redstorm 0.6.6 → 0.7.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -1
- data/README.md +8 -7
- data/examples/dsl/exclamation_topology.rb +2 -4
- data/examples/dsl/exclamation_topology2.rb +4 -5
- data/examples/dsl/hello_world_topology.rb +7 -0
- data/examples/dsl/kafka_topology.rb +5 -1
- data/examples/dsl/redis_word_count_topology.rb +5 -9
- data/examples/dsl/ruby_version_topology.rb +2 -0
- data/examples/dsl/word_count_topology.rb +4 -5
- data/examples/trident/word_count_query.rb +33 -0
- data/examples/trident/word_count_topology.rb +153 -0
- data/ivy/storm_dependencies.xml +1 -1
- data/ivy/topology_dependencies.xml +3 -2
- data/lib/red_storm.rb +5 -2
- data/lib/red_storm/configurator.rb +12 -0
- data/lib/red_storm/dsl/batch_bolt.rb +34 -0
- data/lib/red_storm/dsl/batch_committer_bolt.rb +9 -0
- data/lib/red_storm/dsl/batch_spout.rb +53 -0
- data/lib/red_storm/dsl/bolt.rb +7 -2
- data/lib/red_storm/dsl/output_collector.rb +8 -0
- data/lib/red_storm/dsl/spout.rb +3 -1
- data/lib/red_storm/dsl/topology.rb +2 -2
- data/lib/red_storm/dsl/tuple.rb +2 -0
- data/lib/red_storm/topology_launcher.rb +14 -10
- data/lib/red_storm/version.rb +1 -1
- data/redstorm.gemspec +1 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchBolt.java +53 -35
- data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +77 -42
- data/src/main/redstorm/storm/jruby/JRubyBolt.java +54 -34
- data/src/main/redstorm/storm/jruby/JRubySpout.java +62 -40
- data/src/main/redstorm/storm/jruby/JRubyTransactionalBolt.java +57 -35
- data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterBolt.java +6 -17
- data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterSpout.java +14 -26
- data/src/main/redstorm/storm/jruby/JRubyTransactionalSpout.java +60 -37
- data/src/main/redstorm/storm/jruby/JRubyTridentFunction.java +66 -0
- metadata +16 -23
- data/lib/red_storm/proxy/batch_bolt.rb +0 -63
- data/lib/red_storm/proxy/batch_committer_bolt.rb +0 -52
- data/lib/red_storm/proxy/batch_spout.rb +0 -59
- data/lib/red_storm/proxy/bolt.rb +0 -63
- data/lib/red_storm/proxy/proxy_function.rb +0 -40
- data/lib/red_storm/proxy/spout.rb +0 -87
- data/lib/red_storm/proxy/transactional_committer_spout.rb +0 -47
- data/lib/red_storm/proxy/transactional_spout.rb +0 -46
- data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +0 -51
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7b22ba82aa7cc0135889207e53253b16c129efb
|
4
|
+
data.tar.gz: ae678b9666da95da30f4f2a23a344d0fd44df022
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1bb9e682e7b053f7a2df1c043ab5821df427cf62bfcd26e1281253574f097e88f623bc93db9e73ff1494c865ea2ea3e61e9c7004a0b6c17819dd5f903676f1cc
|
7
|
+
data.tar.gz: bb6148070c8de87f56339c517e6ed8eed4d873a35db5212dc98b720d929fdfe9ba960dd51273995bfee0b2d6a473e4399f7c8c0e0abbe54e2a6bf65d0bcd27f0
|
data/CHANGELOG.md
CHANGED
@@ -97,4 +97,9 @@
|
|
97
97
|
- [issue #76](https://github.com/colinsurprenant/redstorm/issues/76) - avoid shelling out to storm jar command for cluster submission
|
98
98
|
|
99
99
|
# 0.6.6, 07-25-2013
|
100
|
-
- updated example Kafka topology for new dependencies for Storm KafkaSpout
|
100
|
+
- updated example Kafka topology for new dependencies for Storm KafkaSpout
|
101
|
+
|
102
|
+
# 0.7.0.beta1, 03-2014
|
103
|
+
- refactored the proxy classes for better performance
|
104
|
+
- Storm 0.9.1-incubating and JRuby 1.7.11
|
105
|
+
- added Trident example in `examples/trident/`
|
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# RedStorm - JRuby on Storm
|
2
2
|
|
3
|
+
[](http://badge.fury.io/rb/redstorm)
|
3
4
|
[](http://travis-ci.org/colinsurprenant/redstorm)
|
5
|
+
[](https://codeclimate.com/github/colinsurprenant/redstorm)
|
6
|
+
[](https://coveralls.io/r/colinsurprenant/redstorm?branch=master)
|
4
7
|
|
5
8
|
RedStorm provides a Ruby DSL using JRuby integration for the [Storm](https://github.com/nathanmarz/storm/) distributed realtime computation system.
|
6
9
|
|
@@ -13,17 +16,15 @@ Check also these related projects:
|
|
13
16
|
|
14
17
|
## Documentation
|
15
18
|
|
16
|
-
<!--
|
17
|
-
---
|
18
|
-
This is the documentation for the **current 0.6.6-beta2 version of RedStorm** - the **[latest released Gem is v0.6.5](https://github.com/colinsurprenant/redstorm/wiki/RedStorm-Gem-v0.6.5-Documentation)**
|
19
|
-
|
20
|
-
---
|
21
|
-
-->
|
22
19
|
Chances are new versions of RedStorm will introduce changes that will break compatibility or change the developement workflow. To prevent out-of-sync documentation, per version specific documentation are kept [in the wiki](https://github.com/colinsurprenant/redstorm/wiki) when necessary.
|
23
20
|
|
24
21
|
## Dependencies
|
25
22
|
|
26
|
-
|
23
|
+
#### Stable 0.6.6
|
24
|
+
- Tested on **OSX 10.8.3**, **Ubuntu Linux 12.10** using **Storm 0.9.0-wip16**, **JRuby 1.7.4**, **OpenJDK 7**
|
25
|
+
|
26
|
+
#### Current 0.7.0.beta1
|
27
|
+
- Tested on **OSX 10.9.1**, **Ubuntu Linux 12.10** using **Storm 0.9.1-incubating**, **JRuby 1.7.11**, **OpenJDK 7**
|
27
28
|
|
28
29
|
## Installation
|
29
30
|
|
@@ -21,15 +21,14 @@ module RedStorm
|
|
21
21
|
|
22
22
|
bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
|
23
23
|
source ExclamationBolt, :shuffle
|
24
|
+
debug true
|
24
25
|
end
|
25
26
|
|
26
27
|
configure do |env|
|
27
|
-
debug
|
28
|
+
debug false
|
28
29
|
max_task_parallelism 4
|
29
|
-
|
30
|
-
|
31
|
-
max_spout_pending(1000)
|
32
|
-
end
|
30
|
+
num_workers 1
|
31
|
+
max_spout_pending 1000
|
33
32
|
end
|
34
33
|
|
35
34
|
on_submit do |env|
|
@@ -48,10 +48,14 @@ class KafkaTopology < RedStorm::DSL::Topology
|
|
48
48
|
bolt SplitStringBolt do
|
49
49
|
output_fields :word
|
50
50
|
source KafkaSpout, :shuffle
|
51
|
+
debug true
|
51
52
|
end
|
52
53
|
|
53
54
|
configure do |env|
|
54
|
-
debug
|
55
|
+
debug false
|
56
|
+
max_task_parallelism 4
|
57
|
+
num_workers 1
|
58
|
+
max_spout_pending 1000
|
55
59
|
end
|
56
60
|
|
57
61
|
on_submit do |env|
|
@@ -39,19 +39,15 @@ module RedStorm
|
|
39
39
|
spout RedisWordSpout
|
40
40
|
|
41
41
|
bolt WordCountBolt, :parallelism => 2 do
|
42
|
+
debug true
|
42
43
|
source RedisWordSpout, :fields => ["word"]
|
43
44
|
end
|
44
45
|
|
45
46
|
configure do |env|
|
46
|
-
debug
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
when :cluster
|
51
|
-
max_task_parallelism 2
|
52
|
-
num_workers 2
|
53
|
-
max_spout_pending(1000)
|
54
|
-
end
|
47
|
+
debug false
|
48
|
+
max_task_parallelism 2
|
49
|
+
num_workers 1
|
50
|
+
max_spout_pending 1000
|
55
51
|
end
|
56
52
|
end
|
57
53
|
end
|
@@ -13,16 +13,15 @@ module RedStorm
|
|
13
13
|
end
|
14
14
|
|
15
15
|
bolt WordCountBolt, :parallelism => 2 do
|
16
|
+
debug true
|
16
17
|
source SplitSentenceBolt, :fields => ["word"]
|
17
18
|
end
|
18
19
|
|
19
20
|
configure :word_count do |env|
|
20
|
-
debug
|
21
|
+
debug false
|
21
22
|
max_task_parallelism 4
|
22
|
-
|
23
|
-
|
24
|
-
max_spout_pending(1000)
|
25
|
-
end
|
23
|
+
num_workers 1
|
24
|
+
max_spout_pending 1000
|
26
25
|
end
|
27
26
|
|
28
27
|
on_submit do |env|
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require "red_storm"
|
2
|
+
require "json"
|
3
|
+
|
4
|
+
java_import "backtype.storm.utils.DRPCClient"
|
5
|
+
|
6
|
+
# Usage:
|
7
|
+
#
|
8
|
+
# This is a DRPC client that will query a Storm cluster trident drpc topology.
|
9
|
+
# See the trident word_count_topology.rb for runnnig the drpc topology.
|
10
|
+
#
|
11
|
+
# Edit the host and port below.
|
12
|
+
|
13
|
+
module Example
|
14
|
+
|
15
|
+
# this is not a topology, the redstorm topology_launcher will launch any class with the
|
16
|
+
# start method in the correct storm environment
|
17
|
+
|
18
|
+
class TridentWordCountQuery
|
19
|
+
RedStorm::Configuration.topology_class = self
|
20
|
+
|
21
|
+
def start(env)
|
22
|
+
puts("TridentWordCountQuery starting")
|
23
|
+
|
24
|
+
client = DRPCClient.new("localhost", 3772)
|
25
|
+
loop do
|
26
|
+
json_result = client.execute("words", "cat the dog jumped")
|
27
|
+
puts("DRPC execute=#{JSON.parse(json_result)[0][0]}")
|
28
|
+
|
29
|
+
sleep(2)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require 'red_storm'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
java_import "backtype.storm.LocalCluster"
|
5
|
+
java_import "backtype.storm.LocalDRPC"
|
6
|
+
java_import "backtype.storm.StormSubmitter"
|
7
|
+
java_import "backtype.storm.generated.StormTopology"
|
8
|
+
java_import "backtype.storm.tuple.Fields"
|
9
|
+
java_import "backtype.storm.tuple.Values"
|
10
|
+
java_import "storm.trident.TridentState"
|
11
|
+
java_import "storm.trident.TridentTopology"
|
12
|
+
java_import "storm.trident.operation.BaseFunction"
|
13
|
+
java_import "storm.trident.operation.TridentCollector"
|
14
|
+
java_import "storm.trident.operation.builtin.Count"
|
15
|
+
java_import "storm.trident.operation.builtin.FilterNull"
|
16
|
+
java_import "storm.trident.operation.builtin.MapGet"
|
17
|
+
java_import "storm.trident.operation.builtin.Sum"
|
18
|
+
java_import "storm.trident.testing.FixedBatchSpout"
|
19
|
+
java_import "storm.trident.testing.MemoryMapState"
|
20
|
+
java_import "storm.trident.tuple.TridentTuple"
|
21
|
+
|
22
|
+
java_import 'redstorm.storm.jruby.JRubyTridentFunction'
|
23
|
+
|
24
|
+
REQUIRE_PATH = Pathname.new(__FILE__).relative_path_from(Pathname.new(RedStorm::BASE_PATH)).to_s
|
25
|
+
|
26
|
+
# Usage:
|
27
|
+
#
|
28
|
+
# Local mode:
|
29
|
+
#
|
30
|
+
# $ redstorm install
|
31
|
+
# $ redstorm examples
|
32
|
+
# $ restorm local examples/trident/word_count_topology.rb
|
33
|
+
#
|
34
|
+
# Cluster mode:
|
35
|
+
#
|
36
|
+
# $ redstorm install
|
37
|
+
# $ redstorm examples
|
38
|
+
# $ redstorm jar examples
|
39
|
+
# $ redstorm cluster examples/trident/word_count_topology.rb
|
40
|
+
#
|
41
|
+
# After submission, wait a bit for topology to startup and launch the drpc query example:
|
42
|
+
# Edit word_count_query.rb to set the host/port of your cluster drpc daemon.
|
43
|
+
#
|
44
|
+
# $ redstorm local examples/trident/word_count_query.rb
|
45
|
+
|
46
|
+
module Examples
|
47
|
+
class TridentSplit
|
48
|
+
|
49
|
+
def execute(tuple, collector)
|
50
|
+
tuple[0].split(" ").each do |word|
|
51
|
+
collector.emit(Values.new(word))
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def prepare(conf, context); end
|
56
|
+
def cleanup;end
|
57
|
+
end
|
58
|
+
|
59
|
+
class TridentWordCountTopology
|
60
|
+
RedStorm::Configuration.topology_class = self
|
61
|
+
|
62
|
+
def build_topology(local_drpc)
|
63
|
+
spout = FixedBatchSpout.new(
|
64
|
+
Fields.new("sentence"), 3,
|
65
|
+
Values.new("the cow jumped over the moon"),
|
66
|
+
Values.new("the man went to the store and bought some candy"),
|
67
|
+
Values.new("four score and seven years ago"),
|
68
|
+
Values.new("how many apples can you eat"),
|
69
|
+
Values.new("to be or not to be the person")
|
70
|
+
)
|
71
|
+
spout.cycle = true
|
72
|
+
|
73
|
+
topology = TridentTopology.new
|
74
|
+
|
75
|
+
stream = topology.new_stream("spout1", spout)
|
76
|
+
.parallelism_hint(3)
|
77
|
+
.each(
|
78
|
+
Fields.new("sentence"),
|
79
|
+
JRubyTridentFunction.new(REQUIRE_PATH, "Examples::TridentSplit"),
|
80
|
+
Fields.new("word")
|
81
|
+
)
|
82
|
+
.groupBy(
|
83
|
+
Fields.new("word")
|
84
|
+
)
|
85
|
+
.persistentAggregate(
|
86
|
+
MemoryMapState::Factory.new,
|
87
|
+
Count.new,
|
88
|
+
Fields.new("count")
|
89
|
+
)
|
90
|
+
.parallelism_hint(3)
|
91
|
+
|
92
|
+
# topology.newDRPCStream("words", drpc)
|
93
|
+
topology.newDRPCStream("words", local_drpc)
|
94
|
+
.each(
|
95
|
+
Fields.new("args"),
|
96
|
+
JRubyTridentFunction.new(REQUIRE_PATH, "Examples::TridentSplit"),
|
97
|
+
Fields.new("word")
|
98
|
+
)
|
99
|
+
.groupBy(
|
100
|
+
Fields.new("word")
|
101
|
+
)
|
102
|
+
.stateQuery(
|
103
|
+
stream,
|
104
|
+
Fields.new("word"),
|
105
|
+
MapGet.new,
|
106
|
+
Fields.new("count")
|
107
|
+
)
|
108
|
+
.each(
|
109
|
+
Fields.new("count"),
|
110
|
+
FilterNull.new
|
111
|
+
)
|
112
|
+
.aggregate(
|
113
|
+
Fields.new("count"),
|
114
|
+
Sum.new,
|
115
|
+
Fields.new("sum")
|
116
|
+
)
|
117
|
+
|
118
|
+
topology.build
|
119
|
+
end
|
120
|
+
|
121
|
+
def display_drpc(client)
|
122
|
+
loop do
|
123
|
+
sleep(2)
|
124
|
+
|
125
|
+
json_result = client.execute("words", "cat the dog jumped")
|
126
|
+
puts("DRPC execute=#{JSON.parse(json_result)[0][0]}")
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def start(env)
|
131
|
+
conf = Backtype::Config.new
|
132
|
+
conf.debug = false
|
133
|
+
conf.max_spout_pending = 20
|
134
|
+
|
135
|
+
case env
|
136
|
+
when :local
|
137
|
+
local_drpc = LocalDRPC.new
|
138
|
+
submitter = LocalCluster.new
|
139
|
+
conf.num_workers = 1 # set to 1 in local, see https://issues.apache.org/jira/browse/STORM-113
|
140
|
+
when :cluster
|
141
|
+
local_drpc = nil
|
142
|
+
submitter = StormSubmitter
|
143
|
+
conf.put("drpc.servers", ["localhost"])
|
144
|
+
conf.num_workers = 3
|
145
|
+
end
|
146
|
+
|
147
|
+
submitter.submit_topology("trident_word_count", conf, build_topology(local_drpc));
|
148
|
+
|
149
|
+
display_drpc(local_drpc) if local_drpc
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
data/ivy/storm_dependencies.xml
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
<ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
|
3
3
|
<info organisation="redstorm" module="storm-deps"/>
|
4
4
|
<dependencies>
|
5
|
-
<dependency org="storm" name="storm" rev="0.9.
|
5
|
+
<dependency org="org.apache.storm" name="storm-core" rev="0.9.1-incubating" conf="default" transitive="true" />
|
6
6
|
<override org="org.slf4j" module="slf4j-log4j12" rev="1.6.3"/>
|
7
7
|
</dependencies>
|
8
8
|
</ivy-module>
|
@@ -2,11 +2,12 @@
|
|
2
2
|
<ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
|
3
3
|
<info organisation="redstorm" module="topology-deps"/>
|
4
4
|
<dependencies>
|
5
|
-
<dependency org="org.jruby" name="jruby-core" rev="1.7.
|
5
|
+
<dependency org="org.jruby" name="jruby-core" rev="1.7.11" conf="default" transitive="true"/>
|
6
|
+
<dependency org="org.jruby" name="jruby-stdlib" rev="1.7.11" conf="default" transitive="true"/>
|
6
7
|
|
7
8
|
<!-- explicitely specify jffi to also fetch the native jar. make sure to update jffi version matching jruby-core version -->
|
8
9
|
<!-- this is the only way I found using Ivy to fetch the native jar -->
|
9
|
-
<dependency org="com.github.jnr" name="jffi" rev="1.2.
|
10
|
+
<dependency org="com.github.jnr" name="jffi" rev="1.2.7" conf="default" transitive="true">
|
10
11
|
<artifact name="jffi" type="jar" />
|
11
12
|
<artifact name="jffi" type="jar" m:classifier="native"/>
|
12
13
|
</dependency>
|
data/lib/red_storm.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
|
3
1
|
require 'red_storm/version'
|
4
2
|
require 'red_storm/environment'
|
5
3
|
require 'red_storm/configuration'
|
4
|
+
require 'red_storm/configurator'
|
6
5
|
require 'red_storm/dsl/bolt'
|
6
|
+
require 'red_storm/dsl/batch_bolt'
|
7
|
+
require 'red_storm/dsl/batch_committer_bolt'
|
7
8
|
require 'red_storm/dsl/spout'
|
9
|
+
require 'red_storm/dsl/batch_spout'
|
8
10
|
require 'red_storm/dsl/topology'
|
9
11
|
require 'red_storm/dsl/drpc_topology'
|
10
12
|
require 'red_storm/dsl/tuple'
|
13
|
+
require 'red_storm/dsl/output_collector'
|
@@ -1,3 +1,15 @@
|
|
1
|
+
# This hack get rif of the "Use RbConfig instead of obsolete and deprecated Config"
|
2
|
+
# deprecation warning that is triggered by "java_import 'backtype.storm.Config'".
|
3
|
+
begin
|
4
|
+
Object.send :remove_const, :Config
|
5
|
+
Config = RbConfig
|
6
|
+
rescue NameError
|
7
|
+
end
|
8
|
+
|
9
|
+
module Backtype
|
10
|
+
java_import 'backtype.storm.Config'
|
11
|
+
end
|
12
|
+
|
1
13
|
module RedStorm
|
2
14
|
|
3
15
|
class Configurator
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedStorm
|
2
|
+
module DSL
|
3
|
+
|
4
|
+
class BatchBolt < Bolt
|
5
|
+
attr_reader :id
|
6
|
+
|
7
|
+
def self.java_proxy; "Java::RedstormStormJruby::JRubyBatchBolt"; end
|
8
|
+
|
9
|
+
def self.on_finish_batch(method_name = nil, &on_finish_batch_block)
|
10
|
+
body = block_given? ? on_finish_batch_block : lambda {self.send((method_name || :on_finish_batch).to_sym)}
|
11
|
+
define_method(:on_finish_batch, body)
|
12
|
+
end
|
13
|
+
|
14
|
+
def prepare(config, context, collector, id)
|
15
|
+
@collector = collector
|
16
|
+
@context = context
|
17
|
+
@config = config
|
18
|
+
@id = id
|
19
|
+
|
20
|
+
on_init
|
21
|
+
end
|
22
|
+
|
23
|
+
def finish_batch
|
24
|
+
on_finish_batch
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# default noop optional dsl callbacks
|
30
|
+
def on_finish_batch; end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|