kb-redstorm 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+
4
+ RSpec::Core::RakeTask.new(:spec) do
5
+ system("ruby -v")
6
+ module RedStorm; SPECS_CONTEXT = true; end
7
+ end
8
+
9
+ task :default => :spec
10
+
11
+ load 'lib/tasks/red_storm.rake'
data/bin/redstorm ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+
6
+ begin
7
+ # will work from gem, since lib dir is in gem require_paths
8
+ require 'red_storm/application'
9
+ rescue LoadError
10
+ # will work within RedStorm dev project
11
+ $:.unshift './lib'
12
+ require 'red_storm/application'
13
+ end
14
+
15
+ RedStorm::Application.run(ARGV.dup)
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gem 'redis'
@@ -0,0 +1,25 @@
1
+ require 'red_storm'
2
+ require 'examples/native/random_sentence_spout'
3
+ require 'examples/native/split_sentence_bolt'
4
+ require 'examples/native/word_count_bolt'
5
+
6
+ module RedStorm
7
+ module Examples
8
+ class ClusterWordCountTopology
9
+ RedStorm::Configuration.topology_class = self
10
+
11
+ def start(base_class_path, env)
12
+ builder = TopologyBuilder.new
13
+ builder.setSpout('RandomSentenceSpout', JRubySpout.new(base_class_path, "RedStorm::Examples::RandomSentenceSpout"), 5)
14
+ builder.setBolt('SplitSentenceBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::SplitSentenceBolt"), 4).shuffleGrouping('RandomSentenceSpout')
15
+ builder.setBolt('WordCountBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::WordCountBolt"), 4).fieldsGrouping('SplitSentenceBolt', Fields.new("word"))
16
+
17
+ conf = Backtype::Config.new
18
+ conf.setDebug(true)
19
+ conf.setNumWorkers(20);
20
+ conf.setMaxSpoutPending(1000);
21
+ StormSubmitter.submitTopology("word_count", conf, builder.createTopology);
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ module RedStorm
2
+ module Examples
3
+ class ExclamationBolt
4
+ def prepare(conf, context, collector)
5
+ @collector = collector
6
+ end
7
+
8
+ def execute(tuple)
9
+ @collector.emit(tuple, Values.new(tuple.getString(0) + "!!!"))
10
+ @collector.ack(tuple)
11
+ end
12
+
13
+ def get_component_configuration
14
+ end
15
+
16
+ def declare_output_fields(declarer)
17
+ declarer.declare(Fields.new("word"))
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,31 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'lib/red_storm'
4
+ require 'examples/native/exclamation_bolt'
5
+
6
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
7
+
8
+ module RedStorm
9
+ module Examples
10
+ class LocalExclamationTopology
11
+ RedStorm::Configuration.topology_class = self
12
+
13
+ def start(base_class_path, env)
14
+ builder = TopologyBuilder.new
15
+
16
+ builder.setSpout('TestWordSpout', TestWordSpout.new, 10)
17
+ builder.setBolt('ExclamationBolt1', JRubyBolt.new(base_class_path, 'RedStorm::Examples::ExclamationBolt'), 3).shuffleGrouping('TestWordSpout')
18
+ builder.setBolt('ExclamationBolt2', JRubyBolt.new(base_class_path, 'RedStorm::Examples::ExclamationBolt'), 3).shuffleGrouping('ExclamationBolt1')
19
+
20
+ conf = Backtype::Config.new
21
+ conf.setDebug(true)
22
+
23
+ cluster = LocalCluster.new
24
+ cluster.submitTopology("exclamation", conf, builder.createTopology)
25
+ sleep(5)
26
+ cluster.killTopology("exclamation")
27
+ cluster.shutdown
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,48 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'lib/red_storm'
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class ExclamationBolt2
8
+ def prepare(conf, context, collector)
9
+ @collector = collector
10
+ end
11
+
12
+ def execute(tuple)
13
+ @collector.emit(tuple, Values.new("!#{tuple.getString(0)}!"))
14
+ @collector.ack(tuple)
15
+ end
16
+
17
+ def get_component_configuration
18
+ end
19
+
20
+ def declare_output_fields(declarer)
21
+ declarer.declare(Fields.new("word"))
22
+ end
23
+ end
24
+
25
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
26
+
27
+ class LocalExclamationTopology2
28
+ RedStorm::Configuration.topology_class = self
29
+
30
+ def start(base_class_path, env)
31
+ builder = TopologyBuilder.new
32
+
33
+ builder.setSpout('TestWordSpout', TestWordSpout.new, 10)
34
+ builder.setBolt('ExclamationBolt21', JRubyBolt.new(base_class_path, "RedStorm::Examples::ExclamationBolt2"), 3).shuffleGrouping('TestWordSpout')
35
+ builder.setBolt('ExclamationBolt22', JRubyBolt.new(base_class_path, "RedStorm::Examples::ExclamationBolt2"), 2).shuffleGrouping('ExclamationBolt21')
36
+
37
+ conf = Backtype::Config.new
38
+ conf.setDebug(true)
39
+
40
+ cluster = LocalCluster.new
41
+ cluster.submitTopology("exclamation", conf, builder.createTopology)
42
+ sleep(5)
43
+ cluster.killTopology("exclamation")
44
+ cluster.shutdown
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,69 @@
1
+ require 'bundler/setup'
2
+ require 'redis'
3
+ require 'thread'
4
+ require 'lib/red_storm'
5
+ require 'examples/native/word_count_bolt'
6
+
7
+ module RedStorm
8
+ module Examples
9
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
10
+ # and emits each word items pop'ed from the queue.
11
+ class RedisWordSpout
12
+ def open(conf, context, collector)
13
+ @collector = collector
14
+ @q = Queue.new
15
+ @redis_reader = detach_redis_reader
16
+ end
17
+
18
+ def next_tuple
19
+ # per doc nextTuple should not block, and sleep a bit when there's no data to process.
20
+ if @q.size > 0
21
+ @collector.emit(Values.new(@q.pop))
22
+ else
23
+ sleep(0.1)
24
+ end
25
+ end
26
+
27
+ def get_component_configuration
28
+ end
29
+
30
+ def declare_output_fields(declarer)
31
+ declarer.declare(Fields.new("word"))
32
+ end
33
+
34
+ private
35
+
36
+ def detach_redis_reader
37
+ Thread.new do
38
+ Thread.current.abort_on_exception = true
39
+
40
+ redis = Redis.new(:host => "localhost", :port => 6379)
41
+ loop do
42
+ if data = redis.blpop("test", 0)
43
+ @q << data[1]
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ class LocalRedisWordCountTopology
51
+ RedStorm::Configuration.topology_class = self
52
+
53
+ def start(base_class_path, env)
54
+ builder = TopologyBuilder.new
55
+ builder.setSpout('RedisWordSpout', JRubySpout.new(base_class_path, "RedStorm::Examples::RedisWordSpout"), 1)
56
+ builder.setBolt('WordCountBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::WordCountBolt"), 3).fieldsGrouping('RedisWordSpout', Fields.new("word"))
57
+
58
+ conf = Backtype::Config.new
59
+ conf.setDebug(true)
60
+ conf.setMaxTaskParallelism(3)
61
+
62
+ cluster = LocalCluster.new
63
+ cluster.submitTopology("redis_word_count", conf, builder.createTopology)
64
+ sleep(600)
65
+ cluster.shutdown
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,27 @@
1
+ require 'lib/red_storm'
2
+ require 'examples/native/random_sentence_spout'
3
+ require 'examples/native/split_sentence_bolt'
4
+ require 'examples/native/word_count_bolt'
5
+
6
+
7
+ module Examples
8
+ class LocalWordCountTopology
9
+ RedStorm::Configuration.topology_class = self
10
+
11
+ def start(base_class_path, env)
12
+ builder = TopologyBuilder.new
13
+ builder.setSpout('RandomSentenceSpout', JRubySpout.new(base_class_path, "RedStorm::Examples::RandomSentenceSpout"), 5)
14
+ builder.setBolt('SplitSentenceBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::SplitSentenceBolt"), 8).shuffleGrouping('RandomSentenceSpout')
15
+ builder.setBolt('WordCountBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::WordCountBolt"), 12).fieldsGrouping('SplitSentenceBolt', Fields.new("word"))
16
+
17
+ conf = Backtype::Config.new
18
+ conf.setDebug(true)
19
+ conf.setMaxTaskParallelism(3)
20
+
21
+ cluster = LocalCluster.new
22
+ cluster.submitTopology("word_count", conf, builder.createTopology)
23
+ sleep(5)
24
+ cluster.shutdown
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,30 @@
1
+ module RedStorm
2
+ module Examples
3
+ class RandomSentenceSpout
4
+ def initialize
5
+ @sentences = [
6
+ "the cow jumped over the moon",
7
+ "an apple a day keeps the doctor away",
8
+ "four score and seven years ago",
9
+ "snow white and the seven dwarfs",
10
+ "i am at two with nature"
11
+ ]
12
+ end
13
+
14
+ def open(conf, context, collector)
15
+ @collector = collector
16
+ end
17
+
18
+ def next_tuple
19
+ @collector.emit(Values.new(@sentences[rand(@sentences.length)]))
20
+ end
21
+
22
+ def get_component_configuration
23
+ end
24
+
25
+ def declare_output_fields(declarer)
26
+ declarer.declare(Fields.new("word"))
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ module RedStorm
2
+ module Examples
3
+ class SplitSentenceBolt
4
+ def prepare(conf, context, collector)
5
+ @collector = collector
6
+ end
7
+
8
+ def execute(tuple)
9
+ tuple.getString(0).split(" ").each {|w| @collector.emit(Values.new(w)) }
10
+ end
11
+
12
+ def get_component_configuration
13
+ end
14
+
15
+ def declare_output_fields(declarer)
16
+ declarer.declare(Fields.new("word"))
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ module RedStorm
2
+ module Examples
3
+ class WordCountBolt
4
+ def initialize
5
+ @counts = Hash.new{|h, k| h[k] = 0}
6
+ end
7
+
8
+ def prepare(conf, context, collector)
9
+ @collector = collector
10
+ end
11
+
12
+ def execute(tuple)
13
+ word = tuple.getString(0)
14
+ @counts[word] += 1
15
+ @collector.emit(Values.new(word, @counts[word]))
16
+ end
17
+
18
+ def get_component_configuration
19
+ end
20
+
21
+ def declare_output_fields(declarer)
22
+ declarer.declare(Fields.new("word", "count"))
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ import storm
2
+
3
+ class SplitSentenceBolt(storm.BasicBolt):
4
+ def process(self, tup):
5
+ words = tup.values[0].split(" ")
6
+ for word in words:
7
+ storm.emit([word])
8
+
9
+ SplitSentenceBolt().run()
@@ -0,0 +1,206 @@
1
+ import sys
2
+ import os
3
+ import traceback
4
+ from collections import deque
5
+
6
+ try:
7
+ import simplejson as json
8
+ except ImportError:
9
+ import json
10
+
11
+ json_encode = lambda x: json.dumps(x)
12
+ json_decode = lambda x: json.loads(x)
13
+
14
+ #reads lines and reconstructs newlines appropriately
15
+ def readMsg():
16
+ msg = ""
17
+ while True:
18
+ line = sys.stdin.readline()[0:-1]
19
+ if line == "end":
20
+ break
21
+ msg = msg + line + "\n"
22
+ return json_decode(msg[0:-1])
23
+
24
+ MODE = None
25
+ ANCHOR_TUPLE = None
26
+
27
+ #queue up commands we read while trying to read taskids
28
+ pending_commands = deque()
29
+
30
+ def readTaskIds():
31
+ if pending_taskids:
32
+ return pending_taskids.popleft()
33
+ else:
34
+ msg = readMsg()
35
+ while type(msg) is not list:
36
+ pending_commands.append(msg)
37
+ msg = readMsg()
38
+ return msg
39
+
40
+ #queue up taskids we read while trying to read commands/tuples
41
+ pending_taskids = deque()
42
+
43
+ def readCommand():
44
+ if pending_commands:
45
+ return pending_commands.popleft()
46
+ else:
47
+ msg = readMsg()
48
+ while type(msg) is list:
49
+ pending_taskids.append(msg)
50
+ msg = readMsg()
51
+ return msg
52
+
53
+ def readTuple():
54
+ cmd = readCommand()
55
+ return Tuple(cmd["id"], cmd["comp"], cmd["stream"], cmd["task"], cmd["tuple"])
56
+
57
+ def sendMsgToParent(msg):
58
+ print json_encode(msg)
59
+ print "end"
60
+ sys.stdout.flush()
61
+
62
+ def sync():
63
+ sendMsgToParent({'command':'sync'})
64
+
65
+ def sendpid(heartbeatdir):
66
+ pid = os.getpid()
67
+ sendMsgToParent({'pid':pid})
68
+ open(heartbeatdir + "/" + str(pid), "w").close()
69
+
70
+ def emit(*args, **kwargs):
71
+ __emit(*args, **kwargs)
72
+ return readTaskIds()
73
+
74
+ def emitDirect(task, *args, **kwargs):
75
+ kwargs[directTask] = task
76
+ __emit(*args, **kwargs)
77
+
78
+ def __emit(*args, **kwargs):
79
+ global MODE
80
+ if MODE == Bolt:
81
+ emitBolt(*args, **kwargs)
82
+ elif MODE == Spout:
83
+ emitSpout(*args, **kwargs)
84
+
85
+ def emitBolt(tup, stream=None, anchors = [], directTask=None):
86
+ global ANCHOR_TUPLE
87
+ if ANCHOR_TUPLE is not None:
88
+ anchors = [ANCHOR_TUPLE]
89
+ m = {"command": "emit"}
90
+ if stream is not None:
91
+ m["stream"] = stream
92
+ m["anchors"] = map(lambda a: a.id, anchors)
93
+ if directTask is not None:
94
+ m["task"] = directTask
95
+ m["tuple"] = tup
96
+ sendMsgToParent(m)
97
+
98
+ def emitSpout(tup, stream=None, id=None, directTask=None):
99
+ m = {"command": "emit"}
100
+ if id is not None:
101
+ m["id"] = id
102
+ if stream is not None:
103
+ m["stream"] = stream
104
+ if directTask is not None:
105
+ m["task"] = directTask
106
+ m["tuple"] = tup
107
+ sendMsgToParent(m)
108
+
109
+ def ack(tup):
110
+ sendMsgToParent({"command": "ack", "id": tup.id})
111
+
112
+ def fail(tup):
113
+ sendMsgToParent({"command": "fail", "id": tup.id})
114
+
115
+ def log(msg):
116
+ sendMsgToParent({"command": "log", "msg": msg})
117
+
118
+ def initComponent():
119
+ setupInfo = readMsg()
120
+ sendpid(setupInfo['pidDir'])
121
+ return [setupInfo['conf'], setupInfo['context']]
122
+
123
+ class Tuple:
124
+ def __init__(self, id, component, stream, task, values):
125
+ self.id = id
126
+ self.component = component
127
+ self.stream = stream
128
+ self.task = task
129
+ self.values = values
130
+
131
+ def __repr__(self):
132
+ return '<%s%s>' % (
133
+ self.__class__.__name__,
134
+ ''.join(' %s=%r' % (k, self.__dict__[k]) for k in sorted(self.__dict__.keys())))
135
+
136
+ class Bolt:
137
+ def initialize(self, stormconf, context):
138
+ pass
139
+
140
+ def process(self, tuple):
141
+ pass
142
+
143
+ def run(self):
144
+ global MODE
145
+ MODE = Bolt
146
+ conf, context = initComponent()
147
+ self.initialize(conf, context)
148
+ try:
149
+ while True:
150
+ tup = readTuple()
151
+ self.process(tup)
152
+ except Exception, e:
153
+ log(traceback.format_exc(e))
154
+
155
+ class BasicBolt:
156
+ def initialize(self, stormconf, context):
157
+ pass
158
+
159
+ def process(self, tuple):
160
+ pass
161
+
162
+ def run(self):
163
+ global MODE
164
+ MODE = Bolt
165
+ global ANCHOR_TUPLE
166
+ conf, context = initComponent()
167
+ self.initialize(conf, context)
168
+ try:
169
+ while True:
170
+ tup = readTuple()
171
+ ANCHOR_TUPLE = tup
172
+ self.process(tup)
173
+ ack(tup)
174
+ except Exception, e:
175
+ log(traceback.format_exc(e))
176
+
177
+ class Spout:
178
+ def initialize(self, conf, context):
179
+ pass
180
+
181
+ def ack(self, id):
182
+ pass
183
+
184
+ def fail(self, id):
185
+ pass
186
+
187
+ def nextTuple(self):
188
+ pass
189
+
190
+ def run(self):
191
+ global MODE
192
+ MODE = Spout
193
+ conf, context = initComponent()
194
+ self.initialize(conf, context)
195
+ try:
196
+ while True:
197
+ msg = readCommand()
198
+ if msg["command"] == "next":
199
+ self.nextTuple()
200
+ if msg["command"] == "ack":
201
+ self.ack(msg["id"])
202
+ if msg["command"] == "fail":
203
+ self.fail(msg["id"])
204
+ sync()
205
+ except Exception, e:
206
+ log(traceback.format_exc(e))