kb-redstorm 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+
4
+ RSpec::Core::RakeTask.new(:spec) do
5
+ system("ruby -v")
6
+ module RedStorm; SPECS_CONTEXT = true; end
7
+ end
8
+
9
+ task :default => :spec
10
+
11
+ load 'lib/tasks/red_storm.rake'
data/bin/redstorm ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+
6
+ begin
7
+ # will work from gem, since lib dir is in gem require_paths
8
+ require 'red_storm/application'
9
+ rescue LoadError
10
+ # will work within RedStorm dev project
11
+ $:.unshift './lib'
12
+ require 'red_storm/application'
13
+ end
14
+
15
+ RedStorm::Application.run(ARGV.dup)
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gem 'redis'
@@ -0,0 +1,25 @@
1
+ require 'red_storm'
2
+ require 'examples/native/random_sentence_spout'
3
+ require 'examples/native/split_sentence_bolt'
4
+ require 'examples/native/word_count_bolt'
5
+
6
+ module RedStorm
7
+ module Examples
8
+ class ClusterWordCountTopology
9
+ RedStorm::Configuration.topology_class = self
10
+
11
+ def start(base_class_path, env)
12
+ builder = TopologyBuilder.new
13
+ builder.setSpout('RandomSentenceSpout', JRubySpout.new(base_class_path, "RedStorm::Examples::RandomSentenceSpout"), 5)
14
+ builder.setBolt('SplitSentenceBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::SplitSentenceBolt"), 4).shuffleGrouping('RandomSentenceSpout')
15
+ builder.setBolt('WordCountBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::WordCountBolt"), 4).fieldsGrouping('SplitSentenceBolt', Fields.new("word"))
16
+
17
+ conf = Backtype::Config.new
18
+ conf.setDebug(true)
19
+ conf.setNumWorkers(20);
20
+ conf.setMaxSpoutPending(1000);
21
+ StormSubmitter.submitTopology("word_count", conf, builder.createTopology);
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ module RedStorm
2
+ module Examples
3
+ class ExclamationBolt
4
+ def prepare(conf, context, collector)
5
+ @collector = collector
6
+ end
7
+
8
+ def execute(tuple)
9
+ @collector.emit(tuple, Values.new(tuple.getString(0) + "!!!"))
10
+ @collector.ack(tuple)
11
+ end
12
+
13
+ def get_component_configuration
14
+ end
15
+
16
+ def declare_output_fields(declarer)
17
+ declarer.declare(Fields.new("word"))
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,31 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'lib/red_storm'
4
+ require 'examples/native/exclamation_bolt'
5
+
6
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
7
+
8
+ module RedStorm
9
+ module Examples
10
+ class LocalExclamationTopology
11
+ RedStorm::Configuration.topology_class = self
12
+
13
+ def start(base_class_path, env)
14
+ builder = TopologyBuilder.new
15
+
16
+ builder.setSpout('TestWordSpout', TestWordSpout.new, 10)
17
+ builder.setBolt('ExclamationBolt1', JRubyBolt.new(base_class_path, 'RedStorm::Examples::ExclamationBolt'), 3).shuffleGrouping('TestWordSpout')
18
+ builder.setBolt('ExclamationBolt2', JRubyBolt.new(base_class_path, 'RedStorm::Examples::ExclamationBolt'), 3).shuffleGrouping('ExclamationBolt1')
19
+
20
+ conf = Backtype::Config.new
21
+ conf.setDebug(true)
22
+
23
+ cluster = LocalCluster.new
24
+ cluster.submitTopology("exclamation", conf, builder.createTopology)
25
+ sleep(5)
26
+ cluster.killTopology("exclamation")
27
+ cluster.shutdown
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,48 @@
1
+ java_import 'backtype.storm.testing.TestWordSpout'
2
+
3
+ require 'lib/red_storm'
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class ExclamationBolt2
8
+ def prepare(conf, context, collector)
9
+ @collector = collector
10
+ end
11
+
12
+ def execute(tuple)
13
+ @collector.emit(tuple, Values.new("!#{tuple.getString(0)}!"))
14
+ @collector.ack(tuple)
15
+ end
16
+
17
+ def get_component_configuration
18
+ end
19
+
20
+ def declare_output_fields(declarer)
21
+ declarer.declare(Fields.new("word"))
22
+ end
23
+ end
24
+
25
+ # this example topology uses the Storm TestWordSpout and our own JRuby ExclamationBolt
26
+
27
+ class LocalExclamationTopology2
28
+ RedStorm::Configuration.topology_class = self
29
+
30
+ def start(base_class_path, env)
31
+ builder = TopologyBuilder.new
32
+
33
+ builder.setSpout('TestWordSpout', TestWordSpout.new, 10)
34
+ builder.setBolt('ExclamationBolt21', JRubyBolt.new(base_class_path, "RedStorm::Examples::ExclamationBolt2"), 3).shuffleGrouping('TestWordSpout')
35
+ builder.setBolt('ExclamationBolt22', JRubyBolt.new(base_class_path, "RedStorm::Examples::ExclamationBolt2"), 2).shuffleGrouping('ExclamationBolt21')
36
+
37
+ conf = Backtype::Config.new
38
+ conf.setDebug(true)
39
+
40
+ cluster = LocalCluster.new
41
+ cluster.submitTopology("exclamation", conf, builder.createTopology)
42
+ sleep(5)
43
+ cluster.killTopology("exclamation")
44
+ cluster.shutdown
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,69 @@
1
+ require 'bundler/setup'
2
+ require 'redis'
3
+ require 'thread'
4
+ require 'lib/red_storm'
5
+ require 'examples/native/word_count_bolt'
6
+
7
+ module RedStorm
8
+ module Examples
9
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
10
+ # and emits each word items pop'ed from the queue.
11
+ class RedisWordSpout
12
+ def open(conf, context, collector)
13
+ @collector = collector
14
+ @q = Queue.new
15
+ @redis_reader = detach_redis_reader
16
+ end
17
+
18
+ def next_tuple
19
+ # per doc nextTuple should not block, and sleep a bit when there's no data to process.
20
+ if @q.size > 0
21
+ @collector.emit(Values.new(@q.pop))
22
+ else
23
+ sleep(0.1)
24
+ end
25
+ end
26
+
27
+ def get_component_configuration
28
+ end
29
+
30
+ def declare_output_fields(declarer)
31
+ declarer.declare(Fields.new("word"))
32
+ end
33
+
34
+ private
35
+
36
+ def detach_redis_reader
37
+ Thread.new do
38
+ Thread.current.abort_on_exception = true
39
+
40
+ redis = Redis.new(:host => "localhost", :port => 6379)
41
+ loop do
42
+ if data = redis.blpop("test", 0)
43
+ @q << data[1]
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ class LocalRedisWordCountTopology
51
+ RedStorm::Configuration.topology_class = self
52
+
53
+ def start(base_class_path, env)
54
+ builder = TopologyBuilder.new
55
+ builder.setSpout('RedisWordSpout', JRubySpout.new(base_class_path, "RedStorm::Examples::RedisWordSpout"), 1)
56
+ builder.setBolt('WordCountBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::WordCountBolt"), 3).fieldsGrouping('RedisWordSpout', Fields.new("word"))
57
+
58
+ conf = Backtype::Config.new
59
+ conf.setDebug(true)
60
+ conf.setMaxTaskParallelism(3)
61
+
62
+ cluster = LocalCluster.new
63
+ cluster.submitTopology("redis_word_count", conf, builder.createTopology)
64
+ sleep(600)
65
+ cluster.shutdown
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,27 @@
1
+ require 'lib/red_storm'
2
+ require 'examples/native/random_sentence_spout'
3
+ require 'examples/native/split_sentence_bolt'
4
+ require 'examples/native/word_count_bolt'
5
+
6
+
7
+ module Examples
8
+ class LocalWordCountTopology
9
+ RedStorm::Configuration.topology_class = self
10
+
11
+ def start(base_class_path, env)
12
+ builder = TopologyBuilder.new
13
+ builder.setSpout('RandomSentenceSpout', JRubySpout.new(base_class_path, "RedStorm::Examples::RandomSentenceSpout"), 5)
14
+ builder.setBolt('SplitSentenceBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::SplitSentenceBolt"), 8).shuffleGrouping('RandomSentenceSpout')
15
+ builder.setBolt('WordCountBolt', JRubyBolt.new(base_class_path, "RedStorm::Examples::WordCountBolt"), 12).fieldsGrouping('SplitSentenceBolt', Fields.new("word"))
16
+
17
+ conf = Backtype::Config.new
18
+ conf.setDebug(true)
19
+ conf.setMaxTaskParallelism(3)
20
+
21
+ cluster = LocalCluster.new
22
+ cluster.submitTopology("word_count", conf, builder.createTopology)
23
+ sleep(5)
24
+ cluster.shutdown
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,30 @@
1
+ module RedStorm
2
+ module Examples
3
+ class RandomSentenceSpout
4
+ def initialize
5
+ @sentences = [
6
+ "the cow jumped over the moon",
7
+ "an apple a day keeps the doctor away",
8
+ "four score and seven years ago",
9
+ "snow white and the seven dwarfs",
10
+ "i am at two with nature"
11
+ ]
12
+ end
13
+
14
+ def open(conf, context, collector)
15
+ @collector = collector
16
+ end
17
+
18
+ def next_tuple
19
+ @collector.emit(Values.new(@sentences[rand(@sentences.length)]))
20
+ end
21
+
22
+ def get_component_configuration
23
+ end
24
+
25
+ def declare_output_fields(declarer)
26
+ declarer.declare(Fields.new("word"))
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ module RedStorm
2
+ module Examples
3
+ class SplitSentenceBolt
4
+ def prepare(conf, context, collector)
5
+ @collector = collector
6
+ end
7
+
8
+ def execute(tuple)
9
+ tuple.getString(0).split(" ").each {|w| @collector.emit(Values.new(w)) }
10
+ end
11
+
12
+ def get_component_configuration
13
+ end
14
+
15
+ def declare_output_fields(declarer)
16
+ declarer.declare(Fields.new("word"))
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ module RedStorm
2
+ module Examples
3
+ class WordCountBolt
4
+ def initialize
5
+ @counts = Hash.new{|h, k| h[k] = 0}
6
+ end
7
+
8
+ def prepare(conf, context, collector)
9
+ @collector = collector
10
+ end
11
+
12
+ def execute(tuple)
13
+ word = tuple.getString(0)
14
+ @counts[word] += 1
15
+ @collector.emit(Values.new(word, @counts[word]))
16
+ end
17
+
18
+ def get_component_configuration
19
+ end
20
+
21
+ def declare_output_fields(declarer)
22
+ declarer.declare(Fields.new("word", "count"))
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,9 @@
1
+ import storm
2
+
3
+ class SplitSentenceBolt(storm.BasicBolt):
4
+ def process(self, tup):
5
+ words = tup.values[0].split(" ")
6
+ for word in words:
7
+ storm.emit([word])
8
+
9
+ SplitSentenceBolt().run()
@@ -0,0 +1,206 @@
1
+ import sys
2
+ import os
3
+ import traceback
4
+ from collections import deque
5
+
6
+ try:
7
+ import simplejson as json
8
+ except ImportError:
9
+ import json
10
+
11
+ json_encode = lambda x: json.dumps(x)
12
+ json_decode = lambda x: json.loads(x)
13
+
14
+ #reads lines and reconstructs newlines appropriately
15
+ def readMsg():
16
+ msg = ""
17
+ while True:
18
+ line = sys.stdin.readline()[0:-1]
19
+ if line == "end":
20
+ break
21
+ msg = msg + line + "\n"
22
+ return json_decode(msg[0:-1])
23
+
24
+ MODE = None
25
+ ANCHOR_TUPLE = None
26
+
27
+ #queue up commands we read while trying to read taskids
28
+ pending_commands = deque()
29
+
30
+ def readTaskIds():
31
+ if pending_taskids:
32
+ return pending_taskids.popleft()
33
+ else:
34
+ msg = readMsg()
35
+ while type(msg) is not list:
36
+ pending_commands.append(msg)
37
+ msg = readMsg()
38
+ return msg
39
+
40
+ #queue up taskids we read while trying to read commands/tuples
41
+ pending_taskids = deque()
42
+
43
+ def readCommand():
44
+ if pending_commands:
45
+ return pending_commands.popleft()
46
+ else:
47
+ msg = readMsg()
48
+ while type(msg) is list:
49
+ pending_taskids.append(msg)
50
+ msg = readMsg()
51
+ return msg
52
+
53
+ def readTuple():
54
+ cmd = readCommand()
55
+ return Tuple(cmd["id"], cmd["comp"], cmd["stream"], cmd["task"], cmd["tuple"])
56
+
57
+ def sendMsgToParent(msg):
58
+ print json_encode(msg)
59
+ print "end"
60
+ sys.stdout.flush()
61
+
62
+ def sync():
63
+ sendMsgToParent({'command':'sync'})
64
+
65
+ def sendpid(heartbeatdir):
66
+ pid = os.getpid()
67
+ sendMsgToParent({'pid':pid})
68
+ open(heartbeatdir + "/" + str(pid), "w").close()
69
+
70
+ def emit(*args, **kwargs):
71
+ __emit(*args, **kwargs)
72
+ return readTaskIds()
73
+
74
+ def emitDirect(task, *args, **kwargs):
75
+ kwargs[directTask] = task
76
+ __emit(*args, **kwargs)
77
+
78
+ def __emit(*args, **kwargs):
79
+ global MODE
80
+ if MODE == Bolt:
81
+ emitBolt(*args, **kwargs)
82
+ elif MODE == Spout:
83
+ emitSpout(*args, **kwargs)
84
+
85
+ def emitBolt(tup, stream=None, anchors = [], directTask=None):
86
+ global ANCHOR_TUPLE
87
+ if ANCHOR_TUPLE is not None:
88
+ anchors = [ANCHOR_TUPLE]
89
+ m = {"command": "emit"}
90
+ if stream is not None:
91
+ m["stream"] = stream
92
+ m["anchors"] = map(lambda a: a.id, anchors)
93
+ if directTask is not None:
94
+ m["task"] = directTask
95
+ m["tuple"] = tup
96
+ sendMsgToParent(m)
97
+
98
+ def emitSpout(tup, stream=None, id=None, directTask=None):
99
+ m = {"command": "emit"}
100
+ if id is not None:
101
+ m["id"] = id
102
+ if stream is not None:
103
+ m["stream"] = stream
104
+ if directTask is not None:
105
+ m["task"] = directTask
106
+ m["tuple"] = tup
107
+ sendMsgToParent(m)
108
+
109
+ def ack(tup):
110
+ sendMsgToParent({"command": "ack", "id": tup.id})
111
+
112
+ def fail(tup):
113
+ sendMsgToParent({"command": "fail", "id": tup.id})
114
+
115
+ def log(msg):
116
+ sendMsgToParent({"command": "log", "msg": msg})
117
+
118
+ def initComponent():
119
+ setupInfo = readMsg()
120
+ sendpid(setupInfo['pidDir'])
121
+ return [setupInfo['conf'], setupInfo['context']]
122
+
123
+ class Tuple:
124
+ def __init__(self, id, component, stream, task, values):
125
+ self.id = id
126
+ self.component = component
127
+ self.stream = stream
128
+ self.task = task
129
+ self.values = values
130
+
131
+ def __repr__(self):
132
+ return '<%s%s>' % (
133
+ self.__class__.__name__,
134
+ ''.join(' %s=%r' % (k, self.__dict__[k]) for k in sorted(self.__dict__.keys())))
135
+
136
+ class Bolt:
137
+ def initialize(self, stormconf, context):
138
+ pass
139
+
140
+ def process(self, tuple):
141
+ pass
142
+
143
+ def run(self):
144
+ global MODE
145
+ MODE = Bolt
146
+ conf, context = initComponent()
147
+ self.initialize(conf, context)
148
+ try:
149
+ while True:
150
+ tup = readTuple()
151
+ self.process(tup)
152
+ except Exception, e:
153
+ log(traceback.format_exc(e))
154
+
155
+ class BasicBolt:
156
+ def initialize(self, stormconf, context):
157
+ pass
158
+
159
+ def process(self, tuple):
160
+ pass
161
+
162
+ def run(self):
163
+ global MODE
164
+ MODE = Bolt
165
+ global ANCHOR_TUPLE
166
+ conf, context = initComponent()
167
+ self.initialize(conf, context)
168
+ try:
169
+ while True:
170
+ tup = readTuple()
171
+ ANCHOR_TUPLE = tup
172
+ self.process(tup)
173
+ ack(tup)
174
+ except Exception, e:
175
+ log(traceback.format_exc(e))
176
+
177
+ class Spout:
178
+ def initialize(self, conf, context):
179
+ pass
180
+
181
+ def ack(self, id):
182
+ pass
183
+
184
+ def fail(self, id):
185
+ pass
186
+
187
+ def nextTuple(self):
188
+ pass
189
+
190
+ def run(self):
191
+ global MODE
192
+ MODE = Spout
193
+ conf, context = initComponent()
194
+ self.initialize(conf, context)
195
+ try:
196
+ while True:
197
+ msg = readCommand()
198
+ if msg["command"] == "next":
199
+ self.nextTuple()
200
+ if msg["command"] == "ack":
201
+ self.ack(msg["id"])
202
+ if msg["command"] == "fail":
203
+ self.fail(msg["id"])
204
+ sync()
205
+ except Exception, e:
206
+ log(traceback.format_exc(e))