kb-redstorm 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
@@ -0,0 +1,87 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ module RedStorm
6
+
7
+ class InputBoltDefinition < SimpleTopology::BoltDefinition
8
+ attr_accessor :grouping
9
+
10
+ def initialize(*args)
11
+ super
12
+ @grouping = :none
13
+ end
14
+
15
+ def grouping(grouping)
16
+ @grouping = @grouping
17
+ end
18
+
19
+ def define_grouping(declarer)
20
+
21
+ case @grouping
22
+ when :fields
23
+ declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
24
+ when :global
25
+ declarer.globalGrouping()
26
+ when :shuffle
27
+ declarer.shuffleGrouping()
28
+ when :local_or_shuffle
29
+ declarer.localOrShuffleGrouping()
30
+ when :none
31
+ declarer.noneGrouping()
32
+ when :all
33
+ declarer.allGrouping()
34
+ when :direct
35
+ declarer.directGrouping()
36
+ else
37
+ raise("unknown grouper=#{grouper.inspect}")
38
+ end
39
+ end
40
+ end
41
+
42
+ class SimpleDRPCTopology < SimpleTopology
43
+
44
+ def self.spout
45
+ raise TopologyDefinitionError, "DRPC spout is already defined"
46
+ end
47
+
48
+ def start(base_class_path, env)
49
+ builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
50
+
51
+ self.class.bolts.each do |bolt|
52
+ declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
53
+ declarer.addConfigurations(bolt.config)
54
+ bolt.define_grouping(declarer)
55
+ end
56
+
57
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
58
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
59
+
60
+ configurator = Configurator.new(defaults)
61
+ configurator.instance_exec(env, &self.class.configure_block)
62
+
63
+ drpc = nil
64
+ if env == :local
65
+ drpc = LocalDRPC.new
66
+ submitter = @cluster = LocalCluster.new
67
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
68
+ else
69
+ submitter = StormSubmitter
70
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
71
+ end
72
+ instance_exec(env, drpc, &self.class.submit_block)
73
+ end
74
+
75
+ def self.input_bolt(bolt_class, *args, &bolt_block)
76
+ options = args.last.is_a?(Hash) ? args.pop : {}
77
+ contructor_args = !args.empty? ? args.pop : []
78
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
79
+
80
+ bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
81
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
82
+ bolt.instance_exec(&bolt_block)
83
+ self.components << bolt
84
+ end
85
+ end
86
+
87
+ end
@@ -0,0 +1,184 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+
4
+ module RedStorm
5
+
6
+ class SimpleSpout
7
+ attr_reader :config, :context, :collector
8
+
9
+ # DSL class methods
10
+
11
+ def self.configure(&configure_block)
12
+ @configure_block = block_given? ? configure_block : lambda {}
13
+ end
14
+
15
+ def self.log
16
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
17
+ end
18
+
19
+ def self.output_fields(*fields)
20
+ @fields = fields.map(&:to_s)
21
+ end
22
+
23
+ def self.on_send(*args, &on_send_block)
24
+ options = args.last.is_a?(Hash) ? args.pop : {}
25
+ method_name = args.first
26
+
27
+ self.send_options.merge!(options)
28
+ @on_send_block = block_given? ? on_send_block : lambda {self.send(method_name || :on_send)}
29
+ end
30
+
31
+ def self.on_init(method_name = nil, &on_init_block)
32
+ @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
33
+ end
34
+
35
+ def self.on_close(method_name = nil, &on_close_block)
36
+ @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
37
+ end
38
+
39
+ def self.on_activate(method_name = nil, &on_activate_block)
40
+ @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
41
+ end
42
+
43
+ def self.on_deactivate(method_name = nil, &on_deactivate_block)
44
+ @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
45
+ end
46
+
47
+ def self.on_ack(method_name = nil, &on_ack_block)
48
+ @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
49
+ end
50
+
51
+ def self.on_fail(method_name = nil, &on_fail_block)
52
+ @on_fail_block = block_given? ? on_fail_block : lambda {|msg_id| self.send(method_name || :on_fail, msg_id)}
53
+ end
54
+
55
+ # DSL instance methods
56
+
57
+ def reliable_emit(message_id, *values)
58
+ @collector.emit(Values.new(*values), message_id)
59
+ end
60
+
61
+ def unreliable_emit(*values)
62
+ @collector.emit(Values.new(*values))
63
+ end
64
+ alias_method :emit, :unreliable_emit
65
+
66
+ def log
67
+ self.class.log
68
+ end
69
+
70
+ # Spout proxy interface
71
+
72
+ def next_tuple
73
+ output = instance_exec(&self.class.on_send_block)
74
+ if self.class.emit?
75
+ if output
76
+ values = [output].flatten
77
+ if self.class.reliable?
78
+ message_id = values.shift
79
+ reliable_emit(message_id, *values)
80
+ else
81
+ unreliable_emit(*values)
82
+ end
83
+ else
84
+ sleep(0.1)
85
+ end
86
+ end
87
+ end
88
+
89
+ def open(config, context, collector)
90
+ @collector = collector
91
+ @context = context
92
+ @config = config
93
+ instance_exec(&self.class.on_init_block)
94
+ end
95
+
96
+ def close
97
+ instance_exec(&self.class.on_close_block)
98
+ end
99
+
100
+ def activate
101
+ instance_exec(&self.class.on_activate_block)
102
+ end
103
+
104
+ def deactivate
105
+ instance_exec(&self.class.on_deactivate_block)
106
+ end
107
+
108
+ def declare_output_fields(declarer)
109
+ declarer.declare(Fields.new(self.class.fields))
110
+ end
111
+
112
+ def ack(msg_id)
113
+ instance_exec(msg_id, &self.class.on_ack_block)
114
+ end
115
+
116
+ def fail(msg_id)
117
+ instance_exec(msg_id, &self.class.on_fail_block)
118
+ end
119
+
120
+ def get_component_configuration
121
+ configurator = Configurator.new
122
+ configurator.instance_exec(&self.class.configure_block)
123
+ configurator.config
124
+ end
125
+
126
+ private
127
+
128
+ # default optional noop dsl methods/callbacks
129
+ def on_init; end
130
+ def on_close; end
131
+ def on_activate; end
132
+ def on_deactivate; end
133
+ def on_ack(msg_id); end
134
+ def on_fail(msg_id); end
135
+
136
+ def self.fields
137
+ @fields ||= []
138
+ end
139
+
140
+ def self.configure_block
141
+ @configure_block ||= lambda {}
142
+ end
143
+
144
+ def self.on_send_block
145
+ @on_send_block ||= lambda {self.send(:on_send)}
146
+ end
147
+
148
+ def self.on_init_block
149
+ @on_init_block ||= lambda {self.send(:on_init)}
150
+ end
151
+
152
+ def self.on_close_block
153
+ @on_close_block ||= lambda {self.send(:on_close)}
154
+ end
155
+
156
+ def self.on_activate_block
157
+ @on_activate_block ||= lambda {self.send(:on_activate)}
158
+ end
159
+
160
+ def self.on_deactivate_block
161
+ @on_deactivate_block ||= lambda {self.send(:on_deactivate)}
162
+ end
163
+
164
+ def self.on_ack_block
165
+ @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
166
+ end
167
+
168
+ def self.on_fail_block
169
+ @on_fail_block ||= lambda {|msg_id| self.send(:on_fail, msg_id)}
170
+ end
171
+
172
+ def self.send_options
173
+ @send_options ||= {:emit => true, :reliable => false}
174
+ end
175
+
176
+ def self.emit?
177
+ !!self.send_options[:emit]
178
+ end
179
+
180
+ def self.reliable?
181
+ !!self.send_options[:reliable]
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,209 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+
6
+ module RedStorm
7
+
8
+ class TopologyDefinitionError < StandardError; end
9
+
10
+ class SimpleTopology
11
+ attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
12
+
13
+ DEFAULT_SPOUT_PARALLELISM = 1
14
+ DEFAULT_BOLT_PARALLELISM = 1
15
+
16
+ class ComponentDefinition < Configurator
17
+ attr_reader :clazz, :constructor_args, :parallelism
18
+ attr_accessor :id # ids are forced to string
19
+
20
+ def initialize(component_class, constructor_args, id, parallelism)
21
+ super()
22
+ @clazz = component_class
23
+ @constructor_args = constructor_args
24
+ @id = id.to_s
25
+ @parallelism = parallelism
26
+ @output_fields = []
27
+ end
28
+
29
+ def output_fields(*args)
30
+ args.empty? ? @output_fields : @output_fields = args.map(&:to_s)
31
+ end
32
+
33
+ def is_java?
34
+ @clazz.name.split('::').first.downcase == 'java'
35
+ end
36
+ end
37
+
38
+ class SpoutDefinition < ComponentDefinition
39
+
40
+ # WARNING non-dry see BoltDefinition#new_instance
41
+ def new_instance(base_class_path)
42
+ if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
43
+ @clazz.new(constructor_args, @output_fields)
44
+ elsif is_java?
45
+ @clazz.new(*constructor_args)
46
+ else
47
+ JRubySpout.new(base_class_path, @clazz.name, @output_fields)
48
+ end
49
+ # is_java? ? @clazz.new : JRubySpout.new(base_class_path, @clazz.name)
50
+ end
51
+ end
52
+
53
+ class BoltDefinition < ComponentDefinition
54
+ attr_accessor :sources, :command
55
+
56
+ def initialize(*args)
57
+ super
58
+ @sources = []
59
+ end
60
+
61
+ def source(source_id, grouping)
62
+ @sources << [source_id.is_a?(Class) ? SimpleTopology.underscore(source_id) : source_id.to_s, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
63
+ end
64
+
65
+ def define_grouping(declarer)
66
+ @sources.each do |source_id, grouping|
67
+ grouper, params = grouping.first
68
+ # declarer.fieldsGrouping(source_id, Fields.new())
69
+ case grouper
70
+ when :fields
71
+ declarer.fieldsGrouping(source_id, Fields.new(*([params].flatten.map(&:to_s))))
72
+ when :global
73
+ declarer.globalGrouping(source_id)
74
+ when :shuffle
75
+ declarer.shuffleGrouping(source_id)
76
+ when :local_or_shuffle
77
+ declarer.localOrShuffleGrouping(source_id)
78
+ when :none
79
+ declarer.noneGrouping(source_id)
80
+ when :all
81
+ declarer.allGrouping(source_id)
82
+ when :direct
83
+ declarer.directGrouping(source_id)
84
+ else
85
+ raise("unknown grouper=#{grouper.inspect}")
86
+ end
87
+ end
88
+ end
89
+
90
+ def new_instance(base_class_path)
91
+ # WARNING non-dry see BoltDefinition#new_instance
92
+ if @clazz.name == "Java::RedstormStormJruby::JRubyShellBolt"
93
+ @clazz.new(constructor_args, @output_fields)
94
+ elsif is_java?
95
+ @clazz.new(*constructor_args)
96
+ else
97
+ JRubyBolt.new(base_class_path, @clazz.name, @output_fields)
98
+ end
99
+ # is_java? ? @clazz.new : @clazz.is_a?(SimpleBolt) ? JRubyBolt.new(base_class_path, @clazz.name) : @clazz.new
100
+ end
101
+ end
102
+
103
+ def self.log
104
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
105
+ end
106
+
107
+ # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
+ def self.spout(spout_class, *args, &spout_block)
109
+ options = args.last.is_a?(Hash) ? args.pop : {}
110
+ contructor_args = !args.empty? ? args.pop : []
111
+ spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
112
+
113
+ spout = SpoutDefinition.new(spout_class, contructor_args, spout_options[:id], spout_options[:parallelism])
114
+ spout.instance_exec(&spout_block) if block_given?
115
+ self.components << spout
116
+ end
117
+
118
+ # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
119
+ def self.bolt(bolt_class, *args, &bolt_block)
120
+ options = args.last.is_a?(Hash) ? args.pop : {}
121
+ contructor_args = !args.empty? ? args.pop : []
122
+ bolt_options = {:id => options[:id] ? options[:id] : self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
123
+
124
+ bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
125
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
126
+ bolt.instance_exec(&bolt_block)
127
+ self.components << bolt
128
+ end
129
+
130
+ def self.configure(name = nil, &configure_block)
131
+ Configuration.topology_class = self
132
+ @topology_name = name if name
133
+ @configure_block = configure_block if block_given?
134
+ end
135
+
136
+ def self.on_submit(method_name = nil, &submit_block)
137
+ @submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
138
+ end
139
+
140
+ # topology proxy interface
141
+
142
+ def start(base_class_path, env)
143
+ self.class.resolve_ids!(self.class.components)
144
+
145
+ builder = TopologyBuilder.new
146
+ self.class.spouts.each do |spout|
147
+ declarer = builder.setSpout(spout.id, spout.new_instance(base_class_path), spout.parallelism.to_java)
148
+ declarer.addConfigurations(spout.config)
149
+ end
150
+ self.class.bolts.each do |bolt|
151
+ declarer = builder.setBolt(bolt.id, bolt.new_instance(base_class_path), bolt.parallelism.to_java)
152
+ declarer.addConfigurations(bolt.config)
153
+ bolt.define_grouping(declarer)
154
+ end
155
+
156
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
157
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
158
+
159
+ configurator = Configurator.new(defaults)
160
+ configurator.instance_exec(env, &self.class.configure_block)
161
+
162
+ submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
163
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
164
+ instance_exec(env, &self.class.submit_block)
165
+ end
166
+
167
+ private
168
+
169
+ def self.resolve_ids!(components)
170
+ # verify duplicate implicit ids
171
+ ids = components.map(&:id)
172
+ components.reverse.each do |component|
173
+ raise("duplicate id in #{component.clazz.name} on id=#{component.id}") if ids.select{|id| id == component.id}.size > 1
174
+ # verify source_id references
175
+ if component.respond_to?(:sources)
176
+ component.sources.each{|source_id, grouping| raise("cannot resolve #{component.clazz.name} source id=#{source_id}") unless ids.include?(source_id)}
177
+ end
178
+ end
179
+ end
180
+
181
+ def self.spouts
182
+ self.components.select{|c| c.is_a?(SpoutDefinition)}
183
+ end
184
+
185
+ def self.bolts
186
+ self.components.select{|c| c.is_a?(BoltDefinition)}
187
+ end
188
+
189
+ def self.components
190
+ @components ||= []
191
+ end
192
+
193
+ def self.topology_name
194
+ @topology_name ||= self.underscore(self.name)
195
+ end
196
+
197
+ def self.configure_block
198
+ @configure_block ||= lambda{|env|}
199
+ end
200
+
201
+ def self.submit_block
202
+ @submit_block ||= lambda{|env|}
203
+ end
204
+
205
+ def self.underscore(camel_case)
206
+ camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
207
+ end
208
+ end
209
+ end