kb-redstorm 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
@@ -0,0 +1,87 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ module RedStorm
6
+
7
+ class InputBoltDefinition < SimpleTopology::BoltDefinition
8
+ attr_accessor :grouping
9
+
10
+ def initialize(*args)
11
+ super
12
+ @grouping = :none
13
+ end
14
+
15
+ def grouping(grouping)
16
+ @grouping = @grouping
17
+ end
18
+
19
+ def define_grouping(declarer)
20
+
21
+ case @grouping
22
+ when :fields
23
+ declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
24
+ when :global
25
+ declarer.globalGrouping()
26
+ when :shuffle
27
+ declarer.shuffleGrouping()
28
+ when :local_or_shuffle
29
+ declarer.localOrShuffleGrouping()
30
+ when :none
31
+ declarer.noneGrouping()
32
+ when :all
33
+ declarer.allGrouping()
34
+ when :direct
35
+ declarer.directGrouping()
36
+ else
37
+ raise("unknown grouper=#{grouper.inspect}")
38
+ end
39
+ end
40
+ end
41
+
42
+ class SimpleDRPCTopology < SimpleTopology
43
+
44
+ def self.spout
45
+ raise TopologyDefinitionError, "DRPC spout is already defined"
46
+ end
47
+
48
+ def start(base_class_path, env)
49
+ builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
50
+
51
+ self.class.bolts.each do |bolt|
52
+ declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
53
+ declarer.addConfigurations(bolt.config)
54
+ bolt.define_grouping(declarer)
55
+ end
56
+
57
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
58
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
59
+
60
+ configurator = Configurator.new(defaults)
61
+ configurator.instance_exec(env, &self.class.configure_block)
62
+
63
+ drpc = nil
64
+ if env == :local
65
+ drpc = LocalDRPC.new
66
+ submitter = @cluster = LocalCluster.new
67
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
68
+ else
69
+ submitter = StormSubmitter
70
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
71
+ end
72
+ instance_exec(env, drpc, &self.class.submit_block)
73
+ end
74
+
75
+ def self.input_bolt(bolt_class, *args, &bolt_block)
76
+ options = args.last.is_a?(Hash) ? args.pop : {}
77
+ contructor_args = !args.empty? ? args.pop : []
78
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
79
+
80
+ bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
81
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
82
+ bolt.instance_exec(&bolt_block)
83
+ self.components << bolt
84
+ end
85
+ end
86
+
87
+ end
@@ -0,0 +1,184 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+
4
+ module RedStorm
5
+
6
+ class SimpleSpout
7
+ attr_reader :config, :context, :collector
8
+
9
+ # DSL class methods
10
+
11
+ def self.configure(&configure_block)
12
+ @configure_block = block_given? ? configure_block : lambda {}
13
+ end
14
+
15
+ def self.log
16
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
17
+ end
18
+
19
+ def self.output_fields(*fields)
20
+ @fields = fields.map(&:to_s)
21
+ end
22
+
23
+ def self.on_send(*args, &on_send_block)
24
+ options = args.last.is_a?(Hash) ? args.pop : {}
25
+ method_name = args.first
26
+
27
+ self.send_options.merge!(options)
28
+ @on_send_block = block_given? ? on_send_block : lambda {self.send(method_name || :on_send)}
29
+ end
30
+
31
+ def self.on_init(method_name = nil, &on_init_block)
32
+ @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
33
+ end
34
+
35
+ def self.on_close(method_name = nil, &on_close_block)
36
+ @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
37
+ end
38
+
39
+ def self.on_activate(method_name = nil, &on_activate_block)
40
+ @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
41
+ end
42
+
43
+ def self.on_deactivate(method_name = nil, &on_deactivate_block)
44
+ @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
45
+ end
46
+
47
+ def self.on_ack(method_name = nil, &on_ack_block)
48
+ @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
49
+ end
50
+
51
+ def self.on_fail(method_name = nil, &on_fail_block)
52
+ @on_fail_block = block_given? ? on_fail_block : lambda {|msg_id| self.send(method_name || :on_fail, msg_id)}
53
+ end
54
+
55
+ # DSL instance methods
56
+
57
+ def reliable_emit(message_id, *values)
58
+ @collector.emit(Values.new(*values), message_id)
59
+ end
60
+
61
+ def unreliable_emit(*values)
62
+ @collector.emit(Values.new(*values))
63
+ end
64
+ alias_method :emit, :unreliable_emit
65
+
66
+ def log
67
+ self.class.log
68
+ end
69
+
70
+ # Spout proxy interface
71
+
72
+ def next_tuple
73
+ output = instance_exec(&self.class.on_send_block)
74
+ if self.class.emit?
75
+ if output
76
+ values = [output].flatten
77
+ if self.class.reliable?
78
+ message_id = values.shift
79
+ reliable_emit(message_id, *values)
80
+ else
81
+ unreliable_emit(*values)
82
+ end
83
+ else
84
+ sleep(0.1)
85
+ end
86
+ end
87
+ end
88
+
89
+ def open(config, context, collector)
90
+ @collector = collector
91
+ @context = context
92
+ @config = config
93
+ instance_exec(&self.class.on_init_block)
94
+ end
95
+
96
+ def close
97
+ instance_exec(&self.class.on_close_block)
98
+ end
99
+
100
+ def activate
101
+ instance_exec(&self.class.on_activate_block)
102
+ end
103
+
104
+ def deactivate
105
+ instance_exec(&self.class.on_deactivate_block)
106
+ end
107
+
108
+ def declare_output_fields(declarer)
109
+ declarer.declare(Fields.new(self.class.fields))
110
+ end
111
+
112
+ def ack(msg_id)
113
+ instance_exec(msg_id, &self.class.on_ack_block)
114
+ end
115
+
116
+ def fail(msg_id)
117
+ instance_exec(msg_id, &self.class.on_fail_block)
118
+ end
119
+
120
+ def get_component_configuration
121
+ configurator = Configurator.new
122
+ configurator.instance_exec(&self.class.configure_block)
123
+ configurator.config
124
+ end
125
+
126
+ private
127
+
128
+ # default optional noop dsl methods/callbacks
129
+ def on_init; end
130
+ def on_close; end
131
+ def on_activate; end
132
+ def on_deactivate; end
133
+ def on_ack(msg_id); end
134
+ def on_fail(msg_id); end
135
+
136
+ def self.fields
137
+ @fields ||= []
138
+ end
139
+
140
+ def self.configure_block
141
+ @configure_block ||= lambda {}
142
+ end
143
+
144
+ def self.on_send_block
145
+ @on_send_block ||= lambda {self.send(:on_send)}
146
+ end
147
+
148
+ def self.on_init_block
149
+ @on_init_block ||= lambda {self.send(:on_init)}
150
+ end
151
+
152
+ def self.on_close_block
153
+ @on_close_block ||= lambda {self.send(:on_close)}
154
+ end
155
+
156
+ def self.on_activate_block
157
+ @on_activate_block ||= lambda {self.send(:on_activate)}
158
+ end
159
+
160
+ def self.on_deactivate_block
161
+ @on_deactivate_block ||= lambda {self.send(:on_deactivate)}
162
+ end
163
+
164
+ def self.on_ack_block
165
+ @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
166
+ end
167
+
168
+ def self.on_fail_block
169
+ @on_fail_block ||= lambda {|msg_id| self.send(:on_fail, msg_id)}
170
+ end
171
+
172
+ def self.send_options
173
+ @send_options ||= {:emit => true, :reliable => false}
174
+ end
175
+
176
+ def self.emit?
177
+ !!self.send_options[:emit]
178
+ end
179
+
180
+ def self.reliable?
181
+ !!self.send_options[:reliable]
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,209 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+
6
+ module RedStorm
7
+
8
+ class TopologyDefinitionError < StandardError; end
9
+
10
+ class SimpleTopology
11
+ attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
12
+
13
+ DEFAULT_SPOUT_PARALLELISM = 1
14
+ DEFAULT_BOLT_PARALLELISM = 1
15
+
16
+ class ComponentDefinition < Configurator
17
+ attr_reader :clazz, :constructor_args, :parallelism
18
+ attr_accessor :id # ids are forced to string
19
+
20
+ def initialize(component_class, constructor_args, id, parallelism)
21
+ super()
22
+ @clazz = component_class
23
+ @constructor_args = constructor_args
24
+ @id = id.to_s
25
+ @parallelism = parallelism
26
+ @output_fields = []
27
+ end
28
+
29
+ def output_fields(*args)
30
+ args.empty? ? @output_fields : @output_fields = args.map(&:to_s)
31
+ end
32
+
33
+ def is_java?
34
+ @clazz.name.split('::').first.downcase == 'java'
35
+ end
36
+ end
37
+
38
+ class SpoutDefinition < ComponentDefinition
39
+
40
+ # WARNING non-dry see BoltDefinition#new_instance
41
+ def new_instance(base_class_path)
42
+ if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
43
+ @clazz.new(constructor_args, @output_fields)
44
+ elsif is_java?
45
+ @clazz.new(*constructor_args)
46
+ else
47
+ JRubySpout.new(base_class_path, @clazz.name, @output_fields)
48
+ end
49
+ # is_java? ? @clazz.new : JRubySpout.new(base_class_path, @clazz.name)
50
+ end
51
+ end
52
+
53
+ class BoltDefinition < ComponentDefinition
54
+ attr_accessor :sources, :command
55
+
56
+ def initialize(*args)
57
+ super
58
+ @sources = []
59
+ end
60
+
61
+ def source(source_id, grouping)
62
+ @sources << [source_id.is_a?(Class) ? SimpleTopology.underscore(source_id) : source_id.to_s, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
63
+ end
64
+
65
+ def define_grouping(declarer)
66
+ @sources.each do |source_id, grouping|
67
+ grouper, params = grouping.first
68
+ # declarer.fieldsGrouping(source_id, Fields.new())
69
+ case grouper
70
+ when :fields
71
+ declarer.fieldsGrouping(source_id, Fields.new(*([params].flatten.map(&:to_s))))
72
+ when :global
73
+ declarer.globalGrouping(source_id)
74
+ when :shuffle
75
+ declarer.shuffleGrouping(source_id)
76
+ when :local_or_shuffle
77
+ declarer.localOrShuffleGrouping(source_id)
78
+ when :none
79
+ declarer.noneGrouping(source_id)
80
+ when :all
81
+ declarer.allGrouping(source_id)
82
+ when :direct
83
+ declarer.directGrouping(source_id)
84
+ else
85
+ raise("unknown grouper=#{grouper.inspect}")
86
+ end
87
+ end
88
+ end
89
+
90
+ def new_instance(base_class_path)
91
+ # WARNING non-dry see BoltDefinition#new_instance
92
+ if @clazz.name == "Java::RedstormStormJruby::JRubyShellBolt"
93
+ @clazz.new(constructor_args, @output_fields)
94
+ elsif is_java?
95
+ @clazz.new(*constructor_args)
96
+ else
97
+ JRubyBolt.new(base_class_path, @clazz.name, @output_fields)
98
+ end
99
+ # is_java? ? @clazz.new : @clazz.is_a?(SimpleBolt) ? JRubyBolt.new(base_class_path, @clazz.name) : @clazz.new
100
+ end
101
+ end
102
+
103
+ def self.log
104
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
105
+ end
106
+
107
+ # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
+ def self.spout(spout_class, *args, &spout_block)
109
+ options = args.last.is_a?(Hash) ? args.pop : {}
110
+ contructor_args = !args.empty? ? args.pop : []
111
+ spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
112
+
113
+ spout = SpoutDefinition.new(spout_class, contructor_args, spout_options[:id], spout_options[:parallelism])
114
+ spout.instance_exec(&spout_block) if block_given?
115
+ self.components << spout
116
+ end
117
+
118
+ # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
119
+ def self.bolt(bolt_class, *args, &bolt_block)
120
+ options = args.last.is_a?(Hash) ? args.pop : {}
121
+ contructor_args = !args.empty? ? args.pop : []
122
+ bolt_options = {:id => options[:id] ? options[:id] : self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
123
+
124
+ bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
125
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
126
+ bolt.instance_exec(&bolt_block)
127
+ self.components << bolt
128
+ end
129
+
130
+ def self.configure(name = nil, &configure_block)
131
+ Configuration.topology_class = self
132
+ @topology_name = name if name
133
+ @configure_block = configure_block if block_given?
134
+ end
135
+
136
+ def self.on_submit(method_name = nil, &submit_block)
137
+ @submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
138
+ end
139
+
140
+ # topology proxy interface
141
+
142
+ def start(base_class_path, env)
143
+ self.class.resolve_ids!(self.class.components)
144
+
145
+ builder = TopologyBuilder.new
146
+ self.class.spouts.each do |spout|
147
+ declarer = builder.setSpout(spout.id, spout.new_instance(base_class_path), spout.parallelism.to_java)
148
+ declarer.addConfigurations(spout.config)
149
+ end
150
+ self.class.bolts.each do |bolt|
151
+ declarer = builder.setBolt(bolt.id, bolt.new_instance(base_class_path), bolt.parallelism.to_java)
152
+ declarer.addConfigurations(bolt.config)
153
+ bolt.define_grouping(declarer)
154
+ end
155
+
156
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
157
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
158
+
159
+ configurator = Configurator.new(defaults)
160
+ configurator.instance_exec(env, &self.class.configure_block)
161
+
162
+ submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
163
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
164
+ instance_exec(env, &self.class.submit_block)
165
+ end
166
+
167
+ private
168
+
169
+ def self.resolve_ids!(components)
170
+ # verify duplicate implicit ids
171
+ ids = components.map(&:id)
172
+ components.reverse.each do |component|
173
+ raise("duplicate id in #{component.clazz.name} on id=#{component.id}") if ids.select{|id| id == component.id}.size > 1
174
+ # verify source_id references
175
+ if component.respond_to?(:sources)
176
+ component.sources.each{|source_id, grouping| raise("cannot resolve #{component.clazz.name} source id=#{source_id}") unless ids.include?(source_id)}
177
+ end
178
+ end
179
+ end
180
+
181
+ def self.spouts
182
+ self.components.select{|c| c.is_a?(SpoutDefinition)}
183
+ end
184
+
185
+ def self.bolts
186
+ self.components.select{|c| c.is_a?(BoltDefinition)}
187
+ end
188
+
189
+ def self.components
190
+ @components ||= []
191
+ end
192
+
193
+ def self.topology_name
194
+ @topology_name ||= self.underscore(self.name)
195
+ end
196
+
197
+ def self.configure_block
198
+ @configure_block ||= lambda{|env|}
199
+ end
200
+
201
+ def self.submit_block
202
+ @submit_block ||= lambda{|env|}
203
+ end
204
+
205
+ def self.underscore(camel_case)
206
+ camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
207
+ end
208
+ end
209
+ end