redstorm 0.6.5 → 0.6.6.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG.md +12 -1
  2. data/README.md +66 -47
  3. data/Rakefile +1 -1
  4. data/examples/dsl/exclamation_bolt.rb +10 -0
  5. data/examples/{simple → dsl}/exclamation_topology.rb +5 -5
  6. data/examples/{simple → dsl}/exclamation_topology2.rb +5 -5
  7. data/examples/{simple → dsl}/hello_world_topology.rb +4 -4
  8. data/examples/{simple → dsl}/kafka_topology.rb +17 -18
  9. data/examples/{simple → dsl}/random_sentence_spout.rb +1 -1
  10. data/examples/{simple → dsl}/redis_word_count_topology.rb +6 -7
  11. data/examples/{simple → dsl}/ruby_version_topology.rb +9 -9
  12. data/examples/{simple → dsl}/split_sentence_bolt.rb +6 -6
  13. data/examples/{simple → dsl}/word_count_bolt.rb +2 -2
  14. data/examples/{simple → dsl}/word_count_topology.rb +6 -6
  15. data/examples/shell/shell_topology.rb +2 -2
  16. data/ivy/storm_dependencies.xml +2 -2
  17. data/ivy/topology_dependencies.xml +10 -2
  18. data/lib/red_storm.rb +6 -5
  19. data/lib/red_storm/application.rb +5 -5
  20. data/lib/red_storm/dsl/bolt.rb +155 -0
  21. data/lib/red_storm/dsl/drpc_topology.rb +92 -0
  22. data/lib/red_storm/dsl/spout.rb +194 -0
  23. data/lib/red_storm/dsl/topology.rb +227 -0
  24. data/lib/red_storm/dsl/tuple.rb +34 -0
  25. data/lib/red_storm/environment.rb +8 -8
  26. data/lib/red_storm/topology_launcher.rb +2 -2
  27. data/lib/red_storm/version.rb +1 -1
  28. data/lib/tasks/red_storm.rake +45 -27
  29. data/redstorm.gemspec +4 -4
  30. metadata +31 -34
  31. data/examples/simple/exclamation_bolt.rb +0 -10
  32. data/lib/red_storm/simple_bolt.rb +0 -135
  33. data/lib/red_storm/simple_drpc_topology.rb +0 -87
  34. data/lib/red_storm/simple_spout.rb +0 -184
  35. data/lib/red_storm/simple_topology.rb +0 -219
@@ -1,87 +0,0 @@
1
- require 'java'
2
- require 'red_storm/configuration'
3
- require 'red_storm/configurator'
4
-
5
- module RedStorm
6
-
7
- class InputBoltDefinition < SimpleTopology::BoltDefinition
8
- attr_accessor :grouping
9
-
10
- def initialize(*args)
11
- super
12
- @grouping = :none
13
- end
14
-
15
- def grouping(grouping)
16
- @grouping = grouping
17
- end
18
-
19
- def define_grouping(declarer)
20
- case @grouping
21
- when :fields
22
- declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
23
- when :global
24
- declarer.globalGrouping()
25
- when :shuffle
26
- declarer.shuffleGrouping()
27
- when :local_or_shuffle
28
- declarer.localOrShuffleGrouping()
29
- when :none
30
- declarer.noneGrouping()
31
- when :all
32
- declarer.allGrouping()
33
- when :direct
34
- declarer.directGrouping()
35
- else
36
- raise("unknown grouper=#{grouper.inspect}")
37
- end
38
- end
39
- end
40
-
41
- class SimpleDRPCTopology < SimpleTopology
42
-
43
- def self.spout
44
- raise TopologyDefinitionError, "DRPC spout is already defined"
45
- end
46
-
47
- def start(base_class_path, env)
48
- builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
49
-
50
- self.class.bolts.each do |bolt|
51
- declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
52
- declarer.addConfigurations(bolt.config)
53
- bolt.define_grouping(declarer)
54
- end
55
-
56
- # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
57
- defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
58
-
59
- configurator = Configurator.new(defaults)
60
- configurator.instance_exec(env, &self.class.configure_block)
61
-
62
- drpc = nil
63
- if env == :local
64
- drpc = LocalDRPC.new
65
- submitter = @cluster = LocalCluster.new
66
- submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
67
- else
68
- submitter = StormSubmitter
69
- submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
70
- end
71
- instance_exec(env, drpc, &self.class.submit_block)
72
- end
73
-
74
- def self.input_bolt(bolt_class, *args, &bolt_block)
75
- set_topology_class!
76
- options = args.last.is_a?(Hash) ? args.pop : {}
77
- contructor_args = !args.empty? ? args.pop : []
78
- bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
79
-
80
- bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
81
- raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
82
- bolt.instance_exec(&bolt_block)
83
- self.components << bolt
84
- end
85
- end
86
-
87
- end
@@ -1,184 +0,0 @@
1
- require 'java'
2
- require 'red_storm/configurator'
3
-
4
- module RedStorm
5
-
6
- class SimpleSpout
7
- attr_reader :config, :context, :collector
8
-
9
- # DSL class methods
10
-
11
- def self.configure(&configure_block)
12
- @configure_block = block_given? ? configure_block : lambda {}
13
- end
14
-
15
- def self.log
16
- @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
17
- end
18
-
19
- def self.output_fields(*fields)
20
- @fields = fields.map(&:to_s)
21
- end
22
-
23
- def self.on_send(*args, &on_send_block)
24
- options = args.last.is_a?(Hash) ? args.pop : {}
25
- method_name = args.first
26
-
27
- self.send_options.merge!(options)
28
- @on_send_block = block_given? ? on_send_block : lambda {self.send(method_name || :on_send)}
29
- end
30
-
31
- def self.on_init(method_name = nil, &on_init_block)
32
- @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
33
- end
34
-
35
- def self.on_close(method_name = nil, &on_close_block)
36
- @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
37
- end
38
-
39
- def self.on_activate(method_name = nil, &on_activate_block)
40
- @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
41
- end
42
-
43
- def self.on_deactivate(method_name = nil, &on_deactivate_block)
44
- @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
45
- end
46
-
47
- def self.on_ack(method_name = nil, &on_ack_block)
48
- @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
49
- end
50
-
51
- def self.on_fail(method_name = nil, &on_fail_block)
52
- @on_fail_block = block_given? ? on_fail_block : lambda {|msg_id| self.send(method_name || :on_fail, msg_id)}
53
- end
54
-
55
- # DSL instance methods
56
-
57
- def reliable_emit(message_id, *values)
58
- @collector.emit(Values.new(*values), message_id)
59
- end
60
-
61
- def unreliable_emit(*values)
62
- @collector.emit(Values.new(*values))
63
- end
64
- alias_method :emit, :unreliable_emit
65
-
66
- def log
67
- self.class.log
68
- end
69
-
70
- # Spout proxy interface
71
-
72
- def next_tuple
73
- output = instance_exec(&self.class.on_send_block)
74
- if self.class.emit?
75
- if output
76
- values = [output].flatten
77
- if self.class.reliable?
78
- message_id = values.shift
79
- reliable_emit(message_id, *values)
80
- else
81
- unreliable_emit(*values)
82
- end
83
- else
84
- sleep(0.1)
85
- end
86
- end
87
- end
88
-
89
- def open(config, context, collector)
90
- @collector = collector
91
- @context = context
92
- @config = config
93
- instance_exec(&self.class.on_init_block)
94
- end
95
-
96
- def close
97
- instance_exec(&self.class.on_close_block)
98
- end
99
-
100
- def activate
101
- instance_exec(&self.class.on_activate_block)
102
- end
103
-
104
- def deactivate
105
- instance_exec(&self.class.on_deactivate_block)
106
- end
107
-
108
- def declare_output_fields(declarer)
109
- declarer.declare(Fields.new(self.class.fields))
110
- end
111
-
112
- def ack(msg_id)
113
- instance_exec(msg_id, &self.class.on_ack_block)
114
- end
115
-
116
- def fail(msg_id)
117
- instance_exec(msg_id, &self.class.on_fail_block)
118
- end
119
-
120
- def get_component_configuration
121
- configurator = Configurator.new
122
- configurator.instance_exec(&self.class.configure_block)
123
- configurator.config
124
- end
125
-
126
- private
127
-
128
- # default optional noop dsl methods/callbacks
129
- def on_init; end
130
- def on_close; end
131
- def on_activate; end
132
- def on_deactivate; end
133
- def on_ack(msg_id); end
134
- def on_fail(msg_id); end
135
-
136
- def self.fields
137
- @fields ||= []
138
- end
139
-
140
- def self.configure_block
141
- @configure_block ||= lambda {}
142
- end
143
-
144
- def self.on_send_block
145
- @on_send_block ||= lambda {self.send(:on_send)}
146
- end
147
-
148
- def self.on_init_block
149
- @on_init_block ||= lambda {self.send(:on_init)}
150
- end
151
-
152
- def self.on_close_block
153
- @on_close_block ||= lambda {self.send(:on_close)}
154
- end
155
-
156
- def self.on_activate_block
157
- @on_activate_block ||= lambda {self.send(:on_activate)}
158
- end
159
-
160
- def self.on_deactivate_block
161
- @on_deactivate_block ||= lambda {self.send(:on_deactivate)}
162
- end
163
-
164
- def self.on_ack_block
165
- @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
166
- end
167
-
168
- def self.on_fail_block
169
- @on_fail_block ||= lambda {|msg_id| self.send(:on_fail, msg_id)}
170
- end
171
-
172
- def self.send_options
173
- @send_options ||= {:emit => true, :reliable => false}
174
- end
175
-
176
- def self.emit?
177
- !!self.send_options[:emit]
178
- end
179
-
180
- def self.reliable?
181
- !!self.send_options[:reliable]
182
- end
183
- end
184
- end
@@ -1,219 +0,0 @@
1
- require 'java'
2
- require 'red_storm/configuration'
3
- require 'red_storm/configurator'
4
-
5
-
6
- module RedStorm
7
-
8
- class TopologyDefinitionError < StandardError; end
9
-
10
- class SimpleTopology
11
- attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
12
-
13
- DEFAULT_SPOUT_PARALLELISM = 1
14
- DEFAULT_BOLT_PARALLELISM = 1
15
-
16
- class ComponentDefinition < Configurator
17
- attr_reader :clazz, :constructor_args, :parallelism
18
- attr_accessor :id # ids are forced to string
19
-
20
- def initialize(component_class, constructor_args, id, parallelism)
21
- super()
22
- @clazz = component_class
23
- @constructor_args = constructor_args
24
- @id = id.to_s
25
- @parallelism = parallelism
26
- @output_fields = []
27
- end
28
-
29
- def output_fields(*args)
30
- args.empty? ? @output_fields : @output_fields = args.map(&:to_s)
31
- end
32
-
33
- def is_java?
34
- @clazz.name.split('::').first.downcase == 'java'
35
- end
36
- end
37
-
38
- class SpoutDefinition < ComponentDefinition
39
-
40
- # WARNING non-dry see BoltDefinition#new_instance
41
- def new_instance(base_class_path)
42
- if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
43
- @clazz.new(constructor_args, @output_fields)
44
- elsif is_java?
45
- @clazz.new(*constructor_args)
46
- else
47
- JRubySpout.new(base_class_path, @clazz.name, @output_fields)
48
- end
49
- # is_java? ? @clazz.new : JRubySpout.new(base_class_path, @clazz.name)
50
- end
51
- end
52
-
53
- class BoltDefinition < ComponentDefinition
54
- attr_accessor :sources, :command
55
-
56
- def initialize(*args)
57
- super
58
- @sources = []
59
- end
60
-
61
- def source(source_id, grouping)
62
- @sources << [source_id.is_a?(Class) ? SimpleTopology.underscore(source_id) : source_id.to_s, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
63
- end
64
-
65
- def define_grouping(declarer)
66
- @sources.each do |source_id, grouping|
67
- grouper, params = grouping.first
68
- # declarer.fieldsGrouping(source_id, Fields.new())
69
- case grouper
70
- when :fields
71
- declarer.fieldsGrouping(source_id, Fields.new(*([params].flatten.map(&:to_s))))
72
- when :global
73
- declarer.globalGrouping(source_id)
74
- when :shuffle
75
- declarer.shuffleGrouping(source_id)
76
- when :local_or_shuffle
77
- declarer.localOrShuffleGrouping(source_id)
78
- when :none
79
- declarer.noneGrouping(source_id)
80
- when :all
81
- declarer.allGrouping(source_id)
82
- when :direct
83
- declarer.directGrouping(source_id)
84
- else
85
- raise("unknown grouper=#{grouper.inspect}")
86
- end
87
- end
88
- end
89
-
90
- def new_instance(base_class_path)
91
- # WARNING non-dry see BoltDefinition#new_instance
92
- if @clazz.name == "Java::RedstormStormJruby::JRubyShellBolt"
93
- @clazz.new(constructor_args, @output_fields)
94
- elsif is_java?
95
- @clazz.new(*constructor_args)
96
- else
97
- JRubyBolt.new(base_class_path, @clazz.name, @output_fields)
98
- end
99
- # is_java? ? @clazz.new : @clazz.is_a?(SimpleBolt) ? JRubyBolt.new(base_class_path, @clazz.name) : @clazz.new
100
- end
101
- end
102
-
103
- def self.log
104
- @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
105
- end
106
-
107
- # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
- def self.spout(spout_class, *args, &spout_block)
109
- set_topology_class!
110
- options = args.last.is_a?(Hash) ? args.pop : {}
111
- contructor_args = !args.empty? ? args.pop : []
112
- spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
113
-
114
- spout = SpoutDefinition.new(spout_class, contructor_args, spout_options[:id], spout_options[:parallelism])
115
- spout.instance_exec(&spout_block) if block_given?
116
- self.components << spout
117
- end
118
-
119
- # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
120
- def self.bolt(bolt_class, *args, &bolt_block)
121
- set_topology_class!
122
- options = args.last.is_a?(Hash) ? args.pop : {}
123
- contructor_args = !args.empty? ? args.pop : []
124
- bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
125
-
126
- bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
127
- raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
128
- bolt.instance_exec(&bolt_block)
129
- self.components << bolt
130
- end
131
-
132
- def self.configure(name = nil, &configure_block)
133
- set_topology_class!
134
- @topology_name = name.to_s if name
135
- @configure_block = configure_block if block_given?
136
- end
137
-
138
- def self.on_submit(method_name = nil, &submit_block)
139
- @submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
140
- end
141
-
142
- # topology proxy interface
143
-
144
- def start(base_class_path, env)
145
- self.class.resolve_ids!(self.class.components)
146
-
147
- builder = TopologyBuilder.new
148
- self.class.spouts.each do |spout|
149
- declarer = builder.setSpout(spout.id, spout.new_instance(base_class_path), spout.parallelism.to_java)
150
- declarer.addConfigurations(spout.config)
151
- end
152
- self.class.bolts.each do |bolt|
153
- declarer = builder.setBolt(bolt.id, bolt.new_instance(base_class_path), bolt.parallelism.to_java)
154
- declarer.addConfigurations(bolt.config)
155
- bolt.define_grouping(declarer)
156
- end
157
-
158
- # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
159
- defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
160
-
161
- configurator = Configurator.new(defaults)
162
- configurator.instance_exec(env, &self.class.configure_block)
163
-
164
- submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
165
- submitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
166
- instance_exec(env, &self.class.submit_block)
167
- end
168
-
169
- private
170
-
171
- # this is a quirk to figure out the topology class at load time when the topology file
172
- # is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
173
- # optional we can hook into any/all the other DSL statements that will be called at load time
174
- # and set it there. This is somewhat inelegant but it works.
175
- def self.set_topology_class!
176
- Configuration.topology_class = self
177
- end
178
-
179
- def self.resolve_ids!(components)
180
- # verify duplicate implicit ids
181
- ids = components.map(&:id)
182
- components.reverse.each do |component|
183
- raise("duplicate id in #{component.clazz.name} on id=#{component.id}") if ids.select{|id| id == component.id}.size > 1
184
- # verify source_id references
185
- if component.respond_to?(:sources)
186
- component.sources.each{|source_id, grouping| raise("cannot resolve #{component.clazz.name} source id=#{source_id}") unless ids.include?(source_id)}
187
- end
188
- end
189
- end
190
-
191
- def self.spouts
192
- self.components.select{|c| c.is_a?(SpoutDefinition)}
193
- end
194
-
195
- def self.bolts
196
- self.components.select{|c| c.is_a?(BoltDefinition)}
197
- end
198
-
199
- def self.components
200
- @components ||= []
201
- end
202
-
203
- def self.topology_name
204
- @topology_name ||= self.underscore(self.name)
205
- end
206
-
207
- def self.configure_block
208
- @configure_block ||= lambda{|env|}
209
- end
210
-
211
- def self.submit_block
212
- @submit_block ||= lambda{|env|}
213
- end
214
-
215
- def self.underscore(camel_case)
216
- camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
217
- end
218
- end
219
- end