redstorm 0.6.5 → 0.6.6.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/CHANGELOG.md +12 -1
  2. data/README.md +66 -47
  3. data/Rakefile +1 -1
  4. data/examples/dsl/exclamation_bolt.rb +10 -0
  5. data/examples/{simple → dsl}/exclamation_topology.rb +5 -5
  6. data/examples/{simple → dsl}/exclamation_topology2.rb +5 -5
  7. data/examples/{simple → dsl}/hello_world_topology.rb +4 -4
  8. data/examples/{simple → dsl}/kafka_topology.rb +17 -18
  9. data/examples/{simple → dsl}/random_sentence_spout.rb +1 -1
  10. data/examples/{simple → dsl}/redis_word_count_topology.rb +6 -7
  11. data/examples/{simple → dsl}/ruby_version_topology.rb +9 -9
  12. data/examples/{simple → dsl}/split_sentence_bolt.rb +6 -6
  13. data/examples/{simple → dsl}/word_count_bolt.rb +2 -2
  14. data/examples/{simple → dsl}/word_count_topology.rb +6 -6
  15. data/examples/shell/shell_topology.rb +2 -2
  16. data/ivy/storm_dependencies.xml +2 -2
  17. data/ivy/topology_dependencies.xml +10 -2
  18. data/lib/red_storm.rb +6 -5
  19. data/lib/red_storm/application.rb +5 -5
  20. data/lib/red_storm/dsl/bolt.rb +155 -0
  21. data/lib/red_storm/dsl/drpc_topology.rb +92 -0
  22. data/lib/red_storm/dsl/spout.rb +194 -0
  23. data/lib/red_storm/dsl/topology.rb +227 -0
  24. data/lib/red_storm/dsl/tuple.rb +34 -0
  25. data/lib/red_storm/environment.rb +8 -8
  26. data/lib/red_storm/topology_launcher.rb +2 -2
  27. data/lib/red_storm/version.rb +1 -1
  28. data/lib/tasks/red_storm.rake +45 -27
  29. data/redstorm.gemspec +4 -4
  30. metadata +31 -34
  31. data/examples/simple/exclamation_bolt.rb +0 -10
  32. data/lib/red_storm/simple_bolt.rb +0 -135
  33. data/lib/red_storm/simple_drpc_topology.rb +0 -87
  34. data/lib/red_storm/simple_spout.rb +0 -184
  35. data/lib/red_storm/simple_topology.rb +0 -219
@@ -1,87 +0,0 @@
1
- require 'java'
2
- require 'red_storm/configuration'
3
- require 'red_storm/configurator'
4
-
5
- module RedStorm
6
-
7
- class InputBoltDefinition < SimpleTopology::BoltDefinition
8
- attr_accessor :grouping
9
-
10
- def initialize(*args)
11
- super
12
- @grouping = :none
13
- end
14
-
15
- def grouping(grouping)
16
- @grouping = grouping
17
- end
18
-
19
- def define_grouping(declarer)
20
- case @grouping
21
- when :fields
22
- declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
23
- when :global
24
- declarer.globalGrouping()
25
- when :shuffle
26
- declarer.shuffleGrouping()
27
- when :local_or_shuffle
28
- declarer.localOrShuffleGrouping()
29
- when :none
30
- declarer.noneGrouping()
31
- when :all
32
- declarer.allGrouping()
33
- when :direct
34
- declarer.directGrouping()
35
- else
36
- raise("unknown grouper=#{grouper.inspect}")
37
- end
38
- end
39
- end
40
-
41
- class SimpleDRPCTopology < SimpleTopology
42
-
43
- def self.spout
44
- raise TopologyDefinitionError, "DRPC spout is already defined"
45
- end
46
-
47
- def start(base_class_path, env)
48
- builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
49
-
50
- self.class.bolts.each do |bolt|
51
- declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
52
- declarer.addConfigurations(bolt.config)
53
- bolt.define_grouping(declarer)
54
- end
55
-
56
- # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
57
- defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
58
-
59
- configurator = Configurator.new(defaults)
60
- configurator.instance_exec(env, &self.class.configure_block)
61
-
62
- drpc = nil
63
- if env == :local
64
- drpc = LocalDRPC.new
65
- submitter = @cluster = LocalCluster.new
66
- submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
67
- else
68
- submitter = StormSubmitter
69
- submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
70
- end
71
- instance_exec(env, drpc, &self.class.submit_block)
72
- end
73
-
74
- def self.input_bolt(bolt_class, *args, &bolt_block)
75
- set_topology_class!
76
- options = args.last.is_a?(Hash) ? args.pop : {}
77
- contructor_args = !args.empty? ? args.pop : []
78
- bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
79
-
80
- bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
81
- raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
82
- bolt.instance_exec(&bolt_block)
83
- self.components << bolt
84
- end
85
- end
86
-
87
- end
@@ -1,184 +0,0 @@
1
- require 'java'
2
- require 'red_storm/configurator'
3
-
4
- module RedStorm
5
-
6
- class SimpleSpout
7
- attr_reader :config, :context, :collector
8
-
9
- # DSL class methods
10
-
11
- def self.configure(&configure_block)
12
- @configure_block = block_given? ? configure_block : lambda {}
13
- end
14
-
15
- def self.log
16
- @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
17
- end
18
-
19
- def self.output_fields(*fields)
20
- @fields = fields.map(&:to_s)
21
- end
22
-
23
- def self.on_send(*args, &on_send_block)
24
- options = args.last.is_a?(Hash) ? args.pop : {}
25
- method_name = args.first
26
-
27
- self.send_options.merge!(options)
28
- @on_send_block = block_given? ? on_send_block : lambda {self.send(method_name || :on_send)}
29
- end
30
-
31
- def self.on_init(method_name = nil, &on_init_block)
32
- @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
33
- end
34
-
35
- def self.on_close(method_name = nil, &on_close_block)
36
- @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
37
- end
38
-
39
- def self.on_activate(method_name = nil, &on_activate_block)
40
- @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
41
- end
42
-
43
- def self.on_deactivate(method_name = nil, &on_deactivate_block)
44
- @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
45
- end
46
-
47
- def self.on_ack(method_name = nil, &on_ack_block)
48
- @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
49
- end
50
-
51
- def self.on_fail(method_name = nil, &on_fail_block)
52
- @on_fail_block = block_given? ? on_fail_block : lambda {|msg_id| self.send(method_name || :on_fail, msg_id)}
53
- end
54
-
55
- # DSL instance methods
56
-
57
- def reliable_emit(message_id, *values)
58
- @collector.emit(Values.new(*values), message_id)
59
- end
60
-
61
- def unreliable_emit(*values)
62
- @collector.emit(Values.new(*values))
63
- end
64
- alias_method :emit, :unreliable_emit
65
-
66
- def log
67
- self.class.log
68
- end
69
-
70
- # Spout proxy interface
71
-
72
- def next_tuple
73
- output = instance_exec(&self.class.on_send_block)
74
- if self.class.emit?
75
- if output
76
- values = [output].flatten
77
- if self.class.reliable?
78
- message_id = values.shift
79
- reliable_emit(message_id, *values)
80
- else
81
- unreliable_emit(*values)
82
- end
83
- else
84
- sleep(0.1)
85
- end
86
- end
87
- end
88
-
89
- def open(config, context, collector)
90
- @collector = collector
91
- @context = context
92
- @config = config
93
- instance_exec(&self.class.on_init_block)
94
- end
95
-
96
- def close
97
- instance_exec(&self.class.on_close_block)
98
- end
99
-
100
- def activate
101
- instance_exec(&self.class.on_activate_block)
102
- end
103
-
104
- def deactivate
105
- instance_exec(&self.class.on_deactivate_block)
106
- end
107
-
108
- def declare_output_fields(declarer)
109
- declarer.declare(Fields.new(self.class.fields))
110
- end
111
-
112
- def ack(msg_id)
113
- instance_exec(msg_id, &self.class.on_ack_block)
114
- end
115
-
116
- def fail(msg_id)
117
- instance_exec(msg_id, &self.class.on_fail_block)
118
- end
119
-
120
- def get_component_configuration
121
- configurator = Configurator.new
122
- configurator.instance_exec(&self.class.configure_block)
123
- configurator.config
124
- end
125
-
126
- private
127
-
128
- # default optional noop dsl methods/callbacks
129
- def on_init; end
130
- def on_close; end
131
- def on_activate; end
132
- def on_deactivate; end
133
- def on_ack(msg_id); end
134
- def on_fail(msg_id); end
135
-
136
- def self.fields
137
- @fields ||= []
138
- end
139
-
140
- def self.configure_block
141
- @configure_block ||= lambda {}
142
- end
143
-
144
- def self.on_send_block
145
- @on_send_block ||= lambda {self.send(:on_send)}
146
- end
147
-
148
- def self.on_init_block
149
- @on_init_block ||= lambda {self.send(:on_init)}
150
- end
151
-
152
- def self.on_close_block
153
- @on_close_block ||= lambda {self.send(:on_close)}
154
- end
155
-
156
- def self.on_activate_block
157
- @on_activate_block ||= lambda {self.send(:on_activate)}
158
- end
159
-
160
- def self.on_deactivate_block
161
- @on_deactivate_block ||= lambda {self.send(:on_deactivate)}
162
- end
163
-
164
- def self.on_ack_block
165
- @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
166
- end
167
-
168
- def self.on_fail_block
169
- @on_fail_block ||= lambda {|msg_id| self.send(:on_fail, msg_id)}
170
- end
171
-
172
- def self.send_options
173
- @send_options ||= {:emit => true, :reliable => false}
174
- end
175
-
176
- def self.emit?
177
- !!self.send_options[:emit]
178
- end
179
-
180
- def self.reliable?
181
- !!self.send_options[:reliable]
182
- end
183
- end
184
- end
@@ -1,219 +0,0 @@
1
- require 'java'
2
- require 'red_storm/configuration'
3
- require 'red_storm/configurator'
4
-
5
-
6
- module RedStorm
7
-
8
- class TopologyDefinitionError < StandardError; end
9
-
10
- class SimpleTopology
11
- attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
12
-
13
- DEFAULT_SPOUT_PARALLELISM = 1
14
- DEFAULT_BOLT_PARALLELISM = 1
15
-
16
- class ComponentDefinition < Configurator
17
- attr_reader :clazz, :constructor_args, :parallelism
18
- attr_accessor :id # ids are forced to string
19
-
20
- def initialize(component_class, constructor_args, id, parallelism)
21
- super()
22
- @clazz = component_class
23
- @constructor_args = constructor_args
24
- @id = id.to_s
25
- @parallelism = parallelism
26
- @output_fields = []
27
- end
28
-
29
- def output_fields(*args)
30
- args.empty? ? @output_fields : @output_fields = args.map(&:to_s)
31
- end
32
-
33
- def is_java?
34
- @clazz.name.split('::').first.downcase == 'java'
35
- end
36
- end
37
-
38
- class SpoutDefinition < ComponentDefinition
39
-
40
- # WARNING non-dry see BoltDefinition#new_instance
41
- def new_instance(base_class_path)
42
- if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
43
- @clazz.new(constructor_args, @output_fields)
44
- elsif is_java?
45
- @clazz.new(*constructor_args)
46
- else
47
- JRubySpout.new(base_class_path, @clazz.name, @output_fields)
48
- end
49
- # is_java? ? @clazz.new : JRubySpout.new(base_class_path, @clazz.name)
50
- end
51
- end
52
-
53
- class BoltDefinition < ComponentDefinition
54
- attr_accessor :sources, :command
55
-
56
- def initialize(*args)
57
- super
58
- @sources = []
59
- end
60
-
61
- def source(source_id, grouping)
62
- @sources << [source_id.is_a?(Class) ? SimpleTopology.underscore(source_id) : source_id.to_s, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
63
- end
64
-
65
- def define_grouping(declarer)
66
- @sources.each do |source_id, grouping|
67
- grouper, params = grouping.first
68
- # declarer.fieldsGrouping(source_id, Fields.new())
69
- case grouper
70
- when :fields
71
- declarer.fieldsGrouping(source_id, Fields.new(*([params].flatten.map(&:to_s))))
72
- when :global
73
- declarer.globalGrouping(source_id)
74
- when :shuffle
75
- declarer.shuffleGrouping(source_id)
76
- when :local_or_shuffle
77
- declarer.localOrShuffleGrouping(source_id)
78
- when :none
79
- declarer.noneGrouping(source_id)
80
- when :all
81
- declarer.allGrouping(source_id)
82
- when :direct
83
- declarer.directGrouping(source_id)
84
- else
85
- raise("unknown grouper=#{grouper.inspect}")
86
- end
87
- end
88
- end
89
-
90
- def new_instance(base_class_path)
91
- # WARNING non-dry see BoltDefinition#new_instance
92
- if @clazz.name == "Java::RedstormStormJruby::JRubyShellBolt"
93
- @clazz.new(constructor_args, @output_fields)
94
- elsif is_java?
95
- @clazz.new(*constructor_args)
96
- else
97
- JRubyBolt.new(base_class_path, @clazz.name, @output_fields)
98
- end
99
- # is_java? ? @clazz.new : @clazz.is_a?(SimpleBolt) ? JRubyBolt.new(base_class_path, @clazz.name) : @clazz.new
100
- end
101
- end
102
-
103
- def self.log
104
- @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
105
- end
106
-
107
- # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
- def self.spout(spout_class, *args, &spout_block)
109
- set_topology_class!
110
- options = args.last.is_a?(Hash) ? args.pop : {}
111
- contructor_args = !args.empty? ? args.pop : []
112
- spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
113
-
114
- spout = SpoutDefinition.new(spout_class, contructor_args, spout_options[:id], spout_options[:parallelism])
115
- spout.instance_exec(&spout_block) if block_given?
116
- self.components << spout
117
- end
118
-
119
- # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
120
- def self.bolt(bolt_class, *args, &bolt_block)
121
- set_topology_class!
122
- options = args.last.is_a?(Hash) ? args.pop : {}
123
- contructor_args = !args.empty? ? args.pop : []
124
- bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
125
-
126
- bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
127
- raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
128
- bolt.instance_exec(&bolt_block)
129
- self.components << bolt
130
- end
131
-
132
- def self.configure(name = nil, &configure_block)
133
- set_topology_class!
134
- @topology_name = name.to_s if name
135
- @configure_block = configure_block if block_given?
136
- end
137
-
138
- def self.on_submit(method_name = nil, &submit_block)
139
- @submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
140
- end
141
-
142
- # topology proxy interface
143
-
144
- def start(base_class_path, env)
145
- self.class.resolve_ids!(self.class.components)
146
-
147
- builder = TopologyBuilder.new
148
- self.class.spouts.each do |spout|
149
- declarer = builder.setSpout(spout.id, spout.new_instance(base_class_path), spout.parallelism.to_java)
150
- declarer.addConfigurations(spout.config)
151
- end
152
- self.class.bolts.each do |bolt|
153
- declarer = builder.setBolt(bolt.id, bolt.new_instance(base_class_path), bolt.parallelism.to_java)
154
- declarer.addConfigurations(bolt.config)
155
- bolt.define_grouping(declarer)
156
- end
157
-
158
- # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
159
- defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
160
-
161
- configurator = Configurator.new(defaults)
162
- configurator.instance_exec(env, &self.class.configure_block)
163
-
164
- submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
165
- submitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
166
- instance_exec(env, &self.class.submit_block)
167
- end
168
-
169
- private
170
-
171
- # this is a quirk to figure out the topology class at load time when the topology file
172
- # is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
173
- # optional we can hook into any/all the other DSL statements that will be called at load time
174
- # and set it there. This is somewhat inelegant but it works.
175
- def self.set_topology_class!
176
- Configuration.topology_class = self
177
- end
178
-
179
- def self.resolve_ids!(components)
180
- # verify duplicate implicit ids
181
- ids = components.map(&:id)
182
- components.reverse.each do |component|
183
- raise("duplicate id in #{component.clazz.name} on id=#{component.id}") if ids.select{|id| id == component.id}.size > 1
184
- # verify source_id references
185
- if component.respond_to?(:sources)
186
- component.sources.each{|source_id, grouping| raise("cannot resolve #{component.clazz.name} source id=#{source_id}") unless ids.include?(source_id)}
187
- end
188
- end
189
- end
190
-
191
- def self.spouts
192
- self.components.select{|c| c.is_a?(SpoutDefinition)}
193
- end
194
-
195
- def self.bolts
196
- self.components.select{|c| c.is_a?(BoltDefinition)}
197
- end
198
-
199
- def self.components
200
- @components ||= []
201
- end
202
-
203
- def self.topology_name
204
- @topology_name ||= self.underscore(self.name)
205
- end
206
-
207
- def self.configure_block
208
- @configure_block ||= lambda{|env|}
209
- end
210
-
211
- def self.submit_block
212
- @submit_block ||= lambda{|env|}
213
- end
214
-
215
- def self.underscore(camel_case)
216
- camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
217
- end
218
- end
219
- end