redstorm 0.6.5 → 0.6.6.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG.md +12 -1
  2. data/README.md +66 -47
  3. data/Rakefile +1 -1
  4. data/examples/dsl/exclamation_bolt.rb +10 -0
  5. data/examples/{simple → dsl}/exclamation_topology.rb +5 -5
  6. data/examples/{simple → dsl}/exclamation_topology2.rb +5 -5
  7. data/examples/{simple → dsl}/hello_world_topology.rb +4 -4
  8. data/examples/{simple → dsl}/kafka_topology.rb +17 -18
  9. data/examples/{simple → dsl}/random_sentence_spout.rb +1 -1
  10. data/examples/{simple → dsl}/redis_word_count_topology.rb +6 -7
  11. data/examples/{simple → dsl}/ruby_version_topology.rb +9 -9
  12. data/examples/{simple → dsl}/split_sentence_bolt.rb +6 -6
  13. data/examples/{simple → dsl}/word_count_bolt.rb +2 -2
  14. data/examples/{simple → dsl}/word_count_topology.rb +6 -6
  15. data/examples/shell/shell_topology.rb +2 -2
  16. data/ivy/storm_dependencies.xml +2 -2
  17. data/ivy/topology_dependencies.xml +10 -2
  18. data/lib/red_storm.rb +6 -5
  19. data/lib/red_storm/application.rb +5 -5
  20. data/lib/red_storm/dsl/bolt.rb +155 -0
  21. data/lib/red_storm/dsl/drpc_topology.rb +92 -0
  22. data/lib/red_storm/dsl/spout.rb +194 -0
  23. data/lib/red_storm/dsl/topology.rb +227 -0
  24. data/lib/red_storm/dsl/tuple.rb +34 -0
  25. data/lib/red_storm/environment.rb +8 -8
  26. data/lib/red_storm/topology_launcher.rb +2 -2
  27. data/lib/red_storm/version.rb +1 -1
  28. data/lib/tasks/red_storm.rake +45 -27
  29. data/redstorm.gemspec +4 -4
  30. metadata +31 -34
  31. data/examples/simple/exclamation_bolt.rb +0 -10
  32. data/lib/red_storm/simple_bolt.rb +0 -135
  33. data/lib/red_storm/simple_drpc_topology.rb +0 -87
  34. data/lib/red_storm/simple_spout.rb +0 -184
  35. data/lib/red_storm/simple_topology.rb +0 -219
@@ -0,0 +1,194 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+ require 'red_storm/environment'
4
+ require 'pathname'
5
+
6
+ module RedStorm
7
+ module DSL
8
+
9
+ class SpoutError < StandardError; end
10
+
11
+ class Spout
12
+ attr_reader :config, :context, :collector
13
+
14
+ # DSL class methods
15
+
16
+ def self.configure(&configure_block)
17
+ @configure_block = block_given? ? configure_block : lambda {}
18
+ end
19
+
20
+ def self.log
21
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
22
+ end
23
+
24
+ def self.output_fields(*fields)
25
+ @fields = fields.map(&:to_s)
26
+ end
27
+
28
+ def self.on_send(*args, &on_send_block)
29
+ options = args.last.is_a?(Hash) ? args.pop : {}
30
+ method_name = args.first
31
+
32
+ self.send_options.merge!(options)
33
+
34
+ # indirecting through a lambda defers the method lookup at invocation time
35
+ # and the performance penalty is negligible
36
+ body = block_given? ? on_send_block : lambda{self.send((method_name || :on_send).to_sym)}
37
+ define_method(:on_send, body)
38
+ end
39
+
40
+ def self.on_init(method_name = nil, &on_init_block)
41
+ body = block_given? ? on_init_block : lambda {self.send((method_name || :on_init).to_sym)}
42
+ define_method(:on_init, body)
43
+ end
44
+
45
+ def self.on_close(method_name = nil, &on_close_block)
46
+ body = block_given? ? on_close_block : lambda {self.send((method_name || :on_close).to_sym)}
47
+ define_method(:on_close, body)
48
+ end
49
+
50
+ def self.on_activate(method_name = nil, &on_activate_block)
51
+ # @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
52
+ body = block_given? ? on_activate_block : lambda {self.send((method_name || :on_activate).to_sym)}
53
+ define_method(:on_activate, body)
54
+ end
55
+
56
+ def self.on_deactivate(method_name = nil, &on_deactivate_block)
57
+ # @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
58
+ body = block_given? ? on_deactivate_block : lambda {self.send((method_name || :on_deactivate).to_sym)}
59
+ define_method(:on_deactivate, body)
60
+ end
61
+
62
+ def self.on_ack(method_name = nil, &on_ack_block)
63
+ body = block_given? ? on_ack_block : lambda {|msg_id| self.send((method_name || :on_ack).to_sym, msg_id)}
64
+ define_method(:on_ack, body)
65
+ end
66
+
67
+ def self.on_fail(method_name = nil, &on_fail_block)
68
+ body = block_given? ? on_fail_block : lambda {|msg_id| self.send((method_name || :on_fail).to_sym, msg_id)}
69
+ define_method(:on_fail, body)
70
+ end
71
+
72
+ # DSL instance methods
73
+
74
+ def reliable_emit(message_id, *values)
75
+ @collector.emit(Values.new(*values), message_id)
76
+ end
77
+
78
+ def unreliable_emit(*values)
79
+ @collector.emit(Values.new(*values))
80
+ end
81
+ alias_method :emit, :unreliable_emit
82
+
83
+ def log
84
+ self.class.log
85
+ end
86
+
87
+ # Spout proxy interface
88
+
89
+ def next_tuple
90
+ output = on_send
91
+
92
+ if self.class.emit?
93
+ if output
94
+ values = [output].flatten
95
+ if self.class.reliable?
96
+ message_id = values.shift
97
+ reliable_emit(message_id, *values)
98
+ else
99
+ unreliable_emit(*values)
100
+ end
101
+ else
102
+ sleep(0.1)
103
+ end
104
+ end
105
+ end
106
+
107
+ def open(config, context, collector)
108
+ @collector = collector
109
+ @context = context
110
+ @config = config
111
+
112
+ on_init
113
+ end
114
+
115
+ def close
116
+ on_close
117
+ end
118
+
119
+ def activate
120
+ on_activate
121
+ end
122
+
123
+ def deactivate
124
+ on_deactivate
125
+ end
126
+
127
+ def declare_output_fields(declarer)
128
+ declarer.declare(Fields.new(self.class.fields))
129
+ end
130
+
131
+ def ack(msg_id)
132
+ on_ack(msg_id)
133
+ end
134
+
135
+ def fail(msg_id)
136
+ on_fail(msg_id)
137
+ end
138
+
139
+ def get_component_configuration
140
+ configurator = Configurator.new
141
+ configurator.instance_exec(&self.class.configure_block)
142
+ configurator.config
143
+ end
144
+
145
+ private
146
+
147
+ # default optional noop dsl methods/callbacks
148
+ def on_init; end
149
+ def on_close; end
150
+ def on_activate; end
151
+ def on_deactivate; end
152
+ def on_ack(msg_id); end
153
+ def on_fail(msg_id); end
154
+
155
+ def self.fields
156
+ @fields ||= []
157
+ end
158
+
159
+ def self.configure_block
160
+ @configure_block ||= lambda {}
161
+ end
162
+
163
+ def self.send_options
164
+ @send_options ||= {:emit => true, :reliable => false}
165
+ end
166
+
167
+ def self.emit?
168
+ !!self.send_options[:emit]
169
+ end
170
+
171
+ def self.reliable?
172
+ !!self.send_options[:reliable]
173
+ end
174
+
175
+ # below non-dry see Bolt class
176
+ def self.inherited(subclass)
177
+ path = (caller.first.to_s =~ /^(.+):\d+.*$/) ? $1 : raise(SpoutError, "unable to extract base topology class path from #{caller.first.inspect}")
178
+ subclass.base_class_path = Pathname.new(path).relative_path_from(Pathname.new(RedStorm::BASE_PATH)).to_s
179
+ end
180
+
181
+ def self.base_class_path=(path)
182
+ @base_class_path = path
183
+ end
184
+
185
+ def self.base_class_path
186
+ @base_class_path
187
+ end
188
+ end
189
+ end
190
+
191
+ # for backward compatibility
192
+ SimpleSpout = DSL::Spout
193
+
194
+ end
@@ -0,0 +1,227 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ java_import 'backtype.storm.topology.TopologyBuilder'
6
+
7
+ module RedStorm
8
+ module DSL
9
+
10
+ class TopologyDefinitionError < StandardError; end
11
+
12
+ class Topology
13
+ attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
14
+
15
+ DEFAULT_SPOUT_PARALLELISM = 1
16
+ DEFAULT_BOLT_PARALLELISM = 1
17
+
18
+ class ComponentDefinition < Configurator
19
+ attr_reader :clazz, :constructor_args, :parallelism
20
+ attr_accessor :id # ids are forced to string
21
+
22
+ def initialize(component_class, constructor_args, id, parallelism)
23
+ super()
24
+ @clazz = component_class
25
+ @constructor_args = constructor_args
26
+ @id = id.to_s
27
+ @parallelism = parallelism
28
+ @output_fields = []
29
+ end
30
+
31
+ def output_fields(*args)
32
+ args.empty? ? @output_fields : @output_fields = args.map(&:to_s)
33
+ end
34
+
35
+ def is_java?
36
+ @clazz.name.split('::').first.downcase == 'java'
37
+ end
38
+ end
39
+
40
+ class SpoutDefinition < ComponentDefinition
41
+
42
+ # WARNING non-dry see BoltDefinition#new_instance
43
+ def new_instance
44
+ if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
45
+ @clazz.new(constructor_args, @output_fields)
46
+ elsif is_java?
47
+ @clazz.new(*constructor_args)
48
+ else
49
+ JRubySpout.new(@clazz.base_class_path, @clazz.name, @output_fields)
50
+ end
51
+ end
52
+ end
53
+
54
+ class BoltDefinition < ComponentDefinition
55
+ attr_accessor :sources, :command
56
+
57
+ def initialize(*args)
58
+ super
59
+ @sources = []
60
+ end
61
+
62
+ def source(source_id, grouping)
63
+ @sources << [source_id.is_a?(Class) ? Topology.underscore(source_id) : source_id.to_s, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
64
+ end
65
+
66
+ def define_grouping(declarer)
67
+ @sources.each do |source_id, grouping|
68
+ grouper, params = grouping.first
69
+ # declarer.fieldsGrouping(source_id, Fields.new())
70
+ case grouper
71
+ when :fields
72
+ declarer.fieldsGrouping(source_id, Fields.new(*([params].flatten.map(&:to_s))))
73
+ when :global
74
+ declarer.globalGrouping(source_id)
75
+ when :shuffle
76
+ declarer.shuffleGrouping(source_id)
77
+ when :local_or_shuffle
78
+ declarer.localOrShuffleGrouping(source_id)
79
+ when :none
80
+ declarer.noneGrouping(source_id)
81
+ when :all
82
+ declarer.allGrouping(source_id)
83
+ when :direct
84
+ declarer.directGrouping(source_id)
85
+ else
86
+ raise("unknown grouper=#{grouper.inspect}")
87
+ end
88
+ end
89
+ end
90
+
91
+ def new_instance
92
+ # WARNING non-dry see BoltDefinition#new_instance
93
+ if @clazz.name == "Java::RedstormStormJruby::JRubyShellBolt"
94
+ @clazz.new(constructor_args, @output_fields)
95
+ elsif is_java?
96
+ @clazz.new(*constructor_args)
97
+ else
98
+ JRubyBolt.new(@clazz.base_class_path, @clazz.name, @output_fields)
99
+ end
100
+ end
101
+ end
102
+
103
+ def self.log
104
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
105
+ end
106
+
107
+ # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
+ def self.spout(spout_class, *args, &spout_block)
109
+ set_topology_class!
110
+ options = args.last.is_a?(Hash) ? args.pop : {}
111
+ contructor_args = !args.empty? ? args.pop : []
112
+ spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
113
+
114
+ spout = SpoutDefinition.new(spout_class, contructor_args, spout_options[:id], spout_options[:parallelism])
115
+ spout.instance_exec(&spout_block) if block_given?
116
+ self.components << spout
117
+ end
118
+
119
+ # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
120
+ def self.bolt(bolt_class, *args, &bolt_block)
121
+ set_topology_class!
122
+ options = args.last.is_a?(Hash) ? args.pop : {}
123
+ contructor_args = !args.empty? ? args.pop : []
124
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
125
+
126
+ bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
127
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
128
+ bolt.instance_exec(&bolt_block)
129
+ self.components << bolt
130
+ end
131
+
132
+ def self.configure(name = nil, &configure_block)
133
+ set_topology_class!
134
+ @topology_name = name.to_s if name
135
+ @configure_block = configure_block if block_given?
136
+ end
137
+
138
+ def self.on_submit(method_name = nil, &submit_block)
139
+ @submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
140
+ end
141
+
142
+ def self.build_topology
143
+ resolve_ids!(components)
144
+
145
+ builder = TopologyBuilder.new
146
+ spouts.each do |spout|
147
+ declarer = builder.setSpout(spout.id, spout.new_instance, spout.parallelism.to_java)
148
+ declarer.addConfigurations(spout.config)
149
+ end
150
+ bolts.each do |bolt|
151
+ declarer = builder.setBolt(bolt.id, bolt.new_instance, bolt.parallelism.to_java)
152
+ declarer.addConfigurations(bolt.config)
153
+ bolt.define_grouping(declarer)
154
+ end
155
+ builder.createTopology
156
+ end
157
+
158
+ def start(env)
159
+ topology = self.class.build_topology
160
+
161
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
162
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
163
+
164
+ configurator = Configurator.new(defaults)
165
+ configurator.instance_exec(env, &self.class.configure_block)
166
+
167
+ submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
168
+ submitter.submitTopology(self.class.topology_name, configurator.config, topology)
169
+ instance_exec(env, &self.class.submit_block)
170
+ end
171
+
172
+ private
173
+
174
+ # this is a quirk to figure out the topology class at load time when the topology file
175
+ # is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
176
+ # optional we can hook into any/all the other DSL statements that will be called at load time
177
+ # and set it there. This is somewhat inelegant but it works.
178
+ def self.set_topology_class!
179
+ Configuration.topology_class = self
180
+ end
181
+
182
+ def self.resolve_ids!(components)
183
+ # verify duplicate implicit ids
184
+ ids = components.map(&:id)
185
+ components.reverse.each do |component|
186
+ raise("duplicate id in #{component.clazz.name} on id=#{component.id}") if ids.select{|id| id == component.id}.size > 1
187
+ # verify source_id references
188
+ if component.respond_to?(:sources)
189
+ component.sources.each{|source_id, grouping| raise("cannot resolve #{component.clazz.name} source id=#{source_id}") unless ids.include?(source_id)}
190
+ end
191
+ end
192
+ end
193
+
194
+ def self.spouts
195
+ self.components.select{|c| c.is_a?(SpoutDefinition)}
196
+ end
197
+
198
+ def self.bolts
199
+ self.components.select{|c| c.is_a?(BoltDefinition)}
200
+ end
201
+
202
+ def self.components
203
+ @components ||= []
204
+ end
205
+
206
+ def self.topology_name
207
+ @topology_name ||= self.underscore(self.name)
208
+ end
209
+
210
+ def self.configure_block
211
+ @configure_block ||= lambda{|env|}
212
+ end
213
+
214
+ def self.submit_block
215
+ @submit_block ||= lambda{|env|}
216
+ end
217
+
218
+ def self.underscore(camel_case)
219
+ camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
220
+ end
221
+ end
222
+ end
223
+
224
+ # for backward compatibility
225
+ SimpleTopology = DSL::Topology
226
+
227
+ end
@@ -0,0 +1,34 @@
1
+ java_import 'backtype.storm.tuple.Tuple'
2
+ java_import 'backtype.storm.tuple.TupleImpl'
3
+
4
+ module RedStorm
5
+ module DSL
6
+ class TupleError < StandardError; end
7
+ end
8
+ end
9
+
10
+ class TupleImpl
11
+
12
+ def value(i)
13
+ case i
14
+ when Fixnum
15
+ getValue(i)
16
+ when String
17
+ getValueByField(i)
18
+ when Symbol
19
+ getValueByField(i.to_s)
20
+ else
21
+ raise(RedStorm::DSL::TupleError, "unsupported tuple index class=#{i.class.to_s} for #{i.inspect}")
22
+ end
23
+ end
24
+ alias_method :[], :value
25
+
26
+ def field_index(field)
27
+ fieldIndex(field.to_s)
28
+ end
29
+
30
+ def contains?(field)
31
+ contains(field.to_s)
32
+ end
33
+
34
+ end