redstorm 0.6.5 → 0.6.6.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +12 -1
- data/README.md +66 -47
- data/Rakefile +1 -1
- data/examples/dsl/exclamation_bolt.rb +10 -0
- data/examples/{simple → dsl}/exclamation_topology.rb +5 -5
- data/examples/{simple → dsl}/exclamation_topology2.rb +5 -5
- data/examples/{simple → dsl}/hello_world_topology.rb +4 -4
- data/examples/{simple → dsl}/kafka_topology.rb +17 -18
- data/examples/{simple → dsl}/random_sentence_spout.rb +1 -1
- data/examples/{simple → dsl}/redis_word_count_topology.rb +6 -7
- data/examples/{simple → dsl}/ruby_version_topology.rb +9 -9
- data/examples/{simple → dsl}/split_sentence_bolt.rb +6 -6
- data/examples/{simple → dsl}/word_count_bolt.rb +2 -2
- data/examples/{simple → dsl}/word_count_topology.rb +6 -6
- data/examples/shell/shell_topology.rb +2 -2
- data/ivy/storm_dependencies.xml +2 -2
- data/ivy/topology_dependencies.xml +10 -2
- data/lib/red_storm.rb +6 -5
- data/lib/red_storm/application.rb +5 -5
- data/lib/red_storm/dsl/bolt.rb +155 -0
- data/lib/red_storm/dsl/drpc_topology.rb +92 -0
- data/lib/red_storm/dsl/spout.rb +194 -0
- data/lib/red_storm/dsl/topology.rb +227 -0
- data/lib/red_storm/dsl/tuple.rb +34 -0
- data/lib/red_storm/environment.rb +8 -8
- data/lib/red_storm/topology_launcher.rb +2 -2
- data/lib/red_storm/version.rb +1 -1
- data/lib/tasks/red_storm.rake +45 -27
- data/redstorm.gemspec +4 -4
- metadata +31 -34
- data/examples/simple/exclamation_bolt.rb +0 -10
- data/lib/red_storm/simple_bolt.rb +0 -135
- data/lib/red_storm/simple_drpc_topology.rb +0 -87
- data/lib/red_storm/simple_spout.rb +0 -184
- data/lib/red_storm/simple_topology.rb +0 -219
@@ -0,0 +1,194 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'red_storm/configurator'
|
3
|
+
require 'red_storm/environment'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
module RedStorm
|
7
|
+
module DSL
|
8
|
+
|
9
|
+
class SpoutError < StandardError; end
|
10
|
+
|
11
|
+
class Spout
|
12
|
+
attr_reader :config, :context, :collector
|
13
|
+
|
14
|
+
# DSL class methods
|
15
|
+
|
16
|
+
def self.configure(&configure_block)
|
17
|
+
@configure_block = block_given? ? configure_block : lambda {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.log
|
21
|
+
@log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.output_fields(*fields)
|
25
|
+
@fields = fields.map(&:to_s)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.on_send(*args, &on_send_block)
|
29
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
30
|
+
method_name = args.first
|
31
|
+
|
32
|
+
self.send_options.merge!(options)
|
33
|
+
|
34
|
+
# indirecting through a lambda defers the method lookup at invocation time
|
35
|
+
# and the performance penalty is negligible
|
36
|
+
body = block_given? ? on_send_block : lambda{self.send((method_name || :on_send).to_sym)}
|
37
|
+
define_method(:on_send, body)
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.on_init(method_name = nil, &on_init_block)
|
41
|
+
body = block_given? ? on_init_block : lambda {self.send((method_name || :on_init).to_sym)}
|
42
|
+
define_method(:on_init, body)
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.on_close(method_name = nil, &on_close_block)
|
46
|
+
body = block_given? ? on_close_block : lambda {self.send((method_name || :on_close).to_sym)}
|
47
|
+
define_method(:on_close, body)
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.on_activate(method_name = nil, &on_activate_block)
|
51
|
+
# @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
|
52
|
+
body = block_given? ? on_activate_block : lambda {self.send((method_name || :on_activate).to_sym)}
|
53
|
+
define_method(:on_activate, body)
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.on_deactivate(method_name = nil, &on_deactivate_block)
|
57
|
+
# @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
|
58
|
+
body = block_given? ? on_deactivate_block : lambda {self.send((method_name || :on_deactivate).to_sym)}
|
59
|
+
define_method(:on_deactivate, body)
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.on_ack(method_name = nil, &on_ack_block)
|
63
|
+
body = block_given? ? on_ack_block : lambda {|msg_id| self.send((method_name || :on_ack).to_sym, msg_id)}
|
64
|
+
define_method(:on_ack, body)
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.on_fail(method_name = nil, &on_fail_block)
|
68
|
+
body = block_given? ? on_fail_block : lambda {|msg_id| self.send((method_name || :on_fail).to_sym, msg_id)}
|
69
|
+
define_method(:on_fail, body)
|
70
|
+
end
|
71
|
+
|
72
|
+
# DSL instance methods
|
73
|
+
|
74
|
+
def reliable_emit(message_id, *values)
|
75
|
+
@collector.emit(Values.new(*values), message_id)
|
76
|
+
end
|
77
|
+
|
78
|
+
def unreliable_emit(*values)
|
79
|
+
@collector.emit(Values.new(*values))
|
80
|
+
end
|
81
|
+
alias_method :emit, :unreliable_emit
|
82
|
+
|
83
|
+
def log
|
84
|
+
self.class.log
|
85
|
+
end
|
86
|
+
|
87
|
+
# Spout proxy interface
|
88
|
+
|
89
|
+
def next_tuple
|
90
|
+
output = on_send
|
91
|
+
|
92
|
+
if self.class.emit?
|
93
|
+
if output
|
94
|
+
values = [output].flatten
|
95
|
+
if self.class.reliable?
|
96
|
+
message_id = values.shift
|
97
|
+
reliable_emit(message_id, *values)
|
98
|
+
else
|
99
|
+
unreliable_emit(*values)
|
100
|
+
end
|
101
|
+
else
|
102
|
+
sleep(0.1)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def open(config, context, collector)
|
108
|
+
@collector = collector
|
109
|
+
@context = context
|
110
|
+
@config = config
|
111
|
+
|
112
|
+
on_init
|
113
|
+
end
|
114
|
+
|
115
|
+
def close
|
116
|
+
on_close
|
117
|
+
end
|
118
|
+
|
119
|
+
def activate
|
120
|
+
on_activate
|
121
|
+
end
|
122
|
+
|
123
|
+
def deactivate
|
124
|
+
on_deactivate
|
125
|
+
end
|
126
|
+
|
127
|
+
def declare_output_fields(declarer)
|
128
|
+
declarer.declare(Fields.new(self.class.fields))
|
129
|
+
end
|
130
|
+
|
131
|
+
def ack(msg_id)
|
132
|
+
on_ack(msg_id)
|
133
|
+
end
|
134
|
+
|
135
|
+
def fail(msg_id)
|
136
|
+
on_fail(msg_id)
|
137
|
+
end
|
138
|
+
|
139
|
+
def get_component_configuration
|
140
|
+
configurator = Configurator.new
|
141
|
+
configurator.instance_exec(&self.class.configure_block)
|
142
|
+
configurator.config
|
143
|
+
end
|
144
|
+
|
145
|
+
private
|
146
|
+
|
147
|
+
# default optional noop dsl methods/callbacks
|
148
|
+
def on_init; end
|
149
|
+
def on_close; end
|
150
|
+
def on_activate; end
|
151
|
+
def on_deactivate; end
|
152
|
+
def on_ack(msg_id); end
|
153
|
+
def on_fail(msg_id); end
|
154
|
+
|
155
|
+
def self.fields
|
156
|
+
@fields ||= []
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.configure_block
|
160
|
+
@configure_block ||= lambda {}
|
161
|
+
end
|
162
|
+
|
163
|
+
def self.send_options
|
164
|
+
@send_options ||= {:emit => true, :reliable => false}
|
165
|
+
end
|
166
|
+
|
167
|
+
def self.emit?
|
168
|
+
!!self.send_options[:emit]
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.reliable?
|
172
|
+
!!self.send_options[:reliable]
|
173
|
+
end
|
174
|
+
|
175
|
+
# below non-dry see Bolt class
|
176
|
+
def self.inherited(subclass)
|
177
|
+
path = (caller.first.to_s =~ /^(.+):\d+.*$/) ? $1 : raise(SpoutError, "unable to extract base topology class path from #{caller.first.inspect}")
|
178
|
+
subclass.base_class_path = Pathname.new(path).relative_path_from(Pathname.new(RedStorm::BASE_PATH)).to_s
|
179
|
+
end
|
180
|
+
|
181
|
+
def self.base_class_path=(path)
|
182
|
+
@base_class_path = path
|
183
|
+
end
|
184
|
+
|
185
|
+
def self.base_class_path
|
186
|
+
@base_class_path
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
# for backward compatibility
|
192
|
+
SimpleSpout = DSL::Spout
|
193
|
+
|
194
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
require 'java'
|
2
|
+
require 'red_storm/configuration'
|
3
|
+
require 'red_storm/configurator'
|
4
|
+
|
5
|
+
java_import 'backtype.storm.topology.TopologyBuilder'
|
6
|
+
|
7
|
+
module RedStorm
|
8
|
+
module DSL
|
9
|
+
|
10
|
+
class TopologyDefinitionError < StandardError; end
|
11
|
+
|
12
|
+
class Topology
|
13
|
+
attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
|
14
|
+
|
15
|
+
DEFAULT_SPOUT_PARALLELISM = 1
|
16
|
+
DEFAULT_BOLT_PARALLELISM = 1
|
17
|
+
|
18
|
+
class ComponentDefinition < Configurator
|
19
|
+
attr_reader :clazz, :constructor_args, :parallelism
|
20
|
+
attr_accessor :id # ids are forced to string
|
21
|
+
|
22
|
+
def initialize(component_class, constructor_args, id, parallelism)
|
23
|
+
super()
|
24
|
+
@clazz = component_class
|
25
|
+
@constructor_args = constructor_args
|
26
|
+
@id = id.to_s
|
27
|
+
@parallelism = parallelism
|
28
|
+
@output_fields = []
|
29
|
+
end
|
30
|
+
|
31
|
+
def output_fields(*args)
|
32
|
+
args.empty? ? @output_fields : @output_fields = args.map(&:to_s)
|
33
|
+
end
|
34
|
+
|
35
|
+
def is_java?
|
36
|
+
@clazz.name.split('::').first.downcase == 'java'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class SpoutDefinition < ComponentDefinition
|
41
|
+
|
42
|
+
# WARNING non-dry see BoltDefinition#new_instance
|
43
|
+
def new_instance
|
44
|
+
if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
|
45
|
+
@clazz.new(constructor_args, @output_fields)
|
46
|
+
elsif is_java?
|
47
|
+
@clazz.new(*constructor_args)
|
48
|
+
else
|
49
|
+
JRubySpout.new(@clazz.base_class_path, @clazz.name, @output_fields)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class BoltDefinition < ComponentDefinition
|
55
|
+
attr_accessor :sources, :command
|
56
|
+
|
57
|
+
def initialize(*args)
|
58
|
+
super
|
59
|
+
@sources = []
|
60
|
+
end
|
61
|
+
|
62
|
+
def source(source_id, grouping)
|
63
|
+
@sources << [source_id.is_a?(Class) ? Topology.underscore(source_id) : source_id.to_s, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
|
64
|
+
end
|
65
|
+
|
66
|
+
def define_grouping(declarer)
|
67
|
+
@sources.each do |source_id, grouping|
|
68
|
+
grouper, params = grouping.first
|
69
|
+
# declarer.fieldsGrouping(source_id, Fields.new())
|
70
|
+
case grouper
|
71
|
+
when :fields
|
72
|
+
declarer.fieldsGrouping(source_id, Fields.new(*([params].flatten.map(&:to_s))))
|
73
|
+
when :global
|
74
|
+
declarer.globalGrouping(source_id)
|
75
|
+
when :shuffle
|
76
|
+
declarer.shuffleGrouping(source_id)
|
77
|
+
when :local_or_shuffle
|
78
|
+
declarer.localOrShuffleGrouping(source_id)
|
79
|
+
when :none
|
80
|
+
declarer.noneGrouping(source_id)
|
81
|
+
when :all
|
82
|
+
declarer.allGrouping(source_id)
|
83
|
+
when :direct
|
84
|
+
declarer.directGrouping(source_id)
|
85
|
+
else
|
86
|
+
raise("unknown grouper=#{grouper.inspect}")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def new_instance
|
92
|
+
# WARNING non-dry see BoltDefinition#new_instance
|
93
|
+
if @clazz.name == "Java::RedstormStormJruby::JRubyShellBolt"
|
94
|
+
@clazz.new(constructor_args, @output_fields)
|
95
|
+
elsif is_java?
|
96
|
+
@clazz.new(*constructor_args)
|
97
|
+
else
|
98
|
+
JRubyBolt.new(@clazz.base_class_path, @clazz.name, @output_fields)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.log
|
104
|
+
@log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
|
105
|
+
end
|
106
|
+
|
107
|
+
# def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
|
108
|
+
def self.spout(spout_class, *args, &spout_block)
|
109
|
+
set_topology_class!
|
110
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
111
|
+
contructor_args = !args.empty? ? args.pop : []
|
112
|
+
spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
|
113
|
+
|
114
|
+
spout = SpoutDefinition.new(spout_class, contructor_args, spout_options[:id], spout_options[:parallelism])
|
115
|
+
spout.instance_exec(&spout_block) if block_given?
|
116
|
+
self.components << spout
|
117
|
+
end
|
118
|
+
|
119
|
+
# def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
|
120
|
+
def self.bolt(bolt_class, *args, &bolt_block)
|
121
|
+
set_topology_class!
|
122
|
+
options = args.last.is_a?(Hash) ? args.pop : {}
|
123
|
+
contructor_args = !args.empty? ? args.pop : []
|
124
|
+
bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
|
125
|
+
|
126
|
+
bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
|
127
|
+
raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
|
128
|
+
bolt.instance_exec(&bolt_block)
|
129
|
+
self.components << bolt
|
130
|
+
end
|
131
|
+
|
132
|
+
def self.configure(name = nil, &configure_block)
|
133
|
+
set_topology_class!
|
134
|
+
@topology_name = name.to_s if name
|
135
|
+
@configure_block = configure_block if block_given?
|
136
|
+
end
|
137
|
+
|
138
|
+
def self.on_submit(method_name = nil, &submit_block)
|
139
|
+
@submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.build_topology
|
143
|
+
resolve_ids!(components)
|
144
|
+
|
145
|
+
builder = TopologyBuilder.new
|
146
|
+
spouts.each do |spout|
|
147
|
+
declarer = builder.setSpout(spout.id, spout.new_instance, spout.parallelism.to_java)
|
148
|
+
declarer.addConfigurations(spout.config)
|
149
|
+
end
|
150
|
+
bolts.each do |bolt|
|
151
|
+
declarer = builder.setBolt(bolt.id, bolt.new_instance, bolt.parallelism.to_java)
|
152
|
+
declarer.addConfigurations(bolt.config)
|
153
|
+
bolt.define_grouping(declarer)
|
154
|
+
end
|
155
|
+
builder.createTopology
|
156
|
+
end
|
157
|
+
|
158
|
+
def start(env)
|
159
|
+
topology = self.class.build_topology
|
160
|
+
|
161
|
+
# set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
|
162
|
+
defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
|
163
|
+
|
164
|
+
configurator = Configurator.new(defaults)
|
165
|
+
configurator.instance_exec(env, &self.class.configure_block)
|
166
|
+
|
167
|
+
submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
|
168
|
+
submitter.submitTopology(self.class.topology_name, configurator.config, topology)
|
169
|
+
instance_exec(env, &self.class.submit_block)
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
# this is a quirk to figure out the topology class at load time when the topology file
|
175
|
+
# is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
|
176
|
+
# optional we can hook into any/all the other DSL statements that will be called at load time
|
177
|
+
# and set it there. This is somewhat inelegant but it works.
|
178
|
+
def self.set_topology_class!
|
179
|
+
Configuration.topology_class = self
|
180
|
+
end
|
181
|
+
|
182
|
+
def self.resolve_ids!(components)
|
183
|
+
# verify duplicate implicit ids
|
184
|
+
ids = components.map(&:id)
|
185
|
+
components.reverse.each do |component|
|
186
|
+
raise("duplicate id in #{component.clazz.name} on id=#{component.id}") if ids.select{|id| id == component.id}.size > 1
|
187
|
+
# verify source_id references
|
188
|
+
if component.respond_to?(:sources)
|
189
|
+
component.sources.each{|source_id, grouping| raise("cannot resolve #{component.clazz.name} source id=#{source_id}") unless ids.include?(source_id)}
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def self.spouts
|
195
|
+
self.components.select{|c| c.is_a?(SpoutDefinition)}
|
196
|
+
end
|
197
|
+
|
198
|
+
def self.bolts
|
199
|
+
self.components.select{|c| c.is_a?(BoltDefinition)}
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.components
|
203
|
+
@components ||= []
|
204
|
+
end
|
205
|
+
|
206
|
+
def self.topology_name
|
207
|
+
@topology_name ||= self.underscore(self.name)
|
208
|
+
end
|
209
|
+
|
210
|
+
def self.configure_block
|
211
|
+
@configure_block ||= lambda{|env|}
|
212
|
+
end
|
213
|
+
|
214
|
+
def self.submit_block
|
215
|
+
@submit_block ||= lambda{|env|}
|
216
|
+
end
|
217
|
+
|
218
|
+
def self.underscore(camel_case)
|
219
|
+
camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# for backward compatibility
|
225
|
+
SimpleTopology = DSL::Topology
|
226
|
+
|
227
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
java_import 'backtype.storm.tuple.Tuple'
|
2
|
+
java_import 'backtype.storm.tuple.TupleImpl'
|
3
|
+
|
4
|
+
module RedStorm
|
5
|
+
module DSL
|
6
|
+
class TupleError < StandardError; end
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class TupleImpl
|
11
|
+
|
12
|
+
def value(i)
|
13
|
+
case i
|
14
|
+
when Fixnum
|
15
|
+
getValue(i)
|
16
|
+
when String
|
17
|
+
getValueByField(i)
|
18
|
+
when Symbol
|
19
|
+
getValueByField(i.to_s)
|
20
|
+
else
|
21
|
+
raise(RedStorm::DSL::TupleError, "unsupported tuple index class=#{i.class.to_s} for #{i.inspect}")
|
22
|
+
end
|
23
|
+
end
|
24
|
+
alias_method :[], :value
|
25
|
+
|
26
|
+
def field_index(field)
|
27
|
+
fieldIndex(field.to_s)
|
28
|
+
end
|
29
|
+
|
30
|
+
def contains?(field)
|
31
|
+
contains(field.to_s)
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|