kb-redstorm 0.6.5 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +9 -0
- data/README.md +206 -103
- data/examples/native/cluster_word_count_topology.rb +5 -5
- data/examples/native/local_exclamation_topology.rb +8 -8
- data/examples/native/local_exclamation_topology2.rb +7 -7
- data/examples/native/local_redis_word_count_topology.rb +7 -8
- data/examples/native/local_word_count_topology.rb +5 -5
- data/examples/simple/exclamation_topology.rb +7 -11
- data/examples/simple/exclamation_topology2.rb +10 -12
- data/examples/simple/hello_world_topology.rb +22 -0
- data/examples/simple/kafka_topology.rb +15 -15
- data/examples/simple/redis_word_count_topology.rb +3 -5
- data/examples/simple/ruby_version_topology.rb +7 -1
- data/examples/simple/word_count_topology.rb +8 -10
- data/ivy/settings.xml +1 -0
- data/ivy/storm_dependencies.xml +8 -0
- data/ivy/topology_dependencies.xml +7 -0
- data/lib/red_storm/application.rb +7 -5
- data/lib/red_storm/configurator.rb +1 -1
- data/lib/red_storm/proxy/batch_bolt.rb +63 -0
- data/lib/red_storm/proxy/batch_committer_bolt.rb +52 -0
- data/lib/red_storm/proxy/batch_spout.rb +12 -24
- data/lib/red_storm/proxy/proxy_function.rb +1 -9
- data/lib/red_storm/proxy/transactional_committer_spout.rb +47 -0
- data/lib/red_storm/proxy/transactional_spout.rb +46 -0
- data/lib/red_storm/simple_drpc_topology.rb +2 -2
- data/lib/red_storm/simple_topology.rb +14 -4
- data/lib/red_storm/topology_launcher.rb +16 -0
- data/lib/red_storm/version.rb +1 -1
- data/lib/tasks/red_storm.rake +69 -108
- data/redstorm.gemspec +24 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchBolt.java +90 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchCommitterBolt.java +9 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +25 -26
- data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +1 -9
- data/src/main/redstorm/storm/jruby/JRubyTransactionalBolt.java +90 -0
- data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterBolt.java +31 -0
- data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterSpout.java +44 -0
- data/src/main/redstorm/storm/jruby/JRubyTransactionalSpout.java +89 -0
- metadata +80 -62
- data/examples/native/Gemfile +0 -2
@@ -106,6 +106,7 @@ module RedStorm
|
|
106
106
|
|
107
107
|
# def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
|
108
108
|
def self.spout(spout_class, *args, &spout_block)
|
109
|
+
set_topology_class!
|
109
110
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
110
111
|
contructor_args = !args.empty? ? args.pop : []
|
111
112
|
spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
|
@@ -117,9 +118,10 @@ module RedStorm
|
|
117
118
|
|
118
119
|
# def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
|
119
120
|
def self.bolt(bolt_class, *args, &bolt_block)
|
121
|
+
set_topology_class!
|
120
122
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
121
123
|
contructor_args = !args.empty? ? args.pop : []
|
122
|
-
bolt_options = {:id =>
|
124
|
+
bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
|
123
125
|
|
124
126
|
bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
|
125
127
|
raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
|
@@ -128,8 +130,8 @@ module RedStorm
|
|
128
130
|
end
|
129
131
|
|
130
132
|
def self.configure(name = nil, &configure_block)
|
131
|
-
|
132
|
-
@topology_name = name if name
|
133
|
+
set_topology_class!
|
134
|
+
@topology_name = name.to_s if name
|
133
135
|
@configure_block = configure_block if block_given?
|
134
136
|
end
|
135
137
|
|
@@ -158,7 +160,7 @@ module RedStorm
|
|
158
160
|
|
159
161
|
configurator = Configurator.new(defaults)
|
160
162
|
configurator.instance_exec(env, &self.class.configure_block)
|
161
|
-
|
163
|
+
|
162
164
|
submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
|
163
165
|
submitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
|
164
166
|
instance_exec(env, &self.class.submit_block)
|
@@ -166,6 +168,14 @@ module RedStorm
|
|
166
168
|
|
167
169
|
private
|
168
170
|
|
171
|
+
# this is a quirk to figure out the topology class at load time when the topology file
|
172
|
+
# is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
|
173
|
+
# optional we can hook into any/all the other DSL statements that will be called at load time
|
174
|
+
# and set it there. This is somewhat inelegant but it works.
|
175
|
+
def self.set_topology_class!
|
176
|
+
Configuration.topology_class = self
|
177
|
+
end
|
178
|
+
|
169
179
|
def self.resolve_ids!(components)
|
170
180
|
# verify duplicate implicit ids
|
171
181
|
ids = components.map(&:id)
|
@@ -1,5 +1,10 @@
|
|
1
1
|
require 'java'
|
2
2
|
|
3
|
+
# This hack get rif of the "Use RbConfig instead of obsolete and deprecated Config"
|
4
|
+
# deprecation warning that is triggered by "java_import 'backtype.storm.Config'".
|
5
|
+
Object.send :remove_const, :Config
|
6
|
+
Config = RbConfig
|
7
|
+
|
3
8
|
# see https://github.com/colinsurprenant/redstorm/issues/7
|
4
9
|
module Backtype
|
5
10
|
java_import 'backtype.storm.Config'
|
@@ -9,6 +14,7 @@ java_import 'backtype.storm.LocalCluster'
|
|
9
14
|
java_import 'backtype.storm.LocalDRPC'
|
10
15
|
java_import 'backtype.storm.StormSubmitter'
|
11
16
|
java_import 'backtype.storm.topology.TopologyBuilder'
|
17
|
+
java_import 'backtype.storm.coordination.BatchBoltExecutor'
|
12
18
|
java_import 'backtype.storm.drpc.LinearDRPCTopologyBuilder'
|
13
19
|
java_import 'backtype.storm.tuple.Fields'
|
14
20
|
java_import 'backtype.storm.tuple.Tuple'
|
@@ -16,7 +22,12 @@ java_import 'backtype.storm.tuple.Values'
|
|
16
22
|
|
17
23
|
java_import 'redstorm.storm.jruby.JRubyBolt'
|
18
24
|
java_import 'redstorm.storm.jruby.JRubySpout'
|
25
|
+
java_import 'redstorm.storm.jruby.JRubyBatchBolt'
|
26
|
+
java_import 'redstorm.storm.jruby.JRubyBatchCommitterBolt'
|
19
27
|
java_import 'redstorm.storm.jruby.JRubyBatchSpout'
|
28
|
+
java_import 'redstorm.storm.jruby.JRubyTransactionalSpout'
|
29
|
+
java_import 'redstorm.storm.jruby.JRubyTransactionalBolt'
|
30
|
+
java_import 'redstorm.storm.jruby.JRubyTransactionalCommitterBolt'
|
20
31
|
|
21
32
|
java_package 'redstorm'
|
22
33
|
|
@@ -41,6 +52,11 @@ class TopologyLauncher
|
|
41
52
|
|
42
53
|
require "#{class_path}"
|
43
54
|
|
55
|
+
if RedStorm::Configuration.topology_class.nil? || !RedStorm::Configuration.topology_class.method_defined?(:start)
|
56
|
+
puts("\nERROR: invalid topology class. make sure your topology class is a subclass of one of the DSL topology classes or that your class sets RedStorm::Configuration.topology_class and defines the start method\n\n")
|
57
|
+
exit(1)
|
58
|
+
end
|
59
|
+
|
44
60
|
topology_name = RedStorm::Configuration.topology_class.respond_to?(:topology_name) ? "/#{RedStorm::Configuration.topology_class.topology_name}" : ''
|
45
61
|
puts("RedStorm v#{RedStorm::VERSION} starting topology #{RedStorm::Configuration.topology_class.name}#{topology_name} in #{env.to_s} environment")
|
46
62
|
RedStorm::Configuration.topology_class.new.start(class_path, env)
|
data/lib/red_storm/version.rb
CHANGED
data/lib/tasks/red_storm.rake
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
begin
|
2
2
|
require 'ant'
|
3
3
|
rescue
|
4
|
-
puts("
|
5
|
-
puts(" and $ANT_HOME is defined properly.")
|
4
|
+
puts("ERROR: unable to load Ant, make sure Ant is installed, in your PATH and $ANT_HOME is defined properly")
|
6
5
|
puts("\nerror detail:\n#{$!}")
|
7
6
|
exit(1)
|
8
7
|
end
|
@@ -11,29 +10,18 @@ require 'jruby/jrubyc'
|
|
11
10
|
require 'red_storm'
|
12
11
|
require 'red_storm/application'
|
13
12
|
|
14
|
-
|
15
|
-
DEP_JRUBY_VERSION = "1.7.3"
|
16
|
-
INSTALL_IVY_VERSION = "2.2.0"
|
17
|
-
|
18
|
-
DEFAULT_DEPENDENCIES = {
|
19
|
-
:storm_artifacts => [
|
20
|
-
"storm:storm:#{DEP_STORM_VERSION}, transitive=true",
|
21
|
-
],
|
22
|
-
:topology_artifacts => [
|
23
|
-
"org.jruby:jruby-complete:#{DEP_JRUBY_VERSION}, transitive=false",
|
24
|
-
],
|
25
|
-
}
|
13
|
+
INSTALL_IVY_VERSION = "2.3.0"
|
26
14
|
|
27
15
|
task :launch, :env, :ruby_mode, :class_file do |t, args|
|
28
16
|
# use ruby mode parameter or default to current interpreter version
|
29
17
|
version_token = RedStorm.jruby_mode_token(args[:ruby_mode])
|
30
|
-
|
18
|
+
|
31
19
|
command = case args[:env]
|
32
20
|
when "local"
|
33
21
|
RedStorm::Application.local_storm_command(args[:class_file], args[:ruby_mode])
|
34
22
|
when "cluster"
|
35
23
|
unless File.exist?(TARGET_CLUSTER_JAR)
|
36
|
-
puts("error: cluster jar file #{TARGET_CLUSTER_JAR} not found. Generate it
|
24
|
+
puts("error: cluster jar file #{TARGET_CLUSTER_JAR} not found. Generate it using $redstorm jar DIR1 [DIR2, ...]")
|
37
25
|
exit(1)
|
38
26
|
end
|
39
27
|
RedStorm::Application.cluster_storm_command(args[:class_file], args[:ruby_mode])
|
@@ -46,25 +34,25 @@ task :launch, :env, :ruby_mode, :class_file do |t, args|
|
|
46
34
|
end
|
47
35
|
|
48
36
|
task :clean do
|
49
|
-
ant.delete
|
37
|
+
ant.delete 'dir' => TARGET_DIR
|
50
38
|
end
|
51
39
|
|
52
40
|
task :clean_jar do
|
53
|
-
ant.delete
|
41
|
+
ant.delete 'file' => TARGET_CLUSTER_JAR
|
54
42
|
end
|
55
43
|
|
56
44
|
task :setup do
|
57
45
|
puts("\n--> Setting up target directories")
|
58
|
-
ant.mkdir
|
59
|
-
ant.mkdir
|
60
|
-
ant.mkdir
|
61
|
-
ant.mkdir
|
62
|
-
ant.mkdir
|
63
|
-
ant.mkdir
|
64
|
-
ant.path
|
65
|
-
fileset
|
66
|
-
fileset
|
67
|
-
end
|
46
|
+
ant.mkdir 'dir' => TARGET_DIR
|
47
|
+
ant.mkdir 'dir' => TARGET_CLASSES_DIR
|
48
|
+
ant.mkdir 'dir' => TARGET_DEPENDENCY_DIR
|
49
|
+
ant.mkdir 'dir' => TARGET_SRC_DIR
|
50
|
+
ant.mkdir 'dir' => TARGET_GEM_DIR
|
51
|
+
ant.mkdir 'dir' => TARGET_SPECS_DIR
|
52
|
+
ant.path 'id' => 'classpath' do
|
53
|
+
fileset 'dir' => TARGET_DEPENDENCY_DIR
|
54
|
+
fileset 'dir' => TARGET_CLASSES_DIR
|
55
|
+
end
|
68
56
|
end
|
69
57
|
|
70
58
|
task :install => [:deps, :build] do
|
@@ -112,18 +100,23 @@ task :bundle, [:groups] => :setup do |t, args|
|
|
112
100
|
defaulted_args = {:groups => 'default'}.merge(args.to_hash.delete_if{|k, v| v.to_s.empty?})
|
113
101
|
groups = defaulted_args[:groups].split(':').map(&:to_sym)
|
114
102
|
Bundler.definition.specs_for(groups).each do |spec|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
103
|
+
next if spec.name == 'bundler'
|
104
|
+
|
105
|
+
# try to avoid infinite recursion
|
106
|
+
next if TARGET_GEM_DIR.start_with?(spec.full_gem_path)
|
107
|
+
|
108
|
+
destination_path = "#{TARGET_GEM_DIR}/#{spec.full_name}"
|
109
|
+
next if File.directory?(destination_path)
|
110
|
+
|
111
|
+
puts("installing gem #{spec.full_name} into #{destination_path}")
|
112
|
+
# copy the actual gem dir
|
113
|
+
FileUtils.cp_r(spec.full_gem_path, destination_path)
|
114
|
+
# copy the evaluated gemspec into the specifications/ dir (we
|
115
|
+
# may not have enough info to reconstruct once we delete the
|
116
|
+
# .git directory)
|
117
|
+
File.open(File.join(TARGET_SPECS_DIR, File.basename(spec.loaded_from)), 'w'){|f| f.write(spec.to_ruby)}
|
118
|
+
# strip the .git directory from git dependencies, it can be huge
|
119
|
+
FileUtils.rm_rf("#{destination_path}/.git")
|
127
120
|
end
|
128
121
|
end
|
129
122
|
|
@@ -131,21 +124,21 @@ namespace :ivy do
|
|
131
124
|
task :download do
|
132
125
|
mkdir_p DST_IVY_DIR
|
133
126
|
ant.get({
|
134
|
-
|
135
|
-
|
136
|
-
|
127
|
+
'src' => "http://repo1.maven.org/maven2/org/apache/ivy/ivy/#{INSTALL_IVY_VERSION}/ivy-#{INSTALL_IVY_VERSION}.jar",
|
128
|
+
'dest' => "#{DST_IVY_DIR}/ivy-#{INSTALL_IVY_VERSION}.jar",
|
129
|
+
'usetimestamp' => true,
|
137
130
|
})
|
138
131
|
end
|
139
132
|
|
140
133
|
task :install => :download do
|
141
|
-
ant.path
|
142
|
-
fileset
|
134
|
+
ant.path 'id' => 'ivy.lib.path' do
|
135
|
+
fileset 'dir' => DST_IVY_DIR, 'includes' => '*.jar'
|
143
136
|
end
|
144
137
|
|
145
138
|
ant.taskdef({
|
146
|
-
|
147
|
-
|
148
|
-
|
139
|
+
'resource' => "org/apache/ivy/ant/antlib.xml",
|
140
|
+
'classpathref' => "ivy.lib.path",
|
141
|
+
#'uri' => "antlib:org.apache.ivy.ant",
|
149
142
|
})
|
150
143
|
end
|
151
144
|
end
|
@@ -153,37 +146,32 @@ end
|
|
153
146
|
task :deps => "ivy:install" do
|
154
147
|
puts("\n--> Installing dependencies")
|
155
148
|
|
156
|
-
|
157
|
-
ant.configure :file => File.exists?(CUSTOM_IVY_SETTINGS) ? CUSTOM_IVY_SETTINGS : DEFAULT_IVY_SETTINGS
|
149
|
+
ant.configure 'file' => File.exists?(CUSTOM_IVY_SETTINGS) ? CUSTOM_IVY_SETTINGS : DEFAULT_IVY_SETTINGS
|
158
150
|
|
159
|
-
|
160
|
-
|
161
|
-
ivy_retrieve(*artifact.split(':').concat([transitive.split(/\s*=\s*/).last, "#{TARGET_DEPENDENCY_DIR}/storm", "default"]))
|
162
|
-
end
|
151
|
+
ant.resolve 'file' => File.exists?(CUSTOM_IVY_STORM_DEPENDENCIES) ? CUSTOM_IVY_STORM_DEPENDENCIES : DEFAULT_IVY_STORM_DEPENDENCIES
|
152
|
+
ant.retrieve 'pattern' => "#{TARGET_DEPENDENCY_DIR}/storm/[conf]/[artifact]-[revision].[ext]", 'sync' => "true"
|
163
153
|
|
164
|
-
|
165
|
-
|
166
|
-
ivy_retrieve(*artifact.split(':').concat([transitive.split(/\s*=\s*/).last, "#{TARGET_DEPENDENCY_DIR}/topology", "default"]))
|
167
|
-
end
|
154
|
+
ant.resolve 'file' => File.exists?(CUSTOM_IVY_TOPOLOGY_DEPENDENCIES) ? CUSTOM_IVY_TOPOLOGY_DEPENDENCIES : DEFAULT_IVY_TOPOLOGY_DEPENDENCIES
|
155
|
+
ant.retrieve 'pattern' => "#{TARGET_DEPENDENCY_DIR}/topology/[conf]/[artifact]-[revision].[ext]", 'sync' => "true"
|
168
156
|
end
|
169
157
|
|
170
158
|
task :jar, [:include_dir] => [:clean_jar] do |t, args|
|
171
159
|
puts("\n--> Generating JAR file #{TARGET_CLUSTER_JAR}")
|
172
160
|
|
173
|
-
ant.jar
|
161
|
+
ant.jar 'destfile' => TARGET_CLUSTER_JAR do
|
174
162
|
# rejar all topology jars
|
175
163
|
Dir["target/dependency/topology/default/*.jar"].each do |jar|
|
176
164
|
puts("Extracting #{jar}")
|
177
|
-
zipfileset
|
165
|
+
zipfileset 'src' => jar, 'includes' => "**/*"
|
178
166
|
end
|
179
|
-
fileset
|
180
|
-
include
|
167
|
+
fileset 'dir' => TARGET_DIR do
|
168
|
+
include 'name' => "gems/**"
|
181
169
|
end
|
182
|
-
fileset
|
170
|
+
fileset 'dir' => TARGET_CLASSES_DIR
|
183
171
|
# red_storm.rb and red_storm/* must be in root of jar so that "require 'red_storm'"
|
184
172
|
# in bolts/spouts works in jar context
|
185
|
-
fileset
|
186
|
-
exclude
|
173
|
+
fileset 'dir' => TARGET_LIB_DIR do
|
174
|
+
exclude 'name' => "tasks/**"
|
187
175
|
end
|
188
176
|
if args[:include_dir]
|
189
177
|
dirs = args[:include_dir].split(":")
|
@@ -193,19 +181,19 @@ task :jar, [:include_dir] => [:clean_jar] do |t, args|
|
|
193
181
|
resources_dirs = Dir.glob("#{dir}/**/resources")
|
194
182
|
resources_dirs.each do |resources_dir|
|
195
183
|
resources_parent = resources_dir.gsub("/resources", "")
|
196
|
-
fileset
|
197
|
-
include
|
184
|
+
fileset 'dir' => resources_parent do
|
185
|
+
include 'name' => "resources/**/*"
|
198
186
|
end
|
199
187
|
end
|
200
188
|
end
|
201
189
|
|
202
190
|
# include complete source dir tree (note we don't care about potential duplicated resources dir)
|
203
|
-
fileset
|
204
|
-
dirs.each{|dir| include
|
191
|
+
fileset 'dir' => CWD do
|
192
|
+
dirs.each{|dir| include 'name' => "#{dir}/**/*"}
|
205
193
|
end
|
206
194
|
end
|
207
195
|
manifest do
|
208
|
-
attribute
|
196
|
+
attribute 'name' => "Main-Class", 'value' => "redstorm.TopologyLauncher"
|
209
197
|
end
|
210
198
|
end
|
211
199
|
puts("\nRedStorm generated JAR file #{TARGET_CLUSTER_JAR}")
|
@@ -214,19 +202,19 @@ end
|
|
214
202
|
def build_java_dir(source_folder)
|
215
203
|
puts("\n--> Compiling Java")
|
216
204
|
ant.javac(
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
205
|
+
'srcdir' => source_folder,
|
206
|
+
'destdir' => TARGET_CLASSES_DIR,
|
207
|
+
'classpathref' => 'classpath',
|
208
|
+
'source' => "1.6",
|
209
|
+
'target' => "1.6",
|
210
|
+
'debug' => "yes",
|
211
|
+
'includeantruntime' => "no",
|
212
|
+
'verbose' => false,
|
213
|
+
'listfiles' => true
|
226
214
|
) do
|
227
215
|
# compilerarg :value => "-Xlint:unchecked"
|
228
|
-
end
|
229
|
-
end
|
216
|
+
end
|
217
|
+
end
|
230
218
|
|
231
219
|
def build_jruby(source_path)
|
232
220
|
puts("\n--> Compiling JRuby")
|
@@ -243,30 +231,3 @@ def build_jruby(source_path)
|
|
243
231
|
status = JRuby::Compiler::compile_argv(argv)
|
244
232
|
end
|
245
233
|
end
|
246
|
-
|
247
|
-
def truefalse(s)
|
248
|
-
return true if s.to_s.downcase =~ /1|yes|true/
|
249
|
-
return false if s.to_s.downcase =~ /0|no|false/
|
250
|
-
nil
|
251
|
-
end
|
252
|
-
|
253
|
-
def ivy_retrieve(org, mod, rev, transitive, dir, conf)
|
254
|
-
ant.resolve({
|
255
|
-
:organisation => org,
|
256
|
-
:module => mod,
|
257
|
-
:revision => rev,
|
258
|
-
:inline => true,
|
259
|
-
:transitive => truefalse(transitive),
|
260
|
-
:conf => conf,
|
261
|
-
})
|
262
|
-
|
263
|
-
ant.retrieve({
|
264
|
-
:organisation => org,
|
265
|
-
:module => mod,
|
266
|
-
:revision => rev,
|
267
|
-
:pattern => "#{dir}/[conf]/[artifact]-[revision].[ext]",
|
268
|
-
:inline => true,
|
269
|
-
:transitive => truefalse(transitive),
|
270
|
-
:conf => conf,
|
271
|
-
})
|
272
|
-
end
|
data/redstorm.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
libdir = File.expand_path('../lib/', __FILE__)
|
2
|
+
$:.unshift libdir unless $:.include?(libdir)
|
3
|
+
|
4
|
+
require 'red_storm/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = 'kb-redstorm'
|
8
|
+
s.version = RedStorm::VERSION
|
9
|
+
s.authors = ['Colin Surprenant']
|
10
|
+
s.email = ['colin.surprenant@gmail.com']
|
11
|
+
s.homepage = 'https://github.com/colinsurprenant/redstorm'
|
12
|
+
s.summary = 'JRuby on Storm'
|
13
|
+
s.description = 'JRuby integration & DSL for the Storm distributed realtime computation system'
|
14
|
+
|
15
|
+
s.rubyforge_project = 'redstorm'
|
16
|
+
|
17
|
+
s.files = Dir.glob("{lib/**/*}") + Dir.glob("{ivy/*.xml}") + Dir.glob("{examples/**/*}") + Dir.glob("{src/**/*.java}") + Dir.glob("{bin/**/*}") + %w(redstorm.gemspec Rakefile README.md CHANGELOG.md LICENSE.md)
|
18
|
+
s.require_paths = ['lib']
|
19
|
+
s.bindir = 'bin'
|
20
|
+
s.executables = ['redstorm']
|
21
|
+
|
22
|
+
s.add_development_dependency 'rspec', '~> 2.11.0'
|
23
|
+
s.add_runtime_dependency 'rake'
|
24
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
package redstorm.storm.jruby;
|
2
|
+
|
3
|
+
import backtype.storm.task.OutputCollector;
|
4
|
+
import backtype.storm.task.TopologyContext;
|
5
|
+
import backtype.storm.topology.base.BaseBatchBolt;
|
6
|
+
import backtype.storm.coordination.BatchOutputCollector;
|
7
|
+
import backtype.storm.coordination.IBatchBolt;
|
8
|
+
import backtype.storm.topology.OutputFieldsDeclarer;
|
9
|
+
import backtype.storm.tuple.Tuple;
|
10
|
+
import backtype.storm.tuple.Fields;
|
11
|
+
import java.util.Map;
|
12
|
+
|
13
|
+
/**
|
14
|
+
* the JRubyBolt class is a simple proxy class to the actual bolt implementation in JRuby.
|
15
|
+
* this proxy is required to bypass the serialization/deserialization process when dispatching
|
16
|
+
* the bolts to the workers. JRuby does not yet support serialization from Java
|
17
|
+
* (Java serialization call on a JRuby class).
|
18
|
+
*
|
19
|
+
* Note that the JRuby bolt proxy class is instanciated in the prepare method which is called after
|
20
|
+
* deserialization at the worker and in the declareOutputFields method which is called once before
|
21
|
+
* serialization at topology creation.
|
22
|
+
*/
|
23
|
+
public class JRubyBatchBolt extends BaseBatchBolt {
|
24
|
+
IBatchBolt _proxyBolt;
|
25
|
+
String _realBoltClassName;
|
26
|
+
String _baseClassPath;
|
27
|
+
String[] _fields;
|
28
|
+
|
29
|
+
/**
|
30
|
+
* create a new JRubyBolt
|
31
|
+
*
|
32
|
+
* @param baseClassPath the topology/project base JRuby class file path
|
33
|
+
* @param realBoltClassName the fully qualified JRuby bolt implementation class name
|
34
|
+
*/
|
35
|
+
public JRubyBatchBolt(String baseClassPath, String realBoltClassName, String[] fields) {
|
36
|
+
_baseClassPath = baseClassPath;
|
37
|
+
_realBoltClassName = realBoltClassName;
|
38
|
+
_fields = fields;
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void prepare(final Map stormConf, final TopologyContext context, final BatchOutputCollector collector, final Object id) {
|
43
|
+
// create instance of the jruby class here, after deserialization in the workers.
|
44
|
+
_proxyBolt = newProxyBolt(_baseClassPath, _realBoltClassName);
|
45
|
+
_proxyBolt.prepare(stormConf, context, collector, id);
|
46
|
+
}
|
47
|
+
|
48
|
+
@Override
|
49
|
+
public void execute(Tuple input) {
|
50
|
+
_proxyBolt.execute(input);
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public void finishBatch() {
|
55
|
+
_proxyBolt.finishBatch();
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void declareOutputFields(OutputFieldsDeclarer declarer) {
|
60
|
+
// declareOutputFields is executed in the topology creation time, before serialisation.
|
61
|
+
// do not set the _proxyBolt instance variable here to avoid JRuby serialization
|
62
|
+
// issues. Just create tmp bolt instance to call declareOutputFields.
|
63
|
+
if (_fields.length > 0) {
|
64
|
+
declarer.declare(new Fields(_fields));
|
65
|
+
} else {
|
66
|
+
IBatchBolt bolt = newProxyBolt(_baseClassPath, _realBoltClassName);
|
67
|
+
bolt.declareOutputFields(declarer);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
@Override
|
72
|
+
public Map<String, Object> getComponentConfiguration() {
|
73
|
+
// getComponentConfiguration is executed in the topology creation time, before serialisation.
|
74
|
+
// do not set the _proxyBolt instance variable here to avoid JRuby serialization
|
75
|
+
// issues. Just create tmp bolt instance to call declareOutputFields.
|
76
|
+
IBatchBolt bolt = newProxyBolt(_baseClassPath, _realBoltClassName);
|
77
|
+
return bolt.getComponentConfiguration();
|
78
|
+
}
|
79
|
+
|
80
|
+
|
81
|
+
private static IBatchBolt newProxyBolt(String baseClassPath, String realBoltClassName) {
|
82
|
+
try {
|
83
|
+
redstorm.proxy.BatchBolt proxy = new redstorm.proxy.BatchBolt(baseClassPath, realBoltClassName);
|
84
|
+
return proxy;
|
85
|
+
}
|
86
|
+
catch (Exception e) {
|
87
|
+
throw new RuntimeException(e);
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
@@ -0,0 +1,9 @@
|
|
1
|
+
package redstorm.storm.jruby;
|
2
|
+
|
3
|
+
import backtype.storm.transactional.ICommitter;
|
4
|
+
|
5
|
+
public class JRubyBatchCommitterBolt extends JRubyBatchBolt implements ICommitter {
|
6
|
+
public JRubyBatchCommitterBolt(String baseClassPath, String realBoltClassName, String[] fields) {
|
7
|
+
super(baseClassPath, realBoltClassName, fields);
|
8
|
+
}
|
9
|
+
}
|
@@ -1,39 +1,35 @@
|
|
1
1
|
package redstorm.storm.jruby;
|
2
2
|
|
3
|
-
import storm.
|
3
|
+
import backtype.storm.tuple.Fields;
|
4
4
|
import backtype.storm.task.TopologyContext;
|
5
|
+
import storm.trident.operation.TridentCollector;
|
5
6
|
import storm.trident.spout.IBatchSpout;
|
6
|
-
import backtype.storm.topology.OutputFieldsDeclarer;
|
7
|
-
import backtype.storm.tuple.Tuple;
|
8
|
-
import backtype.storm.tuple.Fields;
|
9
7
|
import java.util.Map;
|
10
8
|
|
11
9
|
/**
|
12
|
-
* the
|
10
|
+
* the JRubySpout class is a simple proxy class to the actual spout implementation in JRuby.
|
13
11
|
* this proxy is required to bypass the serialization/deserialization process when dispatching
|
14
12
|
* the spout to the workers. JRuby does not yet support serialization from Java
|
15
|
-
* (Java serialization call on a JRuby class).
|
13
|
+
* (Java serialization call on a JRuby class).
|
16
14
|
*
|
17
|
-
* Note that the JRuby spout proxy class is instanciated in the open method which is called after
|
18
|
-
* deserialization at the worker and in both the declareOutputFields and isDistributed methods which
|
19
|
-
* are called once before serialization at topology creation.
|
15
|
+
* Note that the JRuby spout proxy class is instanciated in the open method which is called after
|
16
|
+
* deserialization at the worker and in both the declareOutputFields and isDistributed methods which
|
17
|
+
* are called once before serialization at topology creation.
|
20
18
|
*/
|
21
19
|
public class JRubyBatchSpout implements IBatchSpout {
|
22
20
|
IBatchSpout _proxySpout;
|
23
21
|
String _realSpoutClassName;
|
24
22
|
String _baseClassPath;
|
25
|
-
|
26
|
-
|
23
|
+
|
27
24
|
/**
|
28
|
-
* create a new
|
29
|
-
*
|
30
|
-
* @param baseClassPath the topology/project base JRuby class file path
|
25
|
+
* create a new JRubySpout
|
26
|
+
*
|
27
|
+
* @param baseClassPath the topology/project base JRuby class file path
|
31
28
|
* @param realSpoutClassName the fully qualified JRuby spout implementation class name
|
32
29
|
*/
|
33
|
-
public JRubyBatchSpout(String baseClassPath, String realSpoutClassName
|
30
|
+
public JRubyBatchSpout(String baseClassPath, String realSpoutClassName) {
|
34
31
|
_baseClassPath = baseClassPath;
|
35
32
|
_realSpoutClassName = realSpoutClassName;
|
36
|
-
_fields = fields;
|
37
33
|
}
|
38
34
|
|
39
35
|
@Override
|
@@ -44,10 +40,9 @@ public class JRubyBatchSpout implements IBatchSpout {
|
|
44
40
|
}
|
45
41
|
|
46
42
|
@Override
|
47
|
-
public void emitBatch(
|
43
|
+
public void emitBatch(long batchId, TridentCollector collector) {
|
48
44
|
_proxySpout.emitBatch(batchId, collector);
|
49
|
-
}
|
50
|
-
|
45
|
+
}
|
51
46
|
|
52
47
|
@Override
|
53
48
|
public void close() {
|
@@ -55,17 +50,21 @@ public class JRubyBatchSpout implements IBatchSpout {
|
|
55
50
|
}
|
56
51
|
|
57
52
|
@Override
|
58
|
-
public void ack(
|
53
|
+
public void ack(long batchId) {
|
59
54
|
_proxySpout.ack(batchId);
|
60
55
|
}
|
61
56
|
|
62
57
|
@Override
|
63
58
|
public Fields getOutputFields() {
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
59
|
+
if (_proxySpout == null) {
|
60
|
+
// getOutputFields is executed in the topology creation time before serialisation.
|
61
|
+
// do not set the _proxySpout instance variable here to avoid JRuby serialization
|
62
|
+
// issues. Just create tmp spout instance to call getOutputFields.
|
63
|
+
IBatchSpout spout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
64
|
+
return spout.getOutputFields();
|
65
|
+
} else {
|
66
|
+
return _proxySpout.getOutputFields();
|
67
|
+
}
|
69
68
|
}
|
70
69
|
|
71
70
|
@Override
|
@@ -76,7 +75,7 @@ public class JRubyBatchSpout implements IBatchSpout {
|
|
76
75
|
IBatchSpout spout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
77
76
|
return spout.getComponentConfiguration();
|
78
77
|
}
|
79
|
-
|
78
|
+
|
80
79
|
private static IBatchSpout newProxySpout(String baseClassPath, String realSpoutClassName) {
|
81
80
|
try {
|
82
81
|
redstorm.proxy.BatchSpout proxy = new redstorm.proxy.BatchSpout(baseClassPath, realSpoutClassName);
|
@@ -10,40 +10,32 @@ public class JRubyProxyFunction implements Function {
|
|
10
10
|
Function _proxy;
|
11
11
|
String _realClassName;
|
12
12
|
String _baseClassPath;
|
13
|
-
String[] _fields;
|
14
13
|
|
15
|
-
public JRubyProxyFunction(final String baseClassPath, final String realClassName
|
14
|
+
public JRubyProxyFunction(final String baseClassPath, final String realClassName) {
|
16
15
|
_baseClassPath = baseClassPath;
|
17
16
|
_realClassName = realClassName;
|
18
|
-
_fields = fields;
|
19
17
|
}
|
20
18
|
|
21
19
|
|
22
20
|
@Override
|
23
21
|
public void execute(final TridentTuple _tridentTuple, final TridentCollector _tridentCollector) {
|
24
|
-
|
25
22
|
if(_proxy == null) {
|
26
23
|
_proxy = newProxy(_baseClassPath, _realClassName);
|
27
24
|
}
|
28
25
|
_proxy.execute(_tridentTuple, _tridentCollector);
|
29
|
-
|
30
26
|
}
|
31
27
|
|
32
28
|
@Override
|
33
29
|
public void cleanup() {
|
34
|
-
|
35
30
|
_proxy.cleanup();
|
36
|
-
|
37
31
|
}
|
38
32
|
|
39
33
|
@Override
|
40
34
|
public void prepare(final Map _map, final TridentOperationContext _tridentOperationContext) {
|
41
|
-
|
42
35
|
if(_proxy == null) {
|
43
36
|
_proxy = newProxy(_baseClassPath, _realClassName);
|
44
37
|
}
|
45
38
|
_proxy.prepare(_map, _tridentOperationContext);
|
46
|
-
|
47
39
|
}
|
48
40
|
|
49
41
|
|