kb-redstorm 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +9 -0
- data/README.md +206 -103
- data/examples/native/cluster_word_count_topology.rb +5 -5
- data/examples/native/local_exclamation_topology.rb +8 -8
- data/examples/native/local_exclamation_topology2.rb +7 -7
- data/examples/native/local_redis_word_count_topology.rb +7 -8
- data/examples/native/local_word_count_topology.rb +5 -5
- data/examples/simple/exclamation_topology.rb +7 -11
- data/examples/simple/exclamation_topology2.rb +10 -12
- data/examples/simple/hello_world_topology.rb +22 -0
- data/examples/simple/kafka_topology.rb +15 -15
- data/examples/simple/redis_word_count_topology.rb +3 -5
- data/examples/simple/ruby_version_topology.rb +7 -1
- data/examples/simple/word_count_topology.rb +8 -10
- data/ivy/settings.xml +1 -0
- data/ivy/storm_dependencies.xml +8 -0
- data/ivy/topology_dependencies.xml +7 -0
- data/lib/red_storm/application.rb +7 -5
- data/lib/red_storm/configurator.rb +1 -1
- data/lib/red_storm/proxy/batch_bolt.rb +63 -0
- data/lib/red_storm/proxy/batch_committer_bolt.rb +52 -0
- data/lib/red_storm/proxy/batch_spout.rb +12 -24
- data/lib/red_storm/proxy/proxy_function.rb +1 -9
- data/lib/red_storm/proxy/transactional_committer_spout.rb +47 -0
- data/lib/red_storm/proxy/transactional_spout.rb +46 -0
- data/lib/red_storm/simple_drpc_topology.rb +2 -2
- data/lib/red_storm/simple_topology.rb +14 -4
- data/lib/red_storm/topology_launcher.rb +16 -0
- data/lib/red_storm/version.rb +1 -1
- data/lib/tasks/red_storm.rake +69 -108
- data/redstorm.gemspec +24 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchBolt.java +90 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchCommitterBolt.java +9 -0
- data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +25 -26
- data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +1 -9
- data/src/main/redstorm/storm/jruby/JRubyTransactionalBolt.java +90 -0
- data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterBolt.java +31 -0
- data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterSpout.java +44 -0
- data/src/main/redstorm/storm/jruby/JRubyTransactionalSpout.java +89 -0
- metadata +80 -62
- data/examples/native/Gemfile +0 -2
@@ -106,6 +106,7 @@ module RedStorm
|
|
106
106
|
|
107
107
|
# def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
|
108
108
|
def self.spout(spout_class, *args, &spout_block)
|
109
|
+
set_topology_class!
|
109
110
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
110
111
|
contructor_args = !args.empty? ? args.pop : []
|
111
112
|
spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
|
@@ -117,9 +118,10 @@ module RedStorm
|
|
117
118
|
|
118
119
|
# def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
|
119
120
|
def self.bolt(bolt_class, *args, &bolt_block)
|
121
|
+
set_topology_class!
|
120
122
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
121
123
|
contructor_args = !args.empty? ? args.pop : []
|
122
|
-
bolt_options = {:id =>
|
124
|
+
bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
|
123
125
|
|
124
126
|
bolt = BoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
|
125
127
|
raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
|
@@ -128,8 +130,8 @@ module RedStorm
|
|
128
130
|
end
|
129
131
|
|
130
132
|
def self.configure(name = nil, &configure_block)
|
131
|
-
|
132
|
-
@topology_name = name if name
|
133
|
+
set_topology_class!
|
134
|
+
@topology_name = name.to_s if name
|
133
135
|
@configure_block = configure_block if block_given?
|
134
136
|
end
|
135
137
|
|
@@ -158,7 +160,7 @@ module RedStorm
|
|
158
160
|
|
159
161
|
configurator = Configurator.new(defaults)
|
160
162
|
configurator.instance_exec(env, &self.class.configure_block)
|
161
|
-
|
163
|
+
|
162
164
|
submitter = (env == :local) ? @cluster = LocalCluster.new : StormSubmitter
|
163
165
|
submitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
|
164
166
|
instance_exec(env, &self.class.submit_block)
|
@@ -166,6 +168,14 @@ module RedStorm
|
|
166
168
|
|
167
169
|
private
|
168
170
|
|
171
|
+
# this is a quirk to figure out the topology class at load time when the topology file
|
172
|
+
# is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
|
173
|
+
# optional we can hook into any/all the other DSL statements that will be called at load time
|
174
|
+
# and set it there. This is somewhat inelegant but it works.
|
175
|
+
def self.set_topology_class!
|
176
|
+
Configuration.topology_class = self
|
177
|
+
end
|
178
|
+
|
169
179
|
def self.resolve_ids!(components)
|
170
180
|
# verify duplicate implicit ids
|
171
181
|
ids = components.map(&:id)
|
@@ -1,5 +1,10 @@
|
|
1
1
|
require 'java'
|
2
2
|
|
3
|
+
# This hack get rif of the "Use RbConfig instead of obsolete and deprecated Config"
|
4
|
+
# deprecation warning that is triggered by "java_import 'backtype.storm.Config'".
|
5
|
+
Object.send :remove_const, :Config
|
6
|
+
Config = RbConfig
|
7
|
+
|
3
8
|
# see https://github.com/colinsurprenant/redstorm/issues/7
|
4
9
|
module Backtype
|
5
10
|
java_import 'backtype.storm.Config'
|
@@ -9,6 +14,7 @@ java_import 'backtype.storm.LocalCluster'
|
|
9
14
|
java_import 'backtype.storm.LocalDRPC'
|
10
15
|
java_import 'backtype.storm.StormSubmitter'
|
11
16
|
java_import 'backtype.storm.topology.TopologyBuilder'
|
17
|
+
java_import 'backtype.storm.coordination.BatchBoltExecutor'
|
12
18
|
java_import 'backtype.storm.drpc.LinearDRPCTopologyBuilder'
|
13
19
|
java_import 'backtype.storm.tuple.Fields'
|
14
20
|
java_import 'backtype.storm.tuple.Tuple'
|
@@ -16,7 +22,12 @@ java_import 'backtype.storm.tuple.Values'
|
|
16
22
|
|
17
23
|
java_import 'redstorm.storm.jruby.JRubyBolt'
|
18
24
|
java_import 'redstorm.storm.jruby.JRubySpout'
|
25
|
+
java_import 'redstorm.storm.jruby.JRubyBatchBolt'
|
26
|
+
java_import 'redstorm.storm.jruby.JRubyBatchCommitterBolt'
|
19
27
|
java_import 'redstorm.storm.jruby.JRubyBatchSpout'
|
28
|
+
java_import 'redstorm.storm.jruby.JRubyTransactionalSpout'
|
29
|
+
java_import 'redstorm.storm.jruby.JRubyTransactionalBolt'
|
30
|
+
java_import 'redstorm.storm.jruby.JRubyTransactionalCommitterBolt'
|
20
31
|
|
21
32
|
java_package 'redstorm'
|
22
33
|
|
@@ -41,6 +52,11 @@ class TopologyLauncher
|
|
41
52
|
|
42
53
|
require "#{class_path}"
|
43
54
|
|
55
|
+
if RedStorm::Configuration.topology_class.nil? || !RedStorm::Configuration.topology_class.method_defined?(:start)
|
56
|
+
puts("\nERROR: invalid topology class. make sure your topology class is a subclass of one of the DSL topology classes or that your class sets RedStorm::Configuration.topology_class and defines the start method\n\n")
|
57
|
+
exit(1)
|
58
|
+
end
|
59
|
+
|
44
60
|
topology_name = RedStorm::Configuration.topology_class.respond_to?(:topology_name) ? "/#{RedStorm::Configuration.topology_class.topology_name}" : ''
|
45
61
|
puts("RedStorm v#{RedStorm::VERSION} starting topology #{RedStorm::Configuration.topology_class.name}#{topology_name} in #{env.to_s} environment")
|
46
62
|
RedStorm::Configuration.topology_class.new.start(class_path, env)
|
data/lib/red_storm/version.rb
CHANGED
data/lib/tasks/red_storm.rake
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
begin
|
2
2
|
require 'ant'
|
3
3
|
rescue
|
4
|
-
puts("
|
5
|
-
puts(" and $ANT_HOME is defined properly.")
|
4
|
+
puts("ERROR: unable to load Ant, make sure Ant is installed, in your PATH and $ANT_HOME is defined properly")
|
6
5
|
puts("\nerror detail:\n#{$!}")
|
7
6
|
exit(1)
|
8
7
|
end
|
@@ -11,29 +10,18 @@ require 'jruby/jrubyc'
|
|
11
10
|
require 'red_storm'
|
12
11
|
require 'red_storm/application'
|
13
12
|
|
14
|
-
|
15
|
-
DEP_JRUBY_VERSION = "1.7.3"
|
16
|
-
INSTALL_IVY_VERSION = "2.2.0"
|
17
|
-
|
18
|
-
DEFAULT_DEPENDENCIES = {
|
19
|
-
:storm_artifacts => [
|
20
|
-
"storm:storm:#{DEP_STORM_VERSION}, transitive=true",
|
21
|
-
],
|
22
|
-
:topology_artifacts => [
|
23
|
-
"org.jruby:jruby-complete:#{DEP_JRUBY_VERSION}, transitive=false",
|
24
|
-
],
|
25
|
-
}
|
13
|
+
INSTALL_IVY_VERSION = "2.3.0"
|
26
14
|
|
27
15
|
task :launch, :env, :ruby_mode, :class_file do |t, args|
|
28
16
|
# use ruby mode parameter or default to current interpreter version
|
29
17
|
version_token = RedStorm.jruby_mode_token(args[:ruby_mode])
|
30
|
-
|
18
|
+
|
31
19
|
command = case args[:env]
|
32
20
|
when "local"
|
33
21
|
RedStorm::Application.local_storm_command(args[:class_file], args[:ruby_mode])
|
34
22
|
when "cluster"
|
35
23
|
unless File.exist?(TARGET_CLUSTER_JAR)
|
36
|
-
puts("error: cluster jar file #{TARGET_CLUSTER_JAR} not found. Generate it
|
24
|
+
puts("error: cluster jar file #{TARGET_CLUSTER_JAR} not found. Generate it using $redstorm jar DIR1 [DIR2, ...]")
|
37
25
|
exit(1)
|
38
26
|
end
|
39
27
|
RedStorm::Application.cluster_storm_command(args[:class_file], args[:ruby_mode])
|
@@ -46,25 +34,25 @@ task :launch, :env, :ruby_mode, :class_file do |t, args|
|
|
46
34
|
end
|
47
35
|
|
48
36
|
task :clean do
|
49
|
-
ant.delete
|
37
|
+
ant.delete 'dir' => TARGET_DIR
|
50
38
|
end
|
51
39
|
|
52
40
|
task :clean_jar do
|
53
|
-
ant.delete
|
41
|
+
ant.delete 'file' => TARGET_CLUSTER_JAR
|
54
42
|
end
|
55
43
|
|
56
44
|
task :setup do
|
57
45
|
puts("\n--> Setting up target directories")
|
58
|
-
ant.mkdir
|
59
|
-
ant.mkdir
|
60
|
-
ant.mkdir
|
61
|
-
ant.mkdir
|
62
|
-
ant.mkdir
|
63
|
-
ant.mkdir
|
64
|
-
ant.path
|
65
|
-
fileset
|
66
|
-
fileset
|
67
|
-
end
|
46
|
+
ant.mkdir 'dir' => TARGET_DIR
|
47
|
+
ant.mkdir 'dir' => TARGET_CLASSES_DIR
|
48
|
+
ant.mkdir 'dir' => TARGET_DEPENDENCY_DIR
|
49
|
+
ant.mkdir 'dir' => TARGET_SRC_DIR
|
50
|
+
ant.mkdir 'dir' => TARGET_GEM_DIR
|
51
|
+
ant.mkdir 'dir' => TARGET_SPECS_DIR
|
52
|
+
ant.path 'id' => 'classpath' do
|
53
|
+
fileset 'dir' => TARGET_DEPENDENCY_DIR
|
54
|
+
fileset 'dir' => TARGET_CLASSES_DIR
|
55
|
+
end
|
68
56
|
end
|
69
57
|
|
70
58
|
task :install => [:deps, :build] do
|
@@ -112,18 +100,23 @@ task :bundle, [:groups] => :setup do |t, args|
|
|
112
100
|
defaulted_args = {:groups => 'default'}.merge(args.to_hash.delete_if{|k, v| v.to_s.empty?})
|
113
101
|
groups = defaulted_args[:groups].split(':').map(&:to_sym)
|
114
102
|
Bundler.definition.specs_for(groups).each do |spec|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
103
|
+
next if spec.name == 'bundler'
|
104
|
+
|
105
|
+
# try to avoid infinite recursion
|
106
|
+
next if TARGET_GEM_DIR.start_with?(spec.full_gem_path)
|
107
|
+
|
108
|
+
destination_path = "#{TARGET_GEM_DIR}/#{spec.full_name}"
|
109
|
+
next if File.directory?(destination_path)
|
110
|
+
|
111
|
+
puts("installing gem #{spec.full_name} into #{destination_path}")
|
112
|
+
# copy the actual gem dir
|
113
|
+
FileUtils.cp_r(spec.full_gem_path, destination_path)
|
114
|
+
# copy the evaluated gemspec into the specifications/ dir (we
|
115
|
+
# may not have enough info to reconstruct once we delete the
|
116
|
+
# .git directory)
|
117
|
+
File.open(File.join(TARGET_SPECS_DIR, File.basename(spec.loaded_from)), 'w'){|f| f.write(spec.to_ruby)}
|
118
|
+
# strip the .git directory from git dependencies, it can be huge
|
119
|
+
FileUtils.rm_rf("#{destination_path}/.git")
|
127
120
|
end
|
128
121
|
end
|
129
122
|
|
@@ -131,21 +124,21 @@ namespace :ivy do
|
|
131
124
|
task :download do
|
132
125
|
mkdir_p DST_IVY_DIR
|
133
126
|
ant.get({
|
134
|
-
|
135
|
-
|
136
|
-
|
127
|
+
'src' => "http://repo1.maven.org/maven2/org/apache/ivy/ivy/#{INSTALL_IVY_VERSION}/ivy-#{INSTALL_IVY_VERSION}.jar",
|
128
|
+
'dest' => "#{DST_IVY_DIR}/ivy-#{INSTALL_IVY_VERSION}.jar",
|
129
|
+
'usetimestamp' => true,
|
137
130
|
})
|
138
131
|
end
|
139
132
|
|
140
133
|
task :install => :download do
|
141
|
-
ant.path
|
142
|
-
fileset
|
134
|
+
ant.path 'id' => 'ivy.lib.path' do
|
135
|
+
fileset 'dir' => DST_IVY_DIR, 'includes' => '*.jar'
|
143
136
|
end
|
144
137
|
|
145
138
|
ant.taskdef({
|
146
|
-
|
147
|
-
|
148
|
-
|
139
|
+
'resource' => "org/apache/ivy/ant/antlib.xml",
|
140
|
+
'classpathref' => "ivy.lib.path",
|
141
|
+
#'uri' => "antlib:org.apache.ivy.ant",
|
149
142
|
})
|
150
143
|
end
|
151
144
|
end
|
@@ -153,37 +146,32 @@ end
|
|
153
146
|
task :deps => "ivy:install" do
|
154
147
|
puts("\n--> Installing dependencies")
|
155
148
|
|
156
|
-
|
157
|
-
ant.configure :file => File.exists?(CUSTOM_IVY_SETTINGS) ? CUSTOM_IVY_SETTINGS : DEFAULT_IVY_SETTINGS
|
149
|
+
ant.configure 'file' => File.exists?(CUSTOM_IVY_SETTINGS) ? CUSTOM_IVY_SETTINGS : DEFAULT_IVY_SETTINGS
|
158
150
|
|
159
|
-
|
160
|
-
|
161
|
-
ivy_retrieve(*artifact.split(':').concat([transitive.split(/\s*=\s*/).last, "#{TARGET_DEPENDENCY_DIR}/storm", "default"]))
|
162
|
-
end
|
151
|
+
ant.resolve 'file' => File.exists?(CUSTOM_IVY_STORM_DEPENDENCIES) ? CUSTOM_IVY_STORM_DEPENDENCIES : DEFAULT_IVY_STORM_DEPENDENCIES
|
152
|
+
ant.retrieve 'pattern' => "#{TARGET_DEPENDENCY_DIR}/storm/[conf]/[artifact]-[revision].[ext]", 'sync' => "true"
|
163
153
|
|
164
|
-
|
165
|
-
|
166
|
-
ivy_retrieve(*artifact.split(':').concat([transitive.split(/\s*=\s*/).last, "#{TARGET_DEPENDENCY_DIR}/topology", "default"]))
|
167
|
-
end
|
154
|
+
ant.resolve 'file' => File.exists?(CUSTOM_IVY_TOPOLOGY_DEPENDENCIES) ? CUSTOM_IVY_TOPOLOGY_DEPENDENCIES : DEFAULT_IVY_TOPOLOGY_DEPENDENCIES
|
155
|
+
ant.retrieve 'pattern' => "#{TARGET_DEPENDENCY_DIR}/topology/[conf]/[artifact]-[revision].[ext]", 'sync' => "true"
|
168
156
|
end
|
169
157
|
|
170
158
|
task :jar, [:include_dir] => [:clean_jar] do |t, args|
|
171
159
|
puts("\n--> Generating JAR file #{TARGET_CLUSTER_JAR}")
|
172
160
|
|
173
|
-
ant.jar
|
161
|
+
ant.jar 'destfile' => TARGET_CLUSTER_JAR do
|
174
162
|
# rejar all topology jars
|
175
163
|
Dir["target/dependency/topology/default/*.jar"].each do |jar|
|
176
164
|
puts("Extracting #{jar}")
|
177
|
-
zipfileset
|
165
|
+
zipfileset 'src' => jar, 'includes' => "**/*"
|
178
166
|
end
|
179
|
-
fileset
|
180
|
-
include
|
167
|
+
fileset 'dir' => TARGET_DIR do
|
168
|
+
include 'name' => "gems/**"
|
181
169
|
end
|
182
|
-
fileset
|
170
|
+
fileset 'dir' => TARGET_CLASSES_DIR
|
183
171
|
# red_storm.rb and red_storm/* must be in root of jar so that "require 'red_storm'"
|
184
172
|
# in bolts/spouts works in jar context
|
185
|
-
fileset
|
186
|
-
exclude
|
173
|
+
fileset 'dir' => TARGET_LIB_DIR do
|
174
|
+
exclude 'name' => "tasks/**"
|
187
175
|
end
|
188
176
|
if args[:include_dir]
|
189
177
|
dirs = args[:include_dir].split(":")
|
@@ -193,19 +181,19 @@ task :jar, [:include_dir] => [:clean_jar] do |t, args|
|
|
193
181
|
resources_dirs = Dir.glob("#{dir}/**/resources")
|
194
182
|
resources_dirs.each do |resources_dir|
|
195
183
|
resources_parent = resources_dir.gsub("/resources", "")
|
196
|
-
fileset
|
197
|
-
include
|
184
|
+
fileset 'dir' => resources_parent do
|
185
|
+
include 'name' => "resources/**/*"
|
198
186
|
end
|
199
187
|
end
|
200
188
|
end
|
201
189
|
|
202
190
|
# include complete source dir tree (note we don't care about potential duplicated resources dir)
|
203
|
-
fileset
|
204
|
-
dirs.each{|dir| include
|
191
|
+
fileset 'dir' => CWD do
|
192
|
+
dirs.each{|dir| include 'name' => "#{dir}/**/*"}
|
205
193
|
end
|
206
194
|
end
|
207
195
|
manifest do
|
208
|
-
attribute
|
196
|
+
attribute 'name' => "Main-Class", 'value' => "redstorm.TopologyLauncher"
|
209
197
|
end
|
210
198
|
end
|
211
199
|
puts("\nRedStorm generated JAR file #{TARGET_CLUSTER_JAR}")
|
@@ -214,19 +202,19 @@ end
|
|
214
202
|
def build_java_dir(source_folder)
|
215
203
|
puts("\n--> Compiling Java")
|
216
204
|
ant.javac(
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
205
|
+
'srcdir' => source_folder,
|
206
|
+
'destdir' => TARGET_CLASSES_DIR,
|
207
|
+
'classpathref' => 'classpath',
|
208
|
+
'source' => "1.6",
|
209
|
+
'target' => "1.6",
|
210
|
+
'debug' => "yes",
|
211
|
+
'includeantruntime' => "no",
|
212
|
+
'verbose' => false,
|
213
|
+
'listfiles' => true
|
226
214
|
) do
|
227
215
|
# compilerarg :value => "-Xlint:unchecked"
|
228
|
-
end
|
229
|
-
end
|
216
|
+
end
|
217
|
+
end
|
230
218
|
|
231
219
|
def build_jruby(source_path)
|
232
220
|
puts("\n--> Compiling JRuby")
|
@@ -243,30 +231,3 @@ def build_jruby(source_path)
|
|
243
231
|
status = JRuby::Compiler::compile_argv(argv)
|
244
232
|
end
|
245
233
|
end
|
246
|
-
|
247
|
-
def truefalse(s)
|
248
|
-
return true if s.to_s.downcase =~ /1|yes|true/
|
249
|
-
return false if s.to_s.downcase =~ /0|no|false/
|
250
|
-
nil
|
251
|
-
end
|
252
|
-
|
253
|
-
def ivy_retrieve(org, mod, rev, transitive, dir, conf)
|
254
|
-
ant.resolve({
|
255
|
-
:organisation => org,
|
256
|
-
:module => mod,
|
257
|
-
:revision => rev,
|
258
|
-
:inline => true,
|
259
|
-
:transitive => truefalse(transitive),
|
260
|
-
:conf => conf,
|
261
|
-
})
|
262
|
-
|
263
|
-
ant.retrieve({
|
264
|
-
:organisation => org,
|
265
|
-
:module => mod,
|
266
|
-
:revision => rev,
|
267
|
-
:pattern => "#{dir}/[conf]/[artifact]-[revision].[ext]",
|
268
|
-
:inline => true,
|
269
|
-
:transitive => truefalse(transitive),
|
270
|
-
:conf => conf,
|
271
|
-
})
|
272
|
-
end
|
data/redstorm.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
libdir = File.expand_path('../lib/', __FILE__)
|
2
|
+
$:.unshift libdir unless $:.include?(libdir)
|
3
|
+
|
4
|
+
require 'red_storm/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = 'kb-redstorm'
|
8
|
+
s.version = RedStorm::VERSION
|
9
|
+
s.authors = ['Colin Surprenant']
|
10
|
+
s.email = ['colin.surprenant@gmail.com']
|
11
|
+
s.homepage = 'https://github.com/colinsurprenant/redstorm'
|
12
|
+
s.summary = 'JRuby on Storm'
|
13
|
+
s.description = 'JRuby integration & DSL for the Storm distributed realtime computation system'
|
14
|
+
|
15
|
+
s.rubyforge_project = 'redstorm'
|
16
|
+
|
17
|
+
s.files = Dir.glob("{lib/**/*}") + Dir.glob("{ivy/*.xml}") + Dir.glob("{examples/**/*}") + Dir.glob("{src/**/*.java}") + Dir.glob("{bin/**/*}") + %w(redstorm.gemspec Rakefile README.md CHANGELOG.md LICENSE.md)
|
18
|
+
s.require_paths = ['lib']
|
19
|
+
s.bindir = 'bin'
|
20
|
+
s.executables = ['redstorm']
|
21
|
+
|
22
|
+
s.add_development_dependency 'rspec', '~> 2.11.0'
|
23
|
+
s.add_runtime_dependency 'rake'
|
24
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
package redstorm.storm.jruby;
|
2
|
+
|
3
|
+
import backtype.storm.task.OutputCollector;
|
4
|
+
import backtype.storm.task.TopologyContext;
|
5
|
+
import backtype.storm.topology.base.BaseBatchBolt;
|
6
|
+
import backtype.storm.coordination.BatchOutputCollector;
|
7
|
+
import backtype.storm.coordination.IBatchBolt;
|
8
|
+
import backtype.storm.topology.OutputFieldsDeclarer;
|
9
|
+
import backtype.storm.tuple.Tuple;
|
10
|
+
import backtype.storm.tuple.Fields;
|
11
|
+
import java.util.Map;
|
12
|
+
|
13
|
+
/**
|
14
|
+
* the JRubyBolt class is a simple proxy class to the actual bolt implementation in JRuby.
|
15
|
+
* this proxy is required to bypass the serialization/deserialization process when dispatching
|
16
|
+
* the bolts to the workers. JRuby does not yet support serialization from Java
|
17
|
+
* (Java serialization call on a JRuby class).
|
18
|
+
*
|
19
|
+
* Note that the JRuby bolt proxy class is instanciated in the prepare method which is called after
|
20
|
+
* deserialization at the worker and in the declareOutputFields method which is called once before
|
21
|
+
* serialization at topology creation.
|
22
|
+
*/
|
23
|
+
public class JRubyBatchBolt extends BaseBatchBolt {
|
24
|
+
IBatchBolt _proxyBolt;
|
25
|
+
String _realBoltClassName;
|
26
|
+
String _baseClassPath;
|
27
|
+
String[] _fields;
|
28
|
+
|
29
|
+
/**
|
30
|
+
* create a new JRubyBolt
|
31
|
+
*
|
32
|
+
* @param baseClassPath the topology/project base JRuby class file path
|
33
|
+
* @param realBoltClassName the fully qualified JRuby bolt implementation class name
|
34
|
+
*/
|
35
|
+
public JRubyBatchBolt(String baseClassPath, String realBoltClassName, String[] fields) {
|
36
|
+
_baseClassPath = baseClassPath;
|
37
|
+
_realBoltClassName = realBoltClassName;
|
38
|
+
_fields = fields;
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void prepare(final Map stormConf, final TopologyContext context, final BatchOutputCollector collector, final Object id) {
|
43
|
+
// create instance of the jruby class here, after deserialization in the workers.
|
44
|
+
_proxyBolt = newProxyBolt(_baseClassPath, _realBoltClassName);
|
45
|
+
_proxyBolt.prepare(stormConf, context, collector, id);
|
46
|
+
}
|
47
|
+
|
48
|
+
@Override
|
49
|
+
public void execute(Tuple input) {
|
50
|
+
_proxyBolt.execute(input);
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public void finishBatch() {
|
55
|
+
_proxyBolt.finishBatch();
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void declareOutputFields(OutputFieldsDeclarer declarer) {
|
60
|
+
// declareOutputFields is executed in the topology creation time, before serialisation.
|
61
|
+
// do not set the _proxyBolt instance variable here to avoid JRuby serialization
|
62
|
+
// issues. Just create tmp bolt instance to call declareOutputFields.
|
63
|
+
if (_fields.length > 0) {
|
64
|
+
declarer.declare(new Fields(_fields));
|
65
|
+
} else {
|
66
|
+
IBatchBolt bolt = newProxyBolt(_baseClassPath, _realBoltClassName);
|
67
|
+
bolt.declareOutputFields(declarer);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
@Override
|
72
|
+
public Map<String, Object> getComponentConfiguration() {
|
73
|
+
// getComponentConfiguration is executed in the topology creation time, before serialisation.
|
74
|
+
// do not set the _proxyBolt instance variable here to avoid JRuby serialization
|
75
|
+
// issues. Just create tmp bolt instance to call declareOutputFields.
|
76
|
+
IBatchBolt bolt = newProxyBolt(_baseClassPath, _realBoltClassName);
|
77
|
+
return bolt.getComponentConfiguration();
|
78
|
+
}
|
79
|
+
|
80
|
+
|
81
|
+
private static IBatchBolt newProxyBolt(String baseClassPath, String realBoltClassName) {
|
82
|
+
try {
|
83
|
+
redstorm.proxy.BatchBolt proxy = new redstorm.proxy.BatchBolt(baseClassPath, realBoltClassName);
|
84
|
+
return proxy;
|
85
|
+
}
|
86
|
+
catch (Exception e) {
|
87
|
+
throw new RuntimeException(e);
|
88
|
+
}
|
89
|
+
}
|
90
|
+
}
|
@@ -0,0 +1,9 @@
|
|
1
|
+
package redstorm.storm.jruby;
|
2
|
+
|
3
|
+
import backtype.storm.transactional.ICommitter;
|
4
|
+
|
5
|
+
public class JRubyBatchCommitterBolt extends JRubyBatchBolt implements ICommitter {
|
6
|
+
public JRubyBatchCommitterBolt(String baseClassPath, String realBoltClassName, String[] fields) {
|
7
|
+
super(baseClassPath, realBoltClassName, fields);
|
8
|
+
}
|
9
|
+
}
|
@@ -1,39 +1,35 @@
|
|
1
1
|
package redstorm.storm.jruby;
|
2
2
|
|
3
|
-
import storm.
|
3
|
+
import backtype.storm.tuple.Fields;
|
4
4
|
import backtype.storm.task.TopologyContext;
|
5
|
+
import storm.trident.operation.TridentCollector;
|
5
6
|
import storm.trident.spout.IBatchSpout;
|
6
|
-
import backtype.storm.topology.OutputFieldsDeclarer;
|
7
|
-
import backtype.storm.tuple.Tuple;
|
8
|
-
import backtype.storm.tuple.Fields;
|
9
7
|
import java.util.Map;
|
10
8
|
|
11
9
|
/**
|
12
|
-
* the
|
10
|
+
* the JRubySpout class is a simple proxy class to the actual spout implementation in JRuby.
|
13
11
|
* this proxy is required to bypass the serialization/deserialization process when dispatching
|
14
12
|
* the spout to the workers. JRuby does not yet support serialization from Java
|
15
|
-
* (Java serialization call on a JRuby class).
|
13
|
+
* (Java serialization call on a JRuby class).
|
16
14
|
*
|
17
|
-
* Note that the JRuby spout proxy class is instanciated in the open method which is called after
|
18
|
-
* deserialization at the worker and in both the declareOutputFields and isDistributed methods which
|
19
|
-
* are called once before serialization at topology creation.
|
15
|
+
* Note that the JRuby spout proxy class is instanciated in the open method which is called after
|
16
|
+
* deserialization at the worker and in both the declareOutputFields and isDistributed methods which
|
17
|
+
* are called once before serialization at topology creation.
|
20
18
|
*/
|
21
19
|
public class JRubyBatchSpout implements IBatchSpout {
|
22
20
|
IBatchSpout _proxySpout;
|
23
21
|
String _realSpoutClassName;
|
24
22
|
String _baseClassPath;
|
25
|
-
|
26
|
-
|
23
|
+
|
27
24
|
/**
|
28
|
-
* create a new
|
29
|
-
*
|
30
|
-
* @param baseClassPath the topology/project base JRuby class file path
|
25
|
+
* create a new JRubySpout
|
26
|
+
*
|
27
|
+
* @param baseClassPath the topology/project base JRuby class file path
|
31
28
|
* @param realSpoutClassName the fully qualified JRuby spout implementation class name
|
32
29
|
*/
|
33
|
-
public JRubyBatchSpout(String baseClassPath, String realSpoutClassName
|
30
|
+
public JRubyBatchSpout(String baseClassPath, String realSpoutClassName) {
|
34
31
|
_baseClassPath = baseClassPath;
|
35
32
|
_realSpoutClassName = realSpoutClassName;
|
36
|
-
_fields = fields;
|
37
33
|
}
|
38
34
|
|
39
35
|
@Override
|
@@ -44,10 +40,9 @@ public class JRubyBatchSpout implements IBatchSpout {
|
|
44
40
|
}
|
45
41
|
|
46
42
|
@Override
|
47
|
-
public void emitBatch(
|
43
|
+
public void emitBatch(long batchId, TridentCollector collector) {
|
48
44
|
_proxySpout.emitBatch(batchId, collector);
|
49
|
-
}
|
50
|
-
|
45
|
+
}
|
51
46
|
|
52
47
|
@Override
|
53
48
|
public void close() {
|
@@ -55,17 +50,21 @@ public class JRubyBatchSpout implements IBatchSpout {
|
|
55
50
|
}
|
56
51
|
|
57
52
|
@Override
|
58
|
-
public void ack(
|
53
|
+
public void ack(long batchId) {
|
59
54
|
_proxySpout.ack(batchId);
|
60
55
|
}
|
61
56
|
|
62
57
|
@Override
|
63
58
|
public Fields getOutputFields() {
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
59
|
+
if (_proxySpout == null) {
|
60
|
+
// getOutputFields is executed in the topology creation time before serialisation.
|
61
|
+
// do not set the _proxySpout instance variable here to avoid JRuby serialization
|
62
|
+
// issues. Just create tmp spout instance to call getOutputFields.
|
63
|
+
IBatchSpout spout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
64
|
+
return spout.getOutputFields();
|
65
|
+
} else {
|
66
|
+
return _proxySpout.getOutputFields();
|
67
|
+
}
|
69
68
|
}
|
70
69
|
|
71
70
|
@Override
|
@@ -76,7 +75,7 @@ public class JRubyBatchSpout implements IBatchSpout {
|
|
76
75
|
IBatchSpout spout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
77
76
|
return spout.getComponentConfiguration();
|
78
77
|
}
|
79
|
-
|
78
|
+
|
80
79
|
private static IBatchSpout newProxySpout(String baseClassPath, String realSpoutClassName) {
|
81
80
|
try {
|
82
81
|
redstorm.proxy.BatchSpout proxy = new redstorm.proxy.BatchSpout(baseClassPath, realSpoutClassName);
|
@@ -10,40 +10,32 @@ public class JRubyProxyFunction implements Function {
|
|
10
10
|
Function _proxy;
|
11
11
|
String _realClassName;
|
12
12
|
String _baseClassPath;
|
13
|
-
String[] _fields;
|
14
13
|
|
15
|
-
public JRubyProxyFunction(final String baseClassPath, final String realClassName
|
14
|
+
public JRubyProxyFunction(final String baseClassPath, final String realClassName) {
|
16
15
|
_baseClassPath = baseClassPath;
|
17
16
|
_realClassName = realClassName;
|
18
|
-
_fields = fields;
|
19
17
|
}
|
20
18
|
|
21
19
|
|
22
20
|
@Override
|
23
21
|
public void execute(final TridentTuple _tridentTuple, final TridentCollector _tridentCollector) {
|
24
|
-
|
25
22
|
if(_proxy == null) {
|
26
23
|
_proxy = newProxy(_baseClassPath, _realClassName);
|
27
24
|
}
|
28
25
|
_proxy.execute(_tridentTuple, _tridentCollector);
|
29
|
-
|
30
26
|
}
|
31
27
|
|
32
28
|
@Override
|
33
29
|
public void cleanup() {
|
34
|
-
|
35
30
|
_proxy.cleanup();
|
36
|
-
|
37
31
|
}
|
38
32
|
|
39
33
|
@Override
|
40
34
|
public void prepare(final Map _map, final TridentOperationContext _tridentOperationContext) {
|
41
|
-
|
42
35
|
if(_proxy == null) {
|
43
36
|
_proxy = newProxy(_baseClassPath, _realClassName);
|
44
37
|
}
|
45
38
|
_proxy.prepare(_map, _tridentOperationContext);
|
46
|
-
|
47
39
|
}
|
48
40
|
|
49
41
|
|