redstorm 0.6.4 → 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/CHANGELOG.md +9 -0
  2. data/README.md +206 -103
  3. data/examples/native/cluster_word_count_topology.rb +5 -5
  4. data/examples/native/local_exclamation_topology.rb +8 -8
  5. data/examples/native/local_exclamation_topology2.rb +7 -7
  6. data/examples/native/local_redis_word_count_topology.rb +7 -8
  7. data/examples/native/local_word_count_topology.rb +5 -5
  8. data/examples/simple/exclamation_topology.rb +7 -11
  9. data/examples/simple/exclamation_topology2.rb +10 -12
  10. data/examples/simple/hello_world_topology.rb +22 -0
  11. data/examples/simple/kafka_topology.rb +2 -1
  12. data/examples/simple/redis_word_count_topology.rb +3 -5
  13. data/examples/simple/ruby_version_topology.rb +7 -1
  14. data/examples/simple/word_count_topology.rb +8 -10
  15. data/ivy/settings.xml +1 -0
  16. data/ivy/storm_dependencies.xml +8 -0
  17. data/ivy/topology_dependencies.xml +7 -0
  18. data/lib/red_storm.rb +1 -0
  19. data/lib/red_storm/application.rb +9 -7
  20. data/lib/red_storm/configurator.rb +1 -1
  21. data/lib/red_storm/proxy/batch_bolt.rb +63 -0
  22. data/lib/red_storm/proxy/batch_committer_bolt.rb +52 -0
  23. data/lib/red_storm/proxy/batch_spout.rb +59 -0
  24. data/lib/red_storm/proxy/proxy_function.rb +40 -0
  25. data/lib/red_storm/proxy/transactional_committer_spout.rb +47 -0
  26. data/lib/red_storm/proxy/transactional_spout.rb +46 -0
  27. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  28. data/lib/red_storm/simple_topology.rb +14 -4
  29. data/lib/red_storm/topology_launcher.rb +22 -3
  30. data/lib/red_storm/version.rb +1 -1
  31. data/lib/tasks/red_storm.rake +66 -104
  32. data/redstorm.gemspec +24 -0
  33. data/src/main/redstorm/storm/jruby/JRubyBatchBolt.java +90 -0
  34. data/src/main/redstorm/storm/jruby/JRubyBatchCommitterBolt.java +9 -0
  35. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +88 -0
  36. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +51 -0
  37. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +1 -1
  38. data/src/main/redstorm/storm/jruby/JRubyTransactionalBolt.java +90 -0
  39. data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterBolt.java +31 -0
  40. data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterSpout.java +44 -0
  41. data/src/main/redstorm/storm/jruby/JRubyTransactionalSpout.java +89 -0
  42. metadata +35 -14
  43. data/examples/native/Gemfile +0 -2
@@ -0,0 +1,63 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.coordination.BatchOutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.topology.IRichBolt'
6
+ java_import 'backtype.storm.coordination.IBatchBolt'
7
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
8
+ java_import 'backtype.storm.tuple.Tuple'
9
+ java_import 'java.util.Map'
10
+
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the Bolt class is a proxy to the real bolt to avoid having to deal with all the
18
+ # Java artifacts when creating a bolt.
19
+ #
20
+ # The real bolt class implementation must define these methods:
21
+ # - prepare(conf, context, collector)
22
+ # - execute(tuple)
23
+ # - declare_output_fields
24
+ #
25
+ # and optionnaly:
26
+ # - cleanup
27
+ #
28
+ class BatchBolt
29
+ java_implements IBatchBolt
30
+
31
+ java_signature 'IBatchBolt (String base_class_path, String real_bolt_class_name)'
32
+ def initialize(base_class_path, real_bolt_class_name)
33
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
34
+ rescue NameError
35
+ require base_class_path
36
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
37
+ end
38
+
39
+ java_signature 'void prepare(Map, TopologyContext, BatchOutputCollector, Object)'
40
+ def prepare(conf, context, collector, id)
41
+ @real_bolt.prepare(conf, context, collector, id)
42
+ end
43
+
44
+ java_signature 'void execute(Tuple)'
45
+ def execute(tuple)
46
+ @real_bolt.execute(tuple)
47
+ end
48
+
49
+ java_signature 'void finishBatch()'
50
+ def finishBatch
51
+ @real_bolt.finish_batch if @real_bolt.respond_to?(:finish_batch)
52
+ end
53
+
54
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
55
+ def declareOutputFields(declarer)
56
+ @real_bolt.declare_output_fields(declarer)
57
+ end
58
+
59
+ java_signature 'Map<String, Object> getComponentConfiguration()'
60
+ def getComponentConfiguration
61
+ @real_bolt.get_component_configuration
62
+ end
63
+ end
@@ -0,0 +1,52 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.coordination.BatchOutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.coordination.IBatchBolt'
6
+ java_import 'backtype.storm.transactional.ICommitter'
7
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
8
+ java_import 'backtype.storm.tuple.Tuple'
9
+ java_import 'java.util.Map'
10
+
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ class BatchCommitterBolt
18
+ java_implements 'ICommitter, IBatchBolt'
19
+
20
+ java_signature 'IBatchCommitterBolt (String base_class_path, String real_bolt_class_name)'
21
+ def initialize(base_class_path, real_bolt_class_name)
22
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
23
+ rescue NameError
24
+ require base_class_path
25
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
26
+ end
27
+
28
+ java_signature 'void prepare(Map, TopologyContext, BatchOutputCollector, Object)'
29
+ def prepare(conf, context, collector, id)
30
+ @real_bolt.prepare(conf, context, collector, id)
31
+ end
32
+
33
+ java_signature 'void execute(Tuple)'
34
+ def execute(tuple)
35
+ @real_bolt.execute(tuple)
36
+ end
37
+
38
+ java_signature 'void finishBatch()'
39
+ def finishBatch
40
+ @real_bolt.finish_batch if @real_bolt.respond_to?(:finish_batch)
41
+ end
42
+
43
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
44
+ def declareOutputFields(declarer)
45
+ @real_bolt.declare_output_fields(declarer)
46
+ end
47
+
48
+ java_signature 'Map<String, Object> getComponentConfiguration()'
49
+ def getComponentConfiguration
50
+ @real_bolt.get_component_configuration
51
+ end
52
+ end
@@ -0,0 +1,59 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.TopologyContext'
4
+ java_import 'storm.trident.operation.TridentCollector'
5
+ java_import 'storm.trident.spout.IBatchSpout'
6
+ java_import 'backtype.storm.tuple.Fields'
7
+ java_import 'java.util.Map'
8
+
9
+ module Backtype
10
+ java_import 'backtype.storm.Config'
11
+ end
12
+
13
+ java_package 'redstorm.proxy'
14
+
15
+ # the Spout class is a proxy to the real spout to avoid having to deal with all the
16
+ # Java artifacts when creating a spout.
17
+
18
+ class BatchSpout
19
+ java_implements IBatchSpout
20
+
21
+ java_signature 'IBatchSpout (String base_class_path, String real_spout_class_name)'
22
+ def initialize(base_class_path, real_spout_class_name)
23
+ @real_spout = Object.module_eval(real_spout_class_name).new
24
+ rescue NameError
25
+ require base_class_path
26
+ @real_spout = Object.module_eval(real_spout_class_name).new
27
+ end
28
+
29
+ java_signature 'void open(Map, TopologyContext)'
30
+ def open(conf, context)
31
+ @real_spout.open(conf, context) if @real_spout.respond_to?(:open)
32
+ end
33
+
34
+ java_signature 'void emitBatch(long, TridentCollector)'
35
+ def emitBatch(batch_id, collector)
36
+ @real_spout.emit_batch(batch_id, collector)
37
+ end
38
+
39
+ java_signature 'void close()'
40
+ def close
41
+ @real_spout.close if @real_spout.respond_to?(:close)
42
+ end
43
+
44
+ java_signature 'void ack(long)'
45
+ def ack(batch_id)
46
+ @real_spout.ack(batch_id) if @real_spout.respond_to?(:ack)
47
+ end
48
+
49
+ java_signature 'Fields getOutputFields()'
50
+ def getOutputFields()
51
+ @real_spout.get_output_fields
52
+ end
53
+
54
+ java_signature 'Map<String, Object> getComponentConfiguration()'
55
+ def getComponentConfiguration
56
+ @real_spout.get_component_configuration
57
+ end
58
+
59
+ end
@@ -0,0 +1,40 @@
1
+ require 'java'
2
+
3
+ java_import 'storm.trident.tuple.TridentTuple'
4
+ java_import 'storm.trident.operation.TridentCollector'
5
+ java_import 'storm.trident.operation.TridentOperationContext'
6
+ java_import 'storm.trident.operation.Function'
7
+ java_import 'java.util.Map'
8
+
9
+ module Backtype
10
+ java_import 'backtype.storm.Config'
11
+ end
12
+
13
+ java_package 'redstorm.proxy'
14
+
15
+ class ProxyFunction
16
+ java_implements Function
17
+
18
+ java_signature 'Function (String base_class_path, String real_class_name)'
19
+ def initialize(base_class_path, real_class_name)
20
+ @real = Object.module_eval(real_class_name).new
21
+ rescue NameError
22
+ require base_class_path
23
+ @real = Object.module_eval(real_class_name).new
24
+ end
25
+
26
+ java_signature 'void execute(TridentTuple, TridentCollector)'
27
+ def execute(_trident_tuple, _trident_collector)
28
+ @real.execute(_trident_tuple, _trident_collector)
29
+ end
30
+
31
+ java_signature 'void cleanup()'
32
+ def cleanup()
33
+ @real.cleanup()
34
+ end
35
+
36
+ java_signature 'void prepare(Map, TridentOperationContext)'
37
+ def prepare(_map, _trident_operation_context)
38
+ @real.prepare(_map, _trident_operation_context)
39
+ end
40
+ end
@@ -0,0 +1,47 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.TopologyContext'
4
+ java_import 'backtype.storm.transactional.ITransactionalSpout'
5
+ java_import 'backtype.storm.transactional.ICommitterTransactionalSpout'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'java.util.Map'
8
+
9
+ module Backtype
10
+ java_import 'backtype.storm.Config'
11
+ end
12
+
13
+ java_package 'redstorm.proxy'
14
+
15
+
16
+ class TransactionalCommitterSpout
17
+ java_implements 'ICommitterTransactionalSpout'
18
+
19
+ java_signature 'ICommitterTransactionalSpout (String base_class_path, String real_spout_class_name)'
20
+ def initialize(base_class_path, real_spout_class_name)
21
+ @real_spout = Object.module_eval(real_spout_class_name).new
22
+ rescue NameError
23
+ require base_class_path
24
+ @real_spout = Object.module_eval(real_spout_class_name).new
25
+ end
26
+
27
+ java_signature 'ICommitterTransactionalSpout.Emitter getEmitter(Map, TopologyContext)'
28
+ def getEmitter(conf, context)
29
+ @real_spout.get_emitter(conf, context)
30
+ end
31
+
32
+ java_signature 'ITransactionalSpout.Coordinator getCoordinator(Map, TopologyContext)'
33
+ def getCoordinator(conf, context)
34
+ @real_spout.get_coordinator(conf, context)
35
+ end
36
+
37
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
38
+ def declareOutputFields(declarer)
39
+ @real_spout.declare_output_fields(declarer)
40
+ end
41
+
42
+ java_signature 'Map<String, Object> getComponentConfiguration()'
43
+ def getComponentConfiguration
44
+ @real_spout.get_component_configuration
45
+ end
46
+
47
+ end
@@ -0,0 +1,46 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.TopologyContext'
4
+ java_import 'backtype.storm.transactional.ITransactionalSpout'
5
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
6
+ java_import 'java.util.Map'
7
+
8
+ module Backtype
9
+ java_import 'backtype.storm.Config'
10
+ end
11
+
12
+ java_package 'redstorm.proxy'
13
+
14
+
15
+ class TransactionalSpout
16
+ java_implements 'ITransactionalSpout'
17
+
18
+ java_signature 'ITransactionalSpout (String base_class_path, String real_spout_class_name)'
19
+ def initialize(base_class_path, real_spout_class_name)
20
+ @real_spout = Object.module_eval(real_spout_class_name).new
21
+ rescue NameError
22
+ require base_class_path
23
+ @real_spout = Object.module_eval(real_spout_class_name).new
24
+ end
25
+
26
+ java_signature 'ITransactionalSpout.Emitter getEmitter(Map, TopologyContext)'
27
+ def getEmitter(conf, context)
28
+ @real_spout.get_emitter(conf, context)
29
+ end
30
+
31
+ java_signature 'ITransactionalSpout.Coordinator getCoordinator(Map, TopologyContext)'
32
+ def getCoordinator(conf, context)
33
+ @real_spout.get_coordinator(conf, context)
34
+ end
35
+
36
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
37
+ def declareOutputFields(declarer)
38
+ @real_spout.declare_output_fields(declarer)
39
+ end
40
+
41
+ java_signature 'Map<String, Object> getComponentConfiguration()'
42
+ def getComponentConfiguration
43
+ @real_spout.get_component_configuration
44
+ end
45
+
46
+ end
@@ -0,0 +1,87 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ module RedStorm
6
+
7
+ class InputBoltDefinition < SimpleTopology::BoltDefinition
8
+ attr_accessor :grouping
9
+
10
+ def initialize(*args)
11
+ super
12
+ @grouping = :none
13
+ end
14
+
15
+ def grouping(grouping)
16
+ @grouping = grouping
17
+ end
18
+
19
+ def define_grouping(declarer)
20
+ case @grouping
21
+ when :fields
22
+ declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
23
+ when :global
24
+ declarer.globalGrouping()
25
+ when :shuffle
26
+ declarer.shuffleGrouping()
27
+ when :local_or_shuffle
28
+ declarer.localOrShuffleGrouping()
29
+ when :none
30
+ declarer.noneGrouping()
31
+ when :all
32
+ declarer.allGrouping()
33
+ when :direct
34
+ declarer.directGrouping()
35
+ else
36
+ raise("unknown grouper=#{grouper.inspect}")
37
+ end
38
+ end
39
+ end
40
+
41
+ class SimpleDRPCTopology < SimpleTopology
42
+
43
+ def self.spout
44
+ raise TopologyDefinitionError, "DRPC spout is already defined"
45
+ end
46
+
47
+ def start(base_class_path, env)
48
+ builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
49
+
50
+ self.class.bolts.each do |bolt|
51
+ declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
52
+ declarer.addConfigurations(bolt.config)
53
+ bolt.define_grouping(declarer)
54
+ end
55
+
56
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
57
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
58
+
59
+ configurator = Configurator.new(defaults)
60
+ configurator.instance_exec(env, &self.class.configure_block)
61
+
62
+ drpc = nil
63
+ if env == :local
64
+ drpc = LocalDRPC.new
65
+ submitter = @cluster = LocalCluster.new
66
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
67
+ else
68
+ submitter = StormSubmitter
69
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
70
+ end
71
+ instance_exec(env, drpc, &self.class.submit_block)
72
+ end
73
+
74
+ def self.input_bolt(bolt_class, *args, &bolt_block)
75
+ set_topology_class!
76
+ options = args.last.is_a?(Hash) ? args.pop : {}
77
+ contructor_args = !args.empty? ? args.pop : []
78
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
79
+
80
+ bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
81
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
82
+ bolt.instance_exec(&bolt_block)
83
+ self.components << bolt
84
+ end
85
+ end
86
+
87
+ end
@@ -36,7 +36,7 @@ module RedStorm
36
36
  end
37
37
 
38
38
  class SpoutDefinition < ComponentDefinition
39
-
39
+
40
40
  # WARNING non-dry see BoltDefinition#new_instance
41
41
  def new_instance(base_class_path)
42
42
  if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
@@ -49,7 +49,7 @@ module RedStorm
49
49
  # is_java? ? @clazz.new : JRubySpout.new(base_class_path, @clazz.name)
50
50
  end
51
51
  end
52
-
52
+
53
53
  class BoltDefinition < ComponentDefinition
54
54
  attr_accessor :sources, :command
55
55
 
@@ -106,6 +106,7 @@ module RedStorm
106
106
 
107
107
  # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
108
  def self.spout(spout_class, *args, &spout_block)
109
+ set_topology_class!
109
110
  options = args.last.is_a?(Hash) ? args.pop : {}
110
111
  contructor_args = !args.empty? ? args.pop : []
111
112
  spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
@@ -117,6 +118,7 @@ module RedStorm
117
118
 
118
119
  # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
119
120
  def self.bolt(bolt_class, *args, &bolt_block)
121
+ set_topology_class!
120
122
  options = args.last.is_a?(Hash) ? args.pop : {}
121
123
  contructor_args = !args.empty? ? args.pop : []
122
124
  bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
@@ -128,8 +130,8 @@ module RedStorm
128
130
  end
129
131
 
130
132
  def self.configure(name = nil, &configure_block)
131
- Configuration.topology_class = self
132
- @topology_name = name if name
133
+ set_topology_class!
134
+ @topology_name = name.to_s if name
133
135
  @configure_block = configure_block if block_given?
134
136
  end
135
137
 
@@ -166,6 +168,14 @@ module RedStorm
166
168
 
167
169
  private
168
170
 
171
+ # this is a quirk to figure out the topology class at load time when the topology file
172
+ # is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
173
+ # optional we can hook into any/all the other DSL statements that will be called at load time
174
+ # and set it there. This is somewhat inelegant but it works.
175
+ def self.set_topology_class!
176
+ Configuration.topology_class = self
177
+ end
178
+
169
179
  def self.resolve_ids!(components)
170
180
  # verify duplicate implicit ids
171
181
  ids = components.map(&:id)