redstorm 0.6.4 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/CHANGELOG.md +9 -0
  2. data/README.md +206 -103
  3. data/examples/native/cluster_word_count_topology.rb +5 -5
  4. data/examples/native/local_exclamation_topology.rb +8 -8
  5. data/examples/native/local_exclamation_topology2.rb +7 -7
  6. data/examples/native/local_redis_word_count_topology.rb +7 -8
  7. data/examples/native/local_word_count_topology.rb +5 -5
  8. data/examples/simple/exclamation_topology.rb +7 -11
  9. data/examples/simple/exclamation_topology2.rb +10 -12
  10. data/examples/simple/hello_world_topology.rb +22 -0
  11. data/examples/simple/kafka_topology.rb +2 -1
  12. data/examples/simple/redis_word_count_topology.rb +3 -5
  13. data/examples/simple/ruby_version_topology.rb +7 -1
  14. data/examples/simple/word_count_topology.rb +8 -10
  15. data/ivy/settings.xml +1 -0
  16. data/ivy/storm_dependencies.xml +8 -0
  17. data/ivy/topology_dependencies.xml +7 -0
  18. data/lib/red_storm.rb +1 -0
  19. data/lib/red_storm/application.rb +9 -7
  20. data/lib/red_storm/configurator.rb +1 -1
  21. data/lib/red_storm/proxy/batch_bolt.rb +63 -0
  22. data/lib/red_storm/proxy/batch_committer_bolt.rb +52 -0
  23. data/lib/red_storm/proxy/batch_spout.rb +59 -0
  24. data/lib/red_storm/proxy/proxy_function.rb +40 -0
  25. data/lib/red_storm/proxy/transactional_committer_spout.rb +47 -0
  26. data/lib/red_storm/proxy/transactional_spout.rb +46 -0
  27. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  28. data/lib/red_storm/simple_topology.rb +14 -4
  29. data/lib/red_storm/topology_launcher.rb +22 -3
  30. data/lib/red_storm/version.rb +1 -1
  31. data/lib/tasks/red_storm.rake +66 -104
  32. data/redstorm.gemspec +24 -0
  33. data/src/main/redstorm/storm/jruby/JRubyBatchBolt.java +90 -0
  34. data/src/main/redstorm/storm/jruby/JRubyBatchCommitterBolt.java +9 -0
  35. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +88 -0
  36. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +51 -0
  37. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +1 -1
  38. data/src/main/redstorm/storm/jruby/JRubyTransactionalBolt.java +90 -0
  39. data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterBolt.java +31 -0
  40. data/src/main/redstorm/storm/jruby/JRubyTransactionalCommitterSpout.java +44 -0
  41. data/src/main/redstorm/storm/jruby/JRubyTransactionalSpout.java +89 -0
  42. metadata +35 -14
  43. data/examples/native/Gemfile +0 -2
@@ -0,0 +1,63 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.coordination.BatchOutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.topology.IRichBolt'
6
+ java_import 'backtype.storm.coordination.IBatchBolt'
7
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
8
+ java_import 'backtype.storm.tuple.Tuple'
9
+ java_import 'java.util.Map'
10
+
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the Bolt class is a proxy to the real bolt to avoid having to deal with all the
18
+ # Java artifacts when creating a bolt.
19
+ #
20
+ # The real bolt class implementation must define these methods:
21
+ # - prepare(conf, context, collector)
22
+ # - execute(tuple)
23
+ # - declare_output_fields
24
+ #
25
+ # and optionnaly:
26
+ # - cleanup
27
+ #
28
+ class BatchBolt
29
+ java_implements IBatchBolt
30
+
31
+ java_signature 'IBatchBolt (String base_class_path, String real_bolt_class_name)'
32
+ def initialize(base_class_path, real_bolt_class_name)
33
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
34
+ rescue NameError
35
+ require base_class_path
36
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
37
+ end
38
+
39
+ java_signature 'void prepare(Map, TopologyContext, BatchOutputCollector, Object)'
40
+ def prepare(conf, context, collector, id)
41
+ @real_bolt.prepare(conf, context, collector, id)
42
+ end
43
+
44
+ java_signature 'void execute(Tuple)'
45
+ def execute(tuple)
46
+ @real_bolt.execute(tuple)
47
+ end
48
+
49
+ java_signature 'void finishBatch()'
50
+ def finishBatch
51
+ @real_bolt.finish_batch if @real_bolt.respond_to?(:finish_batch)
52
+ end
53
+
54
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
55
+ def declareOutputFields(declarer)
56
+ @real_bolt.declare_output_fields(declarer)
57
+ end
58
+
59
+ java_signature 'Map<String, Object> getComponentConfiguration()'
60
+ def getComponentConfiguration
61
+ @real_bolt.get_component_configuration
62
+ end
63
+ end
@@ -0,0 +1,52 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.coordination.BatchOutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.coordination.IBatchBolt'
6
+ java_import 'backtype.storm.transactional.ICommitter'
7
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
8
+ java_import 'backtype.storm.tuple.Tuple'
9
+ java_import 'java.util.Map'
10
+
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ class BatchCommitterBolt
18
+ java_implements 'ICommitter, IBatchBolt'
19
+
20
+ java_signature 'IBatchCommitterBolt (String base_class_path, String real_bolt_class_name)'
21
+ def initialize(base_class_path, real_bolt_class_name)
22
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
23
+ rescue NameError
24
+ require base_class_path
25
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
26
+ end
27
+
28
+ java_signature 'void prepare(Map, TopologyContext, BatchOutputCollector, Object)'
29
+ def prepare(conf, context, collector, id)
30
+ @real_bolt.prepare(conf, context, collector, id)
31
+ end
32
+
33
+ java_signature 'void execute(Tuple)'
34
+ def execute(tuple)
35
+ @real_bolt.execute(tuple)
36
+ end
37
+
38
+ java_signature 'void finishBatch()'
39
+ def finishBatch
40
+ @real_bolt.finish_batch if @real_bolt.respond_to?(:finish_batch)
41
+ end
42
+
43
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
44
+ def declareOutputFields(declarer)
45
+ @real_bolt.declare_output_fields(declarer)
46
+ end
47
+
48
+ java_signature 'Map<String, Object> getComponentConfiguration()'
49
+ def getComponentConfiguration
50
+ @real_bolt.get_component_configuration
51
+ end
52
+ end
@@ -0,0 +1,59 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.TopologyContext'
4
+ java_import 'storm.trident.operation.TridentCollector'
5
+ java_import 'storm.trident.spout.IBatchSpout'
6
+ java_import 'backtype.storm.tuple.Fields'
7
+ java_import 'java.util.Map'
8
+
9
+ module Backtype
10
+ java_import 'backtype.storm.Config'
11
+ end
12
+
13
+ java_package 'redstorm.proxy'
14
+
15
+ # the Spout class is a proxy to the real spout to avoid having to deal with all the
16
+ # Java artifacts when creating a spout.
17
+
18
+ class BatchSpout
19
+ java_implements IBatchSpout
20
+
21
+ java_signature 'IBatchSpout (String base_class_path, String real_spout_class_name)'
22
+ def initialize(base_class_path, real_spout_class_name)
23
+ @real_spout = Object.module_eval(real_spout_class_name).new
24
+ rescue NameError
25
+ require base_class_path
26
+ @real_spout = Object.module_eval(real_spout_class_name).new
27
+ end
28
+
29
+ java_signature 'void open(Map, TopologyContext)'
30
+ def open(conf, context)
31
+ @real_spout.open(conf, context) if @real_spout.respond_to?(:open)
32
+ end
33
+
34
+ java_signature 'void emitBatch(long, TridentCollector)'
35
+ def emitBatch(batch_id, collector)
36
+ @real_spout.emit_batch(batch_id, collector)
37
+ end
38
+
39
+ java_signature 'void close()'
40
+ def close
41
+ @real_spout.close if @real_spout.respond_to?(:close)
42
+ end
43
+
44
+ java_signature 'void ack(long)'
45
+ def ack(batch_id)
46
+ @real_spout.ack(batch_id) if @real_spout.respond_to?(:ack)
47
+ end
48
+
49
+ java_signature 'Fields getOutputFields()'
50
+ def getOutputFields()
51
+ @real_spout.get_output_fields
52
+ end
53
+
54
+ java_signature 'Map<String, Object> getComponentConfiguration()'
55
+ def getComponentConfiguration
56
+ @real_spout.get_component_configuration
57
+ end
58
+
59
+ end
@@ -0,0 +1,40 @@
1
+ require 'java'
2
+
3
+ java_import 'storm.trident.tuple.TridentTuple'
4
+ java_import 'storm.trident.operation.TridentCollector'
5
+ java_import 'storm.trident.operation.TridentOperationContext'
6
+ java_import 'storm.trident.operation.Function'
7
+ java_import 'java.util.Map'
8
+
9
+ module Backtype
10
+ java_import 'backtype.storm.Config'
11
+ end
12
+
13
+ java_package 'redstorm.proxy'
14
+
15
+ class ProxyFunction
16
+ java_implements Function
17
+
18
+ java_signature 'Function (String base_class_path, String real_class_name)'
19
+ def initialize(base_class_path, real_class_name)
20
+ @real = Object.module_eval(real_class_name).new
21
+ rescue NameError
22
+ require base_class_path
23
+ @real = Object.module_eval(real_class_name).new
24
+ end
25
+
26
+ java_signature 'void execute(TridentTuple, TridentCollector)'
27
+ def execute(_trident_tuple, _trident_collector)
28
+ @real.execute(_trident_tuple, _trident_collector)
29
+ end
30
+
31
+ java_signature 'void cleanup()'
32
+ def cleanup()
33
+ @real.cleanup()
34
+ end
35
+
36
+ java_signature 'void prepare(Map, TridentOperationContext)'
37
+ def prepare(_map, _trident_operation_context)
38
+ @real.prepare(_map, _trident_operation_context)
39
+ end
40
+ end
@@ -0,0 +1,47 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.TopologyContext'
4
+ java_import 'backtype.storm.transactional.ITransactionalSpout'
5
+ java_import 'backtype.storm.transactional.ICommitterTransactionalSpout'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'java.util.Map'
8
+
9
+ module Backtype
10
+ java_import 'backtype.storm.Config'
11
+ end
12
+
13
+ java_package 'redstorm.proxy'
14
+
15
+
16
+ class TransactionalCommitterSpout
17
+ java_implements 'ICommitterTransactionalSpout'
18
+
19
+ java_signature 'ICommitterTransactionalSpout (String base_class_path, String real_spout_class_name)'
20
+ def initialize(base_class_path, real_spout_class_name)
21
+ @real_spout = Object.module_eval(real_spout_class_name).new
22
+ rescue NameError
23
+ require base_class_path
24
+ @real_spout = Object.module_eval(real_spout_class_name).new
25
+ end
26
+
27
+ java_signature 'ICommitterTransactionalSpout.Emitter getEmitter(Map, TopologyContext)'
28
+ def getEmitter(conf, context)
29
+ @real_spout.get_emitter(conf, context)
30
+ end
31
+
32
+ java_signature 'ITransactionalSpout.Coordinator getCoordinator(Map, TopologyContext)'
33
+ def getCoordinator(conf, context)
34
+ @real_spout.get_coordinator(conf, context)
35
+ end
36
+
37
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
38
+ def declareOutputFields(declarer)
39
+ @real_spout.declare_output_fields(declarer)
40
+ end
41
+
42
+ java_signature 'Map<String, Object> getComponentConfiguration()'
43
+ def getComponentConfiguration
44
+ @real_spout.get_component_configuration
45
+ end
46
+
47
+ end
@@ -0,0 +1,46 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.TopologyContext'
4
+ java_import 'backtype.storm.transactional.ITransactionalSpout'
5
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
6
+ java_import 'java.util.Map'
7
+
8
+ module Backtype
9
+ java_import 'backtype.storm.Config'
10
+ end
11
+
12
+ java_package 'redstorm.proxy'
13
+
14
+
15
+ class TransactionalSpout
16
+ java_implements 'ITransactionalSpout'
17
+
18
+ java_signature 'ITransactionalSpout (String base_class_path, String real_spout_class_name)'
19
+ def initialize(base_class_path, real_spout_class_name)
20
+ @real_spout = Object.module_eval(real_spout_class_name).new
21
+ rescue NameError
22
+ require base_class_path
23
+ @real_spout = Object.module_eval(real_spout_class_name).new
24
+ end
25
+
26
+ java_signature 'ITransactionalSpout.Emitter getEmitter(Map, TopologyContext)'
27
+ def getEmitter(conf, context)
28
+ @real_spout.get_emitter(conf, context)
29
+ end
30
+
31
+ java_signature 'ITransactionalSpout.Coordinator getCoordinator(Map, TopologyContext)'
32
+ def getCoordinator(conf, context)
33
+ @real_spout.get_coordinator(conf, context)
34
+ end
35
+
36
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
37
+ def declareOutputFields(declarer)
38
+ @real_spout.declare_output_fields(declarer)
39
+ end
40
+
41
+ java_signature 'Map<String, Object> getComponentConfiguration()'
42
+ def getComponentConfiguration
43
+ @real_spout.get_component_configuration
44
+ end
45
+
46
+ end
@@ -0,0 +1,87 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ module RedStorm
6
+
7
+ class InputBoltDefinition < SimpleTopology::BoltDefinition
8
+ attr_accessor :grouping
9
+
10
+ def initialize(*args)
11
+ super
12
+ @grouping = :none
13
+ end
14
+
15
+ def grouping(grouping)
16
+ @grouping = grouping
17
+ end
18
+
19
+ def define_grouping(declarer)
20
+ case @grouping
21
+ when :fields
22
+ declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
23
+ when :global
24
+ declarer.globalGrouping()
25
+ when :shuffle
26
+ declarer.shuffleGrouping()
27
+ when :local_or_shuffle
28
+ declarer.localOrShuffleGrouping()
29
+ when :none
30
+ declarer.noneGrouping()
31
+ when :all
32
+ declarer.allGrouping()
33
+ when :direct
34
+ declarer.directGrouping()
35
+ else
36
+ raise("unknown grouper=#{grouper.inspect}")
37
+ end
38
+ end
39
+ end
40
+
41
+ class SimpleDRPCTopology < SimpleTopology
42
+
43
+ def self.spout
44
+ raise TopologyDefinitionError, "DRPC spout is already defined"
45
+ end
46
+
47
+ def start(base_class_path, env)
48
+ builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
49
+
50
+ self.class.bolts.each do |bolt|
51
+ declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
52
+ declarer.addConfigurations(bolt.config)
53
+ bolt.define_grouping(declarer)
54
+ end
55
+
56
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
57
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
58
+
59
+ configurator = Configurator.new(defaults)
60
+ configurator.instance_exec(env, &self.class.configure_block)
61
+
62
+ drpc = nil
63
+ if env == :local
64
+ drpc = LocalDRPC.new
65
+ submitter = @cluster = LocalCluster.new
66
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
67
+ else
68
+ submitter = StormSubmitter
69
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
70
+ end
71
+ instance_exec(env, drpc, &self.class.submit_block)
72
+ end
73
+
74
+ def self.input_bolt(bolt_class, *args, &bolt_block)
75
+ set_topology_class!
76
+ options = args.last.is_a?(Hash) ? args.pop : {}
77
+ contructor_args = !args.empty? ? args.pop : []
78
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
79
+
80
+ bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
81
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
82
+ bolt.instance_exec(&bolt_block)
83
+ self.components << bolt
84
+ end
85
+ end
86
+
87
+ end
@@ -36,7 +36,7 @@ module RedStorm
36
36
  end
37
37
 
38
38
  class SpoutDefinition < ComponentDefinition
39
-
39
+
40
40
  # WARNING non-dry see BoltDefinition#new_instance
41
41
  def new_instance(base_class_path)
42
42
  if @clazz.name == "Java::RedstormStormJruby::JRubyShellSpout"
@@ -49,7 +49,7 @@ module RedStorm
49
49
  # is_java? ? @clazz.new : JRubySpout.new(base_class_path, @clazz.name)
50
50
  end
51
51
  end
52
-
52
+
53
53
  class BoltDefinition < ComponentDefinition
54
54
  attr_accessor :sources, :command
55
55
 
@@ -106,6 +106,7 @@ module RedStorm
106
106
 
107
107
  # def self.spout(spout_class, contructor_args = [], options = {}, &spout_block)
108
108
  def self.spout(spout_class, *args, &spout_block)
109
+ set_topology_class!
109
110
  options = args.last.is_a?(Hash) ? args.pop : {}
110
111
  contructor_args = !args.empty? ? args.pop : []
111
112
  spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
@@ -117,6 +118,7 @@ module RedStorm
117
118
 
118
119
  # def self.bolt(bolt_class, contructor_args = [], options = {}, &bolt_block)
119
120
  def self.bolt(bolt_class, *args, &bolt_block)
121
+ set_topology_class!
120
122
  options = args.last.is_a?(Hash) ? args.pop : {}
121
123
  contructor_args = !args.empty? ? args.pop : []
122
124
  bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
@@ -128,8 +130,8 @@ module RedStorm
128
130
  end
129
131
 
130
132
  def self.configure(name = nil, &configure_block)
131
- Configuration.topology_class = self
132
- @topology_name = name if name
133
+ set_topology_class!
134
+ @topology_name = name.to_s if name
133
135
  @configure_block = configure_block if block_given?
134
136
  end
135
137
 
@@ -166,6 +168,14 @@ module RedStorm
166
168
 
167
169
  private
168
170
 
171
+ # this is a quirk to figure out the topology class at load time when the topology file
172
+ # is required in the TopologyLauncher. Since we want to make the "configure" DSL statement
173
+ # optional we can hook into any/all the other DSL statements that will be called at load time
174
+ # and set it there. This is somewhat inelegant but it works.
175
+ def self.set_topology_class!
176
+ Configuration.topology_class = self
177
+ end
178
+
169
179
  def self.resolve_ids!(components)
170
180
  # verify duplicate implicit ids
171
181
  ids = components.map(&:id)