kb-redstorm 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
@@ -0,0 +1,16 @@
1
+ module RedStorm
2
+ module Configuration
3
+ extend self
4
+
5
+ @topology_class = nil
6
+
7
+ def topology_class=(clazz)
8
+ @topology_class = clazz
9
+ end
10
+
11
+ def topology_class
12
+ @topology_class
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,26 @@
1
+ module RedStorm
2
+
3
+ class Configurator
4
+ attr_reader :config
5
+
6
+ def initialize(defaults = {})
7
+ @config = Backtype::Config.new
8
+ defaults.each{|attribute, value| @config.put(attribute.to_s, value)}
9
+ end
10
+
11
+ def set(attribute, value)
12
+ @config.put(attribute.to_s, value)
13
+ end
14
+
15
+ def method_missing(sym, *args)
16
+ config_method = "set#{self.class.camel_case(sym)}"
17
+ @config.send(config_method, *args)
18
+ end
19
+
20
+ private
21
+
22
+ def self.camel_case(s)
23
+ s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,41 @@
1
+ module RedStorm
2
+
3
+ LAUNCH_PATH = File.expand_path(File.dirname(__FILE__))
4
+ JAR_CONTEXT = !!(LAUNCH_PATH =~ /\.jar!\/red_storm$/)
5
+
6
+ if JAR_CONTEXT
7
+ BASE_PATH = File.expand_path(LAUNCH_PATH + '/..')
8
+ REDSTORM_HOME = BASE_PATH
9
+ TARGET_PATH = BASE_PATH
10
+ else
11
+ BASE_PATH = Dir.pwd
12
+ REDSTORM_HOME = File.expand_path(LAUNCH_PATH + '/../..')
13
+ TARGET_PATH = "#{BASE_PATH}/target"
14
+ end
15
+
16
+ unless defined?(SPECS_CONTEXT)
17
+ GEM_PATH = "#{TARGET_PATH}/gems/"
18
+ ENV["GEM_PATH"] = GEM_PATH
19
+ ENV["GEM_HOME"] = GEM_PATH
20
+ end
21
+
22
+ def current_ruby_mode
23
+ RUBY_VERSION =~ /(\d+\.\d+)(\.\d+)*/
24
+ raise("unknown Ruby version #{$1}") unless $1 == "1.8" || $1 == "1.9"
25
+ $1
26
+ end
27
+
28
+ def jruby_mode_token(ruby_version = nil)
29
+ version_map = {"1.8" => "RUBY1_8", "--1.8" => "RUBY1_8", "1.9" => "RUBY1_9", "--1.9" => "RUBY1_9"}
30
+ version_map[ruby_version.to_s] || version_map[RedStorm.current_ruby_mode]
31
+ end
32
+
33
+ module_function :current_ruby_mode, :jruby_mode_token
34
+
35
+ # puts("*** LAUNCH_PATH=#{LAUNCH_PATH}")
36
+ # puts("*** JAR_CONTEXT=#{JAR_CONTEXT}")
37
+ # puts("*** BASE_PATH=#{BASE_PATH}")
38
+ # puts("*** REDSTORM_HOME=#{REDSTORM_HOME}")
39
+ # puts("*** TARGET_PATH=#{TARGET_PATH}")
40
+ # puts("*** GEM_PATH=#{GEM_PATH}") if defined?(GEM_PATH)
41
+ end
@@ -0,0 +1,15 @@
1
+ require 'java'
2
+
3
+ module RedStorm
4
+ module Loggable
5
+
6
+ def self.log
7
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
8
+ end
9
+
10
+ def log
11
+ self.class.log
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,71 @@
1
+ require 'java'
2
+
3
+ java_import 'storm.trident.operation.TridentCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'storm.trident.spout.IBatchSpout'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'backtype.storm.tuple.Tuple'
8
+ java_import 'backtype.storm.tuple.Fields'
9
+ java_import 'backtype.storm.tuple.Values'
10
+ java_import 'java.util.Map'
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the BatchSpout class is a proxy to the real batch spout to avoid having to deal with all the
18
+ # Java artifacts when creating a spout.
19
+ #
20
+ # The real batch spout class implementation must define these methods:
21
+ # - open(conf, context, collector)
22
+ # - emitBatch
23
+ # - getOutputFields
24
+ # - ack(batch_id)
25
+ #
26
+ # and optionnaly:
27
+ # - close
28
+ #
29
+
30
+ class BatchSpout
31
+ java_implements IBatchSpout
32
+
33
+ java_signature 'IBatchSpout (String base_class_path, String real_spout_class_name)'
34
+ def initialize(base_class_path, real_spout_class_name)
35
+ @real_spout = Object.module_eval(real_spout_class_name).new
36
+ rescue NameError
37
+ require base_class_path
38
+ @real_spout = Object.module_eval(real_spout_class_name).new
39
+ end
40
+
41
+ java_signature 'void open(Map, TopologyContext)'
42
+ def open(conf, context)
43
+ @real_spout.open(conf, context)
44
+ end
45
+
46
+ java_signature 'void close()'
47
+ def close
48
+ @real_spout.close if @real_spout.respond_to?(:close)
49
+ end
50
+
51
+ java_signature 'void emitBatch(long, TridentCollector)'
52
+ def emitBatch(batch_id, collector)
53
+ @real_spout.emit_batch(batch_id, collector)
54
+ end
55
+
56
+ java_signature 'void ack(long)'
57
+ def ack(batch_id)
58
+ @real_spout.ack(batch_id)
59
+ end
60
+
61
+ java_signature 'Fields getOutputFields()'
62
+ def getOutputFields
63
+ @real_spout.get_output_fields()
64
+ end
65
+
66
+ java_signature 'Map<String, Object> getComponentConfiguration()'
67
+ def getComponentConfiguration
68
+ @real_spout.get_component_configuration
69
+ end
70
+
71
+ end
@@ -0,0 +1,63 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.OutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.topology.IRichBolt'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'backtype.storm.tuple.Tuple'
8
+ java_import 'backtype.storm.tuple.Fields'
9
+ java_import 'backtype.storm.tuple.Values'
10
+ java_import 'java.util.Map'
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the Bolt class is a proxy to the real bolt to avoid having to deal with all the
18
+ # Java artifacts when creating a bolt.
19
+ #
20
+ # The real bolt class implementation must define these methods:
21
+ # - prepare(conf, context, collector)
22
+ # - execute(tuple)
23
+ # - declare_output_fields
24
+ #
25
+ # and optionnaly:
26
+ # - cleanup
27
+ #
28
+ class Bolt
29
+ java_implements IRichBolt
30
+
31
+ java_signature 'IRichBolt (String base_class_path, String real_bolt_class_name)'
32
+ def initialize(base_class_path, real_bolt_class_name)
33
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
34
+ rescue NameError
35
+ require base_class_path
36
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
37
+ end
38
+
39
+ java_signature 'void prepare(Map, TopologyContext, OutputCollector)'
40
+ def prepare(conf, context, collector)
41
+ @real_bolt.prepare(conf, context, collector)
42
+ end
43
+
44
+ java_signature 'void execute(Tuple)'
45
+ def execute(tuple)
46
+ @real_bolt.execute(tuple)
47
+ end
48
+
49
+ java_signature 'void cleanup()'
50
+ def cleanup
51
+ @real_bolt.cleanup if @real_bolt.respond_to?(:cleanup)
52
+ end
53
+
54
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
55
+ def declareOutputFields(declarer)
56
+ @real_bolt.declare_output_fields(declarer)
57
+ end
58
+
59
+ java_signature 'Map<String, Object> getComponentConfiguration()'
60
+ def getComponentConfiguration
61
+ @real_bolt.get_component_configuration
62
+ end
63
+ end
@@ -0,0 +1,48 @@
1
+ require 'java'
2
+
3
+
4
+ java_import 'storm.trident.tuple.TridentTuple'
5
+
6
+ java_import 'storm.trident.operation.TridentCollector'
7
+
8
+ java_import 'java.util.Map'
9
+
10
+ java_import 'storm.trident.operation.TridentOperationContext'
11
+
12
+ java_import 'storm.trident.operation.Function'
13
+
14
+
15
+ module Backtype
16
+ java_import 'backtype.storm.Config'
17
+ end
18
+
19
+ java_package 'redstorm.proxy'
20
+
21
+ class ProxyFunction
22
+ java_implements Function
23
+
24
+ java_signature 'Function (String base_class_path, String real_class_name)'
25
+ def initialize(base_class_path, real_class_name)
26
+ @real = Object.module_eval(real_class_name).new
27
+ rescue NameError
28
+ require base_class_path
29
+ @real = Object.module_eval(real_class_name).new
30
+ end
31
+
32
+ java_signature 'void execute(TridentTuple, TridentCollector)'
33
+ def execute(_trident_tuple, _trident_collector)
34
+ @real.execute(_trident_tuple, _trident_collector)
35
+ end
36
+
37
+ java_signature 'void cleanup()'
38
+ def cleanup()
39
+ @real.cleanup()
40
+ end
41
+
42
+ java_signature 'void prepare(Map, TridentOperationContext)'
43
+ def prepare(_map, _trident_operation_context)
44
+ @real.prepare(_map, _trident_operation_context)
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,87 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.spout.SpoutOutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.topology.IRichSpout'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'backtype.storm.tuple.Tuple'
8
+ java_import 'backtype.storm.tuple.Fields'
9
+ java_import 'backtype.storm.tuple.Values'
10
+ java_import 'java.util.Map'
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the Spout class is a proxy to the real spout to avoid having to deal with all the
18
+ # Java artifacts when creating a spout.
19
+ #
20
+ # The real spout class implementation must define these methods:
21
+ # - open(conf, context, collector)
22
+ # - next_tuple
23
+ # - declare_output_fields
24
+ #
25
+ # and optionnaly:
26
+ # - ack(msg_id)
27
+ # - fail(msg_id)
28
+ # - close
29
+ #
30
+
31
+ class Spout
32
+ java_implements IRichSpout
33
+
34
+ java_signature 'IRichSpout (String base_class_path, String real_spout_class_name)'
35
+ def initialize(base_class_path, real_spout_class_name)
36
+ @real_spout = Object.module_eval(real_spout_class_name).new
37
+ rescue NameError
38
+ require base_class_path
39
+ @real_spout = Object.module_eval(real_spout_class_name).new
40
+ end
41
+
42
+ java_signature 'void open(Map, TopologyContext, SpoutOutputCollector)'
43
+ def open(conf, context, collector)
44
+ @real_spout.open(conf, context, collector)
45
+ end
46
+
47
+ java_signature 'void close()'
48
+ def close
49
+ @real_spout.close if @real_spout.respond_to?(:close)
50
+ end
51
+
52
+ java_signature 'void activate()'
53
+ def activate
54
+ @real_spout.activate if @real_spout.respond_to?(:activate)
55
+ end
56
+
57
+ java_signature 'void deactivate()'
58
+ def deactivate
59
+ @real_spout.deactivate if @real_spout.respond_to?(:deactivate)
60
+ end
61
+
62
+ java_signature 'void nextTuple()'
63
+ def nextTuple
64
+ @real_spout.next_tuple
65
+ end
66
+
67
+ java_signature 'void ack(Object)'
68
+ def ack(msg_id)
69
+ @real_spout.ack(msg_id) if @real_spout.respond_to?(:ack)
70
+ end
71
+
72
+ java_signature 'void fail(Object)'
73
+ def fail(msg_id)
74
+ @real_spout.fail(msg_id) if @real_spout.respond_to?(:fail)
75
+ end
76
+
77
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
78
+ def declareOutputFields(declarer)
79
+ @real_spout.declare_output_fields(declarer)
80
+ end
81
+
82
+ java_signature 'Map<String, Object> getComponentConfiguration()'
83
+ def getComponentConfiguration
84
+ @real_spout.get_component_configuration
85
+ end
86
+
87
+ end
@@ -0,0 +1,135 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+
4
+ module RedStorm
5
+
6
+ class SimpleBolt
7
+ attr_reader :collector, :context, :config
8
+
9
+ # DSL class methods
10
+
11
+ def self.log
12
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
13
+ end
14
+
15
+ def self.output_fields(*fields)
16
+ @fields = fields.map(&:to_s)
17
+ end
18
+
19
+ def self.configure(&configure_block)
20
+ @configure_block = block_given? ? configure_block : lambda {}
21
+ end
22
+
23
+ def self.on_receive(*args, &on_receive_block)
24
+ options = args.last.is_a?(Hash) ? args.pop : {}
25
+ method_name = args.first
26
+
27
+ self.receive_options.merge!(options)
28
+ @on_receive_block = block_given? ? on_receive_block : lambda {|tuple| self.send(method_name || :on_receive, tuple)}
29
+ end
30
+
31
+ def self.on_init(method_name = nil, &on_init_block)
32
+ @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
33
+ end
34
+
35
+ def self.on_close(method_name = nil, &close_block)
36
+ @close_block = block_given? ? close_block : lambda {self.send(method_name || :on_close)}
37
+ end
38
+
39
+ # DSL instance methods
40
+
41
+ def log
42
+ self.class.log
43
+ end
44
+
45
+ def unanchored_emit(*values)
46
+ @collector.emit(Values.new(*values))
47
+ end
48
+
49
+ def anchored_emit(tuple, *values)
50
+ @collector.emit(tuple, Values.new(*values))
51
+ end
52
+
53
+ def ack(tuple)
54
+ @collector.ack(tuple)
55
+ end
56
+
57
+ def fail(tuple)
58
+ @collector.fail(tuple)
59
+ end
60
+
61
+ # Bolt proxy interface
62
+
63
+ def execute(tuple)
64
+ output = instance_exec(tuple, &self.class.on_receive_block)
65
+ if output && self.class.emit?
66
+ values_list = !output.is_a?(Array) ? [[output]] : !output.first.is_a?(Array) ? [output] : output
67
+ values_list.each{|values| self.class.anchor? ? anchored_emit(tuple, *values) : unanchored_emit(*values)}
68
+ @collector.ack(tuple) if self.class.ack?
69
+ end
70
+ end
71
+
72
+ def prepare(config, context, collector)
73
+ @collector = collector
74
+ @context = context
75
+ @config = config
76
+ instance_exec(&self.class.on_init_block)
77
+ end
78
+
79
+ def cleanup
80
+ instance_exec(&self.class.close_block)
81
+ end
82
+
83
+ def declare_output_fields(declarer)
84
+ declarer.declare(Fields.new(self.class.fields))
85
+ end
86
+
87
+ def get_component_configuration
88
+ configurator = Configurator.new
89
+ configurator.instance_exec(&self.class.configure_block)
90
+ configurator.config
91
+ end
92
+
93
+ private
94
+
95
+ # default noop optional dsl callbacks
96
+ def on_init; end
97
+ def on_close; end
98
+
99
+ def self.fields
100
+ @fields ||= []
101
+ end
102
+
103
+ def self.configure_block
104
+ @configure_block ||= lambda {}
105
+ end
106
+
107
+ def self.on_receive_block
108
+ @on_receive_block ||= lambda {|tuple| self.send(:on_receive, tuple)}
109
+ end
110
+
111
+ def self.on_init_block
112
+ @on_init_block ||= lambda {self.send(:on_init)}
113
+ end
114
+
115
+ def self.close_block
116
+ @close_block ||= lambda {self.send(:on_close)}
117
+ end
118
+
119
+ def self.receive_options
120
+ @receive_options ||= {:emit => true, :ack => false, :anchor => false}
121
+ end
122
+
123
+ def self.emit?
124
+ !!self.receive_options[:emit]
125
+ end
126
+
127
+ def self.ack?
128
+ !!self.receive_options[:ack]
129
+ end
130
+
131
+ def self.anchor?
132
+ !!self.receive_options[:anchor]
133
+ end
134
+ end
135
+ end