kb-redstorm 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/CHANGELOG.md +74 -0
  2. data/LICENSE.md +13 -0
  3. data/README.md +375 -0
  4. data/Rakefile +11 -0
  5. data/bin/redstorm +15 -0
  6. data/examples/native/Gemfile +2 -0
  7. data/examples/native/cluster_word_count_topology.rb +25 -0
  8. data/examples/native/exclamation_bolt.rb +21 -0
  9. data/examples/native/local_exclamation_topology.rb +31 -0
  10. data/examples/native/local_exclamation_topology2.rb +48 -0
  11. data/examples/native/local_redis_word_count_topology.rb +69 -0
  12. data/examples/native/local_word_count_topology.rb +27 -0
  13. data/examples/native/random_sentence_spout.rb +30 -0
  14. data/examples/native/split_sentence_bolt.rb +20 -0
  15. data/examples/native/word_count_bolt.rb +26 -0
  16. data/examples/shell/resources/splitsentence.py +9 -0
  17. data/examples/shell/resources/storm.py +206 -0
  18. data/examples/shell/shell_topology.rb +41 -0
  19. data/examples/simple/exclamation_bolt.rb +10 -0
  20. data/examples/simple/exclamation_topology.rb +45 -0
  21. data/examples/simple/exclamation_topology2.rb +45 -0
  22. data/examples/simple/kafka_topology.rb +55 -0
  23. data/examples/simple/random_sentence_spout.rb +21 -0
  24. data/examples/simple/redis_word_count_topology.rb +61 -0
  25. data/examples/simple/ruby_version_topology.rb +32 -0
  26. data/examples/simple/split_sentence_bolt.rb +33 -0
  27. data/examples/simple/word_count_bolt.rb +19 -0
  28. data/examples/simple/word_count_topology.rb +38 -0
  29. data/ivy/settings.xml +11 -0
  30. data/lib/red_storm.rb +9 -0
  31. data/lib/red_storm/application.rb +85 -0
  32. data/lib/red_storm/configuration.rb +16 -0
  33. data/lib/red_storm/configurator.rb +26 -0
  34. data/lib/red_storm/environment.rb +41 -0
  35. data/lib/red_storm/loggable.rb +15 -0
  36. data/lib/red_storm/proxy/batch_spout.rb +71 -0
  37. data/lib/red_storm/proxy/bolt.rb +63 -0
  38. data/lib/red_storm/proxy/proxy_function.rb +48 -0
  39. data/lib/red_storm/proxy/spout.rb +87 -0
  40. data/lib/red_storm/simple_bolt.rb +135 -0
  41. data/lib/red_storm/simple_drpc_topology.rb +87 -0
  42. data/lib/red_storm/simple_spout.rb +184 -0
  43. data/lib/red_storm/simple_topology.rb +209 -0
  44. data/lib/red_storm/topology_launcher.rb +54 -0
  45. data/lib/red_storm/version.rb +3 -0
  46. data/lib/tasks/red_storm.rake +272 -0
  47. data/src/main/redstorm/storm/jruby/JRubyBatchSpout.java +89 -0
  48. data/src/main/redstorm/storm/jruby/JRubyBolt.java +88 -0
  49. data/src/main/redstorm/storm/jruby/JRubyProxyFunction.java +59 -0
  50. data/src/main/redstorm/storm/jruby/JRubyShellBolt.java +26 -0
  51. data/src/main/redstorm/storm/jruby/JRubyShellSpout.java +26 -0
  52. data/src/main/redstorm/storm/jruby/JRubySpout.java +107 -0
  53. metadata +134 -0
@@ -0,0 +1,16 @@
1
+ module RedStorm
2
+ module Configuration
3
+ extend self
4
+
5
+ @topology_class = nil
6
+
7
+ def topology_class=(clazz)
8
+ @topology_class = clazz
9
+ end
10
+
11
+ def topology_class
12
+ @topology_class
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,26 @@
1
+ module RedStorm
2
+
3
+ class Configurator
4
+ attr_reader :config
5
+
6
+ def initialize(defaults = {})
7
+ @config = Backtype::Config.new
8
+ defaults.each{|attribute, value| @config.put(attribute.to_s, value)}
9
+ end
10
+
11
+ def set(attribute, value)
12
+ @config.put(attribute.to_s, value)
13
+ end
14
+
15
+ def method_missing(sym, *args)
16
+ config_method = "set#{self.class.camel_case(sym)}"
17
+ @config.send(config_method, *args)
18
+ end
19
+
20
+ private
21
+
22
+ def self.camel_case(s)
23
+ s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,41 @@
1
+ module RedStorm
2
+
3
+ LAUNCH_PATH = File.expand_path(File.dirname(__FILE__))
4
+ JAR_CONTEXT = !!(LAUNCH_PATH =~ /\.jar!\/red_storm$/)
5
+
6
+ if JAR_CONTEXT
7
+ BASE_PATH = File.expand_path(LAUNCH_PATH + '/..')
8
+ REDSTORM_HOME = BASE_PATH
9
+ TARGET_PATH = BASE_PATH
10
+ else
11
+ BASE_PATH = Dir.pwd
12
+ REDSTORM_HOME = File.expand_path(LAUNCH_PATH + '/../..')
13
+ TARGET_PATH = "#{BASE_PATH}/target"
14
+ end
15
+
16
+ unless defined?(SPECS_CONTEXT)
17
+ GEM_PATH = "#{TARGET_PATH}/gems/"
18
+ ENV["GEM_PATH"] = GEM_PATH
19
+ ENV["GEM_HOME"] = GEM_PATH
20
+ end
21
+
22
+ def current_ruby_mode
23
+ RUBY_VERSION =~ /(\d+\.\d+)(\.\d+)*/
24
+ raise("unknown Ruby version #{$1}") unless $1 == "1.8" || $1 == "1.9"
25
+ $1
26
+ end
27
+
28
+ def jruby_mode_token(ruby_version = nil)
29
+ version_map = {"1.8" => "RUBY1_8", "--1.8" => "RUBY1_8", "1.9" => "RUBY1_9", "--1.9" => "RUBY1_9"}
30
+ version_map[ruby_version.to_s] || version_map[RedStorm.current_ruby_mode]
31
+ end
32
+
33
+ module_function :current_ruby_mode, :jruby_mode_token
34
+
35
+ # puts("*** LAUNCH_PATH=#{LAUNCH_PATH}")
36
+ # puts("*** JAR_CONTEXT=#{JAR_CONTEXT}")
37
+ # puts("*** BASE_PATH=#{BASE_PATH}")
38
+ # puts("*** REDSTORM_HOME=#{REDSTORM_HOME}")
39
+ # puts("*** TARGET_PATH=#{TARGET_PATH}")
40
+ # puts("*** GEM_PATH=#{GEM_PATH}") if defined?(GEM_PATH)
41
+ end
@@ -0,0 +1,15 @@
1
+ require 'java'
2
+
3
+ module RedStorm
4
+ module Loggable
5
+
6
+ def self.log
7
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
8
+ end
9
+
10
+ def log
11
+ self.class.log
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,71 @@
1
+ require 'java'
2
+
3
+ java_import 'storm.trident.operation.TridentCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'storm.trident.spout.IBatchSpout'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'backtype.storm.tuple.Tuple'
8
+ java_import 'backtype.storm.tuple.Fields'
9
+ java_import 'backtype.storm.tuple.Values'
10
+ java_import 'java.util.Map'
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the BatchSpout class is a proxy to the real batch spout to avoid having to deal with all the
18
+ # Java artifacts when creating a spout.
19
+ #
20
+ # The real batch spout class implementation must define these methods:
21
+ # - open(conf, context, collector)
22
+ # - emitBatch
23
+ # - getOutputFields
24
+ # - ack(batch_id)
25
+ #
26
+ # and optionnaly:
27
+ # - close
28
+ #
29
+
30
+ class BatchSpout
31
+ java_implements IBatchSpout
32
+
33
+ java_signature 'IBatchSpout (String base_class_path, String real_spout_class_name)'
34
+ def initialize(base_class_path, real_spout_class_name)
35
+ @real_spout = Object.module_eval(real_spout_class_name).new
36
+ rescue NameError
37
+ require base_class_path
38
+ @real_spout = Object.module_eval(real_spout_class_name).new
39
+ end
40
+
41
+ java_signature 'void open(Map, TopologyContext)'
42
+ def open(conf, context)
43
+ @real_spout.open(conf, context)
44
+ end
45
+
46
+ java_signature 'void close()'
47
+ def close
48
+ @real_spout.close if @real_spout.respond_to?(:close)
49
+ end
50
+
51
+ java_signature 'void emitBatch(long, TridentCollector)'
52
+ def emitBatch(batch_id, collector)
53
+ @real_spout.emit_batch(batch_id, collector)
54
+ end
55
+
56
+ java_signature 'void ack(long)'
57
+ def ack(batch_id)
58
+ @real_spout.ack(batch_id)
59
+ end
60
+
61
+ java_signature 'Fields getOutputFields()'
62
+ def getOutputFields
63
+ @real_spout.get_output_fields()
64
+ end
65
+
66
+ java_signature 'Map<String, Object> getComponentConfiguration()'
67
+ def getComponentConfiguration
68
+ @real_spout.get_component_configuration
69
+ end
70
+
71
+ end
@@ -0,0 +1,63 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.task.OutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.topology.IRichBolt'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'backtype.storm.tuple.Tuple'
8
+ java_import 'backtype.storm.tuple.Fields'
9
+ java_import 'backtype.storm.tuple.Values'
10
+ java_import 'java.util.Map'
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the Bolt class is a proxy to the real bolt to avoid having to deal with all the
18
+ # Java artifacts when creating a bolt.
19
+ #
20
+ # The real bolt class implementation must define these methods:
21
+ # - prepare(conf, context, collector)
22
+ # - execute(tuple)
23
+ # - declare_output_fields
24
+ #
25
+ # and optionnaly:
26
+ # - cleanup
27
+ #
28
+ class Bolt
29
+ java_implements IRichBolt
30
+
31
+ java_signature 'IRichBolt (String base_class_path, String real_bolt_class_name)'
32
+ def initialize(base_class_path, real_bolt_class_name)
33
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
34
+ rescue NameError
35
+ require base_class_path
36
+ @real_bolt = Object.module_eval(real_bolt_class_name).new
37
+ end
38
+
39
+ java_signature 'void prepare(Map, TopologyContext, OutputCollector)'
40
+ def prepare(conf, context, collector)
41
+ @real_bolt.prepare(conf, context, collector)
42
+ end
43
+
44
+ java_signature 'void execute(Tuple)'
45
+ def execute(tuple)
46
+ @real_bolt.execute(tuple)
47
+ end
48
+
49
+ java_signature 'void cleanup()'
50
+ def cleanup
51
+ @real_bolt.cleanup if @real_bolt.respond_to?(:cleanup)
52
+ end
53
+
54
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
55
+ def declareOutputFields(declarer)
56
+ @real_bolt.declare_output_fields(declarer)
57
+ end
58
+
59
+ java_signature 'Map<String, Object> getComponentConfiguration()'
60
+ def getComponentConfiguration
61
+ @real_bolt.get_component_configuration
62
+ end
63
+ end
@@ -0,0 +1,48 @@
1
+ require 'java'
2
+
3
+
4
+ java_import 'storm.trident.tuple.TridentTuple'
5
+
6
+ java_import 'storm.trident.operation.TridentCollector'
7
+
8
+ java_import 'java.util.Map'
9
+
10
+ java_import 'storm.trident.operation.TridentOperationContext'
11
+
12
+ java_import 'storm.trident.operation.Function'
13
+
14
+
15
+ module Backtype
16
+ java_import 'backtype.storm.Config'
17
+ end
18
+
19
+ java_package 'redstorm.proxy'
20
+
21
+ class ProxyFunction
22
+ java_implements Function
23
+
24
+ java_signature 'Function (String base_class_path, String real_class_name)'
25
+ def initialize(base_class_path, real_class_name)
26
+ @real = Object.module_eval(real_class_name).new
27
+ rescue NameError
28
+ require base_class_path
29
+ @real = Object.module_eval(real_class_name).new
30
+ end
31
+
32
+ java_signature 'void execute(TridentTuple, TridentCollector)'
33
+ def execute(_trident_tuple, _trident_collector)
34
+ @real.execute(_trident_tuple, _trident_collector)
35
+ end
36
+
37
+ java_signature 'void cleanup()'
38
+ def cleanup()
39
+ @real.cleanup()
40
+ end
41
+
42
+ java_signature 'void prepare(Map, TridentOperationContext)'
43
+ def prepare(_map, _trident_operation_context)
44
+ @real.prepare(_map, _trident_operation_context)
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,87 @@
1
+ require 'java'
2
+
3
+ java_import 'backtype.storm.spout.SpoutOutputCollector'
4
+ java_import 'backtype.storm.task.TopologyContext'
5
+ java_import 'backtype.storm.topology.IRichSpout'
6
+ java_import 'backtype.storm.topology.OutputFieldsDeclarer'
7
+ java_import 'backtype.storm.tuple.Tuple'
8
+ java_import 'backtype.storm.tuple.Fields'
9
+ java_import 'backtype.storm.tuple.Values'
10
+ java_import 'java.util.Map'
11
+ module Backtype
12
+ java_import 'backtype.storm.Config'
13
+ end
14
+
15
+ java_package 'redstorm.proxy'
16
+
17
+ # the Spout class is a proxy to the real spout to avoid having to deal with all the
18
+ # Java artifacts when creating a spout.
19
+ #
20
+ # The real spout class implementation must define these methods:
21
+ # - open(conf, context, collector)
22
+ # - next_tuple
23
+ # - declare_output_fields
24
+ #
25
+ # and optionnaly:
26
+ # - ack(msg_id)
27
+ # - fail(msg_id)
28
+ # - close
29
+ #
30
+
31
+ class Spout
32
+ java_implements IRichSpout
33
+
34
+ java_signature 'IRichSpout (String base_class_path, String real_spout_class_name)'
35
+ def initialize(base_class_path, real_spout_class_name)
36
+ @real_spout = Object.module_eval(real_spout_class_name).new
37
+ rescue NameError
38
+ require base_class_path
39
+ @real_spout = Object.module_eval(real_spout_class_name).new
40
+ end
41
+
42
+ java_signature 'void open(Map, TopologyContext, SpoutOutputCollector)'
43
+ def open(conf, context, collector)
44
+ @real_spout.open(conf, context, collector)
45
+ end
46
+
47
+ java_signature 'void close()'
48
+ def close
49
+ @real_spout.close if @real_spout.respond_to?(:close)
50
+ end
51
+
52
+ java_signature 'void activate()'
53
+ def activate
54
+ @real_spout.activate if @real_spout.respond_to?(:activate)
55
+ end
56
+
57
+ java_signature 'void deactivate()'
58
+ def deactivate
59
+ @real_spout.deactivate if @real_spout.respond_to?(:deactivate)
60
+ end
61
+
62
+ java_signature 'void nextTuple()'
63
+ def nextTuple
64
+ @real_spout.next_tuple
65
+ end
66
+
67
+ java_signature 'void ack(Object)'
68
+ def ack(msg_id)
69
+ @real_spout.ack(msg_id) if @real_spout.respond_to?(:ack)
70
+ end
71
+
72
+ java_signature 'void fail(Object)'
73
+ def fail(msg_id)
74
+ @real_spout.fail(msg_id) if @real_spout.respond_to?(:fail)
75
+ end
76
+
77
+ java_signature 'void declareOutputFields(OutputFieldsDeclarer)'
78
+ def declareOutputFields(declarer)
79
+ @real_spout.declare_output_fields(declarer)
80
+ end
81
+
82
+ java_signature 'Map<String, Object> getComponentConfiguration()'
83
+ def getComponentConfiguration
84
+ @real_spout.get_component_configuration
85
+ end
86
+
87
+ end
@@ -0,0 +1,135 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+
4
+ module RedStorm
5
+
6
+ class SimpleBolt
7
+ attr_reader :collector, :context, :config
8
+
9
+ # DSL class methods
10
+
11
+ def self.log
12
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
13
+ end
14
+
15
+ def self.output_fields(*fields)
16
+ @fields = fields.map(&:to_s)
17
+ end
18
+
19
+ def self.configure(&configure_block)
20
+ @configure_block = block_given? ? configure_block : lambda {}
21
+ end
22
+
23
+ def self.on_receive(*args, &on_receive_block)
24
+ options = args.last.is_a?(Hash) ? args.pop : {}
25
+ method_name = args.first
26
+
27
+ self.receive_options.merge!(options)
28
+ @on_receive_block = block_given? ? on_receive_block : lambda {|tuple| self.send(method_name || :on_receive, tuple)}
29
+ end
30
+
31
+ def self.on_init(method_name = nil, &on_init_block)
32
+ @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
33
+ end
34
+
35
+ def self.on_close(method_name = nil, &close_block)
36
+ @close_block = block_given? ? close_block : lambda {self.send(method_name || :on_close)}
37
+ end
38
+
39
+ # DSL instance methods
40
+
41
+ def log
42
+ self.class.log
43
+ end
44
+
45
+ def unanchored_emit(*values)
46
+ @collector.emit(Values.new(*values))
47
+ end
48
+
49
+ def anchored_emit(tuple, *values)
50
+ @collector.emit(tuple, Values.new(*values))
51
+ end
52
+
53
+ def ack(tuple)
54
+ @collector.ack(tuple)
55
+ end
56
+
57
+ def fail(tuple)
58
+ @collector.fail(tuple)
59
+ end
60
+
61
+ # Bolt proxy interface
62
+
63
+ def execute(tuple)
64
+ output = instance_exec(tuple, &self.class.on_receive_block)
65
+ if output && self.class.emit?
66
+ values_list = !output.is_a?(Array) ? [[output]] : !output.first.is_a?(Array) ? [output] : output
67
+ values_list.each{|values| self.class.anchor? ? anchored_emit(tuple, *values) : unanchored_emit(*values)}
68
+ @collector.ack(tuple) if self.class.ack?
69
+ end
70
+ end
71
+
72
+ def prepare(config, context, collector)
73
+ @collector = collector
74
+ @context = context
75
+ @config = config
76
+ instance_exec(&self.class.on_init_block)
77
+ end
78
+
79
+ def cleanup
80
+ instance_exec(&self.class.close_block)
81
+ end
82
+
83
+ def declare_output_fields(declarer)
84
+ declarer.declare(Fields.new(self.class.fields))
85
+ end
86
+
87
+ def get_component_configuration
88
+ configurator = Configurator.new
89
+ configurator.instance_exec(&self.class.configure_block)
90
+ configurator.config
91
+ end
92
+
93
+ private
94
+
95
+ # default noop optional dsl callbacks
96
+ def on_init; end
97
+ def on_close; end
98
+
99
+ def self.fields
100
+ @fields ||= []
101
+ end
102
+
103
+ def self.configure_block
104
+ @configure_block ||= lambda {}
105
+ end
106
+
107
+ def self.on_receive_block
108
+ @on_receive_block ||= lambda {|tuple| self.send(:on_receive, tuple)}
109
+ end
110
+
111
+ def self.on_init_block
112
+ @on_init_block ||= lambda {self.send(:on_init)}
113
+ end
114
+
115
+ def self.close_block
116
+ @close_block ||= lambda {self.send(:on_close)}
117
+ end
118
+
119
+ def self.receive_options
120
+ @receive_options ||= {:emit => true, :ack => false, :anchor => false}
121
+ end
122
+
123
+ def self.emit?
124
+ !!self.receive_options[:emit]
125
+ end
126
+
127
+ def self.ack?
128
+ !!self.receive_options[:ack]
129
+ end
130
+
131
+ def self.anchor?
132
+ !!self.receive_options[:anchor]
133
+ end
134
+ end
135
+ end