redstorm 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,14 +1,10 @@
1
- load 'lib/tasks/red_storm.rake'
1
+ require 'rubygems'
2
+ require 'rspec/core/rake_task'
2
3
 
3
- task :default => :spec
4
+ load 'lib/tasks/red_storm.rake'
4
5
 
5
- begin
6
- require 'rspec/core/rake_task'
7
- desc "run specs"
8
- task :spec do
9
- system("ruby -v")
10
- RSpec::Core::RakeTask.new
11
- end
12
- rescue NameError, LoadError => e
13
- puts e
6
+ RSpec::Core::RakeTask.new(:spec) do
7
+ system("ruby -v")
14
8
  end
9
+
10
+ task :default => :spec
data/TODO.md CHANGED
@@ -1,4 +1 @@
1
1
  # TODO
2
-
3
- - expose the log4j logger in Simple{Topology|Bolt|Spout} (Java::org.apache.log4j.Logger.getLogger(...))
4
- - see if using bundler would provide better gems integration and easier jar packaging
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gem 'redis'
@@ -7,21 +7,27 @@ require 'examples/simple/exclamation_bolt'
7
7
  module RedStorm
8
8
  module Examples
9
9
  class ExclamationTopology < RedStorm::SimpleTopology
10
- spout TestWordSpout, :parallelism => 10
10
+ spout TestWordSpout, :parallelism => 5 do
11
+ debug true
12
+ end
11
13
 
12
- bolt ExclamationBolt, :parallelism => 3 do
14
+ bolt ExclamationBolt, :parallelism => 2 do
13
15
  source TestWordSpout, :shuffle
16
+ # max_task_parallelism 1
14
17
  end
15
18
 
16
19
  bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
17
20
  source ExclamationBolt, :shuffle
21
+ # max_task_parallelism 1
22
+ debug true
18
23
  end
19
24
 
20
25
  configure do |env|
21
- debug true
26
+ debug false
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
22
28
  case env
23
29
  when :local
24
- max_task_parallelism 3
30
+ max_task_parallelism 40
25
31
  when :cluster
26
32
  num_workers 20
27
33
  max_spout_pending(1000);
@@ -24,6 +24,7 @@ module RedStorm
24
24
 
25
25
  configure do |env|
26
26
  debug true
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
27
28
  case env
28
29
  when :local
29
30
  max_task_parallelism 3
@@ -3,7 +3,6 @@ require 'red_storm'
3
3
  module RedStorm
4
4
  module Examples
5
5
  class RandomSentenceSpout < RedStorm::SimpleSpout
6
- set :is_distributed => true
7
6
  output_fields :word
8
7
 
9
8
  on_send {@sentences[rand(@sentences.length)]}
@@ -1,8 +1,9 @@
1
- require 'rubygems'
1
+ require 'rubygems' # required for remote cluster exec where TopolyLauncher + require rubygems is not called
2
+ require 'red_storm' # must be required before bundler for environment setup and after rubygems
3
+ require 'bundler/setup'
4
+
2
5
  require 'redis'
3
6
  require 'thread'
4
- require 'red_storm'
5
-
6
7
  require 'examples/simple/word_count_bolt'
7
8
 
8
9
  module RedStorm
@@ -46,6 +47,7 @@ module RedStorm
46
47
 
47
48
  configure do |env|
48
49
  debug true
50
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
49
51
  case env
50
52
  when :local
51
53
  max_task_parallelism 3
@@ -0,0 +1,32 @@
1
+ require 'red_storm'
2
+
3
+ # this example topology only prints the Ruby version string. No tuple is emitted.
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class VersionSpout < RedStorm::SimpleSpout
8
+ output_fields :dummy
9
+ on_init {log.info("****** JRuby version #{RUBY_VERSION}")}
10
+ on_send {}
11
+ end
12
+
13
+ class RubyVersionTopology < RedStorm::SimpleTopology
14
+ spout VersionSpout
15
+
16
+ configure do |env|
17
+ debug true
18
+
19
+ # set the JRuby version property for this topology. this will only affect remote cluster execution
20
+ # for local execution use the --1.8|--1.9 switch when launching
21
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
22
+ end
23
+
24
+ on_submit do |env|
25
+ if env == :local
26
+ sleep(1)
27
+ cluster.shutdown
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -17,6 +17,7 @@ module RedStorm
17
17
 
18
18
  configure :word_count do |env|
19
19
  debug true
20
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
20
21
  case env
21
22
  when :local
22
23
  max_task_parallelism 3
data/lib/red_storm.rb CHANGED
@@ -1,10 +1,48 @@
1
+ # we depends on rubygems begings loaded at this point for setting up gem/bundle environments
2
+ # explicitely requiring rubygems is required in remote cluster environment
3
+ require 'rubygems'
4
+
1
5
  module RedStorm
2
- REDSTORM_HOME = File.expand_path(File.dirname(__FILE__) + '/..') unless defined?(REDSTORM_HOME)
6
+ LAUNCH_PATH = File.expand_path(File.dirname(__FILE__))
7
+ JAR_CONTEXT = !!(LAUNCH_PATH =~ /\.jar!$/)
8
+
9
+ if JAR_CONTEXT
10
+ REDSTORM_HOME = LAUNCH_PATH
11
+ TARGET_PATH = LAUNCH_PATH
12
+ BUNDLE_GEMFILE = "#{TARGET_PATH}/bundler/Gemfile"
13
+ BUNDLE_PATH = "#{TARGET_PATH}/bundler/#{Gem.ruby_engine}/#{Gem::ConfigMap[:ruby_version]}/"
14
+ GEM_PATH = "#{TARGET_PATH}/gems/"
15
+ else
16
+ REDSTORM_HOME = File.expand_path(LAUNCH_PATH + '/..')
17
+ TARGET_PATH = Dir.pwd
18
+ BUNDLE_GEMFILE = "#{TARGET_PATH}/target/gems/bundler/Gemfile"
19
+ BUNDLE_PATH = "#{TARGET_PATH}/target/gems/bundler/#{Gem.ruby_engine}/#{Gem::ConfigMap[:ruby_version]}/"
20
+ GEM_PATH = "#{TARGET_PATH}/target/gems/gems"
21
+ end
22
+
23
+ # setup bundler environment
24
+ ENV['BUNDLE_GEMFILE'] = RedStorm::BUNDLE_GEMFILE
25
+ ENV['BUNDLE_PATH'] = RedStorm::BUNDLE_PATH
26
+ ENV["GEM_PATH"] = RedStorm::GEM_PATH
27
+ ENV['BUNDLE_DISABLE_SHARED_GEMS'] = "1"
3
28
  end
4
29
 
30
+ $:.unshift RedStorm::TARGET_PATH
31
+
32
+
5
33
  require 'red_storm/version'
6
34
  require 'red_storm/configuration'
7
35
  require 'red_storm/application'
8
36
  require 'red_storm/simple_bolt'
9
37
  require 'red_storm/simple_spout'
10
38
  require 'red_storm/simple_topology'
39
+
40
+ # puts("************************ PWD=#{Dir.pwd}")
41
+ # puts("************************ RedStorm::JAR_CONTEXT=#{RedStorm::JAR_CONTEXT}")
42
+ # puts("************************ RedStorm::LAUNCH_PATH=#{RedStorm::LAUNCH_PATH}")
43
+ # puts("************************ RedStorm::REDSTORM_HOME=#{RedStorm::REDSTORM_HOME}")
44
+ # puts("************************ RedStorm::TARGET_PATH=#{RedStorm::TARGET_PATH}")
45
+ # puts("************************ RedStorm::GEM_PATH=#{RedStorm::GEM_PATH}")
46
+ # puts("************************ ENV['BUNDLE_GEMFILE']=#{ENV['BUNDLE_GEMFILE']}")
47
+ # puts("************************ ENV['BUNDLE_PATH']=#{ENV['BUNDLE_PATH']}")
48
+ # puts("************************ ENV['GEM_PATH']=#{ENV['GEM_PATH']}")
@@ -1,27 +1,36 @@
1
1
  module RedStorm
2
+
3
+ DEFAULT_RUBY_VERSION = "--1.8"
4
+ RUNTIME = {}
2
5
 
3
6
  class Application
4
7
  TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
5
8
 
6
9
  def usage
7
- puts("usage: redstorm install|examples|jar <project_directory>|local <topology_class_file>")
10
+ puts("usage: redstorm [--1.8|--1.9] install | deps | build | examples | gems [--gemfile GEMFILE_PATH] | jar DIR1, DIR2, ... | local TOPOLOGY_CLASS_PATH")
8
11
  exit(1)
9
12
  end
10
13
 
11
14
  def run(args)
12
15
  if args.size > 0
13
- if ["install", "examples", "jar"].include?(args[0])
14
- load(TASKS_FILE)
15
- Rake::Task[args.shift].invoke(*args)
16
- elsif args.size == 2 && ["local"].include?(args[0]) && File.exist?(args[1])
16
+ version = args.delete("--1.8") || args.delete("--1.9") || DEFAULT_RUBY_VERSION
17
+ RUNTIME['RUBY_VERSION'] = version
18
+
19
+ if ["install", "examples", "jar", "gems", "deps", "build"].include?(args[0])
17
20
  load(TASKS_FILE)
18
- Rake::Task['launch'].invoke(*args)
19
- else
20
- usage
21
+ Rake::Task[args.shift].invoke(args.join(":"))
22
+ exit
23
+ elsif args.size >= 2 && args.include?("local")
24
+ args.delete("local")
25
+ if args.size == 1
26
+ file = args[0]
27
+ load(TASKS_FILE)
28
+ Rake::Task['launch'].invoke("local", file)
29
+ exit
30
+ end
21
31
  end
22
- else
23
- usage
24
32
  end
33
+ usage
25
34
  end
26
35
  end
27
36
  end
@@ -0,0 +1,25 @@
1
+ module RedStorm
2
+
3
+ class Configurator
4
+ attr_reader :config
5
+
6
+ def initialize
7
+ @config = Backtype::Config.new
8
+ end
9
+
10
+ def set(attribute, value)
11
+ @config.put(attribute, value)
12
+ end
13
+
14
+ def method_missing(sym, *args)
15
+ config_method = "set#{self.class.camel_case(sym)}"
16
+ @config.send(config_method, *args)
17
+ end
18
+
19
+ private
20
+
21
+ def self.camel_case(s)
22
+ s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,13 @@
1
+ module RedStorm
2
+ module Loggable
3
+
4
+ def self.log
5
+ @log ||= Logger.getLogger(self.name)
6
+ end
7
+
8
+ def log
9
+ self.class.log
10
+ end
11
+
12
+ end
13
+ end
@@ -8,6 +8,10 @@ java_import 'backtype.storm.tuple.Tuple'
8
8
  java_import 'backtype.storm.tuple.Fields'
9
9
  java_import 'backtype.storm.tuple.Values'
10
10
  java_import 'java.util.Map'
11
+ java_import 'org.apache.log4j.Logger'
12
+ module Backtype
13
+ java_import 'backtype.storm.Config'
14
+ end
11
15
 
12
16
  java_package 'redstorm.proxy'
13
17
 
@@ -52,4 +56,9 @@ class Bolt
52
56
  def declareOutputFields(declarer)
53
57
  @real_bolt.declare_output_fields(declarer)
54
58
  end
59
+
60
+ java_signature 'Map<String, Object> getComponentConfiguration()'
61
+ def getComponentConfiguration
62
+ @real_bolt.get_component_configuration
63
+ end
55
64
  end
@@ -8,6 +8,10 @@ java_import 'backtype.storm.tuple.Tuple'
8
8
  java_import 'backtype.storm.tuple.Fields'
9
9
  java_import 'backtype.storm.tuple.Values'
10
10
  java_import 'java.util.Map'
11
+ java_import 'org.apache.log4j.Logger'
12
+ module Backtype
13
+ java_import 'backtype.storm.Config'
14
+ end
11
15
 
12
16
  java_package 'redstorm.proxy'
13
17
 
@@ -17,7 +21,6 @@ java_package 'redstorm.proxy'
17
21
  # The real spout class implementation must define these methods:
18
22
  # - open(conf, context, collector)
19
23
  # - next_tuple
20
- # - is_distributed
21
24
  # - declare_output_fields
22
25
  #
23
26
  # and optionnaly:
@@ -25,6 +28,7 @@ java_package 'redstorm.proxy'
25
28
  # - fail(msg_id)
26
29
  # - close
27
30
  #
31
+
28
32
  class Spout
29
33
  java_implements IRichSpout
30
34
 
@@ -36,11 +40,6 @@ class Spout
36
40
  @real_spout = Object.module_eval(real_spout_class_name).new
37
41
  end
38
42
 
39
- java_signature 'boolean isDistributed()'
40
- def isDistributed
41
- @real_spout.respond_to?(:is_distributed) ? @real_spout.is_distributed : false
42
- end
43
-
44
43
  java_signature 'void open(Map, TopologyContext, SpoutOutputCollector)'
45
44
  def open(conf, context, collector)
46
45
  @real_spout.open(conf, context, collector)
@@ -51,6 +50,16 @@ class Spout
51
50
  @real_spout.close if @real_spout.respond_to?(:close)
52
51
  end
53
52
 
53
+ java_signature 'void activate()'
54
+ def activate
55
+ @real_spout.activate if @real_spout.respond_to?(:activate)
56
+ end
57
+
58
+ java_signature 'void deactivate()'
59
+ def deactivate
60
+ @real_spout.deactivate if @real_spout.respond_to?(:deactivate)
61
+ end
62
+
54
63
  java_signature 'void nextTuple()'
55
64
  def nextTuple
56
65
  @real_spout.next_tuple
@@ -70,4 +79,10 @@ class Spout
70
79
  def declareOutputFields(declarer)
71
80
  @real_spout.declare_output_fields(declarer)
72
81
  end
82
+
83
+ java_signature 'Map<String, Object> getComponentConfiguration()'
84
+ def getComponentConfiguration
85
+ @real_spout.get_component_configuration
86
+ end
87
+
73
88
  end
@@ -1,3 +1,5 @@
1
+ require 'red_storm/configurator'
2
+
1
3
  module RedStorm
2
4
 
3
5
  class SimpleBolt
@@ -5,10 +7,18 @@ module RedStorm
5
7
 
6
8
  # DSL class methods
7
9
 
10
+ def self.log
11
+ @log ||= Logger.getLogger(self.name)
12
+ end
13
+
8
14
  def self.output_fields(*fields)
9
15
  @fields = fields.map(&:to_s)
10
16
  end
11
17
 
18
+ def self.configure(&configure_block)
19
+ @configure_block = block_given? ? configure_block : lambda {}
20
+ end
21
+
12
22
  def self.on_receive(*args, &on_receive_block)
13
23
  options = args.last.is_a?(Hash) ? args.pop : {}
14
24
  method_name = args.first
@@ -27,6 +37,10 @@ module RedStorm
27
37
 
28
38
  # DSL instance methods
29
39
 
40
+ def log
41
+ self.class.log
42
+ end
43
+
30
44
  def unanchored_emit(*values)
31
45
  @collector.emit(Values.new(*values))
32
46
  end
@@ -64,17 +78,26 @@ module RedStorm
64
78
  declarer.declare(Fields.new(self.class.fields))
65
79
  end
66
80
 
67
- # default optional dsl methods/callbacks
81
+ def get_component_configuration
82
+ configurator = Configurator.new
83
+ configurator.instance_exec(&self.class.configure_block)
84
+ configurator.config
85
+ end
86
+
87
+ private
68
88
 
89
+ # default noop optional dsl callbacks
69
90
  def on_init; end
70
91
  def on_close; end
71
92
 
72
- private
73
-
74
93
  def self.fields
75
94
  @fields ||= []
76
95
  end
77
96
 
97
+ def self.configure_block
98
+ @configure_block ||= lambda {}
99
+ end
100
+
78
101
  def self.on_receive_block
79
102
  @on_receive_block ||= lambda {|tuple| self.send(:on_receive, tuple)}
80
103
  end
@@ -1,3 +1,5 @@
1
+ require 'red_storm/configurator'
2
+
1
3
  module RedStorm
2
4
 
3
5
  class SimpleSpout
@@ -5,8 +7,12 @@ module RedStorm
5
7
 
6
8
  # DSL class methods
7
9
 
8
- def self.set(options = {})
9
- self.spout_options.merge!(options)
10
+ def self.configure(&configure_block)
11
+ @configure_block = block_given? ? configure_block : lambda {}
12
+ end
13
+
14
+ def self.log
15
+ @log ||= Logger.getLogger(self.name)
10
16
  end
11
17
 
12
18
  def self.output_fields(*fields)
@@ -29,6 +35,14 @@ module RedStorm
29
35
  @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
30
36
  end
31
37
 
38
+ def self.on_activate(method_name = nil, &on_activate_block)
39
+ @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
40
+ end
41
+
42
+ def self.on_deactivate(method_name = nil, &on_deactivate_block)
43
+ @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
44
+ end
45
+
32
46
  def self.on_ack(method_name = nil, &on_ack_block)
33
47
  @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
34
48
  end
@@ -43,6 +57,10 @@ module RedStorm
43
57
  @collector.emit(Values.new(*values))
44
58
  end
45
59
 
60
+ def log
61
+ self.class.log
62
+ end
63
+
46
64
  # Spout proxy interface
47
65
 
48
66
  def next_tuple
@@ -68,12 +86,16 @@ module RedStorm
68
86
  instance_exec(&self.class.on_close_block)
69
87
  end
70
88
 
71
- def declare_output_fields(declarer)
72
- declarer.declare(Fields.new(self.class.fields))
89
+ def activate
90
+ instance_exec(&self.class.on_activate_block)
73
91
  end
74
92
 
75
- def is_distributed
76
- self.class.is_distributed?
93
+ def deactivate
94
+ instance_exec(&self.class.on_deactivate_block)
95
+ end
96
+
97
+ def declare_output_fields(declarer)
98
+ declarer.declare(Fields.new(self.class.fields))
77
99
  end
78
100
 
79
101
  def ack(msg_id)
@@ -84,19 +106,30 @@ module RedStorm
84
106
  instance_exec(msg_id, &self.class.on_fail_block)
85
107
  end
86
108
 
87
- # default optional dsl methods/callbacks
109
+ def get_component_configuration
110
+ configurator = Configurator.new
111
+ configurator.instance_exec(&self.class.configure_block)
112
+ configurator.config
113
+ end
114
+
115
+ private
88
116
 
117
+ # default optional noop dsl methods/callbacks
89
118
  def on_init; end
90
119
  def on_close; end
120
+ def on_activate; end
121
+ def on_deactivate; end
91
122
  def on_ack(msg_id); end
92
123
  def on_fail(msg_id); end
93
124
 
94
- private
95
-
96
125
  def self.fields
97
126
  @fields ||= []
98
127
  end
99
128
 
129
+ def self.configure_block
130
+ @configure_block ||= lambda {}
131
+ end
132
+
100
133
  def self.on_send_block
101
134
  @on_send_block ||= lambda {self.send(:on_send)}
102
135
  end
@@ -109,6 +142,14 @@ module RedStorm
109
142
  @on_close_block ||= lambda {self.send(:on_close)}
110
143
  end
111
144
 
145
+ def self.on_activate_block
146
+ @on_activate_block ||= lambda {self.send(:on_activate)}
147
+ end
148
+
149
+ def self.on_deactivate_block
150
+ @on_deactivate_block ||= lambda {self.send(:on_deactivate)}
151
+ end
152
+
112
153
  def self.on_ack_block
113
154
  @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
114
155
  end
@@ -121,14 +162,6 @@ module RedStorm
121
162
  @send_options ||= {:emit => true}
122
163
  end
123
164
 
124
- def self.spout_options
125
- @spout_options ||= {:is_distributed => false}
126
- end
127
-
128
- def self.is_distributed?
129
- !!self.spout_options[:is_distributed]
130
- end
131
-
132
165
  def self.emit?
133
166
  !!self.send_options[:emit]
134
167
  end