redstorm 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,14 +1,10 @@
1
- load 'lib/tasks/red_storm.rake'
1
+ require 'rubygems'
2
+ require 'rspec/core/rake_task'
2
3
 
3
- task :default => :spec
4
+ load 'lib/tasks/red_storm.rake'
4
5
 
5
- begin
6
- require 'rspec/core/rake_task'
7
- desc "run specs"
8
- task :spec do
9
- system("ruby -v")
10
- RSpec::Core::RakeTask.new
11
- end
12
- rescue NameError, LoadError => e
13
- puts e
6
+ RSpec::Core::RakeTask.new(:spec) do
7
+ system("ruby -v")
14
8
  end
9
+
10
+ task :default => :spec
data/TODO.md CHANGED
@@ -1,4 +1 @@
1
1
  # TODO
2
-
3
- - expose the log4j logger in Simple{Topology|Bolt|Spout} (Java::org.apache.log4j.Logger.getLogger(...))
4
- - see if using bundler would provide better gems integration and easier jar packaging
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gem 'redis'
@@ -7,21 +7,27 @@ require 'examples/simple/exclamation_bolt'
7
7
  module RedStorm
8
8
  module Examples
9
9
  class ExclamationTopology < RedStorm::SimpleTopology
10
- spout TestWordSpout, :parallelism => 10
10
+ spout TestWordSpout, :parallelism => 5 do
11
+ debug true
12
+ end
11
13
 
12
- bolt ExclamationBolt, :parallelism => 3 do
14
+ bolt ExclamationBolt, :parallelism => 2 do
13
15
  source TestWordSpout, :shuffle
16
+ # max_task_parallelism 1
14
17
  end
15
18
 
16
19
  bolt ExclamationBolt, :id => :ExclamationBolt2, :parallelism => 2 do
17
20
  source ExclamationBolt, :shuffle
21
+ # max_task_parallelism 1
22
+ debug true
18
23
  end
19
24
 
20
25
  configure do |env|
21
- debug true
26
+ debug false
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
22
28
  case env
23
29
  when :local
24
- max_task_parallelism 3
30
+ max_task_parallelism 40
25
31
  when :cluster
26
32
  num_workers 20
27
33
  max_spout_pending(1000);
@@ -24,6 +24,7 @@ module RedStorm
24
24
 
25
25
  configure do |env|
26
26
  debug true
27
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
27
28
  case env
28
29
  when :local
29
30
  max_task_parallelism 3
@@ -3,7 +3,6 @@ require 'red_storm'
3
3
  module RedStorm
4
4
  module Examples
5
5
  class RandomSentenceSpout < RedStorm::SimpleSpout
6
- set :is_distributed => true
7
6
  output_fields :word
8
7
 
9
8
  on_send {@sentences[rand(@sentences.length)]}
@@ -1,8 +1,9 @@
1
- require 'rubygems'
1
+ require 'rubygems' # required for remote cluster exec where TopolyLauncher + require rubygems is not called
2
+ require 'red_storm' # must be required before bundler for environment setup and after rubygems
3
+ require 'bundler/setup'
4
+
2
5
  require 'redis'
3
6
  require 'thread'
4
- require 'red_storm'
5
-
6
7
  require 'examples/simple/word_count_bolt'
7
8
 
8
9
  module RedStorm
@@ -46,6 +47,7 @@ module RedStorm
46
47
 
47
48
  configure do |env|
48
49
  debug true
50
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
49
51
  case env
50
52
  when :local
51
53
  max_task_parallelism 3
@@ -0,0 +1,32 @@
1
+ require 'red_storm'
2
+
3
+ # this example topology only prints the Ruby version string. No tuple is emitted.
4
+
5
+ module RedStorm
6
+ module Examples
7
+ class VersionSpout < RedStorm::SimpleSpout
8
+ output_fields :dummy
9
+ on_init {log.info("****** JRuby version #{RUBY_VERSION}")}
10
+ on_send {}
11
+ end
12
+
13
+ class RubyVersionTopology < RedStorm::SimpleTopology
14
+ spout VersionSpout
15
+
16
+ configure do |env|
17
+ debug true
18
+
19
+ # set the JRuby version property for this topology. this will only affect remote cluster execution
20
+ # for local execution use the --1.8|--1.9 switch when launching
21
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
22
+ end
23
+
24
+ on_submit do |env|
25
+ if env == :local
26
+ sleep(1)
27
+ cluster.shutdown
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -17,6 +17,7 @@ module RedStorm
17
17
 
18
18
  configure :word_count do |env|
19
19
  debug true
20
+ set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
20
21
  case env
21
22
  when :local
22
23
  max_task_parallelism 3
data/lib/red_storm.rb CHANGED
@@ -1,10 +1,48 @@
1
+ # we depends on rubygems begings loaded at this point for setting up gem/bundle environments
2
+ # explicitely requiring rubygems is required in remote cluster environment
3
+ require 'rubygems'
4
+
1
5
  module RedStorm
2
- REDSTORM_HOME = File.expand_path(File.dirname(__FILE__) + '/..') unless defined?(REDSTORM_HOME)
6
+ LAUNCH_PATH = File.expand_path(File.dirname(__FILE__))
7
+ JAR_CONTEXT = !!(LAUNCH_PATH =~ /\.jar!$/)
8
+
9
+ if JAR_CONTEXT
10
+ REDSTORM_HOME = LAUNCH_PATH
11
+ TARGET_PATH = LAUNCH_PATH
12
+ BUNDLE_GEMFILE = "#{TARGET_PATH}/bundler/Gemfile"
13
+ BUNDLE_PATH = "#{TARGET_PATH}/bundler/#{Gem.ruby_engine}/#{Gem::ConfigMap[:ruby_version]}/"
14
+ GEM_PATH = "#{TARGET_PATH}/gems/"
15
+ else
16
+ REDSTORM_HOME = File.expand_path(LAUNCH_PATH + '/..')
17
+ TARGET_PATH = Dir.pwd
18
+ BUNDLE_GEMFILE = "#{TARGET_PATH}/target/gems/bundler/Gemfile"
19
+ BUNDLE_PATH = "#{TARGET_PATH}/target/gems/bundler/#{Gem.ruby_engine}/#{Gem::ConfigMap[:ruby_version]}/"
20
+ GEM_PATH = "#{TARGET_PATH}/target/gems/gems"
21
+ end
22
+
23
+ # setup bundler environment
24
+ ENV['BUNDLE_GEMFILE'] = RedStorm::BUNDLE_GEMFILE
25
+ ENV['BUNDLE_PATH'] = RedStorm::BUNDLE_PATH
26
+ ENV["GEM_PATH"] = RedStorm::GEM_PATH
27
+ ENV['BUNDLE_DISABLE_SHARED_GEMS'] = "1"
3
28
  end
4
29
 
30
+ $:.unshift RedStorm::TARGET_PATH
31
+
32
+
5
33
  require 'red_storm/version'
6
34
  require 'red_storm/configuration'
7
35
  require 'red_storm/application'
8
36
  require 'red_storm/simple_bolt'
9
37
  require 'red_storm/simple_spout'
10
38
  require 'red_storm/simple_topology'
39
+
40
+ # puts("************************ PWD=#{Dir.pwd}")
41
+ # puts("************************ RedStorm::JAR_CONTEXT=#{RedStorm::JAR_CONTEXT}")
42
+ # puts("************************ RedStorm::LAUNCH_PATH=#{RedStorm::LAUNCH_PATH}")
43
+ # puts("************************ RedStorm::REDSTORM_HOME=#{RedStorm::REDSTORM_HOME}")
44
+ # puts("************************ RedStorm::TARGET_PATH=#{RedStorm::TARGET_PATH}")
45
+ # puts("************************ RedStorm::GEM_PATH=#{RedStorm::GEM_PATH}")
46
+ # puts("************************ ENV['BUNDLE_GEMFILE']=#{ENV['BUNDLE_GEMFILE']}")
47
+ # puts("************************ ENV['BUNDLE_PATH']=#{ENV['BUNDLE_PATH']}")
48
+ # puts("************************ ENV['GEM_PATH']=#{ENV['GEM_PATH']}")
@@ -1,27 +1,36 @@
1
1
  module RedStorm
2
+
3
+ DEFAULT_RUBY_VERSION = "--1.8"
4
+ RUNTIME = {}
2
5
 
3
6
  class Application
4
7
  TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
5
8
 
6
9
  def usage
7
- puts("usage: redstorm install|examples|jar <project_directory>|local <topology_class_file>")
10
+ puts("usage: redstorm [--1.8|--1.9] install | deps | build | examples | gems [--gemfile GEMFILE_PATH] | jar DIR1, DIR2, ... | local TOPOLOGY_CLASS_PATH")
8
11
  exit(1)
9
12
  end
10
13
 
11
14
  def run(args)
12
15
  if args.size > 0
13
- if ["install", "examples", "jar"].include?(args[0])
14
- load(TASKS_FILE)
15
- Rake::Task[args.shift].invoke(*args)
16
- elsif args.size == 2 && ["local"].include?(args[0]) && File.exist?(args[1])
16
+ version = args.delete("--1.8") || args.delete("--1.9") || DEFAULT_RUBY_VERSION
17
+ RUNTIME['RUBY_VERSION'] = version
18
+
19
+ if ["install", "examples", "jar", "gems", "deps", "build"].include?(args[0])
17
20
  load(TASKS_FILE)
18
- Rake::Task['launch'].invoke(*args)
19
- else
20
- usage
21
+ Rake::Task[args.shift].invoke(args.join(":"))
22
+ exit
23
+ elsif args.size >= 2 && args.include?("local")
24
+ args.delete("local")
25
+ if args.size == 1
26
+ file = args[0]
27
+ load(TASKS_FILE)
28
+ Rake::Task['launch'].invoke("local", file)
29
+ exit
30
+ end
21
31
  end
22
- else
23
- usage
24
32
  end
33
+ usage
25
34
  end
26
35
  end
27
36
  end
@@ -0,0 +1,25 @@
1
+ module RedStorm
2
+
3
+ class Configurator
4
+ attr_reader :config
5
+
6
+ def initialize
7
+ @config = Backtype::Config.new
8
+ end
9
+
10
+ def set(attribute, value)
11
+ @config.put(attribute, value)
12
+ end
13
+
14
+ def method_missing(sym, *args)
15
+ config_method = "set#{self.class.camel_case(sym)}"
16
+ @config.send(config_method, *args)
17
+ end
18
+
19
+ private
20
+
21
+ def self.camel_case(s)
22
+ s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,13 @@
1
+ module RedStorm
2
+ module Loggable
3
+
4
+ def self.log
5
+ @log ||= Logger.getLogger(self.name)
6
+ end
7
+
8
+ def log
9
+ self.class.log
10
+ end
11
+
12
+ end
13
+ end
@@ -8,6 +8,10 @@ java_import 'backtype.storm.tuple.Tuple'
8
8
  java_import 'backtype.storm.tuple.Fields'
9
9
  java_import 'backtype.storm.tuple.Values'
10
10
  java_import 'java.util.Map'
11
+ java_import 'org.apache.log4j.Logger'
12
+ module Backtype
13
+ java_import 'backtype.storm.Config'
14
+ end
11
15
 
12
16
  java_package 'redstorm.proxy'
13
17
 
@@ -52,4 +56,9 @@ class Bolt
52
56
  def declareOutputFields(declarer)
53
57
  @real_bolt.declare_output_fields(declarer)
54
58
  end
59
+
60
+ java_signature 'Map<String, Object> getComponentConfiguration()'
61
+ def getComponentConfiguration
62
+ @real_bolt.get_component_configuration
63
+ end
55
64
  end
@@ -8,6 +8,10 @@ java_import 'backtype.storm.tuple.Tuple'
8
8
  java_import 'backtype.storm.tuple.Fields'
9
9
  java_import 'backtype.storm.tuple.Values'
10
10
  java_import 'java.util.Map'
11
+ java_import 'org.apache.log4j.Logger'
12
+ module Backtype
13
+ java_import 'backtype.storm.Config'
14
+ end
11
15
 
12
16
  java_package 'redstorm.proxy'
13
17
 
@@ -17,7 +21,6 @@ java_package 'redstorm.proxy'
17
21
  # The real spout class implementation must define these methods:
18
22
  # - open(conf, context, collector)
19
23
  # - next_tuple
20
- # - is_distributed
21
24
  # - declare_output_fields
22
25
  #
23
26
  # and optionnaly:
@@ -25,6 +28,7 @@ java_package 'redstorm.proxy'
25
28
  # - fail(msg_id)
26
29
  # - close
27
30
  #
31
+
28
32
  class Spout
29
33
  java_implements IRichSpout
30
34
 
@@ -36,11 +40,6 @@ class Spout
36
40
  @real_spout = Object.module_eval(real_spout_class_name).new
37
41
  end
38
42
 
39
- java_signature 'boolean isDistributed()'
40
- def isDistributed
41
- @real_spout.respond_to?(:is_distributed) ? @real_spout.is_distributed : false
42
- end
43
-
44
43
  java_signature 'void open(Map, TopologyContext, SpoutOutputCollector)'
45
44
  def open(conf, context, collector)
46
45
  @real_spout.open(conf, context, collector)
@@ -51,6 +50,16 @@ class Spout
51
50
  @real_spout.close if @real_spout.respond_to?(:close)
52
51
  end
53
52
 
53
+ java_signature 'void activate()'
54
+ def activate
55
+ @real_spout.activate if @real_spout.respond_to?(:activate)
56
+ end
57
+
58
+ java_signature 'void deactivate()'
59
+ def deactivate
60
+ @real_spout.deactivate if @real_spout.respond_to?(:deactivate)
61
+ end
62
+
54
63
  java_signature 'void nextTuple()'
55
64
  def nextTuple
56
65
  @real_spout.next_tuple
@@ -70,4 +79,10 @@ class Spout
70
79
  def declareOutputFields(declarer)
71
80
  @real_spout.declare_output_fields(declarer)
72
81
  end
82
+
83
+ java_signature 'Map<String, Object> getComponentConfiguration()'
84
+ def getComponentConfiguration
85
+ @real_spout.get_component_configuration
86
+ end
87
+
73
88
  end
@@ -1,3 +1,5 @@
1
+ require 'red_storm/configurator'
2
+
1
3
  module RedStorm
2
4
 
3
5
  class SimpleBolt
@@ -5,10 +7,18 @@ module RedStorm
5
7
 
6
8
  # DSL class methods
7
9
 
10
+ def self.log
11
+ @log ||= Logger.getLogger(self.name)
12
+ end
13
+
8
14
  def self.output_fields(*fields)
9
15
  @fields = fields.map(&:to_s)
10
16
  end
11
17
 
18
+ def self.configure(&configure_block)
19
+ @configure_block = block_given? ? configure_block : lambda {}
20
+ end
21
+
12
22
  def self.on_receive(*args, &on_receive_block)
13
23
  options = args.last.is_a?(Hash) ? args.pop : {}
14
24
  method_name = args.first
@@ -27,6 +37,10 @@ module RedStorm
27
37
 
28
38
  # DSL instance methods
29
39
 
40
+ def log
41
+ self.class.log
42
+ end
43
+
30
44
  def unanchored_emit(*values)
31
45
  @collector.emit(Values.new(*values))
32
46
  end
@@ -64,17 +78,26 @@ module RedStorm
64
78
  declarer.declare(Fields.new(self.class.fields))
65
79
  end
66
80
 
67
- # default optional dsl methods/callbacks
81
+ def get_component_configuration
82
+ configurator = Configurator.new
83
+ configurator.instance_exec(&self.class.configure_block)
84
+ configurator.config
85
+ end
86
+
87
+ private
68
88
 
89
+ # default noop optional dsl callbacks
69
90
  def on_init; end
70
91
  def on_close; end
71
92
 
72
- private
73
-
74
93
  def self.fields
75
94
  @fields ||= []
76
95
  end
77
96
 
97
+ def self.configure_block
98
+ @configure_block ||= lambda {}
99
+ end
100
+
78
101
  def self.on_receive_block
79
102
  @on_receive_block ||= lambda {|tuple| self.send(:on_receive, tuple)}
80
103
  end
@@ -1,3 +1,5 @@
1
+ require 'red_storm/configurator'
2
+
1
3
  module RedStorm
2
4
 
3
5
  class SimpleSpout
@@ -5,8 +7,12 @@ module RedStorm
5
7
 
6
8
  # DSL class methods
7
9
 
8
- def self.set(options = {})
9
- self.spout_options.merge!(options)
10
+ def self.configure(&configure_block)
11
+ @configure_block = block_given? ? configure_block : lambda {}
12
+ end
13
+
14
+ def self.log
15
+ @log ||= Logger.getLogger(self.name)
10
16
  end
11
17
 
12
18
  def self.output_fields(*fields)
@@ -29,6 +35,14 @@ module RedStorm
29
35
  @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
30
36
  end
31
37
 
38
+ def self.on_activate(method_name = nil, &on_activate_block)
39
+ @on_activate_block = block_given? ? on_activate_block : lambda {self.send(method_name || :on_activate)}
40
+ end
41
+
42
+ def self.on_deactivate(method_name = nil, &on_deactivate_block)
43
+ @on_deactivate_block = block_given? ? on_deactivate_block : lambda {self.send(method_name || :on_deactivate)}
44
+ end
45
+
32
46
  def self.on_ack(method_name = nil, &on_ack_block)
33
47
  @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
34
48
  end
@@ -43,6 +57,10 @@ module RedStorm
43
57
  @collector.emit(Values.new(*values))
44
58
  end
45
59
 
60
+ def log
61
+ self.class.log
62
+ end
63
+
46
64
  # Spout proxy interface
47
65
 
48
66
  def next_tuple
@@ -68,12 +86,16 @@ module RedStorm
68
86
  instance_exec(&self.class.on_close_block)
69
87
  end
70
88
 
71
- def declare_output_fields(declarer)
72
- declarer.declare(Fields.new(self.class.fields))
89
+ def activate
90
+ instance_exec(&self.class.on_activate_block)
73
91
  end
74
92
 
75
- def is_distributed
76
- self.class.is_distributed?
93
+ def deactivate
94
+ instance_exec(&self.class.on_deactivate_block)
95
+ end
96
+
97
+ def declare_output_fields(declarer)
98
+ declarer.declare(Fields.new(self.class.fields))
77
99
  end
78
100
 
79
101
  def ack(msg_id)
@@ -84,19 +106,30 @@ module RedStorm
84
106
  instance_exec(msg_id, &self.class.on_fail_block)
85
107
  end
86
108
 
87
- # default optional dsl methods/callbacks
109
+ def get_component_configuration
110
+ configurator = Configurator.new
111
+ configurator.instance_exec(&self.class.configure_block)
112
+ configurator.config
113
+ end
114
+
115
+ private
88
116
 
117
+ # default optional noop dsl methods/callbacks
89
118
  def on_init; end
90
119
  def on_close; end
120
+ def on_activate; end
121
+ def on_deactivate; end
91
122
  def on_ack(msg_id); end
92
123
  def on_fail(msg_id); end
93
124
 
94
- private
95
-
96
125
  def self.fields
97
126
  @fields ||= []
98
127
  end
99
128
 
129
+ def self.configure_block
130
+ @configure_block ||= lambda {}
131
+ end
132
+
100
133
  def self.on_send_block
101
134
  @on_send_block ||= lambda {self.send(:on_send)}
102
135
  end
@@ -109,6 +142,14 @@ module RedStorm
109
142
  @on_close_block ||= lambda {self.send(:on_close)}
110
143
  end
111
144
 
145
+ def self.on_activate_block
146
+ @on_activate_block ||= lambda {self.send(:on_activate)}
147
+ end
148
+
149
+ def self.on_deactivate_block
150
+ @on_deactivate_block ||= lambda {self.send(:on_deactivate)}
151
+ end
152
+
112
153
  def self.on_ack_block
113
154
  @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
114
155
  end
@@ -121,14 +162,6 @@ module RedStorm
121
162
  @send_options ||= {:emit => true}
122
163
  end
123
164
 
124
- def self.spout_options
125
- @spout_options ||= {:is_distributed => false}
126
- end
127
-
128
- def self.is_distributed?
129
- !!self.spout_options[:is_distributed]
130
- end
131
-
132
165
  def self.emit?
133
166
  !!self.send_options[:emit]
134
167
  end