redstorm 0.6.5 → 0.6.6.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/CHANGELOG.md +12 -1
  2. data/README.md +66 -47
  3. data/Rakefile +1 -1
  4. data/examples/dsl/exclamation_bolt.rb +10 -0
  5. data/examples/{simple → dsl}/exclamation_topology.rb +5 -5
  6. data/examples/{simple → dsl}/exclamation_topology2.rb +5 -5
  7. data/examples/{simple → dsl}/hello_world_topology.rb +4 -4
  8. data/examples/{simple → dsl}/kafka_topology.rb +17 -18
  9. data/examples/{simple → dsl}/random_sentence_spout.rb +1 -1
  10. data/examples/{simple → dsl}/redis_word_count_topology.rb +6 -7
  11. data/examples/{simple → dsl}/ruby_version_topology.rb +9 -9
  12. data/examples/{simple → dsl}/split_sentence_bolt.rb +6 -6
  13. data/examples/{simple → dsl}/word_count_bolt.rb +2 -2
  14. data/examples/{simple → dsl}/word_count_topology.rb +6 -6
  15. data/examples/shell/shell_topology.rb +2 -2
  16. data/ivy/storm_dependencies.xml +2 -2
  17. data/ivy/topology_dependencies.xml +10 -2
  18. data/lib/red_storm.rb +6 -5
  19. data/lib/red_storm/application.rb +5 -5
  20. data/lib/red_storm/dsl/bolt.rb +155 -0
  21. data/lib/red_storm/dsl/drpc_topology.rb +92 -0
  22. data/lib/red_storm/dsl/spout.rb +194 -0
  23. data/lib/red_storm/dsl/topology.rb +227 -0
  24. data/lib/red_storm/dsl/tuple.rb +34 -0
  25. data/lib/red_storm/environment.rb +8 -8
  26. data/lib/red_storm/topology_launcher.rb +2 -2
  27. data/lib/red_storm/version.rb +1 -1
  28. data/lib/tasks/red_storm.rake +45 -27
  29. data/redstorm.gemspec +4 -4
  30. metadata +31 -34
  31. data/examples/simple/exclamation_bolt.rb +0 -10
  32. data/lib/red_storm/simple_bolt.rb +0 -135
  33. data/lib/red_storm/simple_drpc_topology.rb +0 -87
  34. data/lib/red_storm/simple_spout.rb +0 -184
  35. data/lib/red_storm/simple_topology.rb +0 -219
@@ -2,7 +2,7 @@ require 'red_storm'
2
2
 
3
3
  module RedStorm
4
4
  module Examples
5
- class RandomSentenceSpout < RedStorm::SimpleSpout
5
+ class RandomSentenceSpout < DSL::Spout
6
6
  output_fields :word
7
7
 
8
8
  on_send {@sentences[rand(@sentences.length)]}
@@ -1,15 +1,15 @@
1
1
  require 'red_storm'
2
- require 'examples/simple/word_count_bolt'
2
+ require 'examples/dsl/word_count_bolt'
3
3
  require 'redis'
4
4
  require 'thread'
5
5
 
6
6
  module RedStorm
7
7
  module Examples
8
8
 
9
- # RedisWordSpout reads the Redis queue "test" on localhost:6379
9
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
10
10
  # and emits each word items pop'ed from the queue.
11
11
 
12
- class RedisWordSpout < RedStorm::SimpleSpout
12
+ class RedisWordSpout < DSL::Spout
13
13
  output_fields :word
14
14
 
15
15
  on_send {@q.pop.to_s if @q.size > 0}
@@ -18,7 +18,7 @@ module RedStorm
18
18
  @q = Queue.new
19
19
  @redis_reader = detach_redis_reader
20
20
  end
21
-
21
+
22
22
  private
23
23
 
24
24
  def detach_redis_reader
@@ -35,16 +35,15 @@ module RedStorm
35
35
  end
36
36
  end
37
37
 
38
- class RedisWordCountTopology < RedStorm::SimpleTopology
38
+ class RedisWordCountTopology < DSL::Topology
39
39
  spout RedisWordSpout
40
-
40
+
41
41
  bolt WordCountBolt, :parallelism => 3 do
42
42
  source RedisWordSpout, :fields => ["word"]
43
43
  end
44
44
 
45
45
  configure do |env|
46
46
  debug true
47
- # set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
48
47
  case env
49
48
  when :local
50
49
  max_task_parallelism 3
@@ -1,30 +1,30 @@
1
+ require 'java'
1
2
  require 'red_storm'
3
+ java_import 'java.lang.System'
4
+
2
5
 
3
6
  # this example topology only prints the Ruby version string. No tuple is emitted.
4
7
 
5
8
  module RedStorm
6
9
  module Examples
7
- class VersionSpout < RedStorm::SimpleSpout
10
+ class VersionSpout < DSL::Spout
8
11
  output_fields :dummy
9
12
  on_init do
13
+ log.info("***************** REDSTORM VERSION=#{VERSION}")
10
14
  log.info("***************** RUBY_VERSION=#{RUBY_VERSION}")
11
15
  log.info("***************** JRUBY_VERSION=#{JRUBY_VERSION}")
12
- log.info("***************** VERSION=#{VERSION}")
13
16
  log.info("***************** RUBY_ENGINE=#{RUBY_ENGINE}")
14
17
  log.info("***************** RUBY_PLATFORM=#{RUBY_PLATFORM}")
18
+ log.info("***************** JAVA VERSION=#{System.properties["java.runtime.version"]}")
15
19
  end
16
20
  on_send {}
17
21
  end
18
22
 
19
- class RubyVersionTopology < RedStorm::SimpleTopology
23
+ class RubyVersionTopology < DSL::Topology
20
24
  spout VersionSpout
21
-
22
- configure do |env|
23
- debug true
24
25
 
25
- # force the JRuby version property for this topology. this will only affect remote cluster execution
26
- # for local execution use the --1.8|--1.9 switch when launching
27
- # set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
26
+ configure do |env|
27
+ debug false
28
28
  end
29
29
 
30
30
  on_submit do |env|
@@ -2,31 +2,31 @@ require 'red_storm'
2
2
 
3
3
  module RedStorm
4
4
  module Examples
5
- class SplitSentenceBolt < RedStorm::SimpleBolt
5
+ class SplitSentenceBolt < DSL::Bolt
6
6
  output_fields :word
7
7
 
8
8
  # block declaration style using auto-emit (default)
9
9
  #
10
- on_receive {|tuple| tuple.getString(0).split(' ').map{|w| [w]}}
10
+ on_receive {|tuple| tuple[0].split(' ').map{|w| [w]}}
11
11
 
12
12
  # block declaration style no auto-emit
13
13
  #
14
- # on_receive :emit => false do |tuple|
15
- # tuple.getString(0).split(' ').each{|w| unanchored_emit(w)}
14
+ # on_receive :emit => false do |tuple|
15
+ # tuple[0].split(' ').each{|w| unanchored_emit(w)}
16
16
  # end
17
17
 
18
18
  # alternate declaration style using on_receive method
19
19
  #
20
20
  # on_receive :emit => true
21
21
  # def on_receive(tuple)
22
- # tuple.getString(0).split(' ').map{|w| [w]}
22
+ # tuple[0].split(' ').map{|w| [w]}
23
23
  # end
24
24
 
25
25
  # alternate declaration style using any specific method
26
26
  #
27
27
  # on_receive :my_method, :emit => true
28
28
  # def my_method(tuple)
29
- # tuple.getString(0).split(' ').map{|w| [w]}
29
+ # tuple[0].split(' ').map{|w| [w]}
30
30
  # end
31
31
  end
32
32
  end
@@ -2,14 +2,14 @@ require 'red_storm'
2
2
 
3
3
  module RedStorm
4
4
  module Examples
5
- class WordCountBolt < RedStorm::SimpleBolt
5
+ class WordCountBolt < DSL::Bolt
6
6
  output_fields :word, :count
7
7
  on_init {@counts = Hash.new{|h, k| h[k] = 0}}
8
8
 
9
9
  # block declaration style using auto-emit (default)
10
10
  #
11
11
  on_receive do |tuple|
12
- word = tuple.getValue(0).to_s
12
+ word = tuple[0].to_s
13
13
  @counts[word] += 1
14
14
 
15
15
  [word, @counts[word]]
@@ -1,17 +1,17 @@
1
1
  require 'red_storm'
2
- require 'examples/simple/random_sentence_spout'
3
- require 'examples/simple/split_sentence_bolt'
4
- require 'examples/simple/word_count_bolt'
2
+ require 'examples/dsl/random_sentence_spout'
3
+ require 'examples/dsl/split_sentence_bolt'
4
+ require 'examples/dsl/word_count_bolt'
5
5
 
6
6
  module RedStorm
7
7
  module Examples
8
- class WordCountTopology < SimpleTopology
8
+ class WordCountTopology < DSL::Topology
9
9
  spout RandomSentenceSpout, :parallelism => 2
10
-
10
+
11
11
  bolt SplitSentenceBolt, :parallelism => 2 do
12
12
  source RandomSentenceSpout, :shuffle
13
13
  end
14
-
14
+
15
15
  bolt WordCountBolt, :parallelism => 2 do
16
16
  source SplitSentenceBolt, :fields => ["word"]
17
17
  end
@@ -3,7 +3,7 @@ require 'thread'
3
3
 
4
4
  java_import 'redstorm.storm.jruby.JRubyShellBolt'
5
5
 
6
- class SimpleSpout < RedStorm::SimpleSpout
6
+ class SimpleSpout < RedStorm::DSL::Spout
7
7
  on_init do
8
8
  @q = Queue.new
9
9
  @q << "the quick red fox"
@@ -16,7 +16,7 @@ class SimpleSpout < RedStorm::SimpleSpout
16
16
  end
17
17
  end
18
18
 
19
- class ShellTopology < RedStorm::SimpleTopology
19
+ class ShellTopology < RedStorm::DSL::Topology
20
20
  spout SimpleSpout do
21
21
  output_fields :string
22
22
  end
@@ -1,8 +1,8 @@
1
1
  <?xml version="1.0"?>
2
- <ivy-module version="2.0">
2
+ <ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
3
3
  <info organisation="redstorm" module="storm-deps"/>
4
4
  <dependencies>
5
- <dependency org="storm" name="storm" rev="0.8.2" conf="default" transitive="true" />
5
+ <dependency org="storm" name="storm" rev="0.9.0-wip16" conf="default" transitive="true" />
6
6
  <override org="org.slf4j" module="slf4j-log4j12" rev="1.6.3"/>
7
7
  </dependencies>
8
8
  </ivy-module>
@@ -1,7 +1,15 @@
1
1
  <?xml version="1.0"?>
2
- <ivy-module version="2.0">
2
+ <ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
3
3
  <info organisation="redstorm" module="topology-deps"/>
4
4
  <dependencies>
5
- <dependency org="org.jruby" name="jruby-core" rev="1.7.3" conf="default" transitive="true"/>
5
+ <dependency org="org.jruby" name="jruby-core" rev="1.7.4" conf="default" transitive="true"/>
6
+
7
+ <!-- explicitely specify jffi to also fetch the native jar. make sure to update jffi version matching jruby-core version -->
8
+ <!-- this is the only way I found using Ivy to fetch the native jar -->
9
+ <dependency org="com.github.jnr" name="jffi" rev="1.2.5" conf="default" transitive="true">
10
+ <artifact name="jffi" type="jar" />
11
+ <artifact name="jffi" type="jar" m:classifier="native"/>
12
+ </dependency>
13
+
6
14
  </dependencies>
7
15
  </ivy-module>
data/lib/red_storm.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  require 'rubygems'
2
2
 
3
- require 'red_storm/environment'
4
3
  require 'red_storm/version'
4
+ require 'red_storm/environment'
5
5
  require 'red_storm/configuration'
6
- require 'red_storm/simple_bolt'
7
- require 'red_storm/simple_spout'
8
- require 'red_storm/simple_topology'
9
- require 'red_storm/simple_drpc_topology'
6
+ require 'red_storm/dsl/bolt'
7
+ require 'red_storm/dsl/spout'
8
+ require 'red_storm/dsl/topology'
9
+ require 'red_storm/dsl/drpc_topology'
10
+ require 'red_storm/dsl/tuple'
@@ -29,8 +29,8 @@ CUSTOM_IVY_TOPOLOGY_DEPENDENCIES = "#{DST_IVY_DIR}/topology_dependencies.xml"
29
29
 
30
30
  module RedStorm
31
31
 
32
- class Application
33
- TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
32
+ class Application
33
+ TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
34
34
 
35
35
  def self.local_storm_command(class_file, ruby_mode = nil)
36
36
  src_dir = File.expand_path(File.dirname(class_file))
@@ -43,9 +43,9 @@ module RedStorm
43
43
 
44
44
  def self.usage
45
45
  puts("usage: redstorm version")
46
- puts(" redstorm install")
46
+ puts(" redstorm install [--JVM_VERSION] (ex.: --1.6 or --1.7) default is current JVM version")
47
47
  puts(" redstorm deps")
48
- puts(" redstorm build")
48
+ puts(" redstorm build [--JVM_VERSION] (ex.: --1.6 or --1.7) default is current JVM version")
49
49
  puts(" redstorm examples")
50
50
  puts(" redstorm bundle [BUNDLER_GROUP]")
51
51
  puts(" redstorm jar DIR1, [DIR2, ...]")
@@ -59,7 +59,7 @@ module RedStorm
59
59
  if args[0] == "version"
60
60
  puts("RedStorm v#{VERSION}")
61
61
  exit
62
- elsif ["install", "examples", "jar", "bundle", "deps", "build"].include?(args[0])
62
+ elsif ["examples", "jar", "bundle", "deps", "install", "build"].include?(args[0])
63
63
  load(TASKS_FILE)
64
64
  Rake::Task[args.shift].invoke(args.join(":"))
65
65
  exit
@@ -0,0 +1,155 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+ require 'red_storm/environment'
4
+ require 'pathname'
5
+
6
+ module RedStorm
7
+ module DSL
8
+
9
+ class BoltError < StandardError; end
10
+
11
+ class Bolt
12
+ attr_reader :collector, :context, :config
13
+
14
+ # DSL class methods
15
+
16
+ def self.log
17
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
18
+ end
19
+
20
+ def self.output_fields(*fields)
21
+ @fields = fields.map(&:to_s)
22
+ end
23
+
24
+ def self.configure(&configure_block)
25
+ @configure_block = block_given? ? configure_block : lambda {}
26
+ end
27
+
28
+ def self.on_receive(*args, &on_receive_block)
29
+ options = args.last.is_a?(Hash) ? args.pop : {}
30
+ method_name = args.first
31
+
32
+ self.receive_options.merge!(options)
33
+
34
+ # indirecting through a lambda defers the method lookup at invocation time
35
+ # and the performance penalty is negligible
36
+ body = block_given? ? on_receive_block : lambda{|tuple| self.send((method_name || :on_receive).to_sym, tuple)}
37
+ define_method(:on_receive, body)
38
+ end
39
+
40
+ def self.on_init(method_name = nil, &on_init_block)
41
+ body = block_given? ? on_init_block : lambda {self.send((method_name || :on_init).to_sym)}
42
+ define_method(:on_init, body)
43
+ end
44
+
45
+ def self.on_close(method_name = nil, &on_close_block)
46
+ body = block_given? ? on_close_block : lambda {self.send((method_name || :on_close).to_sym)}
47
+ define_method(:on_close, body)
48
+ end
49
+
50
+ # DSL instance methods
51
+
52
+ def log
53
+ self.class.log
54
+ end
55
+
56
+ def unanchored_emit(*values)
57
+ @collector.emit(Values.new(*values))
58
+ end
59
+
60
+ def anchored_emit(tuple, *values)
61
+ @collector.emit(tuple, Values.new(*values))
62
+ end
63
+
64
+ def ack(tuple)
65
+ @collector.ack(tuple)
66
+ end
67
+
68
+ def fail(tuple)
69
+ @collector.fail(tuple)
70
+ end
71
+
72
+ # Bolt proxy interface
73
+
74
+ def execute(tuple)
75
+ output = on_receive(tuple)
76
+ if output && self.class.emit?
77
+ values_list = !output.is_a?(Array) ? [[output]] : !output.first.is_a?(Array) ? [output] : output
78
+ values_list.each{|values| self.class.anchor? ? anchored_emit(tuple, *values) : unanchored_emit(*values)}
79
+ @collector.ack(tuple) if self.class.ack?
80
+ end
81
+ end
82
+
83
+ def prepare(config, context, collector)
84
+ @collector = collector
85
+ @context = context
86
+ @config = config
87
+
88
+ on_init
89
+ end
90
+
91
+ def cleanup
92
+ on_close
93
+ end
94
+
95
+ def declare_output_fields(declarer)
96
+ declarer.declare(Fields.new(self.class.fields))
97
+ end
98
+
99
+ def get_component_configuration
100
+ configurator = Configurator.new
101
+ configurator.instance_exec(&self.class.configure_block)
102
+ configurator.config
103
+ end
104
+
105
+ private
106
+
107
+ # default noop optional dsl callbacks
108
+ def on_init; end
109
+ def on_close; end
110
+
111
+ def self.fields
112
+ @fields ||= []
113
+ end
114
+
115
+ def self.configure_block
116
+ @configure_block ||= lambda {}
117
+ end
118
+
119
+ def self.receive_options
120
+ @receive_options ||= {:emit => true, :ack => false, :anchor => false}
121
+ end
122
+
123
+ def self.emit?
124
+ !!self.receive_options[:emit]
125
+ end
126
+
127
+ def self.ack?
128
+ !!self.receive_options[:ack]
129
+ end
130
+
131
+ def self.anchor?
132
+ !!self.receive_options[:anchor]
133
+ end
134
+
135
+ # below non-dry see Spout class
136
+ def self.inherited(subclass)
137
+ path = (caller.first.to_s =~ /^(.+):\d+.*$/) ? $1 : raise(BoltError, "unable to extract base topology class path from #{caller.first.inspect}")
138
+ subclass.base_class_path = Pathname.new(path).relative_path_from(Pathname.new(RedStorm::BASE_PATH)).to_s
139
+ end
140
+
141
+ def self.base_class_path=(path)
142
+ @base_class_path = path
143
+ end
144
+
145
+ def self.base_class_path
146
+ @base_class_path
147
+ end
148
+
149
+ end
150
+ end
151
+
152
+ # for backward compatibility
153
+ SimpleBolt = DSL::Bolt
154
+
155
+ end
@@ -0,0 +1,92 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ module RedStorm
6
+ module DSL
7
+
8
+ class InputBoltDefinition < Topology::BoltDefinition
9
+ attr_accessor :grouping
10
+
11
+ def initialize(*args)
12
+ super
13
+ @grouping = :none
14
+ end
15
+
16
+ def grouping(grouping)
17
+ @grouping = grouping
18
+ end
19
+
20
+ def define_grouping(declarer)
21
+ case @grouping
22
+ when :fields
23
+ declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
24
+ when :global
25
+ declarer.globalGrouping()
26
+ when :shuffle
27
+ declarer.shuffleGrouping()
28
+ when :local_or_shuffle
29
+ declarer.localOrShuffleGrouping()
30
+ when :none
31
+ declarer.noneGrouping()
32
+ when :all
33
+ declarer.allGrouping()
34
+ when :direct
35
+ declarer.directGrouping()
36
+ else
37
+ raise("unknown grouper=#{grouper.inspect}")
38
+ end
39
+ end
40
+ end
41
+
42
+ class DRPCTopology < Topology
43
+
44
+ def self.spout
45
+ raise TopologyDefinitionError, "DRPC spout is already defined"
46
+ end
47
+
48
+ def start(base_class_path, env)
49
+ builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
50
+
51
+ self.class.bolts.each do |bolt|
52
+ declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
53
+ declarer.addConfigurations(bolt.config)
54
+ bolt.define_grouping(declarer)
55
+ end
56
+
57
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
58
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
59
+
60
+ configurator = Configurator.new(defaults)
61
+ configurator.instance_exec(env, &self.class.configure_block)
62
+
63
+ drpc = nil
64
+ if env == :local
65
+ drpc = LocalDRPC.new
66
+ submitter = @cluster = LocalCluster.new
67
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
68
+ else
69
+ submitter = StormSubmitter
70
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
71
+ end
72
+ instance_exec(env, drpc, &self.class.submit_block)
73
+ end
74
+
75
+ def self.input_bolt(bolt_class, *args, &bolt_block)
76
+ set_topology_class!
77
+ options = args.last.is_a?(Hash) ? args.pop : {}
78
+ contructor_args = !args.empty? ? args.pop : []
79
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
80
+
81
+ bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
82
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
83
+ bolt.instance_exec(&bolt_block)
84
+ self.components << bolt
85
+ end
86
+ end
87
+ end
88
+
89
+ # for backward compatibility
90
+ SimpleDRPCTopology = DSL::DRPCTopology
91
+
92
+ end