redstorm 0.6.5 → 0.6.6.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/CHANGELOG.md +12 -1
  2. data/README.md +66 -47
  3. data/Rakefile +1 -1
  4. data/examples/dsl/exclamation_bolt.rb +10 -0
  5. data/examples/{simple → dsl}/exclamation_topology.rb +5 -5
  6. data/examples/{simple → dsl}/exclamation_topology2.rb +5 -5
  7. data/examples/{simple → dsl}/hello_world_topology.rb +4 -4
  8. data/examples/{simple → dsl}/kafka_topology.rb +17 -18
  9. data/examples/{simple → dsl}/random_sentence_spout.rb +1 -1
  10. data/examples/{simple → dsl}/redis_word_count_topology.rb +6 -7
  11. data/examples/{simple → dsl}/ruby_version_topology.rb +9 -9
  12. data/examples/{simple → dsl}/split_sentence_bolt.rb +6 -6
  13. data/examples/{simple → dsl}/word_count_bolt.rb +2 -2
  14. data/examples/{simple → dsl}/word_count_topology.rb +6 -6
  15. data/examples/shell/shell_topology.rb +2 -2
  16. data/ivy/storm_dependencies.xml +2 -2
  17. data/ivy/topology_dependencies.xml +10 -2
  18. data/lib/red_storm.rb +6 -5
  19. data/lib/red_storm/application.rb +5 -5
  20. data/lib/red_storm/dsl/bolt.rb +155 -0
  21. data/lib/red_storm/dsl/drpc_topology.rb +92 -0
  22. data/lib/red_storm/dsl/spout.rb +194 -0
  23. data/lib/red_storm/dsl/topology.rb +227 -0
  24. data/lib/red_storm/dsl/tuple.rb +34 -0
  25. data/lib/red_storm/environment.rb +8 -8
  26. data/lib/red_storm/topology_launcher.rb +2 -2
  27. data/lib/red_storm/version.rb +1 -1
  28. data/lib/tasks/red_storm.rake +45 -27
  29. data/redstorm.gemspec +4 -4
  30. metadata +31 -34
  31. data/examples/simple/exclamation_bolt.rb +0 -10
  32. data/lib/red_storm/simple_bolt.rb +0 -135
  33. data/lib/red_storm/simple_drpc_topology.rb +0 -87
  34. data/lib/red_storm/simple_spout.rb +0 -184
  35. data/lib/red_storm/simple_topology.rb +0 -219
@@ -2,7 +2,7 @@ require 'red_storm'
2
2
 
3
3
  module RedStorm
4
4
  module Examples
5
- class RandomSentenceSpout < RedStorm::SimpleSpout
5
+ class RandomSentenceSpout < DSL::Spout
6
6
  output_fields :word
7
7
 
8
8
  on_send {@sentences[rand(@sentences.length)]}
@@ -1,15 +1,15 @@
1
1
  require 'red_storm'
2
- require 'examples/simple/word_count_bolt'
2
+ require 'examples/dsl/word_count_bolt'
3
3
  require 'redis'
4
4
  require 'thread'
5
5
 
6
6
  module RedStorm
7
7
  module Examples
8
8
 
9
- # RedisWordSpout reads the Redis queue "test" on localhost:6379
9
+ # RedisWordSpout reads the Redis queue "test" on localhost:6379
10
10
  # and emits each word items pop'ed from the queue.
11
11
 
12
- class RedisWordSpout < RedStorm::SimpleSpout
12
+ class RedisWordSpout < DSL::Spout
13
13
  output_fields :word
14
14
 
15
15
  on_send {@q.pop.to_s if @q.size > 0}
@@ -18,7 +18,7 @@ module RedStorm
18
18
  @q = Queue.new
19
19
  @redis_reader = detach_redis_reader
20
20
  end
21
-
21
+
22
22
  private
23
23
 
24
24
  def detach_redis_reader
@@ -35,16 +35,15 @@ module RedStorm
35
35
  end
36
36
  end
37
37
 
38
- class RedisWordCountTopology < RedStorm::SimpleTopology
38
+ class RedisWordCountTopology < DSL::Topology
39
39
  spout RedisWordSpout
40
-
40
+
41
41
  bolt WordCountBolt, :parallelism => 3 do
42
42
  source RedisWordSpout, :fields => ["word"]
43
43
  end
44
44
 
45
45
  configure do |env|
46
46
  debug true
47
- # set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
48
47
  case env
49
48
  when :local
50
49
  max_task_parallelism 3
@@ -1,30 +1,30 @@
1
+ require 'java'
1
2
  require 'red_storm'
3
+ java_import 'java.lang.System'
4
+
2
5
 
3
6
  # this example topology only prints the Ruby version string. No tuple is emitted.
4
7
 
5
8
  module RedStorm
6
9
  module Examples
7
- class VersionSpout < RedStorm::SimpleSpout
10
+ class VersionSpout < DSL::Spout
8
11
  output_fields :dummy
9
12
  on_init do
13
+ log.info("***************** REDSTORM VERSION=#{VERSION}")
10
14
  log.info("***************** RUBY_VERSION=#{RUBY_VERSION}")
11
15
  log.info("***************** JRUBY_VERSION=#{JRUBY_VERSION}")
12
- log.info("***************** VERSION=#{VERSION}")
13
16
  log.info("***************** RUBY_ENGINE=#{RUBY_ENGINE}")
14
17
  log.info("***************** RUBY_PLATFORM=#{RUBY_PLATFORM}")
18
+ log.info("***************** JAVA VERSION=#{System.properties["java.runtime.version"]}")
15
19
  end
16
20
  on_send {}
17
21
  end
18
22
 
19
- class RubyVersionTopology < RedStorm::SimpleTopology
23
+ class RubyVersionTopology < DSL::Topology
20
24
  spout VersionSpout
21
-
22
- configure do |env|
23
- debug true
24
25
 
25
- # force the JRuby version property for this topology. this will only affect remote cluster execution
26
- # for local execution use the --1.8|--1.9 switch when launching
27
- # set "topology.worker.childopts", "-Djruby.compat.version=RUBY1_9"
26
+ configure do |env|
27
+ debug false
28
28
  end
29
29
 
30
30
  on_submit do |env|
@@ -2,31 +2,31 @@ require 'red_storm'
2
2
 
3
3
  module RedStorm
4
4
  module Examples
5
- class SplitSentenceBolt < RedStorm::SimpleBolt
5
+ class SplitSentenceBolt < DSL::Bolt
6
6
  output_fields :word
7
7
 
8
8
  # block declaration style using auto-emit (default)
9
9
  #
10
- on_receive {|tuple| tuple.getString(0).split(' ').map{|w| [w]}}
10
+ on_receive {|tuple| tuple[0].split(' ').map{|w| [w]}}
11
11
 
12
12
  # block declaration style no auto-emit
13
13
  #
14
- # on_receive :emit => false do |tuple|
15
- # tuple.getString(0).split(' ').each{|w| unanchored_emit(w)}
14
+ # on_receive :emit => false do |tuple|
15
+ # tuple[0].split(' ').each{|w| unanchored_emit(w)}
16
16
  # end
17
17
 
18
18
  # alternate declaration style using on_receive method
19
19
  #
20
20
  # on_receive :emit => true
21
21
  # def on_receive(tuple)
22
- # tuple.getString(0).split(' ').map{|w| [w]}
22
+ # tuple[0].split(' ').map{|w| [w]}
23
23
  # end
24
24
 
25
25
  # alternate declaration style using any specific method
26
26
  #
27
27
  # on_receive :my_method, :emit => true
28
28
  # def my_method(tuple)
29
- # tuple.getString(0).split(' ').map{|w| [w]}
29
+ # tuple[0].split(' ').map{|w| [w]}
30
30
  # end
31
31
  end
32
32
  end
@@ -2,14 +2,14 @@ require 'red_storm'
2
2
 
3
3
  module RedStorm
4
4
  module Examples
5
- class WordCountBolt < RedStorm::SimpleBolt
5
+ class WordCountBolt < DSL::Bolt
6
6
  output_fields :word, :count
7
7
  on_init {@counts = Hash.new{|h, k| h[k] = 0}}
8
8
 
9
9
  # block declaration style using auto-emit (default)
10
10
  #
11
11
  on_receive do |tuple|
12
- word = tuple.getValue(0).to_s
12
+ word = tuple[0].to_s
13
13
  @counts[word] += 1
14
14
 
15
15
  [word, @counts[word]]
@@ -1,17 +1,17 @@
1
1
  require 'red_storm'
2
- require 'examples/simple/random_sentence_spout'
3
- require 'examples/simple/split_sentence_bolt'
4
- require 'examples/simple/word_count_bolt'
2
+ require 'examples/dsl/random_sentence_spout'
3
+ require 'examples/dsl/split_sentence_bolt'
4
+ require 'examples/dsl/word_count_bolt'
5
5
 
6
6
  module RedStorm
7
7
  module Examples
8
- class WordCountTopology < SimpleTopology
8
+ class WordCountTopology < DSL::Topology
9
9
  spout RandomSentenceSpout, :parallelism => 2
10
-
10
+
11
11
  bolt SplitSentenceBolt, :parallelism => 2 do
12
12
  source RandomSentenceSpout, :shuffle
13
13
  end
14
-
14
+
15
15
  bolt WordCountBolt, :parallelism => 2 do
16
16
  source SplitSentenceBolt, :fields => ["word"]
17
17
  end
@@ -3,7 +3,7 @@ require 'thread'
3
3
 
4
4
  java_import 'redstorm.storm.jruby.JRubyShellBolt'
5
5
 
6
- class SimpleSpout < RedStorm::SimpleSpout
6
+ class SimpleSpout < RedStorm::DSL::Spout
7
7
  on_init do
8
8
  @q = Queue.new
9
9
  @q << "the quick red fox"
@@ -16,7 +16,7 @@ class SimpleSpout < RedStorm::SimpleSpout
16
16
  end
17
17
  end
18
18
 
19
- class ShellTopology < RedStorm::SimpleTopology
19
+ class ShellTopology < RedStorm::DSL::Topology
20
20
  spout SimpleSpout do
21
21
  output_fields :string
22
22
  end
@@ -1,8 +1,8 @@
1
1
  <?xml version="1.0"?>
2
- <ivy-module version="2.0">
2
+ <ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
3
3
  <info organisation="redstorm" module="storm-deps"/>
4
4
  <dependencies>
5
- <dependency org="storm" name="storm" rev="0.8.2" conf="default" transitive="true" />
5
+ <dependency org="storm" name="storm" rev="0.9.0-wip16" conf="default" transitive="true" />
6
6
  <override org="org.slf4j" module="slf4j-log4j12" rev="1.6.3"/>
7
7
  </dependencies>
8
8
  </ivy-module>
@@ -1,7 +1,15 @@
1
1
  <?xml version="1.0"?>
2
- <ivy-module version="2.0">
2
+ <ivy-module version="2.0" xmlns:m="http://ant.apache.org/ivy/maven">
3
3
  <info organisation="redstorm" module="topology-deps"/>
4
4
  <dependencies>
5
- <dependency org="org.jruby" name="jruby-core" rev="1.7.3" conf="default" transitive="true"/>
5
+ <dependency org="org.jruby" name="jruby-core" rev="1.7.4" conf="default" transitive="true"/>
6
+
7
+ <!-- explicitely specify jffi to also fetch the native jar. make sure to update jffi version matching jruby-core version -->
8
+ <!-- this is the only way I found using Ivy to fetch the native jar -->
9
+ <dependency org="com.github.jnr" name="jffi" rev="1.2.5" conf="default" transitive="true">
10
+ <artifact name="jffi" type="jar" />
11
+ <artifact name="jffi" type="jar" m:classifier="native"/>
12
+ </dependency>
13
+
6
14
  </dependencies>
7
15
  </ivy-module>
data/lib/red_storm.rb CHANGED
@@ -1,9 +1,10 @@
1
1
  require 'rubygems'
2
2
 
3
- require 'red_storm/environment'
4
3
  require 'red_storm/version'
4
+ require 'red_storm/environment'
5
5
  require 'red_storm/configuration'
6
- require 'red_storm/simple_bolt'
7
- require 'red_storm/simple_spout'
8
- require 'red_storm/simple_topology'
9
- require 'red_storm/simple_drpc_topology'
6
+ require 'red_storm/dsl/bolt'
7
+ require 'red_storm/dsl/spout'
8
+ require 'red_storm/dsl/topology'
9
+ require 'red_storm/dsl/drpc_topology'
10
+ require 'red_storm/dsl/tuple'
@@ -29,8 +29,8 @@ CUSTOM_IVY_TOPOLOGY_DEPENDENCIES = "#{DST_IVY_DIR}/topology_dependencies.xml"
29
29
 
30
30
  module RedStorm
31
31
 
32
- class Application
33
- TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
32
+ class Application
33
+ TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
34
34
 
35
35
  def self.local_storm_command(class_file, ruby_mode = nil)
36
36
  src_dir = File.expand_path(File.dirname(class_file))
@@ -43,9 +43,9 @@ module RedStorm
43
43
 
44
44
  def self.usage
45
45
  puts("usage: redstorm version")
46
- puts(" redstorm install")
46
+ puts(" redstorm install [--JVM_VERSION] (ex.: --1.6 or --1.7) default is current JVM version")
47
47
  puts(" redstorm deps")
48
- puts(" redstorm build")
48
+ puts(" redstorm build [--JVM_VERSION] (ex.: --1.6 or --1.7) default is current JVM version")
49
49
  puts(" redstorm examples")
50
50
  puts(" redstorm bundle [BUNDLER_GROUP]")
51
51
  puts(" redstorm jar DIR1, [DIR2, ...]")
@@ -59,7 +59,7 @@ module RedStorm
59
59
  if args[0] == "version"
60
60
  puts("RedStorm v#{VERSION}")
61
61
  exit
62
- elsif ["install", "examples", "jar", "bundle", "deps", "build"].include?(args[0])
62
+ elsif ["examples", "jar", "bundle", "deps", "install", "build"].include?(args[0])
63
63
  load(TASKS_FILE)
64
64
  Rake::Task[args.shift].invoke(args.join(":"))
65
65
  exit
@@ -0,0 +1,155 @@
1
+ require 'java'
2
+ require 'red_storm/configurator'
3
+ require 'red_storm/environment'
4
+ require 'pathname'
5
+
6
+ module RedStorm
7
+ module DSL
8
+
9
+ class BoltError < StandardError; end
10
+
11
+ class Bolt
12
+ attr_reader :collector, :context, :config
13
+
14
+ # DSL class methods
15
+
16
+ def self.log
17
+ @log ||= Java::OrgApacheLog4j::Logger.getLogger(self.name)
18
+ end
19
+
20
+ def self.output_fields(*fields)
21
+ @fields = fields.map(&:to_s)
22
+ end
23
+
24
+ def self.configure(&configure_block)
25
+ @configure_block = block_given? ? configure_block : lambda {}
26
+ end
27
+
28
+ def self.on_receive(*args, &on_receive_block)
29
+ options = args.last.is_a?(Hash) ? args.pop : {}
30
+ method_name = args.first
31
+
32
+ self.receive_options.merge!(options)
33
+
34
+ # indirecting through a lambda defers the method lookup at invocation time
35
+ # and the performance penalty is negligible
36
+ body = block_given? ? on_receive_block : lambda{|tuple| self.send((method_name || :on_receive).to_sym, tuple)}
37
+ define_method(:on_receive, body)
38
+ end
39
+
40
+ def self.on_init(method_name = nil, &on_init_block)
41
+ body = block_given? ? on_init_block : lambda {self.send((method_name || :on_init).to_sym)}
42
+ define_method(:on_init, body)
43
+ end
44
+
45
+ def self.on_close(method_name = nil, &on_close_block)
46
+ body = block_given? ? on_close_block : lambda {self.send((method_name || :on_close).to_sym)}
47
+ define_method(:on_close, body)
48
+ end
49
+
50
+ # DSL instance methods
51
+
52
+ def log
53
+ self.class.log
54
+ end
55
+
56
+ def unanchored_emit(*values)
57
+ @collector.emit(Values.new(*values))
58
+ end
59
+
60
+ def anchored_emit(tuple, *values)
61
+ @collector.emit(tuple, Values.new(*values))
62
+ end
63
+
64
+ def ack(tuple)
65
+ @collector.ack(tuple)
66
+ end
67
+
68
+ def fail(tuple)
69
+ @collector.fail(tuple)
70
+ end
71
+
72
+ # Bolt proxy interface
73
+
74
+ def execute(tuple)
75
+ output = on_receive(tuple)
76
+ if output && self.class.emit?
77
+ values_list = !output.is_a?(Array) ? [[output]] : !output.first.is_a?(Array) ? [output] : output
78
+ values_list.each{|values| self.class.anchor? ? anchored_emit(tuple, *values) : unanchored_emit(*values)}
79
+ @collector.ack(tuple) if self.class.ack?
80
+ end
81
+ end
82
+
83
+ def prepare(config, context, collector)
84
+ @collector = collector
85
+ @context = context
86
+ @config = config
87
+
88
+ on_init
89
+ end
90
+
91
+ def cleanup
92
+ on_close
93
+ end
94
+
95
+ def declare_output_fields(declarer)
96
+ declarer.declare(Fields.new(self.class.fields))
97
+ end
98
+
99
+ def get_component_configuration
100
+ configurator = Configurator.new
101
+ configurator.instance_exec(&self.class.configure_block)
102
+ configurator.config
103
+ end
104
+
105
+ private
106
+
107
+ # default noop optional dsl callbacks
108
+ def on_init; end
109
+ def on_close; end
110
+
111
+ def self.fields
112
+ @fields ||= []
113
+ end
114
+
115
+ def self.configure_block
116
+ @configure_block ||= lambda {}
117
+ end
118
+
119
+ def self.receive_options
120
+ @receive_options ||= {:emit => true, :ack => false, :anchor => false}
121
+ end
122
+
123
+ def self.emit?
124
+ !!self.receive_options[:emit]
125
+ end
126
+
127
+ def self.ack?
128
+ !!self.receive_options[:ack]
129
+ end
130
+
131
+ def self.anchor?
132
+ !!self.receive_options[:anchor]
133
+ end
134
+
135
+ # below non-dry see Spout class
136
+ def self.inherited(subclass)
137
+ path = (caller.first.to_s =~ /^(.+):\d+.*$/) ? $1 : raise(BoltError, "unable to extract base topology class path from #{caller.first.inspect}")
138
+ subclass.base_class_path = Pathname.new(path).relative_path_from(Pathname.new(RedStorm::BASE_PATH)).to_s
139
+ end
140
+
141
+ def self.base_class_path=(path)
142
+ @base_class_path = path
143
+ end
144
+
145
+ def self.base_class_path
146
+ @base_class_path
147
+ end
148
+
149
+ end
150
+ end
151
+
152
+ # for backward compatibility
153
+ SimpleBolt = DSL::Bolt
154
+
155
+ end
@@ -0,0 +1,92 @@
1
+ require 'java'
2
+ require 'red_storm/configuration'
3
+ require 'red_storm/configurator'
4
+
5
+ module RedStorm
6
+ module DSL
7
+
8
+ class InputBoltDefinition < Topology::BoltDefinition
9
+ attr_accessor :grouping
10
+
11
+ def initialize(*args)
12
+ super
13
+ @grouping = :none
14
+ end
15
+
16
+ def grouping(grouping)
17
+ @grouping = grouping
18
+ end
19
+
20
+ def define_grouping(declarer)
21
+ case @grouping
22
+ when :fields
23
+ declarer.fieldsGrouping(Fields.new(*([params].flatten.map(&:to_s))))
24
+ when :global
25
+ declarer.globalGrouping()
26
+ when :shuffle
27
+ declarer.shuffleGrouping()
28
+ when :local_or_shuffle
29
+ declarer.localOrShuffleGrouping()
30
+ when :none
31
+ declarer.noneGrouping()
32
+ when :all
33
+ declarer.allGrouping()
34
+ when :direct
35
+ declarer.directGrouping()
36
+ else
37
+ raise("unknown grouper=#{grouper.inspect}")
38
+ end
39
+ end
40
+ end
41
+
42
+ class DRPCTopology < Topology
43
+
44
+ def self.spout
45
+ raise TopologyDefinitionError, "DRPC spout is already defined"
46
+ end
47
+
48
+ def start(base_class_path, env)
49
+ builder = Java::BacktypeStormDrpc::LinearDRPCTopologyBuilder.new(self.class.topology_name)
50
+
51
+ self.class.bolts.each do |bolt|
52
+ declarer = builder.addBolt(bolt.new_instance(base_class_path), bolt.parallelism.to_java)
53
+ declarer.addConfigurations(bolt.config)
54
+ bolt.define_grouping(declarer)
55
+ end
56
+
57
+ # set the JRuby compatibility mode option for Storm workers, default to current JRuby mode
58
+ defaults = {"topology.worker.childopts" => "-Djruby.compat.version=#{RedStorm.jruby_mode_token}"}
59
+
60
+ configurator = Configurator.new(defaults)
61
+ configurator.instance_exec(env, &self.class.configure_block)
62
+
63
+ drpc = nil
64
+ if env == :local
65
+ drpc = LocalDRPC.new
66
+ submitter = @cluster = LocalCluster.new
67
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createLocalTopology(drpc))
68
+ else
69
+ submitter = StormSubmitter
70
+ submitter.submitTopology(self.class.topology_name, configurator.config, builder.createRemoteTopology)
71
+ end
72
+ instance_exec(env, drpc, &self.class.submit_block)
73
+ end
74
+
75
+ def self.input_bolt(bolt_class, *args, &bolt_block)
76
+ set_topology_class!
77
+ options = args.last.is_a?(Hash) ? args.pop : {}
78
+ contructor_args = !args.empty? ? args.pop : []
79
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
80
+
81
+ bolt = InputBoltDefinition.new(bolt_class, contructor_args, bolt_options[:id], bolt_options[:parallelism])
82
+ raise(TopologyDefinitionError, "#{bolt.clazz.name}, #{bolt.id}, bolt definition body required") unless block_given?
83
+ bolt.instance_exec(&bolt_block)
84
+ self.components << bolt
85
+ end
86
+ end
87
+ end
88
+
89
+ # for backward compatibility
90
+ SimpleDRPCTopology = DSL::DRPCTopology
91
+
92
+ end