redstorm 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. data/CHANGELOG.md +7 -0
  2. data/README.md +363 -32
  3. data/Rakefile +10 -125
  4. data/bin/redstorm +1 -0
  5. data/examples/{cluster_word_count_topology.rb → native/cluster_word_count_topology.rb} +4 -4
  6. data/examples/{exclamation_bolt.rb → native/exclamation_bolt.rb} +0 -0
  7. data/examples/{local_exclamation_topology.rb → native/local_exclamation_topology.rb} +2 -2
  8. data/examples/{local_exclamation_topology2.rb → native/local_exclamation_topology2.rb} +1 -1
  9. data/examples/{local_redis_word_count_topology.rb → native/local_redis_word_count_topology.rb} +2 -2
  10. data/examples/{local_word_count_topology.rb → native/local_word_count_topology.rb} +4 -4
  11. data/examples/{random_sentence_spout.rb → native/random_sentence_spout.rb} +0 -0
  12. data/examples/{split_sentence_bolt.rb → native/split_sentence_bolt.rb} +0 -0
  13. data/examples/{word_count_bolt.rb → native/word_count_bolt.rb} +0 -0
  14. data/examples/simple/exclamation_bolt.rb +6 -0
  15. data/examples/simple/exclamation_topology.rb +36 -0
  16. data/examples/simple/exclamation_topology2.rb +41 -0
  17. data/examples/simple/random_sentence_spout.rb +18 -0
  18. data/examples/simple/redis_word_count_topology.rb +54 -0
  19. data/examples/simple/split_sentence_bolt.rb +29 -0
  20. data/examples/simple/word_count_bolt.rb +15 -0
  21. data/examples/simple/word_count_topology.rb +34 -0
  22. data/lib/red_storm.rb +3 -0
  23. data/lib/red_storm/application.rb +20 -13
  24. data/lib/red_storm/simple_bolt.rb +106 -0
  25. data/lib/red_storm/simple_spout.rb +136 -0
  26. data/lib/red_storm/simple_topology.rb +191 -0
  27. data/lib/red_storm/topology_launcher.rb +10 -7
  28. data/lib/red_storm/version.rb +1 -1
  29. data/lib/tasks/red_storm.rake +151 -0
  30. data/pom.xml +1 -1
  31. metadata +24 -12
@@ -4,3 +4,6 @@ end
4
4
 
5
5
  require 'red_storm/version'
6
6
  require 'red_storm/application'
7
+ require 'red_storm/simple_bolt'
8
+ require 'red_storm/simple_spout'
9
+ require 'red_storm/simple_topology'
@@ -1,19 +1,26 @@
1
- require 'rake'
1
+ module RedStorm
2
+
3
+ class Application
4
+ TASKS_FILE = "#{RedStorm::REDSTORM_HOME}/lib/tasks/red_storm.rake"
2
5
 
3
- class RedStorm::Application
6
+ def usage
7
+ puts("usage: redstorm install|examples|jar <project_directory>|local <topology_class_file>")
8
+ exit(1)
9
+ end
4
10
 
5
- def run(args)
6
- if args.size == 1 && File.exist?(args.first)
7
- load("#{RedStorm::REDSTORM_HOME}/Rakefile")
8
- Rake::Task['launch'].invoke(args)
9
- else
10
- task = args.shift
11
- if ["install", "examples", "jar"].include?(task)
12
- load("#{RedStorm::REDSTORM_HOME}/Rakefile")
13
- Rake::Task[task].invoke(args)
11
+ def run(args)
12
+ if args.size > 0
13
+ if ["install", "examples", "jar"].include?(args[0])
14
+ load(TASKS_FILE)
15
+ Rake::Task[args.shift].invoke(*args)
16
+ elsif args.size == 2 && ["local"].include?(args[0]) && File.exist?(args[1])
17
+ load(TASKS_FILE)
18
+ Rake::Task['launch'].invoke(*args)
19
+ else
20
+ usage
21
+ end
14
22
  else
15
- puts("\nUsage: redstorm install|examples|jar|topology_class_file_name")
16
- exit(1)
23
+ usage
17
24
  end
18
25
  end
19
26
  end
@@ -0,0 +1,106 @@
1
+ module RedStorm
2
+
3
+ class SimpleBolt
4
+ attr_reader :collector, :context, :config
5
+
6
+ # DSL class methods
7
+
8
+ def self.output_fields(*fields)
9
+ @fields = fields.map(&:to_s)
10
+ end
11
+
12
+ def self.on_receive(*args, &on_receive_block)
13
+ options = args.last.is_a?(Hash) ? args.pop : {}
14
+ method_name = args.first
15
+
16
+ self.receive_options.merge!(options)
17
+ @on_receive_block = block_given? ? on_receive_block : lambda {|tuple| self.send(method_name || :on_receive, tuple)}
18
+ end
19
+
20
+ def self.on_init(method_name = nil, &on_init_block)
21
+ @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
22
+ end
23
+
24
+ def self.on_close(method_name = nil, &close_block)
25
+ @close_block = block_given? ? close_block : lambda {self.send(method_name || :on_close)}
26
+ end
27
+
28
+ # DSL instance methods
29
+
30
+ def unanchored_emit(*values)
31
+ @collector.emit(Values.new(*values))
32
+ end
33
+
34
+ def anchored_emit(tuple, *values)
35
+ @collector.emit(tuple, Values.new(*values))
36
+ end
37
+
38
+ def ack(tuple)
39
+ @collector.ack(tuple)
40
+ end
41
+
42
+ # Bolt proxy interface
43
+
44
+ def execute(tuple)
45
+ if (output = instance_exec(tuple, &self.class.on_receive_block)) && self.class.emit?
46
+ values_list = !output.is_a?(Array) ? [[output]] : !output.first.is_a?(Array) ? [output] : output
47
+ values_list.each{|values| self.class.anchor? ? anchored_emit(tuple, *values) : unanchored_emit(*values)}
48
+ @collector.ack(tuple) if self.class.ack?
49
+ end
50
+ end
51
+
52
+ def prepare(config, context, collector)
53
+ @collector = collector
54
+ @context = context
55
+ @config = config
56
+ instance_exec(&self.class.on_init_block)
57
+ end
58
+
59
+ def cleanup
60
+ instance_exec(&self.class.close_block)
61
+ end
62
+
63
+ def declare_output_fields(declarer)
64
+ declarer.declare(Fields.new(self.class.fields))
65
+ end
66
+
67
+ # default optional dsl methods/callbacks
68
+
69
+ def on_init; end
70
+ def on_close; end
71
+
72
+ private
73
+
74
+ def self.fields
75
+ @fields ||= []
76
+ end
77
+
78
+ def self.on_receive_block
79
+ @on_receive_block ||= lambda {|tuple| self.send(:on_receive, tuple)}
80
+ end
81
+
82
+ def self.on_init_block
83
+ @on_init_block ||= lambda {self.send(:on_init)}
84
+ end
85
+
86
+ def self.close_block
87
+ @close_block ||= lambda {self.send(:on_close)}
88
+ end
89
+
90
+ def self.receive_options
91
+ @receive_options ||= {:emit => true, :ack => false, :anchor => false}
92
+ end
93
+
94
+ def self.emit?
95
+ !!self.receive_options[:emit]
96
+ end
97
+
98
+ def self.ack?
99
+ !!self.receive_options[:ack]
100
+ end
101
+
102
+ def self.anchor?
103
+ !!self.receive_options[:anchor]
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,136 @@
1
+ module RedStorm
2
+
3
+ class SimpleSpout
4
+ attr_reader :config, :context, :collector
5
+
6
+ # DSL class methods
7
+
8
+ def self.set(options = {})
9
+ self.spout_options.merge!(options)
10
+ end
11
+
12
+ def self.output_fields(*fields)
13
+ @fields = fields.map(&:to_s)
14
+ end
15
+
16
+ def self.on_send(*args, &on_send_block)
17
+ options = args.last.is_a?(Hash) ? args.pop : {}
18
+ method_name = args.first
19
+
20
+ self.send_options.merge!(options)
21
+ @on_send_block = block_given? ? on_send_block : lambda {self.send(method_name || :on_send)}
22
+ end
23
+
24
+ def self.on_init(method_name = nil, &on_init_block)
25
+ @on_init_block = block_given? ? on_init_block : lambda {self.send(method_name || :on_init)}
26
+ end
27
+
28
+ def self.on_close(method_name = nil, &on_close_block)
29
+ @on_close_block = block_given? ? on_close_block : lambda {self.send(method_name || :on_close)}
30
+ end
31
+
32
+ def self.on_ack(method_name = nil, &on_ack_block)
33
+ @on_ack_block = block_given? ? on_ack_block : lambda {|msg_id| self.send(method_name || :on_ack, msg_id)}
34
+ end
35
+
36
+ def self.on_fail(method_name = nil, &on_fail_block)
37
+ @on_fail_block = block_given? ? on_fail_block : lambda {|msg_id| self.send(method_name || :on_fail, msg_id)}
38
+ end
39
+
40
+ # DSL instance methods
41
+
42
+ def emit(*values)
43
+ @collector.emit(Values.new(*values))
44
+ end
45
+
46
+ # Spout proxy interface
47
+
48
+ def next_tuple
49
+ output = instance_exec(&self.class.on_send_block)
50
+ if self.class.emit?
51
+ if output
52
+ values = [output].flatten
53
+ @collector.emit(Values.new(*values))
54
+ else
55
+ sleep(0.1)
56
+ end
57
+ end
58
+ end
59
+
60
+ def open(config, context, collector)
61
+ @collector = collector
62
+ @context = context
63
+ @config = config
64
+ instance_exec(&self.class.on_init_block)
65
+ end
66
+
67
+ def close
68
+ instance_exec(&self.class.on_close_block)
69
+ end
70
+
71
+ def declare_output_fields(declarer)
72
+ declarer.declare(Fields.new(self.class.fields))
73
+ end
74
+
75
+ def is_distributed
76
+ self.class.is_distributed?
77
+ end
78
+
79
+ def ack(msg_id)
80
+ instance_exec(msg_id, &self.class.on_ack_block)
81
+ end
82
+
83
+ def fail(msg_id)
84
+ instance_exec(msg_id, &self.class.on_fail_block)
85
+ end
86
+
87
+ # default optional dsl methods/callbacks
88
+
89
+ def on_init; end
90
+ def on_close; end
91
+ def on_ack(msg_id); end
92
+ def on_fail(msg_id); end
93
+
94
+ private
95
+
96
+ def self.fields
97
+ @fields ||= []
98
+ end
99
+
100
+ def self.on_send_block
101
+ @on_send_block ||= lambda {self.send(:on_send)}
102
+ end
103
+
104
+ def self.on_init_block
105
+ @on_init_block ||= lambda {self.send(:on_init)}
106
+ end
107
+
108
+ def self.on_close_block
109
+ @on_close_block ||= lambda {self.send(:on_close)}
110
+ end
111
+
112
+ def self.on_ack_block
113
+ @on_ack_block ||= lambda {|msg_id| self.send(:on_ack, msg_id)}
114
+ end
115
+
116
+ def self.on_fail_block
117
+ @on_fail_block ||= lambda {|msg_id| self.send(:on_fail, msg_id)}
118
+ end
119
+
120
+ def self.send_options
121
+ @send_options ||= {:emit => true}
122
+ end
123
+
124
+ def self.spout_options
125
+ @spout_options ||= {:is_distributed => false}
126
+ end
127
+
128
+ def self.is_distributed?
129
+ !!self.spout_options[:is_distributed]
130
+ end
131
+
132
+ def self.emit?
133
+ !!self.send_options[:emit]
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,191 @@
1
+ module RedStorm
2
+
3
+ class SimpleTopology
4
+ attr_reader :cluster # LocalCluster reference usable in on_submit block, for example
5
+
6
+ DEFAULT_SPOUT_PARALLELISM = 1
7
+ DEFAULT_BOLT_PARALLELISM = 1
8
+
9
+ class ComponentDefinition
10
+ attr_reader :clazz, :parallelism
11
+ attr_accessor :id
12
+
13
+ def initialize(component_class, id, parallelism)
14
+ @clazz = component_class
15
+ @id = id
16
+ @parallelism = parallelism
17
+ end
18
+ end
19
+
20
+ class SpoutDefinition < ComponentDefinition; end
21
+
22
+ class BoltDefinition < ComponentDefinition
23
+ attr_accessor :sources
24
+
25
+ def initialize(*args)
26
+ super
27
+ @sources = []
28
+ end
29
+
30
+ def source(source_id, grouping)
31
+ @sources << [source_id.is_a?(Class) ? SimpleTopology.underscore(source_id) : source_id, grouping.is_a?(Hash) ? grouping : {grouping => nil}]
32
+ end
33
+
34
+ def define_grouping(declarer)
35
+ @sources.each do |source_id, grouping|
36
+ grouper, params = grouping.first
37
+
38
+ case grouper
39
+ when :fields
40
+ declarer.fieldsGrouping(source_id, Fields.new(*params))
41
+ when :global
42
+ declarer.globalGrouping(source_id)
43
+ when :shuffle
44
+ declarer.shuffleGrouping(source_id)
45
+ when :none
46
+ declarer.noneGrouping(source_id)
47
+ when :all
48
+ declarer.allGrouping(source_id)
49
+ when :direct
50
+ declarer.directGrouping(source_id)
51
+ else
52
+ raise("unknown grouper=#{grouper.inspect}")
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ class Configurator
59
+ attr_reader :config
60
+
61
+ def initialize
62
+ @config = Config.new
63
+ end
64
+
65
+ def method_missing(sym, *args)
66
+ config_method = "set#{self.class.camel_case(sym)}"
67
+ @config.send(config_method, *args)
68
+ end
69
+
70
+ private
71
+
72
+ def self.camel_case(s)
73
+ s.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
74
+ end
75
+ end
76
+
77
+ def self.spout(spout_class, options = {})
78
+ spout_options = {:id => self.underscore(spout_class), :parallelism => DEFAULT_SPOUT_PARALLELISM}.merge(options)
79
+ spout = SpoutDefinition.new(spout_class, spout_options[:id], spout_options[:parallelism])
80
+ self.components << spout
81
+ end
82
+
83
+ def self.bolt(bolt_class, options = {}, &bolt_block)
84
+ bolt_options = {:id => self.underscore(bolt_class), :parallelism => DEFAULT_BOLT_PARALLELISM}.merge(options)
85
+ bolt = BoltDefinition.new(bolt_class, bolt_options[:id], bolt_options[:parallelism])
86
+ bolt.instance_exec(&bolt_block)
87
+ self.components << bolt
88
+ end
89
+
90
+ def self.configure(name = nil, &configure_block)
91
+ @topology_name = name if name
92
+ @configure_block = configure_block if block_given?
93
+ end
94
+
95
+ def self.on_submit(method_name = nil, &submit_block)
96
+ @submit_block = block_given? ? submit_block : lambda {|env| self.send(method_name, env)}
97
+ end
98
+
99
+ # topology proxy interface
100
+
101
+ def start(base_class_path, env)
102
+ self.class.resolve_ids!(self.class.components)
103
+
104
+ builder = TopologyBuilder.new
105
+ self.class.spouts.each do |spout|
106
+ is_java = spout.clazz.name.split('::').first == 'Java'
107
+ builder.setSpout(spout.id, is_java ? spout.clazz.new : JRubySpout.new(base_class_path, spout.clazz.name), spout.parallelism)
108
+ end
109
+ self.class.bolts.each do |bolt|
110
+ is_java = bolt.clazz.name.split('::').first == 'Java'
111
+ declarer = builder.setBolt(bolt.id, is_java ? bolt.clazz.new : JRubyBolt.new(base_class_path, bolt.clazz.name), bolt.parallelism)
112
+ bolt.define_grouping(declarer)
113
+ end
114
+
115
+ configurator = Configurator.new
116
+ configurator.instance_exec(env, &self.class.configure_block)
117
+
118
+ case env
119
+ when :local
120
+ @cluster = LocalCluster.new
121
+ @cluster.submitTopology(self.class.topology_name, configurator.config, builder.createTopology)
122
+ when :cluster
123
+ StormSubmitter.submitTopology(self.class.topology_name, configurator.config, builder.createTopology);
124
+ else
125
+ raise("unsupported env=#{env.inspect}, expecting :local or :cluster")
126
+ end
127
+
128
+ instance_exec(env, &self.class.submit_block)
129
+ end
130
+
131
+ private
132
+
133
+ def self.resolve_ids!(components)
134
+ next_numeric_id = 1
135
+ resolved_names = {}
136
+
137
+ numeric_components, symbolic_components = components.partition{|c| c.id.is_a?(Fixnum)}
138
+ numeric_ids = numeric_components.map(&:id)
139
+
140
+ # assign numeric ids to symbolic ids
141
+ symbolic_components.each do |component|
142
+ id = component.id.to_s
143
+ raise("duplicate symbolic id in #{component.clazz.name} on id=#{id}") if resolved_names.has_key?(id)
144
+ next_numeric_id += 1 while numeric_ids.include?(next_numeric_id)
145
+ numeric_ids << next_numeric_id
146
+ resolved_names[id] = next_numeric_id
147
+ end
148
+
149
+ # reassign numeric ids to all components
150
+ components.each do |component|
151
+ unless component.id.is_a?(Fixnum)
152
+ component.id = resolved_names[component.id.to_s] || raise("cannot resolve #{component.clazz.name} id=#{component.id.to_s}")
153
+ end
154
+ if component.respond_to?(:sources)
155
+ component.sources.map! do |source_id, grouping|
156
+ id = source_id.is_a?(Fixnum) ? source_id : resolved_names[source_id.to_s] || raise("cannot resolve #{component.clazz.name} source id=#{source_id.to_s}")
157
+ [id, grouping]
158
+ end
159
+ end
160
+ end
161
+ end
162
+
163
+ def self.spouts
164
+ self.components.select{|c| c.is_a?(SpoutDefinition)}
165
+ end
166
+
167
+ def self.bolts
168
+ self.components.select{|c| c.is_a?(BoltDefinition)}
169
+ end
170
+
171
+ def self.components
172
+ @components ||= []
173
+ end
174
+
175
+ def self.topology_name
176
+ @topology_name ||= self.underscore(self.name)
177
+ end
178
+
179
+ def self.configure_block
180
+ @configure_block ||= lambda{|env|}
181
+ end
182
+
183
+ def self.submit_block
184
+ @submit_block ||= lambda{|env|}
185
+ end
186
+
187
+ def self.underscore(camel_case)
188
+ camel_case.to_s.split('::').last.gsub(/(.)([A-Z])/,'\1_\2').downcase!
189
+ end
190
+ end
191
+ end