zillabyte 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  class Zillabyte::Harness::Aggregate
2
- attr_accessor :_name, :_type, :_emits, :_start, :_aggregate, :_complete
2
+ attr_accessor :_name, :_type, :_consumes, :_group_by, :_emits, :_begin_group, :_aggregate, :_end_group
3
3
 
4
4
  def initialize()
5
5
  @_name = "aggregate_"+Zillabyte::Harness::Counter.get()
@@ -10,20 +10,28 @@ class Zillabyte::Harness::Aggregate
10
10
  @_name = v
11
11
  end
12
12
 
13
+ def consumes(v)
14
+ @_consumes = v
15
+ end
16
+
17
+ def group_by(v)
18
+ @_group_by = v
19
+ end
20
+
13
21
  def emits(v)
14
22
  @_emits = v
15
23
  end
16
24
 
17
- def start(&block)
18
- @_start = block
25
+ def begin_group(&block)
26
+ @_begin_group = block
19
27
  end
20
28
 
21
29
  def aggregate(&block)
22
30
  @_aggregate = block
23
31
  end
24
32
 
25
- def complete(&block)
26
- @_complete = block
33
+ def end_group(&block)
34
+ @_end_group = block
27
35
  end
28
36
 
29
37
  end
@@ -50,7 +50,7 @@ class Zillabyte::Harness::Helper
50
50
 
51
51
  def self.check_emits(operation, emits, streams)
52
52
  ee = "Error in \"#{operation}\" at \"emits\": \n\t "
53
- if(operation == "simple_function" or operation == "simple_spout")
53
+ if(operation.include?("simple"))
54
54
  pp = @@_print_check_simple_function_emits
55
55
  nn = "relation"
56
56
  else
@@ -93,8 +93,8 @@ class Zillabyte::Harness::Helper
93
93
  Zillabyte::Harness::Helper.print_error(msg)
94
94
  end
95
95
 
96
- if(operation == "simple_function" or operation == "simple_spout")
97
- Zillabyte::Harness::Helper.check_simple_function_emits(e)
96
+ if(operation.include?("simple"))
97
+ Zillabyte::Harness::Helper.check_simple_function_emits(operation, e)
98
98
  else
99
99
  if(!e[1].instance_of?(Array))
100
100
  msg = "#{ee}Field names must be an ARRAY of STRINGS in stream #{e[0]}. #{pp}"
@@ -117,17 +117,18 @@ class Zillabyte::Harness::Helper
117
117
  end
118
118
 
119
119
  def self.check_consumes(h, streams)
120
+ ee = "Error in \"#{h._type}\" at \"consumes\": \n\t "
120
121
  if(h._type == "each")
121
- ee = "Error in \"each\" at \"consumes\": \n\t "
122
122
  pp = @@_print_check_each_consumes
123
+ elsif(h._type == "aggregate")
124
+ pp = @@_print_check_aggregate_consumes
123
125
  elsif(h._type == "sink")
124
- ee = "Error in \"sink\" at \"consumes\": \n\t "
125
126
  pp = @@_print_check_sink
126
127
  end
127
128
 
128
129
  consumes = h._consumes
129
130
  if(!consumes)
130
- msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\" or \"spout\" emitted multiple streams. #{pp}"
131
+ msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\", \"spout\" or \"aggregate\" emitted multiple streams. #{pp}"
131
132
  Zillabyte::Harness::Helper.print_error(msg)
132
133
  end
133
134
  if(!consumes.instance_of?(String) or consumes == "")
@@ -144,6 +145,40 @@ class Zillabyte::Harness::Helper
144
145
  end
145
146
  end
146
147
 
148
+ def self.check_group_by(operation, h, nodes, streams)
149
+ ee = "Error in \"#{operation}\" at \"group_by\": \n\t "
150
+ pp = ""
151
+ if(operation == "aggregate")
152
+ pp = @@_print_check_group_by
153
+ end
154
+
155
+ group_by = h._group_by
156
+ if(!group_by.instance_of?(Array) or group_by == [])
157
+ msg = "#{ee}Group_by must be an ARRAY with at least one element! #{pp}"
158
+ Zillabyte::Harness::Helper.print_error(msg)
159
+ end
160
+
161
+ if(operation == "aggregate")
162
+ # Can't check group_by fields for simple_aggregate because "matches" does not specify emitted fields from spout!
163
+ group_by.each do |f|
164
+ if(h._consumes)
165
+ fields = streams[h._consumes]
166
+ if(!fields.include?(f))
167
+ msg = "#{ee}Group_by field not in consumed stream \"#{h._consumes}\". #{pp}"
168
+ Zillabyte::Harness::Helper.print_error(msg)
169
+ end
170
+ else
171
+ previous_emits = nodes.last._emits[0] #if no "consumes", then operation consumes the sole stream from the previous operation
172
+ if(!previous_emits[1].include?(f))
173
+ msg = "#{ee}Group_by field not in \"emits\" of previous relation \"#{previous_emits[0]}\". #{pp}"
174
+ Zillabyte::Harness::Helper.print_error(msg)
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ end
181
+
147
182
  def self.check_sink(sink, nodes)
148
183
  ee = "Error in \"sink\": \n\t "
149
184
  pp = @@_print_check_sink
@@ -187,8 +222,8 @@ class Zillabyte::Harness::Helper
187
222
  if(operation == "sink")
188
223
  ee = "Error in \"sink\" at \"column\": \n\t "
189
224
  pp = @@_print_check_sink
190
- elsif(operation == "simple_function")
191
- ee = "Error in \"simple_function\" at \"emits\": \n\t "
225
+ elsif(operation.include?("simple"))
226
+ ee = "Error in \"#{operation}\" at \"emits\": \n\t "
192
227
  pp = @@_print_check_simple_function_emits
193
228
  end
194
229
 
@@ -228,8 +263,8 @@ class Zillabyte::Harness::Helper
228
263
  end
229
264
  end
230
265
 
231
- def self.check_simple_function_emits(emits)
232
- ee = "Error in \"simple_function\" at \"emits\": \n\t "
266
+ def self.check_simple_function_emits(operation, emits)
267
+ ee = "Error in \"#{operation}\" at \"emits\": \n\t "
233
268
  pp = @@_print_check_simple_function_emits
234
269
 
235
270
  name = emits[0]
@@ -250,7 +285,7 @@ class Zillabyte::Harness::Helper
250
285
  end
251
286
  colkey = colkeys[0]
252
287
  colval = col[colkey]
253
- Zillabyte::Harness::Helper.check_sink_column_format("simple_function",colkey,colval,name)
288
+ Zillabyte::Harness::Helper.check_sink_column_format(operation,colkey,colval,name)
254
289
  end
255
290
  end
256
291
 
@@ -277,6 +312,12 @@ class Zillabyte::Harness::Helper
277
312
  \t [ \"stream_2\", [ \"field_21\", \"field_22\", ... ] ] ] .
278
313
  - Stream and field names must all be non-empty STRINGS."
279
314
 
315
+ @@_print_check_group_by = "\n
316
+ \"Group_by\" Syntax:
317
+ - \"Group_by\" must be a non-empty ARRAY.
318
+ - Each element of \"group_by\" must be a STRING corresponding to a field emitted by the previous operation or the stream
319
+ consumed by the aggregation function."
320
+
280
321
  @@_print_check_simple_function_emits = "\n
281
322
  \"Emits\" Syntax:
282
323
  - \"Emits\" must be a non-empty ARRAY.
@@ -294,15 +335,15 @@ class Zillabyte::Harness::Helper
294
335
  Single stream:
295
336
  \t flow.sink do |h|
296
337
  \t \t h.name \"name_of_relation\"
297
- \t \t h.columns \"field_1\" :type_1
298
- \t \t h.columns \"field_2\" :type_2 ...
338
+ \t \t h.column \"field_1\" :type_1
339
+ \t \t h.column \"field_2\" :type_2 ...
299
340
  \t end
300
341
  Multiple streams:
301
342
  \t flow.sink do |h|
302
343
  \t \t h.name \"relation_name\"
303
344
  \t \t h.consumes \"stream_consumed\"
304
- \t \t h.columns \"field_1\" :type_1
305
- \t \t h.columns \"field_2\" :type_2 ...
345
+ \t \t h.column \"field_1\" :type_1
346
+ \t \t h.column \"field_2\" :type_2 ...
306
347
  \t end
307
348
  - \"Sink\" relation \"name\" must be specified as a non-empty STRING!
308
349
  - Field names must be non-empty STRINGS.
@@ -321,6 +362,20 @@ class Zillabyte::Harness::Helper
321
362
  \t \t h.execute ...
322
363
  \t end
323
364
  - If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
324
- * \"Consumes\" is the name of a stream emitted by a preceding \"each\" or \"spout\" which the current \"each\" operates on."
365
+ * \"Consumes\" is the name of a stream emitted by a preceding \"each\", \"spout\" or \"aggregate\" which the current \"each\" operates on."
366
+
367
+ @@_print_check_aggregate_consumes = "\n
368
+ \"Aggregate\" Syntax for multiple streams:
369
+ \t flow.aggregate do |h|
370
+ \t \t h.name \"name\", => optional
371
+ \t \t h.group_by [\"field_1\", \"field_2\", ...],
372
+ \t \t h.emits emits,
373
+ \t \t h.consumes \"consumed_stream\"
374
+ \t \t h.begin_group ...
375
+ \t \t h.aggregate ...
376
+ \t \t h.end_group ...
377
+ \t end
378
+ - If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
379
+ * \"Consumes\" is the name of a stream emitted by a preceding \"each\", \"spout\" or \"aggregate\" which the current \"aggregate\" operates on."
325
380
 
326
381
  end
@@ -190,102 +190,32 @@ module Zillabyte
190
190
  end
191
191
  end
192
192
 
193
- class AggregateStart
193
+ class Aggregate
194
194
  include Storm::Protocol
195
195
 
196
- def start(tuple); end
197
-
198
- def run(pipe_name)
199
- Storm::Protocol.mode = 'agg_start'
200
- Storm::Protocol.pipe_name = pipe_name
201
- setup_pipes
202
- handshake
203
-
204
- begin
205
- while true
206
- t = nil
207
- begin
208
- t = Tuple.from_hash(read_command)
209
- if(!t)
210
- next
211
- end
212
- start t
213
- rescue Exception => e
214
- fail 'Exception in bolt (A): ' + e.message + ' - ' + e.backtrace.join('\n')
215
- ensure
216
- done()
217
- end
218
- end
219
- rescue Exception => e
220
- fail 'Exception in bolt (B): ' + e.message + ' - ' + e.backtrace.join('\n')
221
- end
222
- end
223
- end
224
-
225
- class AggregateAggregate
226
- include Storm::Protocol
227
-
228
- def aggregate(tuple1, tuple2); end
229
-
230
196
  def run(pipe_name)
231
- Storm::Protocol.mode = 'agg_aggregate'
197
+ Storm::Protocol.mode = 'aggregation'
232
198
  Storm::Protocol.pipe_name = pipe_name
233
199
  setup_pipes
234
200
  handshake
235
201
 
236
202
  begin
237
203
  while true
238
- t = nil
239
- begin
240
- t1 = Tuple.from_hash(read_command)
241
- if(!t1)
242
- next
243
- end
244
- t2 = Tuple.from_hash(read_command)
245
- if(!t2)
246
- next
247
- end
248
- aggregate t1, t2
249
- rescue Exception => e
250
- fail 'Exception in bolt (A): ' + e.message + ' - ' + e.backtrace.join('\n')
251
- ensure
252
- done()
253
- end
254
- end
255
- rescue Exception => e
256
- fail 'Exception in bolt (B): ' + e.message + ' - ' + e.backtrace.join('\n')
257
- end
258
- end
259
- end
260
-
261
- class AggregateComplete
262
- include Storm::Protocol
263
-
264
- def complete(tuple); end
265
-
266
- def run(pipe_name)
267
- Storm::Protocol.mode = 'agg_complete'
268
- Storm::Protocol.pipe_name = pipe_name
269
- setup_pipes
270
- handshake
271
-
272
- begin
273
- while true
274
- t = nil
275
- begin
276
- t = Tuple.from_hash(read_command)
277
- if(!t)
278
- next
279
- end
280
- complete t
281
- rescue Exception => e
282
- fail 'Exception in bolt (A): ' + e.message + ' - ' + e.backtrace.join('\n')
283
- ensure
284
- done()
204
+ msg = read_command
205
+ case msg['command']
206
+ when 'begin_group'
207
+ t = Tuple.from_hash(msg)
208
+ begin_group t
209
+ when 'aggregate'
210
+ t = Tuple.from_hash(msg)
211
+ aggregate t
212
+ when 'end_group'
213
+ end_group
285
214
  end
215
+ done
286
216
  end
287
217
  rescue Exception => e
288
- fail 'Exception in bolt (B): ' + e.message + ' - ' + e.backtrace.join('\n')
218
+ fail "Exception in #{msg["command"]}: " + e.message + ' - ' + e.backtrace.join('\n')
289
219
  end
290
220
  end
291
221
  end
@@ -327,43 +257,26 @@ module Zillabyte
327
257
 
328
258
  end
329
259
 
330
- class AggregateStartController < Storm::AggregateStart
260
+ class AggregateController < Storm::Aggregate
331
261
 
332
262
  def initialize(harness, progress)
333
263
  @harness = harness
334
264
  @progress = progress
335
265
  end
336
266
 
337
- def start(*args)
338
- @harness._start.call(self, *args)
339
- end
340
-
341
- end
342
-
343
- class AggregateAggregateController < Storm::AggregateAggregate
344
-
345
- def initialize(harness, progress)
346
- @harness = harness
347
- @progress = progress
267
+ def begin_group(*args)
268
+ @harness._begin_group.call(*args)
348
269
  end
349
270
 
350
271
  def aggregate(*args)
351
- @harness._aggregate.call(self, *args)
352
- end
353
-
354
- end
355
-
356
- class AggregateCompleteController < Storm::AggregateComplete
357
-
358
- def initialize(harness, progress)
359
- @harness = harness
360
- @progress = progress
272
+ @harness._aggregate.call(*args)
361
273
  end
362
274
 
363
- def complete(*args)
364
- @harness._complete.call(self, *args)
275
+ def end_group(*args)
276
+ @harness._end_group.call(self, *args)
365
277
  end
366
278
 
367
279
  end
280
+
368
281
  end
369
282
  end
@@ -0,0 +1,142 @@
1
+ require 'optparse'
2
+
3
+ class Zillabyte::Harness::SimpleAggregate
4
+ attr_accessor :_nodes, :_relation, :_matches, :_group_by, :_emits, :_begin_group, :_aggregate, :_end_group, :_name, :_info_file, :_options
5
+
6
+ def self.build()
7
+ h = Zillabyte::Harness::SimpleAggregate.new()
8
+ yield(h)
9
+ h._name = h._name || Dir.pwd.split("/")[-1]
10
+ Zillabyte::Harness::Helper.check_name("simple_aggregate", h._name, {})
11
+ Zillabyte::Harness::Helper.check_emits("simple_aggregate", h._emits, {})
12
+ generic_emits = h.get_generic_emits
13
+
14
+ h._nodes = []
15
+ h._options = Zillabyte::Harness::Helper.opt_parser()
16
+
17
+ if(h._options[:command] == :info)
18
+ h._info_file = File.open(h._options[:file],"w+")
19
+ hash = {"language" => "ruby", "name" => h._name}
20
+ Zillabyte::Harness::Helper.write_hash_to_file(hash, h._info_file)
21
+ end
22
+
23
+ h.build_spout()
24
+ fn = h.build_aggregate(generic_emits)
25
+ h.build_sink()
26
+
27
+ if(h._options[:command] == :execute and h._options[:name] == h._name)
28
+ pipe_name = h._options[:pipe]
29
+ c = Zillabyte::Harness::AggregateController.new(fn, progress = Zillabyte::Common::Progress.new)
30
+ c.run(pipe_name)
31
+ end
32
+
33
+ h
34
+ end
35
+
36
+ def name(v)
37
+ @_name = v
38
+ end
39
+
40
+ def matches(v, options = {})
41
+ case v
42
+ when String
43
+ @_relation = { :query => v, :options => options.is_a?(Hash) ? options : {} }
44
+ when Array
45
+ @_matches = v
46
+ end
47
+ end
48
+
49
+ def group_by(v)
50
+ @_group_by = v
51
+ end
52
+
53
+ def emits(v)
54
+ @_emits = v
55
+ end
56
+
57
+ def begin_group(&block)
58
+ @_begin_group = block
59
+ end
60
+
61
+ def aggregate(&block)
62
+ @_aggregate = block
63
+ end
64
+
65
+ def end_group(&block)
66
+ @_end_group = block
67
+ end
68
+
69
+ def get_generic_emits()
70
+ generic_emits = []
71
+ @_emits.each do |relation|
72
+ temit = []
73
+ relation[1].each do |column|
74
+ column.each do |col, type|
75
+ temit << col
76
+ end
77
+ end
78
+ generic_emits << [relation[0], temit]
79
+ end
80
+ generic_emits
81
+ end
82
+
83
+ def build_spout()
84
+ h = Zillabyte::Harness::Spout.new(false)
85
+ h._matches = @_matches if @_matches
86
+ h._relation = @_relation if @_relation
87
+ @_nodes << h
88
+ if(@_options[:command] == :info)
89
+ info_hash = {"name" => h._name, "type" => h._type}
90
+ if(h._relation)
91
+ info_hash["relation"] = h._relation
92
+ elsif(h._matches)
93
+ info_hash["matches"] = h._matches
94
+ end
95
+ Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
96
+ end
97
+ end
98
+
99
+ def build_aggregate(generic_emits)
100
+ h = Zillabyte::Harness::Aggregate.new()
101
+ h._name = @_name
102
+ h._emits = generic_emits
103
+ h._group_by = @_group_by
104
+ h._begin_group = @_begin_group
105
+ h._aggregate = @_aggregate
106
+ h._end_group = @_end_group
107
+ Zillabyte::Harness::Helper.check_group_by("simple_aggregate", h, @_nodes, {})
108
+ @_nodes << h
109
+ if(@_options[:command] == :info)
110
+ info_hash = {"name" => h._name, "type" => h._type, "emits" => h._emits, "group_by" => h._group_by}
111
+ Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
112
+ end
113
+ h
114
+ end
115
+
116
+ def build_sink()
117
+ # Construct the sink...
118
+ n_sinks = @_emits.length
119
+ @_emits.each do |emit|
120
+ h = Zillabyte::Harness::Sink.new()
121
+ h._name = emit[0]
122
+ columns = emit[1]
123
+ columns.each do |col|
124
+ col.each do |cname, ctype|
125
+ h.column(cname, ctype)
126
+ end
127
+ end
128
+ if(n_sinks > 1)
129
+ h._consumes = h._name
130
+ end
131
+ @_nodes << h
132
+ if(@_options[:command] == :info)
133
+ info_hash = {"name" => h._name, "type" => h._type, "columns" => h._columns}
134
+ if(h._consumes)
135
+ info_hash["consumes"] = h._consumes
136
+ end
137
+ Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
138
+ end
139
+ end
140
+ end
141
+
142
+ end
@@ -78,31 +78,30 @@ class Zillabyte::Harness::Topology
78
78
  end
79
79
  end
80
80
 
81
- def group_by(fields)
82
- h = Zillabyte::Harness::GroupBy.new(fields)
83
- @_nodes << h
84
- end
85
-
86
81
  def aggregate
87
82
  h = Zillabyte::Harness::Aggregate.new()
88
83
  yield(h)
89
84
  Zillabyte::Harness::Helper.check_name("aggregate", h._name, @_names)
85
+ if(@_branched)
86
+ Zillabyte::Harness::Helper.check_consumes(h, @_streams)
87
+ else
88
+ if(h._consumes)
89
+ h._consumes = nil
90
+ end
91
+ end
92
+ Zillabyte::Harness::Helper.check_group_by("aggregate", h, @_nodes, @_streams)
90
93
  @_branched = Zillabyte::Harness::Helper.check_emits("aggregate", h._emits, @_streams) || @_branched
91
94
  @_nodes << h
92
95
  if(@_options[:command] == :info)
93
- info_hash = {"name" => h._name, "type" => h._type, "emits" => h._emits}
96
+ info_hash = {"name" => h._name, "type" => h._type, "group_by" => h._group_by, "emits" => h._emits}
94
97
  if(h._consumes)
95
98
  info_hash["consumes"] = h._consumes
96
99
  end
97
100
  Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
98
101
  elsif(@_options[:command] == :execute)
99
102
  pipe_name = @_options[:pipe]
100
- if(@_options[:name] == h._name+"_start")
101
- c = Zillabyte::Harness::AggregateStartController.new(h, Zillabyte::Common::Progress.new)
102
- elsif(@_options[:name] == h._name+"_aggregate")
103
- c = Zillabyte::Harness::AggregateAggregateController.new(h, Zillabyte::Common::Progress.new)
104
- elsif(@_options[:name] == h._name+"_complete")
105
- c = Zillabyte::Harness::AggregateCompleteController.new(h, Zillabyte::Common::Progress.new)
103
+ if(@_options[:name] == h._name)
104
+ c = Zillabyte::Harness::AggregateController.new(h, Zillabyte::Common::Progress.new)
106
105
  end
107
106
  c.run(pipe_name)
108
107
  end
@@ -7,6 +7,7 @@ class Tuple
7
7
 
8
8
  def [](name)
9
9
  maybe_build_alias_hash()
10
+ p name
10
11
  values[ @_alias_hash[name] || name ]
11
12
  end
12
13
 
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.0.11" unless defined?(VERSION)
2
+ VERSION = "0.0.12" unless defined?(VERSION)
3
3
  end
@@ -0,0 +1,3 @@
1
+ module Zillabyte
2
+ VERSION = "0.0.11" unless defined?(VERSION)
3
+ end
@@ -16,5 +16,8 @@ module Zillabyte
16
16
  Zillabyte::Harness::SimpleSpout.build(*args, &block)
17
17
  end
18
18
 
19
+ def self.simple_aggregate(*args, &block)
20
+ Zillabyte::Harness::SimpleAggregate.build(*args, &block)
21
+ end
19
22
 
20
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-15 00:00:00.000000000 Z
12
+ date: 2014-02-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -73,6 +73,7 @@ files:
73
73
  - ruby/lib/zillabyte/harness/groupby.rb
74
74
  - ruby/lib/zillabyte/harness/helper.rb
75
75
  - ruby/lib/zillabyte/harness/live_delegator.rb
76
+ - ruby/lib/zillabyte/harness/simple_aggregate.rb
76
77
  - ruby/lib/zillabyte/harness/simple_function.rb
77
78
  - ruby/lib/zillabyte/harness/simple_spout.rb
78
79
  - ruby/lib/zillabyte/harness/sink.rb
@@ -81,6 +82,7 @@ files:
81
82
  - ruby/lib/zillabyte/harness/tuple.rb
82
83
  - ruby/lib/zillabyte/harness.rb
83
84
  - ruby/lib/zillabyte/version.rb
85
+ - ruby/lib/zillabyte/version.rb~
84
86
  - ruby/lib/zillabyte.rb
85
87
  - ruby/README.md
86
88
  homepage: http://www.zillabyte.com