zillabyte 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  class Zillabyte::Harness::Aggregate
2
- attr_accessor :_name, :_type, :_emits, :_start, :_aggregate, :_complete
2
+ attr_accessor :_name, :_type, :_consumes, :_group_by, :_emits, :_begin_group, :_aggregate, :_end_group
3
3
 
4
4
  def initialize()
5
5
  @_name = "aggregate_"+Zillabyte::Harness::Counter.get()
@@ -10,20 +10,28 @@ class Zillabyte::Harness::Aggregate
10
10
  @_name = v
11
11
  end
12
12
 
13
+ def consumes(v)
14
+ @_consumes = v
15
+ end
16
+
17
+ def group_by(v)
18
+ @_group_by = v
19
+ end
20
+
13
21
  def emits(v)
14
22
  @_emits = v
15
23
  end
16
24
 
17
- def start(&block)
18
- @_start = block
25
+ def begin_group(&block)
26
+ @_begin_group = block
19
27
  end
20
28
 
21
29
  def aggregate(&block)
22
30
  @_aggregate = block
23
31
  end
24
32
 
25
- def complete(&block)
26
- @_complete = block
33
+ def end_group(&block)
34
+ @_end_group = block
27
35
  end
28
36
 
29
37
  end
@@ -50,7 +50,7 @@ class Zillabyte::Harness::Helper
50
50
 
51
51
  def self.check_emits(operation, emits, streams)
52
52
  ee = "Error in \"#{operation}\" at \"emits\": \n\t "
53
- if(operation == "simple_function" or operation == "simple_spout")
53
+ if(operation.include?("simple"))
54
54
  pp = @@_print_check_simple_function_emits
55
55
  nn = "relation"
56
56
  else
@@ -93,8 +93,8 @@ class Zillabyte::Harness::Helper
93
93
  Zillabyte::Harness::Helper.print_error(msg)
94
94
  end
95
95
 
96
- if(operation == "simple_function" or operation == "simple_spout")
97
- Zillabyte::Harness::Helper.check_simple_function_emits(e)
96
+ if(operation.include?("simple"))
97
+ Zillabyte::Harness::Helper.check_simple_function_emits(operation, e)
98
98
  else
99
99
  if(!e[1].instance_of?(Array))
100
100
  msg = "#{ee}Field names must be an ARRAY of STRINGS in stream #{e[0]}. #{pp}"
@@ -117,17 +117,18 @@ class Zillabyte::Harness::Helper
117
117
  end
118
118
 
119
119
  def self.check_consumes(h, streams)
120
+ ee = "Error in \"#{h._type}\" at \"consumes\": \n\t "
120
121
  if(h._type == "each")
121
- ee = "Error in \"each\" at \"consumes\": \n\t "
122
122
  pp = @@_print_check_each_consumes
123
+ elsif(h._type == "aggregate")
124
+ pp = @@_print_check_aggregate_consumes
123
125
  elsif(h._type == "sink")
124
- ee = "Error in \"sink\" at \"consumes\": \n\t "
125
126
  pp = @@_print_check_sink
126
127
  end
127
128
 
128
129
  consumes = h._consumes
129
130
  if(!consumes)
130
- msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\" or \"spout\" emitted multiple streams. #{pp}"
131
+ msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\", \"spout\" or \"aggregate\" emitted multiple streams. #{pp}"
131
132
  Zillabyte::Harness::Helper.print_error(msg)
132
133
  end
133
134
  if(!consumes.instance_of?(String) or consumes == "")
@@ -144,6 +145,40 @@ class Zillabyte::Harness::Helper
144
145
  end
145
146
  end
146
147
 
148
+ def self.check_group_by(operation, h, nodes, streams)
149
+ ee = "Error in \"#{operation}\" at \"group_by\": \n\t "
150
+ pp = ""
151
+ if(operation == "aggregate")
152
+ pp = @@_print_check_group_by
153
+ end
154
+
155
+ group_by = h._group_by
156
+ if(!group_by.instance_of?(Array) or group_by == [])
157
+ msg = "#{ee}Group_by must be an ARRAY with at least one element! #{pp}"
158
+ Zillabyte::Harness::Helper.print_error(msg)
159
+ end
160
+
161
+ if(operation == "aggregate")
162
+ # Can't check group_by fields for simple_aggregate because "matches" does not specify emitted fields from spout!
163
+ group_by.each do |f|
164
+ if(h._consumes)
165
+ fields = streams[h._consumes]
166
+ if(!fields.include?(f))
167
+ msg = "#{ee}Group_by field not in consumed stream \"#{h._consumes}\". #{pp}"
168
+ Zillabyte::Harness::Helper.print_error(msg)
169
+ end
170
+ else
171
+ previous_emits = nodes.last._emits[0] #if no "consumes", then operation consumes the sole stream from the previous operation
172
+ if(!previous_emits[1].include?(f))
173
+ msg = "#{ee}Group_by field not in \"emits\" of previous relation \"#{previous_emits[0]}\". #{pp}"
174
+ Zillabyte::Harness::Helper.print_error(msg)
175
+ end
176
+ end
177
+ end
178
+ end
179
+
180
+ end
181
+
147
182
  def self.check_sink(sink, nodes)
148
183
  ee = "Error in \"sink\": \n\t "
149
184
  pp = @@_print_check_sink
@@ -187,8 +222,8 @@ class Zillabyte::Harness::Helper
187
222
  if(operation == "sink")
188
223
  ee = "Error in \"sink\" at \"column\": \n\t "
189
224
  pp = @@_print_check_sink
190
- elsif(operation == "simple_function")
191
- ee = "Error in \"simple_function\" at \"emits\": \n\t "
225
+ elsif(operation.include?("simple"))
226
+ ee = "Error in \"#{operation}\" at \"emits\": \n\t "
192
227
  pp = @@_print_check_simple_function_emits
193
228
  end
194
229
 
@@ -228,8 +263,8 @@ class Zillabyte::Harness::Helper
228
263
  end
229
264
  end
230
265
 
231
- def self.check_simple_function_emits(emits)
232
- ee = "Error in \"simple_function\" at \"emits\": \n\t "
266
+ def self.check_simple_function_emits(operation, emits)
267
+ ee = "Error in \"#{operation}\" at \"emits\": \n\t "
233
268
  pp = @@_print_check_simple_function_emits
234
269
 
235
270
  name = emits[0]
@@ -250,7 +285,7 @@ class Zillabyte::Harness::Helper
250
285
  end
251
286
  colkey = colkeys[0]
252
287
  colval = col[colkey]
253
- Zillabyte::Harness::Helper.check_sink_column_format("simple_function",colkey,colval,name)
288
+ Zillabyte::Harness::Helper.check_sink_column_format(operation,colkey,colval,name)
254
289
  end
255
290
  end
256
291
 
@@ -277,6 +312,12 @@ class Zillabyte::Harness::Helper
277
312
  \t [ \"stream_2\", [ \"field_21\", \"field_22\", ... ] ] ] .
278
313
  - Stream and field names must all be non-empty STRINGS."
279
314
 
315
+ @@_print_check_group_by = "\n
316
+ \"Group_by\" Syntax:
317
+ - \"Group_by\" must be a non-empty ARRAY.
318
+ - Each element of \"group_by\" must be a STRING corresponding to a field emitted by the previous operation or the stream
319
+ consumed by the aggregation function."
320
+
280
321
  @@_print_check_simple_function_emits = "\n
281
322
  \"Emits\" Syntax:
282
323
  - \"Emits\" must be a non-empty ARRAY.
@@ -294,15 +335,15 @@ class Zillabyte::Harness::Helper
294
335
  Single stream:
295
336
  \t flow.sink do |h|
296
337
  \t \t h.name \"name_of_relation\"
297
- \t \t h.columns \"field_1\" :type_1
298
- \t \t h.columns \"field_2\" :type_2 ...
338
+ \t \t h.column \"field_1\" :type_1
339
+ \t \t h.column \"field_2\" :type_2 ...
299
340
  \t end
300
341
  Multiple streams:
301
342
  \t flow.sink do |h|
302
343
  \t \t h.name \"relation_name\"
303
344
  \t \t h.consumes \"stream_consumed\"
304
- \t \t h.columns \"field_1\" :type_1
305
- \t \t h.columns \"field_2\" :type_2 ...
345
+ \t \t h.column \"field_1\" :type_1
346
+ \t \t h.column \"field_2\" :type_2 ...
306
347
  \t end
307
348
  - \"Sink\" relation \"name\" must be specified as a non-empty STRING!
308
349
  - Field names must be non-empty STRINGS.
@@ -321,6 +362,20 @@ class Zillabyte::Harness::Helper
321
362
  \t \t h.execute ...
322
363
  \t end
323
364
  - If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
324
- * \"Consumes\" is the name of a stream emitted by a preceding \"each\" or \"spout\" which the current \"each\" operates on."
365
+ * \"Consumes\" is the name of a stream emitted by a preceding \"each\", \"spout\" or \"aggregate\" which the current \"each\" operates on."
366
+
367
+ @@_print_check_aggregate_consumes = "\n
368
+ \"Aggregate\" Syntax for multiple streams:
369
+ \t flow.aggregate do |h|
370
+ \t \t h.name \"name\", => optional
371
+ \t \t h.group_by [\"field_1\", \"field_2\", ...],
372
+ \t \t h.emits emits,
373
+ \t \t h.consumes \"consumed_stream\"
374
+ \t \t h.begin_group ...
375
+ \t \t h.aggregate ...
376
+ \t \t h.end_group ...
377
+ \t end
378
+ - If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
379
+ * \"Consumes\" is the name of a stream emitted by a preceding \"each\", \"spout\" or \"aggregate\" which the current \"aggregate\" operates on."
325
380
 
326
381
  end
@@ -190,102 +190,32 @@ module Zillabyte
190
190
  end
191
191
  end
192
192
 
193
- class AggregateStart
193
+ class Aggregate
194
194
  include Storm::Protocol
195
195
 
196
- def start(tuple); end
197
-
198
- def run(pipe_name)
199
- Storm::Protocol.mode = 'agg_start'
200
- Storm::Protocol.pipe_name = pipe_name
201
- setup_pipes
202
- handshake
203
-
204
- begin
205
- while true
206
- t = nil
207
- begin
208
- t = Tuple.from_hash(read_command)
209
- if(!t)
210
- next
211
- end
212
- start t
213
- rescue Exception => e
214
- fail 'Exception in bolt (A): ' + e.message + ' - ' + e.backtrace.join('\n')
215
- ensure
216
- done()
217
- end
218
- end
219
- rescue Exception => e
220
- fail 'Exception in bolt (B): ' + e.message + ' - ' + e.backtrace.join('\n')
221
- end
222
- end
223
- end
224
-
225
- class AggregateAggregate
226
- include Storm::Protocol
227
-
228
- def aggregate(tuple1, tuple2); end
229
-
230
196
  def run(pipe_name)
231
- Storm::Protocol.mode = 'agg_aggregate'
197
+ Storm::Protocol.mode = 'aggregation'
232
198
  Storm::Protocol.pipe_name = pipe_name
233
199
  setup_pipes
234
200
  handshake
235
201
 
236
202
  begin
237
203
  while true
238
- t = nil
239
- begin
240
- t1 = Tuple.from_hash(read_command)
241
- if(!t1)
242
- next
243
- end
244
- t2 = Tuple.from_hash(read_command)
245
- if(!t2)
246
- next
247
- end
248
- aggregate t1, t2
249
- rescue Exception => e
250
- fail 'Exception in bolt (A): ' + e.message + ' - ' + e.backtrace.join('\n')
251
- ensure
252
- done()
253
- end
254
- end
255
- rescue Exception => e
256
- fail 'Exception in bolt (B): ' + e.message + ' - ' + e.backtrace.join('\n')
257
- end
258
- end
259
- end
260
-
261
- class AggregateComplete
262
- include Storm::Protocol
263
-
264
- def complete(tuple); end
265
-
266
- def run(pipe_name)
267
- Storm::Protocol.mode = 'agg_complete'
268
- Storm::Protocol.pipe_name = pipe_name
269
- setup_pipes
270
- handshake
271
-
272
- begin
273
- while true
274
- t = nil
275
- begin
276
- t = Tuple.from_hash(read_command)
277
- if(!t)
278
- next
279
- end
280
- complete t
281
- rescue Exception => e
282
- fail 'Exception in bolt (A): ' + e.message + ' - ' + e.backtrace.join('\n')
283
- ensure
284
- done()
204
+ msg = read_command
205
+ case msg['command']
206
+ when 'begin_group'
207
+ t = Tuple.from_hash(msg)
208
+ begin_group t
209
+ when 'aggregate'
210
+ t = Tuple.from_hash(msg)
211
+ aggregate t
212
+ when 'end_group'
213
+ end_group
285
214
  end
215
+ done
286
216
  end
287
217
  rescue Exception => e
288
- fail 'Exception in bolt (B): ' + e.message + ' - ' + e.backtrace.join('\n')
218
+ fail "Exception in #{msg["command"]}: " + e.message + ' - ' + e.backtrace.join('\n')
289
219
  end
290
220
  end
291
221
  end
@@ -327,43 +257,26 @@ module Zillabyte
327
257
 
328
258
  end
329
259
 
330
- class AggregateStartController < Storm::AggregateStart
260
+ class AggregateController < Storm::Aggregate
331
261
 
332
262
  def initialize(harness, progress)
333
263
  @harness = harness
334
264
  @progress = progress
335
265
  end
336
266
 
337
- def start(*args)
338
- @harness._start.call(self, *args)
339
- end
340
-
341
- end
342
-
343
- class AggregateAggregateController < Storm::AggregateAggregate
344
-
345
- def initialize(harness, progress)
346
- @harness = harness
347
- @progress = progress
267
+ def begin_group(*args)
268
+ @harness._begin_group.call(*args)
348
269
  end
349
270
 
350
271
  def aggregate(*args)
351
- @harness._aggregate.call(self, *args)
352
- end
353
-
354
- end
355
-
356
- class AggregateCompleteController < Storm::AggregateComplete
357
-
358
- def initialize(harness, progress)
359
- @harness = harness
360
- @progress = progress
272
+ @harness._aggregate.call(*args)
361
273
  end
362
274
 
363
- def complete(*args)
364
- @harness._complete.call(self, *args)
275
+ def end_group(*args)
276
+ @harness._end_group.call(self, *args)
365
277
  end
366
278
 
367
279
  end
280
+
368
281
  end
369
282
  end
@@ -0,0 +1,142 @@
1
+ require 'optparse'
2
+
3
+ class Zillabyte::Harness::SimpleAggregate
4
+ attr_accessor :_nodes, :_relation, :_matches, :_group_by, :_emits, :_begin_group, :_aggregate, :_end_group, :_name, :_info_file, :_options
5
+
6
+ def self.build()
7
+ h = Zillabyte::Harness::SimpleAggregate.new()
8
+ yield(h)
9
+ h._name = h._name || Dir.pwd.split("/")[-1]
10
+ Zillabyte::Harness::Helper.check_name("simple_aggregate", h._name, {})
11
+ Zillabyte::Harness::Helper.check_emits("simple_aggregate", h._emits, {})
12
+ generic_emits = h.get_generic_emits
13
+
14
+ h._nodes = []
15
+ h._options = Zillabyte::Harness::Helper.opt_parser()
16
+
17
+ if(h._options[:command] == :info)
18
+ h._info_file = File.open(h._options[:file],"w+")
19
+ hash = {"language" => "ruby", "name" => h._name}
20
+ Zillabyte::Harness::Helper.write_hash_to_file(hash, h._info_file)
21
+ end
22
+
23
+ h.build_spout()
24
+ fn = h.build_aggregate(generic_emits)
25
+ h.build_sink()
26
+
27
+ if(h._options[:command] == :execute and h._options[:name] == h._name)
28
+ pipe_name = h._options[:pipe]
29
+ c = Zillabyte::Harness::AggregateController.new(fn, progress = Zillabyte::Common::Progress.new)
30
+ c.run(pipe_name)
31
+ end
32
+
33
+ h
34
+ end
35
+
36
+ def name(v)
37
+ @_name = v
38
+ end
39
+
40
+ def matches(v, options = {})
41
+ case v
42
+ when String
43
+ @_relation = { :query => v, :options => options.is_a?(Hash) ? options : {} }
44
+ when Array
45
+ @_matches = v
46
+ end
47
+ end
48
+
49
+ def group_by(v)
50
+ @_group_by = v
51
+ end
52
+
53
+ def emits(v)
54
+ @_emits = v
55
+ end
56
+
57
+ def begin_group(&block)
58
+ @_begin_group = block
59
+ end
60
+
61
+ def aggregate(&block)
62
+ @_aggregate = block
63
+ end
64
+
65
+ def end_group(&block)
66
+ @_end_group = block
67
+ end
68
+
69
+ def get_generic_emits()
70
+ generic_emits = []
71
+ @_emits.each do |relation|
72
+ temit = []
73
+ relation[1].each do |column|
74
+ column.each do |col, type|
75
+ temit << col
76
+ end
77
+ end
78
+ generic_emits << [relation[0], temit]
79
+ end
80
+ generic_emits
81
+ end
82
+
83
+ def build_spout()
84
+ h = Zillabyte::Harness::Spout.new(false)
85
+ h._matches = @_matches if @_matches
86
+ h._relation = @_relation if @_relation
87
+ @_nodes << h
88
+ if(@_options[:command] == :info)
89
+ info_hash = {"name" => h._name, "type" => h._type}
90
+ if(h._relation)
91
+ info_hash["relation"] = h._relation
92
+ elsif(h._matches)
93
+ info_hash["matches"] = h._matches
94
+ end
95
+ Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
96
+ end
97
+ end
98
+
99
+ def build_aggregate(generic_emits)
100
+ h = Zillabyte::Harness::Aggregate.new()
101
+ h._name = @_name
102
+ h._emits = generic_emits
103
+ h._group_by = @_group_by
104
+ h._begin_group = @_begin_group
105
+ h._aggregate = @_aggregate
106
+ h._end_group = @_end_group
107
+ Zillabyte::Harness::Helper.check_group_by("simple_aggregate", h, @_nodes, {})
108
+ @_nodes << h
109
+ if(@_options[:command] == :info)
110
+ info_hash = {"name" => h._name, "type" => h._type, "emits" => h._emits, "group_by" => h._group_by}
111
+ Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
112
+ end
113
+ h
114
+ end
115
+
116
+ def build_sink()
117
+ # Construct the sink...
118
+ n_sinks = @_emits.length
119
+ @_emits.each do |emit|
120
+ h = Zillabyte::Harness::Sink.new()
121
+ h._name = emit[0]
122
+ columns = emit[1]
123
+ columns.each do |col|
124
+ col.each do |cname, ctype|
125
+ h.column(cname, ctype)
126
+ end
127
+ end
128
+ if(n_sinks > 1)
129
+ h._consumes = h._name
130
+ end
131
+ @_nodes << h
132
+ if(@_options[:command] == :info)
133
+ info_hash = {"name" => h._name, "type" => h._type, "columns" => h._columns}
134
+ if(h._consumes)
135
+ info_hash["consumes"] = h._consumes
136
+ end
137
+ Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
138
+ end
139
+ end
140
+ end
141
+
142
+ end
@@ -78,31 +78,30 @@ class Zillabyte::Harness::Topology
78
78
  end
79
79
  end
80
80
 
81
- def group_by(fields)
82
- h = Zillabyte::Harness::GroupBy.new(fields)
83
- @_nodes << h
84
- end
85
-
86
81
  def aggregate
87
82
  h = Zillabyte::Harness::Aggregate.new()
88
83
  yield(h)
89
84
  Zillabyte::Harness::Helper.check_name("aggregate", h._name, @_names)
85
+ if(@_branched)
86
+ Zillabyte::Harness::Helper.check_consumes(h, @_streams)
87
+ else
88
+ if(h._consumes)
89
+ h._consumes = nil
90
+ end
91
+ end
92
+ Zillabyte::Harness::Helper.check_group_by("aggregate", h, @_nodes, @_streams)
90
93
  @_branched = Zillabyte::Harness::Helper.check_emits("aggregate", h._emits, @_streams) || @_branched
91
94
  @_nodes << h
92
95
  if(@_options[:command] == :info)
93
- info_hash = {"name" => h._name, "type" => h._type, "emits" => h._emits}
96
+ info_hash = {"name" => h._name, "type" => h._type, "group_by" => h._group_by, "emits" => h._emits}
94
97
  if(h._consumes)
95
98
  info_hash["consumes"] = h._consumes
96
99
  end
97
100
  Zillabyte::Harness::Helper.write_hash_to_file(info_hash, @_info_file)
98
101
  elsif(@_options[:command] == :execute)
99
102
  pipe_name = @_options[:pipe]
100
- if(@_options[:name] == h._name+"_start")
101
- c = Zillabyte::Harness::AggregateStartController.new(h, Zillabyte::Common::Progress.new)
102
- elsif(@_options[:name] == h._name+"_aggregate")
103
- c = Zillabyte::Harness::AggregateAggregateController.new(h, Zillabyte::Common::Progress.new)
104
- elsif(@_options[:name] == h._name+"_complete")
105
- c = Zillabyte::Harness::AggregateCompleteController.new(h, Zillabyte::Common::Progress.new)
103
+ if(@_options[:name] == h._name)
104
+ c = Zillabyte::Harness::AggregateController.new(h, Zillabyte::Common::Progress.new)
106
105
  end
107
106
  c.run(pipe_name)
108
107
  end
@@ -7,6 +7,7 @@ class Tuple
7
7
 
8
8
  def [](name)
9
9
  maybe_build_alias_hash()
10
+ p name
10
11
  values[ @_alias_hash[name] || name ]
11
12
  end
12
13
 
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.0.11" unless defined?(VERSION)
2
+ VERSION = "0.0.12" unless defined?(VERSION)
3
3
  end
@@ -0,0 +1,3 @@
1
+ module Zillabyte
2
+ VERSION = "0.0.11" unless defined?(VERSION)
3
+ end
@@ -16,5 +16,8 @@ module Zillabyte
16
16
  Zillabyte::Harness::SimpleSpout.build(*args, &block)
17
17
  end
18
18
 
19
+ def self.simple_aggregate(*args, &block)
20
+ Zillabyte::Harness::SimpleAggregate.build(*args, &block)
21
+ end
19
22
 
20
23
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-15 00:00:00.000000000 Z
12
+ date: 2014-02-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -73,6 +73,7 @@ files:
73
73
  - ruby/lib/zillabyte/harness/groupby.rb
74
74
  - ruby/lib/zillabyte/harness/helper.rb
75
75
  - ruby/lib/zillabyte/harness/live_delegator.rb
76
+ - ruby/lib/zillabyte/harness/simple_aggregate.rb
76
77
  - ruby/lib/zillabyte/harness/simple_function.rb
77
78
  - ruby/lib/zillabyte/harness/simple_spout.rb
78
79
  - ruby/lib/zillabyte/harness/sink.rb
@@ -81,6 +82,7 @@ files:
81
82
  - ruby/lib/zillabyte/harness/tuple.rb
82
83
  - ruby/lib/zillabyte/harness.rb
83
84
  - ruby/lib/zillabyte/version.rb
85
+ - ruby/lib/zillabyte/version.rb~
84
86
  - ruby/lib/zillabyte.rb
85
87
  - ruby/README.md
86
88
  homepage: http://www.zillabyte.com