zillabyte 0.1.43 → 0.1.44

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,476 +13,137 @@ class Zillabyte::Harness::Stream
13
13
  @_name
14
14
  end
15
15
 
16
- def each(options = {}, &block)
17
- h = Zillabyte::Harness::Each.new()
18
- # Does the block take 0 arguments? If so it's not just an execute block.
19
- if(block.arity == 0)
20
- h.instance_eval(&block)
21
- Zillabyte::Harness::Helper.check_name("each", h._name, @_app._names)
22
- if h._emits
23
- Zillabyte::Harness::Helper.check_emits("each", h._emits, @_app._streams)
24
- else
25
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
26
- end
27
- # Takes more than 0? Then it takes |tuple| and is an execute block. Give it a generated stream name.
28
- else
29
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
30
- h._execute = block
31
- end
32
-
33
- Zillabyte::Harness::Helper.check_each(h)
34
- @_app._nodes << h
35
-
36
- if(@_app._options[:command] == :info)
37
-
38
- node_hash = {
39
- :name => h._name,
40
- :type => h._type,
41
- :output_format => h._output_format,
42
- :config => options
43
- }
44
- if (h._parallelism)
45
- node_hash["parallelism"] = h._parallelism
46
- end
47
-
48
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
49
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
50
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
51
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
52
- pipe_name = @_app._options[:pipe]
53
- c = Zillabyte::Harness::EachController.new(h, progress = Zillabyte::Common::Progress.new)
54
- c.run(pipe_name)
55
- end
56
-
57
- output_streams = []
58
- h._emits.each do |stream|
59
- output_streams << self.class.new(stream, @_app, h._name)
60
- end
61
- output_streams = output_streams[0] if output_streams.size == 1
62
- output_streams
16
+ def each(*args, &block)
17
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
18
+ op.build_multilang_operation("each", *args, &block)
19
+ .add_operation_properties_to_info(:name, :type, :output_format)
20
+ .add_optional_operation_properties_to_info(:parallelism)
21
+ .create_arc_info_from_stream(self)
22
+ .handle_operation
23
+ .get_output_streams
63
24
  end
64
25
 
65
-
66
- def filter(options = {}, &block)
67
- h = Zillabyte::Harness::Filter.new()
68
- # Does the block take 0 arguments? If so it's not just a keep block.
69
- if(block.arity == 0)
70
- h.instance_eval(&block)
71
- Zillabyte::Harness::Helper.check_name("filter", h._name, @_app._names)
72
- if h._emits
73
- Zillabyte::Harness::Helper.check_emits("filter", h._emits, @_app._streams)
74
- else
75
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
76
- end
77
- else
78
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
79
- h._keep = block
80
- end
81
-
82
- Zillabyte::Harness::Helper.check_filter(h)
83
- @_app._nodes << h
84
-
85
- if(@_app._options[:command] == :info)
86
-
87
- node_hash = {"name" => h._name, "type" => h._type}
88
- if (h._parallelism)
89
- node_hash["parallelism"] = h._parallelism
90
- end
91
-
92
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
93
- arc_hash = {
94
- :name => @_name,
95
- :origin => @_previous_node_name,
96
- :dest => h._name,
97
- :config => options
98
- }
99
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
100
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
101
- pipe_name = @_app._options[:pipe]
102
- c = Zillabyte::Harness::FilterController.new(h, progress = Zillabyte::Common::Progress.new)
103
- c.run(pipe_name)
104
- end
105
-
106
- output_stream = self.class.new(h._emits[0], @_app, h._name) # filters only have one output stream
107
- output_stream
26
+ def filter(*args, &block)
27
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
28
+ op.build_multilang_operation("filter", *args, &block)
29
+ .add_operation_properties_to_info(:name, :type)
30
+ .add_optional_operation_properties_to_info(:parallelism)
31
+ .create_arc_info_from_stream(self)
32
+ .handle_operation
33
+ .get_output_streams
108
34
  end
109
35
 
110
36
 
111
37
  def group_by(*args, &block)
112
-
113
- # Init
114
- group_args = Zillabyte::Harness::Helper.get_non_option_args(args)
115
- options = Zillabyte::Harness::Helper.get_options(args)
116
- h = Zillabyte::Harness::GroupBy.new(*group_args)
117
-
118
- # Yield
119
- h.instance_eval(&block)
120
- Zillabyte::Harness::Helper.check_name("group_by", h._name, @_app._names)
121
-
122
- # Multiple emits?
123
- if h._emits
124
- Zillabyte::Harness::Helper.check_emits("group_by", h._emits, @_app._streams)
125
- else
126
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
127
- end
128
-
129
- Zillabyte::Harness::Helper.check_group_by(h)
130
- @_app._nodes << h
131
-
132
- # Handle incoming command..
133
- if(@_app._options[:command] == :info)
134
-
135
- # Info..
136
- node_hash = {
137
- :name => h._name,
138
- :type => h._type,
139
- :group_by => h._group_by,
140
- :config => options
141
- }
142
- if(h._parallelism)
143
- node_hash["parallelism"] = h._parallelism
144
- end
145
-
146
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
147
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
148
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
149
-
150
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
151
-
152
- # Execute..
153
- pipe_name = @_app._options[:pipe]
154
- c = Zillabyte::Harness::GroupByController.new(h, Zillabyte::Common::Progress.new)
155
- c.run(pipe_name)
156
-
157
- end
158
-
159
- # Return the stream
160
- output_streams = []
161
- h._emits.each do |stream|
162
- output_streams << self.class.new(stream, @_app, h._name)
163
- end
164
- output_streams = output_streams[0] if output_streams.size == 1
165
- output_streams
166
-
38
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
39
+ op.build_multilang_operation("group_by", *args, &block)
40
+ .add_operation_properties_to_info(:name, :type, :group_by)
41
+ .add_optional_operation_properties_to_info(:parallelism)
42
+ .create_arc_info_from_stream(self)
43
+ .handle_operation
44
+ .get_output_streams
167
45
  end
168
46
 
169
-
170
- def join_with(rhs_stream, options = {}, &block)
171
-
172
- # Init
173
- h = Zillabyte::Harness::Join.new(options)
174
-
175
- # Yield
176
- throw "programmable joins are not supported at this time" unless block.nil?
177
- # h.instance_eval(&block)
178
-
179
- # Sanity
180
- Zillabyte::Harness::Helper.check_name("join", h._name, @_app._names)
181
-
182
- # Build the node
183
- if h._emits
184
- Zillabyte::Harness::Helper.check_emits("join", h._emits, @_app._streams)
185
- else
186
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
187
- end
188
-
189
- Zillabyte::Harness::Helper.check_join(h)
190
- @_app._nodes << h
191
-
192
- # Handle incoming command..
193
- if(@_app._options[:command] == :info)
194
- # Info..
195
- node_hash = {
196
- :name => h._name,
197
- :type => h._type,
198
- :lhs_fields => h._lhs_fields,
199
- :rhs_fields => h._rhs_fields,
200
- :join_type => h._join_type,
201
- :config => options
202
- }
203
-
204
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
205
- arc_hash1 = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name, "left" => 1}
206
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash1, @_app._info_file)
207
- arc_hash2 = {"name" => rhs_stream._name, "origin" => rhs_stream._previous_node_name, "dest" => h._name, "right" => 1}
208
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash2, @_app._info_file)
209
-
210
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
211
-
212
- # There's a problem, because joins should be not be executed in multilang
213
- throw "unsupported: joins should be executed here"
214
-
215
- end
216
-
217
- # Return the stream
218
- output_stream = self.class.new(h._emits[0], @_app, h._name)
219
- return output_stream
220
-
47
+ def join_with(*args)
48
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
49
+ op = op.build_multilang_operation("join", self, *args)
50
+ .add_operation_properties_to_info(:name, :type, :lhs_fields, :rhs_fields, :join_type)
51
+ node = op.node
52
+ op.create_arc_info_from_stream(node._lhs_stream, :left)
53
+ .create_arc_info_from_stream(node._rhs_stream, :right)
54
+ .handle_operation
55
+ .get_output_streams
221
56
  end
222
57
 
223
- def call_component(comp_id = nil, options = {}, &block)
224
-
225
- h = Zillabyte::Harness::InjectedComponent.new()
226
- h.instance_eval(&block) if block_given?
227
- h._id ||= comp_id
228
- h._emits ||= ["stream_"+Zillabyte::Harness::Counter.get()] # We default to single-stream when called like this..
229
-
230
- Zillabyte::Harness::Helper.check_name("component", h._name, @_app._names)
231
- Zillabyte::Harness::Helper.check_call_component(h)
232
- Zillabyte::Harness::Helper.check_emits("component", h._emits, @_app._streams)
233
- @_app._nodes << h
234
-
235
- if(@_app._options[:command] == :info)
236
- node_hash = {
237
- :name => h._name,
238
- :type => h._type,
239
- :id => h._id,
240
- :output_format => h._output_format,
241
- :config => options
242
- }
243
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
244
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
245
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
246
- h._consumes.each do |stream|
247
- arc_hash = {"name" => stream._name, "origin" => stream._previous_node_name, "dest" => h._name}
248
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
249
- end
58
+ def call_component(*args, &block)
59
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
60
+ op = op.build_multilang_operation("component", self, *args, &block)
61
+ .add_operation_properties_to_info(:name, :type, :id, :output_format)
62
+ node = op.node
63
+ node._consumes.each do |stream|
64
+ op = op.create_arc_info_from_stream(stream)
250
65
  end
251
-
252
- # Return the stream
253
- output_streams = []
254
- h._emits.each do |stream|
255
- output_streams << self.class.new(stream, @_app, h._name)
256
- end
257
- output_streams = output_streams[0] if output_streams.size == 1
258
- output_streams
66
+ op.handle_operation
67
+ .get_output_streams
259
68
  end
260
69
  alias_method :executes, :call_component
261
70
  alias_method :execute, :call_component
262
-
263
-
264
71
 
265
- def sink(&block)
266
- h = Zillabyte::Harness::Sink.new()
267
- h.instance_eval(&block)
268
- Zillabyte::Harness::Helper.check_sink(h, @_app._nodes)
269
- @_app._nodes << h
270
- if(@_app._options[:command] == :info)
271
- node_hash = {"name" => h._name, "type" => h._type, "columns" => h._columns, "relation" => h._relation || h._name, "scope" => h._scope}
272
-
273
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
274
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
275
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
276
- end
72
+ def sink(*args, &block)
73
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
74
+ op.build_multilang_operation("sink", *args, &block)
75
+ .add_operation_properties_to_info(:name, :type, :columns, :scope)
76
+ .add_optional_operation_properties_to_info(:relation)
77
+ .create_arc_info_from_stream(self)
78
+ .handle_operation
277
79
  end
278
80
 
279
81
 
280
82
 
281
83
  # Unique's a stream. Executed on backend
282
84
  def unique(*args)
283
-
284
- # INIT
285
- group_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
286
- options = Zillabyte::Harness::Helper.get_options(args)
287
-
288
- if(@_app._options[:command] == :info)
289
-
290
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
291
- node_name = "unique_#{Zillabyte::Harness::Counter.get()}"
292
- node_hash = {
293
- :name => node_name,
294
- :type => "unique",
295
- :group_fields => group_fields,
296
- :config => options
297
- }
298
- arc_hash = {
299
- :name => @_name,
300
- :origin => @_previous_node_name,
301
- :dest => node_name
302
- }
303
-
304
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
305
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
306
- return self.class.new(emits, @_app, node_name)
307
-
308
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
309
- throw "should not execute in multilang"
310
- end
311
-
85
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
86
+ op.build_jvm_operation("unique", *args)
87
+ .add_operation_properties_to_info(:name, :type)
88
+ .add_input_args_to_info_as(:group_fields)
89
+ .create_arc_info_from_stream(self)
90
+ .handle_operation
91
+ .get_output_streams
312
92
  end
313
93
 
314
-
315
-
316
94
  # Counts fields. Executed on backend
317
95
  def count(*args)
318
-
319
- # Init
320
- group_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
321
- options = Zillabyte::Harness::Helper.get_options(args)
322
-
323
- if(@_app._options[:command] == :info)
324
-
325
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
326
- node_name = "count_#{Zillabyte::Harness::Counter.get()}"
327
- node_hash = {
328
- :name => node_name,
329
- :type => "count",
330
- :group_fields => group_fields,
331
- :config => options
332
- }
333
- arc_hash = {
334
- :name => @_name,
335
- :origin => @_previous_node_name,
336
- :dest => node_name
337
- }
338
-
339
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
340
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
341
- return self.class.new(emits, @_app, node_name)
342
-
343
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
344
- throw "should not execute in multilang"
345
- end
346
-
96
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
97
+ op.build_jvm_operation("count", *args)
98
+ .add_operation_properties_to_info(:name, :type)
99
+ .add_input_args_to_info_as(:group_fields)
100
+ .create_arc_info_from_stream(self)
101
+ .handle_operation
102
+ .get_output_streams
347
103
  end
348
104
 
349
-
350
-
351
-
352
105
  # Removes fields from the stream.
353
106
  def remove(*args)
354
-
355
- # Init
356
- remove_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
357
- options = Zillabyte::Harness::Helper.get_options(args)
358
-
359
- if(@_app._options[:command] == :info)
360
-
361
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
362
- node_name = "remove_#{Zillabyte::Harness::Counter.get()}"
363
- node_hash = {
364
- :name => node_name,
365
- :type => "remove",
366
- :remove => remove_fields,
367
- :config => options
368
- }
369
- arc_hash = {
370
- :name => @_name,
371
- :origin => @_previous_node_name,
372
- :dest => node_name
373
- }
374
-
375
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
376
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
377
- return self.class.new(emits, @_app, node_name)
378
-
379
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
380
- throw "should not execute in multilang"
381
- end
382
-
107
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
108
+ op.build_jvm_operation("remove", *args)
109
+ .add_operation_properties_to_info(:name, :type)
110
+ .add_input_args_to_info_as(:remove)
111
+ .create_arc_info_from_stream(self)
112
+ .handle_operation
113
+ .get_output_streams
383
114
  end
384
115
 
385
-
386
-
387
116
  # Retains fields from the stream and removes everything else
388
117
  def retain(*args)
389
-
390
- # Init
391
- retain_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
392
- options = Zillabyte::Harness::Helper.get_options(args)
393
-
394
- if(@_app._options[:command] == :info)
395
-
396
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
397
- node_name = "retain_#{Zillabyte::Harness::Counter.get()}"
398
- node_hash = {
399
- :name => node_name,
400
- :type => "retain",
401
- :retain => retain_fields,
402
- :config => options
403
- }
404
- arc_hash = {
405
- :name => @_name,
406
- :origin => @_previous_node_name,
407
- :dest => node_name
408
- }
409
-
410
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
411
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
412
- return self.class.new(emits, @_app, node_name)
413
-
414
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
415
- throw "should not execute in multilang"
416
- end
417
-
118
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
119
+ op.build_jvm_operation("retain", *args)
120
+ .add_operation_properties_to_info(:name, :type)
121
+ .add_input_args_to_info_as(:retain)
122
+ .create_arc_info_from_stream(self)
123
+ .handle_operation
124
+ .get_output_streams
418
125
  end
419
126
 
420
-
421
-
422
-
423
-
424
127
  # Groups together N arbitrary tuples (usefull for mini-batch processing)
425
- def clump(options = {})
426
-
427
- if(@_app._options[:command] == :info)
428
-
429
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
430
- node_name = "clump_#{Zillabyte::Harness::Counter.get()}"
431
- node_hash = {
432
- :name => node_name,
433
- :type => "clump",
434
- :config => options
435
- }
436
- arc_hash = {
437
- :name => @_name,
438
- :origin => @_previous_node_name,
439
- :dest => node_name
440
- }
441
-
442
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
443
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
444
- return self.class.new(emits, @_app, node_name)
445
-
446
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
447
- throw "should not execute in multilang"
448
- end
449
-
128
+ def clump(*args)
129
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
130
+ op.build_jvm_operation("clump", *args)
131
+ .add_operation_properties_to_info(:name, :type)
132
+ .add_input_args_to_info_as(:clump)
133
+ .create_arc_info_from_stream(self)
134
+ .handle_operation
135
+ .get_output_streams
450
136
  end
451
137
 
452
-
453
-
454
-
455
-
456
138
  # Renames fields..
457
- def rename(rename_map)
458
-
459
- if(@_app._options[:command] == :info)
460
-
461
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
462
- node_name = "rename_#{Zillabyte::Harness::Counter.get()}"
463
- node_hash = {
464
- :name => node_name,
465
- :type => "rename",
466
- :rename => rename_map
467
- }
468
- arc_hash = {
469
- :name => @_name,
470
- :origin => @_previous_node_name,
471
- :dest => node_name
472
- }
473
-
474
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
475
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
476
- return self.class.new(emits, @_app, node_name)
477
-
478
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
479
- throw "should not execute in multilang"
480
- end
481
-
139
+ def rename(*args)
140
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
141
+ op.build_jvm_operation("rename", *args)
142
+ .add_operation_properties_to_info(:name, :type)
143
+ .add_input_args_to_info_as(:rename, 0)
144
+ .create_arc_info_from_stream(self)
145
+ .handle_operation
146
+ .get_output_streams
482
147
  end
483
148
 
484
-
485
-
486
-
487
-
488
149
  end