zillabyte 0.1.43 → 0.1.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,476 +13,137 @@ class Zillabyte::Harness::Stream
13
13
  @_name
14
14
  end
15
15
 
16
- def each(options = {}, &block)
17
- h = Zillabyte::Harness::Each.new()
18
- # Does the block take 0 arguments? If so it's not just an execute block.
19
- if(block.arity == 0)
20
- h.instance_eval(&block)
21
- Zillabyte::Harness::Helper.check_name("each", h._name, @_app._names)
22
- if h._emits
23
- Zillabyte::Harness::Helper.check_emits("each", h._emits, @_app._streams)
24
- else
25
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
26
- end
27
- # Takes more than 0? Then it takes |tuple| and is an execute block. Give it a generated stream name.
28
- else
29
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
30
- h._execute = block
31
- end
32
-
33
- Zillabyte::Harness::Helper.check_each(h)
34
- @_app._nodes << h
35
-
36
- if(@_app._options[:command] == :info)
37
-
38
- node_hash = {
39
- :name => h._name,
40
- :type => h._type,
41
- :output_format => h._output_format,
42
- :config => options
43
- }
44
- if (h._parallelism)
45
- node_hash["parallelism"] = h._parallelism
46
- end
47
-
48
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
49
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
50
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
51
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
52
- pipe_name = @_app._options[:pipe]
53
- c = Zillabyte::Harness::EachController.new(h, progress = Zillabyte::Common::Progress.new)
54
- c.run(pipe_name)
55
- end
56
-
57
- output_streams = []
58
- h._emits.each do |stream|
59
- output_streams << self.class.new(stream, @_app, h._name)
60
- end
61
- output_streams = output_streams[0] if output_streams.size == 1
62
- output_streams
16
+ def each(*args, &block)
17
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
18
+ op.build_multilang_operation("each", *args, &block)
19
+ .add_operation_properties_to_info(:name, :type, :output_format)
20
+ .add_optional_operation_properties_to_info(:parallelism)
21
+ .create_arc_info_from_stream(self)
22
+ .handle_operation
23
+ .get_output_streams
63
24
  end
64
25
 
65
-
66
- def filter(options = {}, &block)
67
- h = Zillabyte::Harness::Filter.new()
68
- # Does the block take 0 arguments? If so it's not just a keep block.
69
- if(block.arity == 0)
70
- h.instance_eval(&block)
71
- Zillabyte::Harness::Helper.check_name("filter", h._name, @_app._names)
72
- if h._emits
73
- Zillabyte::Harness::Helper.check_emits("filter", h._emits, @_app._streams)
74
- else
75
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
76
- end
77
- else
78
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
79
- h._keep = block
80
- end
81
-
82
- Zillabyte::Harness::Helper.check_filter(h)
83
- @_app._nodes << h
84
-
85
- if(@_app._options[:command] == :info)
86
-
87
- node_hash = {"name" => h._name, "type" => h._type}
88
- if (h._parallelism)
89
- node_hash["parallelism"] = h._parallelism
90
- end
91
-
92
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
93
- arc_hash = {
94
- :name => @_name,
95
- :origin => @_previous_node_name,
96
- :dest => h._name,
97
- :config => options
98
- }
99
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
100
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
101
- pipe_name = @_app._options[:pipe]
102
- c = Zillabyte::Harness::FilterController.new(h, progress = Zillabyte::Common::Progress.new)
103
- c.run(pipe_name)
104
- end
105
-
106
- output_stream = self.class.new(h._emits[0], @_app, h._name) # filters only have one output stream
107
- output_stream
26
+ def filter(*args, &block)
27
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
28
+ op.build_multilang_operation("filter", *args, &block)
29
+ .add_operation_properties_to_info(:name, :type)
30
+ .add_optional_operation_properties_to_info(:parallelism)
31
+ .create_arc_info_from_stream(self)
32
+ .handle_operation
33
+ .get_output_streams
108
34
  end
109
35
 
110
36
 
111
37
  def group_by(*args, &block)
112
-
113
- # Init
114
- group_args = Zillabyte::Harness::Helper.get_non_option_args(args)
115
- options = Zillabyte::Harness::Helper.get_options(args)
116
- h = Zillabyte::Harness::GroupBy.new(*group_args)
117
-
118
- # Yield
119
- h.instance_eval(&block)
120
- Zillabyte::Harness::Helper.check_name("group_by", h._name, @_app._names)
121
-
122
- # Multiple emits?
123
- if h._emits
124
- Zillabyte::Harness::Helper.check_emits("group_by", h._emits, @_app._streams)
125
- else
126
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
127
- end
128
-
129
- Zillabyte::Harness::Helper.check_group_by(h)
130
- @_app._nodes << h
131
-
132
- # Handle incoming command..
133
- if(@_app._options[:command] == :info)
134
-
135
- # Info..
136
- node_hash = {
137
- :name => h._name,
138
- :type => h._type,
139
- :group_by => h._group_by,
140
- :config => options
141
- }
142
- if(h._parallelism)
143
- node_hash["parallelism"] = h._parallelism
144
- end
145
-
146
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
147
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
148
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
149
-
150
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
151
-
152
- # Execute..
153
- pipe_name = @_app._options[:pipe]
154
- c = Zillabyte::Harness::GroupByController.new(h, Zillabyte::Common::Progress.new)
155
- c.run(pipe_name)
156
-
157
- end
158
-
159
- # Return the stream
160
- output_streams = []
161
- h._emits.each do |stream|
162
- output_streams << self.class.new(stream, @_app, h._name)
163
- end
164
- output_streams = output_streams[0] if output_streams.size == 1
165
- output_streams
166
-
38
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
39
+ op.build_multilang_operation("group_by", *args, &block)
40
+ .add_operation_properties_to_info(:name, :type, :group_by)
41
+ .add_optional_operation_properties_to_info(:parallelism)
42
+ .create_arc_info_from_stream(self)
43
+ .handle_operation
44
+ .get_output_streams
167
45
  end
168
46
 
169
-
170
- def join_with(rhs_stream, options = {}, &block)
171
-
172
- # Init
173
- h = Zillabyte::Harness::Join.new(options)
174
-
175
- # Yield
176
- throw "programmable joins are not supported at this time" unless block.nil?
177
- # h.instance_eval(&block)
178
-
179
- # Sanity
180
- Zillabyte::Harness::Helper.check_name("join", h._name, @_app._names)
181
-
182
- # Build the node
183
- if h._emits
184
- Zillabyte::Harness::Helper.check_emits("join", h._emits, @_app._streams)
185
- else
186
- h._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
187
- end
188
-
189
- Zillabyte::Harness::Helper.check_join(h)
190
- @_app._nodes << h
191
-
192
- # Handle incoming command..
193
- if(@_app._options[:command] == :info)
194
- # Info..
195
- node_hash = {
196
- :name => h._name,
197
- :type => h._type,
198
- :lhs_fields => h._lhs_fields,
199
- :rhs_fields => h._rhs_fields,
200
- :join_type => h._join_type,
201
- :config => options
202
- }
203
-
204
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
205
- arc_hash1 = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name, "left" => 1}
206
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash1, @_app._info_file)
207
- arc_hash2 = {"name" => rhs_stream._name, "origin" => rhs_stream._previous_node_name, "dest" => h._name, "right" => 1}
208
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash2, @_app._info_file)
209
-
210
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
211
-
212
- # There's a problem, because joins should be not be executed in multilang
213
- throw "unsupported: joins should be executed here"
214
-
215
- end
216
-
217
- # Return the stream
218
- output_stream = self.class.new(h._emits[0], @_app, h._name)
219
- return output_stream
220
-
47
+ def join_with(*args)
48
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
49
+ op = op.build_multilang_operation("join", self, *args)
50
+ .add_operation_properties_to_info(:name, :type, :lhs_fields, :rhs_fields, :join_type)
51
+ node = op.node
52
+ op.create_arc_info_from_stream(node._lhs_stream, :left)
53
+ .create_arc_info_from_stream(node._rhs_stream, :right)
54
+ .handle_operation
55
+ .get_output_streams
221
56
  end
222
57
 
223
- def call_component(comp_id = nil, options = {}, &block)
224
-
225
- h = Zillabyte::Harness::InjectedComponent.new()
226
- h.instance_eval(&block) if block_given?
227
- h._id ||= comp_id
228
- h._emits ||= ["stream_"+Zillabyte::Harness::Counter.get()] # We default to single-stream when called like this..
229
-
230
- Zillabyte::Harness::Helper.check_name("component", h._name, @_app._names)
231
- Zillabyte::Harness::Helper.check_call_component(h)
232
- Zillabyte::Harness::Helper.check_emits("component", h._emits, @_app._streams)
233
- @_app._nodes << h
234
-
235
- if(@_app._options[:command] == :info)
236
- node_hash = {
237
- :name => h._name,
238
- :type => h._type,
239
- :id => h._id,
240
- :output_format => h._output_format,
241
- :config => options
242
- }
243
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
244
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
245
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
246
- h._consumes.each do |stream|
247
- arc_hash = {"name" => stream._name, "origin" => stream._previous_node_name, "dest" => h._name}
248
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
249
- end
58
+ def call_component(*args, &block)
59
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
60
+ op = op.build_multilang_operation("component", self, *args, &block)
61
+ .add_operation_properties_to_info(:name, :type, :id, :output_format)
62
+ node = op.node
63
+ node._consumes.each do |stream|
64
+ op = op.create_arc_info_from_stream(stream)
250
65
  end
251
-
252
- # Return the stream
253
- output_streams = []
254
- h._emits.each do |stream|
255
- output_streams << self.class.new(stream, @_app, h._name)
256
- end
257
- output_streams = output_streams[0] if output_streams.size == 1
258
- output_streams
66
+ op.handle_operation
67
+ .get_output_streams
259
68
  end
260
69
  alias_method :executes, :call_component
261
70
  alias_method :execute, :call_component
262
-
263
-
264
71
 
265
- def sink(&block)
266
- h = Zillabyte::Harness::Sink.new()
267
- h.instance_eval(&block)
268
- Zillabyte::Harness::Helper.check_sink(h, @_app._nodes)
269
- @_app._nodes << h
270
- if(@_app._options[:command] == :info)
271
- node_hash = {"name" => h._name, "type" => h._type, "columns" => h._columns, "relation" => h._relation || h._name, "scope" => h._scope}
272
-
273
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
274
- arc_hash = {"name" => @_name, "origin" => @_previous_node_name, "dest" => h._name}
275
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
276
- end
72
+ def sink(*args, &block)
73
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
74
+ op.build_multilang_operation("sink", *args, &block)
75
+ .add_operation_properties_to_info(:name, :type, :columns, :scope)
76
+ .add_optional_operation_properties_to_info(:relation)
77
+ .create_arc_info_from_stream(self)
78
+ .handle_operation
277
79
  end
278
80
 
279
81
 
280
82
 
281
83
  # Unique's a stream. Executed on backend
282
84
  def unique(*args)
283
-
284
- # INIT
285
- group_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
286
- options = Zillabyte::Harness::Helper.get_options(args)
287
-
288
- if(@_app._options[:command] == :info)
289
-
290
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
291
- node_name = "unique_#{Zillabyte::Harness::Counter.get()}"
292
- node_hash = {
293
- :name => node_name,
294
- :type => "unique",
295
- :group_fields => group_fields,
296
- :config => options
297
- }
298
- arc_hash = {
299
- :name => @_name,
300
- :origin => @_previous_node_name,
301
- :dest => node_name
302
- }
303
-
304
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
305
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
306
- return self.class.new(emits, @_app, node_name)
307
-
308
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
309
- throw "should not execute in multilang"
310
- end
311
-
85
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
86
+ op.build_jvm_operation("unique", *args)
87
+ .add_operation_properties_to_info(:name, :type)
88
+ .add_input_args_to_info_as(:group_fields)
89
+ .create_arc_info_from_stream(self)
90
+ .handle_operation
91
+ .get_output_streams
312
92
  end
313
93
 
314
-
315
-
316
94
  # Counts fields. Executed on backend
317
95
  def count(*args)
318
-
319
- # Init
320
- group_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
321
- options = Zillabyte::Harness::Helper.get_options(args)
322
-
323
- if(@_app._options[:command] == :info)
324
-
325
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
326
- node_name = "count_#{Zillabyte::Harness::Counter.get()}"
327
- node_hash = {
328
- :name => node_name,
329
- :type => "count",
330
- :group_fields => group_fields,
331
- :config => options
332
- }
333
- arc_hash = {
334
- :name => @_name,
335
- :origin => @_previous_node_name,
336
- :dest => node_name
337
- }
338
-
339
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
340
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
341
- return self.class.new(emits, @_app, node_name)
342
-
343
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
344
- throw "should not execute in multilang"
345
- end
346
-
96
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
97
+ op.build_jvm_operation("count", *args)
98
+ .add_operation_properties_to_info(:name, :type)
99
+ .add_input_args_to_info_as(:group_fields)
100
+ .create_arc_info_from_stream(self)
101
+ .handle_operation
102
+ .get_output_streams
347
103
  end
348
104
 
349
-
350
-
351
-
352
105
  # Removes fields from the stream.
353
106
  def remove(*args)
354
-
355
- # Init
356
- remove_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
357
- options = Zillabyte::Harness::Helper.get_options(args)
358
-
359
- if(@_app._options[:command] == :info)
360
-
361
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
362
- node_name = "remove_#{Zillabyte::Harness::Counter.get()}"
363
- node_hash = {
364
- :name => node_name,
365
- :type => "remove",
366
- :remove => remove_fields,
367
- :config => options
368
- }
369
- arc_hash = {
370
- :name => @_name,
371
- :origin => @_previous_node_name,
372
- :dest => node_name
373
- }
374
-
375
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
376
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
377
- return self.class.new(emits, @_app, node_name)
378
-
379
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
380
- throw "should not execute in multilang"
381
- end
382
-
107
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
108
+ op.build_jvm_operation("remove", *args)
109
+ .add_operation_properties_to_info(:name, :type)
110
+ .add_input_args_to_info_as(:remove)
111
+ .create_arc_info_from_stream(self)
112
+ .handle_operation
113
+ .get_output_streams
383
114
  end
384
115
 
385
-
386
-
387
116
  # Retains fields from the stream and removes everything else
388
117
  def retain(*args)
389
-
390
- # Init
391
- retain_fields = Zillabyte::Harness::Helper.get_non_option_args(args)
392
- options = Zillabyte::Harness::Helper.get_options(args)
393
-
394
- if(@_app._options[:command] == :info)
395
-
396
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
397
- node_name = "retain_#{Zillabyte::Harness::Counter.get()}"
398
- node_hash = {
399
- :name => node_name,
400
- :type => "retain",
401
- :retain => retain_fields,
402
- :config => options
403
- }
404
- arc_hash = {
405
- :name => @_name,
406
- :origin => @_previous_node_name,
407
- :dest => node_name
408
- }
409
-
410
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
411
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
412
- return self.class.new(emits, @_app, node_name)
413
-
414
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
415
- throw "should not execute in multilang"
416
- end
417
-
118
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
119
+ op.build_jvm_operation("retain", *args)
120
+ .add_operation_properties_to_info(:name, :type)
121
+ .add_input_args_to_info_as(:retain)
122
+ .create_arc_info_from_stream(self)
123
+ .handle_operation
124
+ .get_output_streams
418
125
  end
419
126
 
420
-
421
-
422
-
423
-
424
127
  # Groups together N arbitrary tuples (usefull for mini-batch processing)
425
- def clump(options = {})
426
-
427
- if(@_app._options[:command] == :info)
428
-
429
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
430
- node_name = "clump_#{Zillabyte::Harness::Counter.get()}"
431
- node_hash = {
432
- :name => node_name,
433
- :type => "clump",
434
- :config => options
435
- }
436
- arc_hash = {
437
- :name => @_name,
438
- :origin => @_previous_node_name,
439
- :dest => node_name
440
- }
441
-
442
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
443
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
444
- return self.class.new(emits, @_app, node_name)
445
-
446
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
447
- throw "should not execute in multilang"
448
- end
449
-
128
+ def clump(*args)
129
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
130
+ op.build_jvm_operation("clump", *args)
131
+ .add_operation_properties_to_info(:name, :type)
132
+ .add_input_args_to_info_as(:clump)
133
+ .create_arc_info_from_stream(self)
134
+ .handle_operation
135
+ .get_output_streams
450
136
  end
451
137
 
452
-
453
-
454
-
455
-
456
138
  # Renames fields..
457
- def rename(rename_map)
458
-
459
- if(@_app._options[:command] == :info)
460
-
461
- emits = ["stream_"+Zillabyte::Harness::Counter.get()]
462
- node_name = "rename_#{Zillabyte::Harness::Counter.get()}"
463
- node_hash = {
464
- :name => node_name,
465
- :type => "rename",
466
- :rename => rename_map
467
- }
468
- arc_hash = {
469
- :name => @_name,
470
- :origin => @_previous_node_name,
471
- :dest => node_name
472
- }
473
-
474
- Zillabyte::Harness::Helper.write_node_to_file(node_hash, @_app._info_file)
475
- Zillabyte::Harness::Helper.write_arc_to_file(arc_hash, @_app._info_file)
476
- return self.class.new(emits, @_app, node_name)
477
-
478
- elsif(@_app._options[:command] == :execute and @_app._options[:name] == h._name)
479
- throw "should not execute in multilang"
480
- end
481
-
139
+ def rename(*args)
140
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
141
+ op.build_jvm_operation("rename", *args)
142
+ .add_operation_properties_to_info(:name, :type)
143
+ .add_input_args_to_info_as(:rename, 0)
144
+ .create_arc_info_from_stream(self)
145
+ .handle_operation
146
+ .get_output_streams
482
147
  end
483
148
 
484
-
485
-
486
-
487
-
488
149
  end