zillabyte-cli 0.0.24 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +6 -14
  2. data/lib/#zillabyte-cli.rb# +5 -0
  3. data/lib/zillabyte/api/apps.rb +16 -132
  4. data/lib/zillabyte/api/components.rb +115 -0
  5. data/lib/zillabyte/api/flows.rb +121 -0
  6. data/lib/zillabyte/api/keys.rb +70 -0
  7. data/lib/zillabyte/api.rb +15 -2
  8. data/lib/zillabyte/auth.rb +43 -16
  9. data/lib/zillabyte/cli/#logs.rb# +12 -0
  10. data/lib/zillabyte/cli/#repl.rb# +43 -0
  11. data/lib/zillabyte/cli/apps.rb +52 -893
  12. data/lib/zillabyte/cli/auth.rb +3 -8
  13. data/lib/zillabyte/cli/base.rb +28 -7
  14. data/lib/zillabyte/cli/components.rb +245 -0
  15. data/lib/zillabyte/cli/flows.rb +549 -0
  16. data/lib/zillabyte/cli/git.rb +38 -0
  17. data/lib/zillabyte/cli/help.rb +11 -4
  18. data/lib/zillabyte/cli/keys.rb +177 -0
  19. data/lib/zillabyte/cli/query.rb +0 -1
  20. data/lib/zillabyte/cli/relations.rb +2 -1
  21. data/lib/zillabyte/cli/templates/{js → apps/js}/simple_function.js +0 -0
  22. data/lib/zillabyte/cli/templates/{js → apps/js}/zillabyte.conf.yaml +0 -0
  23. data/lib/zillabyte/cli/templates/apps/python/app.py +17 -0
  24. data/lib/zillabyte/cli/templates/{python → apps/python}/requirements.txt +0 -0
  25. data/lib/zillabyte/cli/templates/{python → apps/python}/zillabyte.conf.yaml +1 -1
  26. data/lib/zillabyte/cli/templates/{ruby → apps/ruby}/Gemfile +0 -0
  27. data/lib/zillabyte/cli/templates/{ruby → apps/ruby}/app.rb +1 -1
  28. data/lib/zillabyte/cli/templates/{ruby → apps/ruby}/zillabyte.conf.yaml +0 -0
  29. data/lib/zillabyte/cli/templates/python/{simple_function.py → #simple_function.py#} +3 -6
  30. data/lib/zillabyte/common/session.rb +3 -1
  31. data/lib/zillabyte/helpers.rb +64 -1
  32. data/lib/zillabyte/runner/app_runner.rb +226 -0
  33. data/lib/zillabyte/runner/component_operation.rb +529 -0
  34. data/lib/zillabyte/runner/component_runner.rb +244 -0
  35. data/lib/zillabyte/runner/multilang_operation.rb +1133 -0
  36. data/lib/zillabyte/runner/operation.rb +11 -0
  37. data/lib/zillabyte/runner.rb +6 -0
  38. data/lib/zillabyte-cli/version.rb +1 -1
  39. data/zillabyte-cli.gemspec +1 -0
  40. metadata +83 -52
@@ -0,0 +1,1133 @@
1
+ require 'json'
2
+ require 'mkfifo'
3
+ require "zillabyte/runner/component_operation"
4
+
5
+
6
+ # Emulate a multilang operation
7
+ class Zillabyte::Runner::MultilangOperation
8
+
9
+ HANDSHAKE_MESSAGE = "{\"pidDir\": \"/tmp\"}\n"
10
+ DONE_MESSAGE = "{\"command\": \"done\"}\n"
11
+ NEXT_MESSAGE = "{\"command\": \"next\"}\n"
12
+ BEGIN_CYCLE_MESSAGE = "{\"command\": \"begin_cycle\"}\n"
13
+ END_CYCLE_MESSAGE = "{\"command\": \"end_cycle\"}\n"
14
+ PONG_PREFIX = "{\"pong\": \""
15
+ PONG_SUFFIX = "\"}\n"
16
+ ENDMARKER = "\nend\n"
17
+
18
+ def self.run(node, dir, consumee, consumer_pipes, tester, meta, options = {})
19
+
20
+ @__node = node
21
+ @__name = node["name"]
22
+ @__type = node["type"]
23
+ @__dir = dir
24
+ @__consumee = consumee
25
+ @__consumer_pipes = consumer_pipes
26
+ @__tester = tester
27
+
28
+ @__meta = meta
29
+ @__options = options
30
+ @__output_type = options[:output_type]
31
+
32
+ # Each consumer of a stream gets its own queue and message passing
33
+ @__emit_queues = {}
34
+ @__consumer_pipes.each_pair do |stream, consumers|
35
+ consumers.each_key do |consumer|
36
+ @__emit_queues[stream] ||= {}
37
+ @__emit_queues[stream][consumer] = {:write_queue => [], :ready => true}
38
+ end
39
+ end
40
+
41
+ begin
42
+ case @__type
43
+ when "source"
44
+ self.run_source()
45
+ when "group_by"
46
+ self.run_group_by()
47
+ when "each"
48
+ self.run_each()
49
+ when "filter"
50
+ self.run_filter()
51
+ when "component"
52
+ Zillabyte::Runner::ComponentOperation.run(node, dir, consumee, consumer_pipes, tester, meta, options = {})
53
+ when "sink"
54
+ self.run_sink()
55
+ else
56
+ cdisplay("invalid operation type #{@__type}")
57
+ end
58
+ rescue => e
59
+ cdisplay e.message
60
+ cdisplay e.backtrace
61
+ end
62
+ end
63
+
64
+
65
+ def self.run_source()
66
+
67
+ end_cycle_policy = @__node["end_cycle_policy"]
68
+
69
+ # Interactive source
70
+ if @__options[:interactive]
71
+ loop do
72
+
73
+ msg = @__consumee[:rd_child].gets
74
+
75
+ # Build tuple
76
+ begin
77
+ tuple = JSON.parse(msg)
78
+ rescue JSON::ParserError
79
+ cdisplay "Error: invalid JSON"
80
+ next
81
+ end
82
+
83
+ tuple_json = build_tuple_json(tuple)
84
+ display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [truncate_message(k), truncate_message(v)]}].to_json
85
+ send_to_consumers(tuple_json)
86
+ end
87
+
88
+ # Source from relation
89
+ elsif @__node['matches'] or @__node["relation"]
90
+
91
+ # Query API for rows
92
+ matches = @__node['matches'] || (@__node["relation"]["query"])
93
+ cdisplay("Fetching remote data...")
94
+ res = @__tester.query_agnostic(matches)
95
+ rows = res["rows"]
96
+ if(rows.nil? or rows.length == 0)
97
+ cdisplay("Could not find data that matches your 'matches' clause")
98
+ exit(-1)
99
+ end
100
+ cdisplay("Received #{rows.length} rows!")
101
+
102
+ # Enqueue rows for sending to consumers
103
+ column_aliases = res["column_aliases"]
104
+ rows.each do |tuple|
105
+ tuple_json = build_tuple_json(tuple, nil, column_aliases)
106
+
107
+ @__emit_queues.each_pair do |stream, consumers|
108
+ consumers.each_pair do |consumer, emitter|
109
+ emitter[:write_queue] << tuple_json
110
+ end
111
+ end
112
+ end
113
+
114
+ # Index streams and consumers by their pipes for lookup
115
+ consumer_hash = {}
116
+ @__emit_queues.each_pair do |stream, consumers|
117
+ consumers.each_key do |consumer|
118
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
119
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
120
+ end
121
+ end
122
+
123
+
124
+ # Send first tuple
125
+ @__emit_queues.each_pair do |stream, consumers|
126
+ consumers.each_key do |consumer|
127
+ tuple_json = get_consumer_tuple(stream, consumer)
128
+ emit_consumer_tuple(stream, consumer, tuple_json)
129
+ end
130
+ end
131
+
132
+ # Sent tuples to consumers as appropriate
133
+ loop do
134
+
135
+ # Retrieve messages from consumers
136
+ rs, ws, es = IO.select(consumer_hash.keys, [], [])
137
+
138
+ # Emit tuples to consumers
139
+ emitted = false
140
+ rs.each do |r|
141
+
142
+ # Read from consumer
143
+ msg = read_message(r)
144
+
145
+ stream = consumer_hash[r][:stream]
146
+ consumer = consumer_hash[r][:consumer]
147
+
148
+ # Consumer is ready for next message
149
+ if msg["command"] && msg["command"] == "next"
150
+
151
+ @__emit_queues[stream][consumer][:ready] = true
152
+ tuple_json = get_consumer_tuple(stream, consumer)
153
+
154
+ # If all messages have been sent to consumer, end their cycle
155
+ if tuple_json.nil?
156
+ write_stream = get_write_stream(stream, consumer)
157
+ cdisplay "ending cycle for #{consumer}"
158
+ write_message(write_stream, END_CYCLE_MESSAGE)
159
+ write_message(write_stream, DONE_MESSAGE)
160
+
161
+ else
162
+ # Emit tuple to consumer
163
+ emit_consumer_tuple(stream, consumer, tuple_json)
164
+ emitted = true
165
+ end
166
+ end
167
+ end
168
+
169
+ # Exit when done emitting
170
+ if !emitted
171
+ return
172
+ end
173
+ end
174
+
175
+ # Custom source
176
+ else
177
+
178
+ # Index streams and consumers by their pipes for lookup
179
+
180
+ consumer_hash ={}
181
+ @__emit_queues.each_pair do |stream, consumers|
182
+ consumers.each_key do |consumer|
183
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
184
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
185
+ end
186
+ end
187
+
188
+ # Keep track of how many consumers to handle before exiting
189
+ consumers_running = consumer_hash.keys.length
190
+
191
+ # Setup multilang pipe
192
+ ml_pipe = "#{@__name}_pipe"
193
+ if File.exists?("#{ml_pipe}.in")
194
+ File.delete("#{ml_pipe}.in")
195
+ end
196
+ File.mkfifo("#{ml_pipe}.in")
197
+
198
+
199
+ # Spawn multilang process
200
+ cmd = command("--execute_live --name #{@__name} --pipe #{ml_pipe}")
201
+ begin
202
+
203
+ Open3.popen3(cmd) do |ml_input, stdout, stderr, wait_thread|
204
+ begin
205
+
206
+ # Multilang output tuples
207
+ ml_output = File.open("#{ml_pipe}.in", "r")
208
+
209
+ # Setup streams from consumers and multilang
210
+ read_streams = consumer_hash.keys.concat [stdout, ml_output]
211
+
212
+ # Handshake
213
+ handshake(ml_input, ml_output)
214
+
215
+ # Begin cycle
216
+ begin_cycle(ml_input, ml_output)
217
+ emitted = false
218
+ write_message(ml_input, NEXT_MESSAGE)
219
+ multilang_queue = []
220
+ end_cycle_policy = @__options[:end_cycle_policy]
221
+ end_cycle_received = false
222
+
223
+ # Receive and handle messages
224
+ loop do
225
+
226
+ # Read from a stream
227
+ rs = select_read_streams(read_streams)
228
+ rs.each do |r|
229
+
230
+ # Read stdout straight to user
231
+ if r == stdout && consumers_running > 0
232
+ msg = r.gets
233
+ msg = msg.sub(/\n/, "")
234
+ cdisplay("log: #{msg}")
235
+ next
236
+ end
237
+
238
+ obj = read_message(r)
239
+
240
+ if obj.nil?
241
+ next
242
+ end
243
+
244
+ if obj["command"]
245
+ case obj["command"]
246
+
247
+ # Multilang emitted a tuple
248
+ when "emit"
249
+
250
+ stream = obj['stream']
251
+ # Check for null emit
252
+ if end_cycle_policy != "explicit"
253
+
254
+ if obj['tuple'].nil?
255
+ end_cycle_received = true
256
+ else
257
+ nil_values = false
258
+ obj['tuple'].each_value do |v|
259
+ if v.nil?
260
+ nil_values = true
261
+ break
262
+ end
263
+ end
264
+ end_cycle_received = nil_values
265
+ next unless !end_cycle_received
266
+ end
267
+ end
268
+
269
+ # Valid emit
270
+ emitted = true
271
+
272
+ # Send or enqueue tuple for each consumer
273
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
274
+
275
+
276
+ @__emit_queues[stream].each_pair do |consumer, emitter|
277
+ if emitter[:ready]
278
+ emit_consumer_tuple(stream, consumer, tuple_json)
279
+ else
280
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
281
+ end
282
+ end
283
+
284
+ # Consumer is ready for a message
285
+ when "next"
286
+ stream = consumer_hash[r][:stream]
287
+ consumer = consumer_hash[r][:consumer]
288
+ @__emit_queues[stream][consumer][:ready] = true
289
+ tuple_json = get_consumer_tuple(stream, consumer)
290
+
291
+ # End cycle for consumer if it has processed all tuples
292
+ if tuple_json.nil? && end_cycle_received
293
+
294
+ write_stream = get_write_stream(stream, consumer)
295
+ write_message(write_stream, END_CYCLE_MESSAGE)
296
+ consumers_running -= 1
297
+ if consumers_running == 0
298
+ break
299
+ end
300
+
301
+ # TODO break if last consumer
302
+ elsif !tuple_json.nil?
303
+ # Emit tuple to consumer
304
+ emit_consumer_tuple(stream, consumer, tuple_json)
305
+ emitted = true
306
+ end
307
+
308
+ # Multilang is done emitting a group of tuples
309
+ when "done"
310
+ # End cycle if no tuples were emitted
311
+ if !emitted && end_cycle_policy == "null_emit"
312
+ end_cycle_received = true
313
+ else
314
+ emitted = false
315
+ end
316
+
317
+ # Send the next tuple to multilang
318
+ if !multilang_queue.empty?
319
+ write_message(ml_input, multilang_queue.shift)
320
+
321
+ # Request next tuple from mutilang
322
+ elsif !end_cycle_received
323
+ write_message(ml_input, NEXT_MESSAGE)
324
+
325
+ # If there are no more messages to send, we are done
326
+ else end_cycle_received
327
+ finished = true
328
+ # End cycle for ready consumers
329
+ @__emit_queues.each_pair do |stream, consumers|
330
+ consumers.each_pair do |consumer, emitter|
331
+ if emitter[:ready]
332
+ write_stream = get_write_stream(stream, consumer)
333
+ write_message(write_stream, END_CYCLE_MESSAGE)
334
+ consumers_running -= 1
335
+ if consumers_running == 0
336
+ break
337
+ end
338
+ end
339
+ end
340
+
341
+ end
342
+ end
343
+
344
+ # Multilang sent an error message
345
+ when "fail"
346
+ cdisplay("ERROR : #{obj['msg']}")
347
+
348
+ # Multilang sent a log message
349
+ when "log"
350
+ cdisplay "LOG: #{obj['msg']}"
351
+
352
+ # Multilang sent signal to end the cycle
353
+ when "end_cycle"
354
+ if end_cycle_policy != "explicit"
355
+ cdisplay "received end_cycle command for non explicit policy"
356
+ next
357
+ end
358
+ end_cycle_received = true
359
+
360
+ end
361
+
362
+ # Multilang sent a ping
363
+ elsif obj['ping']
364
+ write_message(to_ml, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
365
+ end
366
+ end
367
+
368
+ # Exit after ending consumer cycles
369
+ if consumers_running == 0
370
+ return
371
+ end
372
+
373
+ end
374
+ rescue Errno::EIO
375
+ cdisplay("Errno:EIO error")
376
+ ensure
377
+ # cleanup
378
+ pid = wait_thread[:pid]
379
+ ml_input.close
380
+ ml_output.close
381
+ File.delete("#{ml_pipe}.in")
382
+ stdout.close
383
+ stderr.close
384
+ Process.kill('INT', pid)
385
+ end
386
+ end
387
+ rescue PTY::ChildExited
388
+ cdisplay("The child process exited!")
389
+ end
390
+ end
391
+ end
392
+
393
+
394
+ def self.run_each()
395
+
396
+ # Index streams and consumers by their pipes for lookup
397
+ consumer_hash = {}
398
+ @__emit_queues.each_pair do |stream, consumers|
399
+ consumers.each_key do |consumer|
400
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
401
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
402
+ end
403
+ end
404
+
405
+
406
+ # Keep track of how many consumers to handle before exiting
407
+ consumers_running = consumer_hash.keys.length
408
+
409
+ # Setup multilang pipe
410
+ ml_pipe = "#{@__name}_pipe"
411
+ if File.exists?("#{ml_pipe}.in")
412
+ File.delete("#{ml_pipe}.in")
413
+ end
414
+ File.mkfifo("#{ml_pipe}.in")
415
+
416
+ cmd = command("--execute_live --name #{@__name} --pipe #{ml_pipe}")
417
+ begin
418
+ # Start the operation...
419
+ Open3.popen3(cmd) do |ml_input, stdout, stderr, wait_thread|
420
+ begin
421
+
422
+ # Multilang output tuples
423
+ ml_output = File.open("#{ml_pipe}.in", "r+")
424
+
425
+ # Setup streams from consumers, multilang, and the consumee
426
+ read_streams = consumer_hash.keys.concat [@__consumee[:rd_child], ml_output, stdout]
427
+
428
+ # Handshake
429
+ handshake(ml_input, ml_output)
430
+
431
+ # Begin cycle
432
+ multilang_queue = []
433
+ mutlilang_count = 0
434
+ end_cycle_received = false
435
+
436
+ # Receive and handle messages
437
+ loop do
438
+
439
+ # Read from a stream
440
+ rs = select_read_streams(read_streams)
441
+ rs.each do |r|
442
+
443
+ # Read STDOUT from program straight to user
444
+ if r == stdout
445
+ msg = r.gets
446
+ msg = msg.sub(/\n/, "")
447
+ cdisplay("log: #{msg}")
448
+ next
449
+ end
450
+
451
+ # Receive an object
452
+ obj = read_message(r)
453
+ if obj["command"]
454
+ case obj["command"]
455
+
456
+ # Multilang emitted a tuple
457
+ when "emit"
458
+
459
+ stream = obj["stream"]
460
+
461
+ # Send or enqueue tuple for each consumer
462
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
463
+
464
+ @__emit_queues[stream].each_pair do |consumer, emitter|
465
+ if emitter[:ready]
466
+ emit_consumer_tuple(stream, consumer, tuple_json)
467
+ else
468
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
469
+ end
470
+ end
471
+
472
+ # Consumer is ready for a message
473
+ when "next"
474
+
475
+ stream = consumer_hash[r][:stream]
476
+ consumer = consumer_hash[r][:consumer]
477
+
478
+ @__emit_queues[stream][consumer][:ready] = true
479
+ tuple_json = get_consumer_tuple(stream, consumer)
480
+
481
+ # End cycle for consumer if it has processed all tuples
482
+ if tuple_json.nil? && end_cycle_received
483
+ write_stream = get_write_stream(stream, consumer)
484
+ write_message(write_stream, END_CYCLE_MESSAGE)
485
+ consumers_running -= 1
486
+ if consumers_running == 0
487
+ break
488
+ end
489
+
490
+
491
+ # TODO break if last consumer
492
+ elsif !tuple_json.nil?
493
+ # Emit tuple to consumer
494
+ emit_consumer_tuple(stream, consumer, tuple_json)
495
+ emitted = true
496
+ end
497
+
498
+ # Multilang is done emitting a group of tuples
499
+ when "done"
500
+ mutlilang_count -= 1
501
+
502
+ # Send the next tuple to multilang
503
+ if !multilang_queue.empty?
504
+ write_message(ml_input, multilang_queue.shift)
505
+
506
+ # Request next tuple from consumee
507
+ elsif !end_cycle_received
508
+ write_message(@__consumee[:wr_child], NEXT_MESSAGE)
509
+
510
+
511
+ # If there are no more messages to send, we are done
512
+ elsif end_cycle_received && mutlilang_count == 0
513
+ finished = true
514
+
515
+ # End cycle for ready consumers
516
+ @__emit_queues.each_pair do |stream, consumers|
517
+ consumers.each_pair do |consumer, emitter|
518
+ if emitter[:ready]
519
+ write_stream = get_write_stream(stream, consumer)
520
+ write_message(write_stream, END_CYCLE_MESSAGE)
521
+ consumers_running -= 1
522
+ if consumers_running == 0
523
+ break
524
+ end
525
+ end
526
+ end
527
+ end
528
+ end
529
+
530
+ # Multilang sent an error message
531
+ when "fail"
532
+ cdisplay("ERROR : #{obj['msg']}")
533
+
534
+ # Multilang sent a log message
535
+ when "log"
536
+ cdisplay "LOG: #{obj['msg']}"
537
+
538
+ # Consumee operation sent signal to end_cycle
539
+ when "end_cycle"
540
+ end_cycle_received = true
541
+
542
+ if mutlilang_count == 0
543
+
544
+ @__emit_queues.each_pair do |stream, consumers|
545
+ consumers.each_pair do |consumer, emitter|
546
+ if emitter[:ready]
547
+ write_stream = get_write_stream(stream, consumer)
548
+ write_message(write_stream, END_CYCLE_MESSAGE)
549
+ consumers_running -= 1
550
+ if consumers_running == 0
551
+ break
552
+ end
553
+ end
554
+ end
555
+ end
556
+
557
+ end
558
+
559
+ end
560
+
561
+ # Received a tuple from consumee
562
+ elsif obj['tuple']
563
+
564
+ # Send or enqueue to multilang
565
+ mutlilang_count += 1
566
+ if multilang_queue.empty?
567
+ write_message(ml_input, obj.to_json)
568
+ else
569
+ multilang_queue << obj.to_json
570
+ end
571
+
572
+ # Multilang sent a ping
573
+ elsif obj['ping']
574
+ write_message(to_ml, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
575
+ end
576
+ end
577
+
578
+ # Exit after ending consumer cycles
579
+ if consumers_running == 0
580
+ break
581
+ end
582
+
583
+ end
584
+ rescue Errno::EIO
585
+ cdisplay("Errno:EIO error")
586
+ ensure
587
+ # cleanup
588
+ pid = wait_thread[:pid]
589
+ ml_input.close
590
+ ml_output.close
591
+ File.delete("#{ml_pipe}.in")
592
+ stdout.close
593
+ stderr.close
594
+ Process.kill('INT', pid)
595
+ end
596
+ end
597
+ rescue PTY::ChildExited
598
+ cdisplay("The child process exited!")
599
+ end
600
+ end
601
+
602
+
603
+ def self.run_group_by()
604
+
605
+ # Index streams and consumers by their pipes for lookup
606
+ consumer_hash = {}
607
+ @__emit_queues.each_pair do |stream, consumers|
608
+ consumers.each_key do |consumer|
609
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
610
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
611
+ end
612
+ end
613
+
614
+ # Keep track of how many consumers to handle before exiting
615
+ consumers_running = consumer_hash.keys.length
616
+
617
+ # Setup groups
618
+ group_by = @__node['group_by']
619
+ group_tuples = {}
620
+ emit_count = 0 # used to track how many emits are expected once groups are sent to multilang
621
+ emitted_tuples = [] # used to send to consumers after once groupings are emitted
622
+ tuple_queue = []
623
+
624
+ # Setup multilang pipe
625
+ ml_pipe = "#{@__name}_pipe"
626
+ if File.exists?("#{ml_pipe}.in")
627
+ File.delete("#{ml_pipe}.in")
628
+ end
629
+ File.mkfifo("#{ml_pipe}.in")
630
+
631
+ cmd = command("--execute_live --name #{@__name} --pipe #{ml_pipe}")
632
+ begin
633
+ # Start the operation...
634
+ Open3.popen3(cmd) do |ml_input, stdout, stderr, wait_thread|
635
+ begin
636
+
637
+ # Multilang output tuples
638
+ ml_output = File.open("#{ml_pipe}.in", "r+")
639
+ # Setup streams from consumers, multilang, and the consumee
640
+ read_streams = consumer_hash.keys.concat [stdout, ml_output, @__consumee[:rd_child]]
641
+
642
+ # Handshake
643
+ handshake(ml_input, ml_output)
644
+
645
+ # Begin cycle
646
+ end_cycle_received = false
647
+ finished_emitting = false
648
+
649
+ # select a stream
650
+ loop do
651
+
652
+ # Read from a stream
653
+ rs = select_read_streams(read_streams)
654
+ rs.each do |r|
655
+
656
+ # Read STDOUT from program straight to user
657
+ if r == stdout
658
+ msg = r.gets
659
+ msg = msg.sub(/\n/, "")
660
+ cdisplay("log: #{msg}")
661
+ next
662
+ end
663
+
664
+
665
+ # Receive an object
666
+ obj = read_message(r)
667
+
668
+ if obj["command"]
669
+ case obj["command"]
670
+
671
+ when "done"
672
+
673
+ if end_cycle_received
674
+ tuple_json = tuple_queue.shift
675
+ if !tuple_json.nil?
676
+ write_message(ml_input, tuple_json)
677
+ end
678
+ end
679
+
680
+ next
681
+
682
+ # Begin aggregation
683
+ when "end_cycle"
684
+ end_cycle_received = true
685
+ read_streams = [ml_output]
686
+
687
+ group_tuples.each do |group_tuple, tuples|
688
+ tuple_queue << "{\"command\": \"begin_group\", \"tuple\": #{group_tuple.to_json}, \"meta\":{}}\n"
689
+ tuples.each do |t|
690
+ tuple_queue << "{\"command\": \"aggregate\", #{t}}\n"
691
+ end
692
+ tuple_queue << "{\"command\": \"end_group\"}\n"
693
+
694
+ # keep track of how many emits are expected
695
+ emit_count += 1
696
+ end
697
+
698
+ tuple_json = tuple_queue.shift
699
+ if !tuple_json.nil?
700
+ write_message(ml_input, tuple_json)
701
+ end
702
+
703
+ # Multilang has emitted a grouped tuple
704
+ when "emit"
705
+ stream = obj['stream']
706
+ emit_count -= 1
707
+ # Enqueue for consumers
708
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
709
+ @__emit_queues.each_pair do |stream, consumers|
710
+ consumers.each_key do |consumer|
711
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
712
+ end
713
+ end
714
+
715
+ # End cycle when done emitting
716
+ if end_cycle_received && emit_count == 0
717
+ finished_emitting = true
718
+ break
719
+ elsif end_cycle_received
720
+ tuple_json = tuple_queue.shift
721
+ if !tuple_json.nil?
722
+ write_message(ml_input, tuple_json)
723
+ end
724
+ end
725
+
726
+ end
727
+
728
+ # Received a tuple from operation
729
+ elsif obj["tuple"]
730
+ tuple = obj["tuple"].to_json
731
+ meta = obj["meta"].to_json
732
+ column_aliases = obj["column_aliases"] || {}
733
+ aliases = Hash[column_aliases.map{|h| [h["alias"],h["concrete_name"]]}]
734
+ gt = {}
735
+
736
+ # Get the column names to group on
737
+ group_by.each do |field|
738
+ field_name = aliases[field] || field
739
+ gt[field] = obj["tuple"][field_name]
740
+ end
741
+
742
+ msg_no_brackets = "\"tuple\": #{tuple}, \"meta\": #{meta}, \"column_aliases\": #{column_aliases.to_json}"
743
+
744
+ # Group tuple into existing group or create new group
745
+ if group_tuples[gt]
746
+ group_tuples[gt] << msg_no_brackets
747
+ else
748
+ group_tuples[gt] = [msg_no_brackets]
749
+ end
750
+
751
+ # Ask operation for next tuple
752
+ write_message(@__consumee[:wr_child], NEXT_MESSAGE)
753
+
754
+ end
755
+ end
756
+
757
+ # Send tuples to consumers
758
+ if finished_emitting && consumers_running > 0
759
+
760
+ # Send first tuple
761
+ @__emit_queues.each_pair do |stream, consumers|
762
+ consumers.each_key do |consumer|
763
+ tuple_json = get_consumer_tuple(stream, consumer)
764
+ emit_consumer_tuple(stream, consumer, tuple_json)
765
+ end
766
+ end
767
+
768
+ # Sent tuples to consumers as appropriate
769
+ loop do
770
+
771
+ # Retrieve messages from consumers
772
+ rs, ws, es = IO.select(consumer_hash.keys, [], [])
773
+
774
+ # Emit tuples to consumers
775
+ emitted = false
776
+ rs.each do |r|
777
+
778
+ # Read from consumer
779
+ msg = read_message(r)
780
+ consumer = consumer_hash[r][:consumer]
781
+ stream = consumer_hash[r][:stream]
782
+
783
+ # Consumer is ready for next message
784
+ if msg["command"] && msg["command"] == "next"
785
+
786
+ @__emit_queues[stream][consumer][:ready] = true
787
+ tuple_json = get_consumer_tuple(stream, consumer)
788
+
789
+ # If all messages have been sent to a consumer, end its cycle
790
+ if tuple_json.nil?
791
+ write_stream = get_write_stream(stream, consumer)
792
+ write_message(write_stream, END_CYCLE_MESSAGE)
793
+ consumers_running -= 1
794
+ if consumers_running == 0
795
+ break
796
+ end
797
+ else
798
+ # Emit tuple to consumer
799
+ emit_consumer_tuple(stream, consumer, tuple_json)
800
+ emitted = true
801
+ end
802
+ end
803
+
804
+ end
805
+ # Exit when done emitting
806
+ if !emitted
807
+ return
808
+ end
809
+ end
810
+ break
811
+
812
+ # Exit after ending all consumer cycles
813
+ elsif consumers_running == 0
814
+ break
815
+ end
816
+
817
+
818
+ end
819
+ rescue Errno::EIO
820
+ cdisplay("Errno:EIO error")
821
+ ensure
822
+ # cleanup
823
+ pid = wait_thread[:pid]
824
+ ml_input.close
825
+ ml_output.close
826
+ File.delete("#{ml_pipe}.in")
827
+ stdout.close
828
+ stderr.close
829
+ Process.kill('INT', pid)
830
+ end
831
+ end
832
+ rescue PTY::ChildExited
833
+ cdisplay("The child process exited!")
834
+ end
835
+ end
836
+
837
+
838
+ def self.run_filter()
839
+ self.run_each()
840
+ end
841
+
842
+
843
+ # Send a message to all consumers of the operation
844
+ def self.run_sink()
845
+ output = @__options[:output]
846
+ messages = []
847
+ loop do
848
+ # Read messages
849
+ msg = read_message(@__consumee[:rd_child])
850
+ obj = msg
851
+
852
+ # Add row
853
+ if obj['tuple']
854
+ messages << msg
855
+ display_json = Hash[obj['tuple'].map{|k, v| [truncate_message(k), truncate_message(v)]}].to_json
856
+ if @__options[:interactive]
857
+ cdisplay "received #{display_json}"
858
+ end
859
+ write_message(@__consumee[:wr_child], NEXT_MESSAGE)
860
+
861
+ # End cycle
862
+ elsif obj['command'] && obj['command'] == "end_cycle"
863
+ break
864
+ end
865
+ end
866
+
867
+ if messages.empty?
868
+ cdisplay "empty relation"
869
+ return
870
+ end
871
+
872
+ # Build table
873
+ table = Terminal::Table.new :title => @__name
874
+ csv_str = CSV.generate do |csv|
875
+ header_written = false;
876
+ messages.each do |obj|
877
+ begin
878
+
879
+ t = obj['tuple']
880
+ m = obj['meta'] || {}
881
+
882
+ if t
883
+ if header_written == false
884
+ keys = [t.keys, m.keys].flatten
885
+ csv << keys
886
+ table << keys
887
+ table << :separator
888
+ header_written = true
889
+ end
890
+
891
+ vals = [t.values, m.values].flatten
892
+ csv << vals
893
+ table << vals.flat_map{|v| "#{v}"[0..100]}
894
+ end
895
+ rescue JSON::ParserError
896
+ cdisplay("invalid JSON")
897
+ next
898
+ rescue Exception => e
899
+ cdisplay e
900
+ end
901
+ end
902
+ end
903
+
904
+ # Output table
905
+ cdisplay("\n#{table.to_s}")
906
+ cdisplay ""
907
+
908
+ # Write file
909
+ if output
910
+ filename = "#{output}.csv"
911
+ f = File.open(filename, "w")
912
+ f.write(csv_str)
913
+ f.close()
914
+ cdisplay("output written to #{filename}")
915
+ end
916
+ end
917
+
918
+
919
+ private
920
+
921
+ BUFSIZE = 8192
922
+
923
+ # Each reading pipe has a read buffer and message queue
924
+ @__read_buffers = {}
925
+ @__read_buffered_messages = {}
926
+
927
+
928
+ # Return availible reading streams
929
+ def self.select_read_streams(read_streams)
930
+
931
+ rs = []
932
+ read_streams.each do |read_stream|
933
+ @__read_buffered_messages[read_stream] ||= []
934
+ if !@__read_buffered_messages[read_stream].empty?
935
+ rs << read_stream
936
+ end
937
+ end
938
+ return rs unless rs.empty?
939
+
940
+ rs, ws, es = IO.select(read_streams, [], [])
941
+ return rs
942
+ end
943
+
944
+
945
+ # Read a JSON message
946
+ def self.read_message(read_stream)
947
+
948
+ @__read_buffers[read_stream] ||= ""
949
+ @__read_buffered_messages[read_stream] ||= []
950
+ if !@__read_buffered_messages[read_stream].empty?
951
+ obj = @__read_buffered_messages[read_stream].shift
952
+ return obj
953
+ end
954
+
955
+ # read message from stream
956
+ loop do
957
+
958
+ while !@__read_buffers[read_stream].include? ENDMARKER
959
+ segment = read_stream.sysread(BUFSIZE)
960
+ @__read_buffers[read_stream] << segment
961
+ end
962
+
963
+ # cdisplay "READMESSAGE: read #{segment.length} bytes, read buffer length : #{@__read_buffer.length}"
964
+ # TODO this include is redundant
965
+ read_buffer = @__read_buffers[read_stream]
966
+ if read_buffer.include? ENDMARKER
967
+ objs = read_buffer.split(ENDMARKER)
968
+ ends = read_buffer.scan(ENDMARKER)
969
+ if objs.count == ends.count # We have a full number of messages
970
+ objs.each do |obj|
971
+ begin
972
+ @__read_buffered_messages[read_stream] << JSON.parse(obj)
973
+ # cdisplay "READMESSAGE: got hash #{hash}"
974
+ rescue JSON::ParserError
975
+ cdisplay "READMESSAGE: invalid JSON #{obj}"
976
+ end
977
+ end
978
+ @__read_buffers[read_stream] = ""
979
+ return @__read_buffered_messages[read_stream].shift
980
+ else
981
+
982
+ (0..ends.count-1).each do |i|
983
+ obj = objs[i]
984
+ begin
985
+ @__read_buffered_messages[read_stream] << JSON.parse(obj)
986
+ rescue JSON::ParserError
987
+ cdisplay "READMESSAGE: invalid JSON #{obj}"
988
+ end
989
+ end
990
+
991
+ # cdisplay "adding leftovers : \n #{objs[ends.count..-1]}"
992
+ @__read_buffers[read_stream] = objs[ends.count..-1].join(ENDMARKER)
993
+ return @__read_buffered_messages[read_stream].shift
994
+ end
995
+ end
996
+ end
997
+ end
998
+
999
+
1000
+ # Write JSON message
1001
+ def self.write_message(write_stream, msg)
1002
+ write_msg = msg.strip + ENDMARKER
1003
+ write_stream.write write_msg
1004
+ write_stream.flush
1005
+ end
1006
+
1007
+ # Format a message for display
1008
+ def self.truncate_message(msg)
1009
+ return msg if(!msg.instance_of?(String))
1010
+ t_length = 50 # truncates entries to this length
1011
+ m_length = msg.length
1012
+ msg_out = m_length > t_length ? msg[0..t_length-3]+"..." : msg
1013
+ msg_out
1014
+ end
1015
+
1016
+
1017
+ # Handshake connection to multilang
1018
+ def self.handshake(write_stream, read_stream)
1019
+ begin
1020
+ write_message write_stream, HANDSHAKE_MESSAGE
1021
+ msg = read_message(read_stream)
1022
+ rescue Exception => e
1023
+ cdisplay(e)
1024
+ cdisplay("Error handshaking node")
1025
+ raise e
1026
+ end
1027
+ end
1028
+
1029
+
1030
+ # Instruct multilang to begin cycle
1031
+ def self.begin_cycle(write_stream, read_stream)
1032
+ begin
1033
+ write_message(write_stream, BEGIN_CYCLE_MESSAGE)
1034
+ msg = read_message(read_stream)
1035
+ obj = Hash[msg]
1036
+ if obj["command"] != "done"
1037
+ raise "Invalid response from multilang #{msg}"
1038
+ end
1039
+ rescue Exception => e
1040
+ cdisplay(e)
1041
+ end
1042
+ end
1043
+
1044
+
1045
+ # Send object to every consumer of the operation, regardless of stream
1046
+ def self.send_to_consumers(json_obj)
1047
+ @__consumer_pipes.each_pair do |stream, consumers|
1048
+ consumers.each_pair do |consumer, pipe|
1049
+ write_message(pipe[:wr_parent], json_obj)
1050
+ cdisplay "emitted #{json_obj} to #{consumer}"
1051
+ end
1052
+ end
1053
+ end
1054
+
1055
+
1056
+ # Get the write pipe of the stream consumer
1057
+ def self.get_write_stream(stream, consumer)
1058
+ @__consumer_pipes[stream][consumer][:wr_parent]
1059
+ end
1060
+
1061
+
1062
+ # Get tuple for sending to consumer of stream
1063
+ def self.get_consumer_tuple(stream, consumer)
1064
+ @__emit_queues[stream][consumer][:write_queue].shift
1065
+ end
1066
+
1067
+
1068
+ # Emit tuple_json to the consumer of a stream
1069
+ def self.emit_consumer_tuple(stream, consumer, tuple_json)
1070
+ begin
1071
+ display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [truncate_message(k), truncate_message(v)]}].to_json
1072
+ rescue JSON::ParserError
1073
+ cdisplay "Error: invalid JSON"
1074
+ end
1075
+ write_stream = get_write_stream(stream, consumer)
1076
+ write_message(write_stream, tuple_json)
1077
+ @__emit_queues[stream][consumer][:ready] = false
1078
+ cdisplay "emitted tuple #{display_json} to #{consumer} "
1079
+ end
1080
+
1081
+
1082
+ # Build a tuple and format into JSON
1083
+ def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
1084
+ meta ||= {}
1085
+ column_aliases ||= {}
1086
+ values = {}
1087
+ tuple.each do |k, v|
1088
+ if(k == "id")
1089
+ next
1090
+ elsif(k == "confidence" or k == "since" or k == "source")
1091
+ meta[k] = v
1092
+ else
1093
+ values[k] = v
1094
+ end
1095
+ end
1096
+ tuple_json = {"tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
1097
+ return tuple_json
1098
+ end
1099
+
1100
+
1101
+ # Construct a multilang command
1102
+ def self.command(arg, ignore_stderr=false)
1103
+ cdisplay("could not extract meta information. missing zillabyte.conf.yml?") if @__meta.nil?
1104
+
1105
+ full_script = File.join(@__dir, @__meta["script"])
1106
+ stderr_opt = "2> /dev/null" if ignore_stderr
1107
+
1108
+ case @__meta["language"]
1109
+ when "ruby"
1110
+ # Execute in the bundler context
1111
+ cmd = "cd \"#{@__dir}\"; unset BUNDLE_GEMFILE; ZILLABYTE_HARNESS=1 bundle exec ruby \"#{full_script}\" #{arg} #{stderr_opt}"
1112
+ when "python"#{
1113
+ if(File.directory?("#{@__dir}/vEnv"))
1114
+ cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte #{@__dir}/vEnv/bin/python \"#{full_script}\" #{arg} #{stderr_opt}"
1115
+ else
1116
+ cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte python \"#{full_script}\" #{arg} #{stderr_opt}"
1117
+ end
1118
+ when "js"
1119
+ cmd = "cd \"#{@__dir}\"; NODE_PATH=~/zb1/multilang/js/src/lib #{Zillabyte::API::NODEJS_BIN} \"#{full_script}\" #{arg} #{stderr_opt}"
1120
+ else
1121
+ cdisplay("no language specified")
1122
+ end
1123
+ return cmd
1124
+ end
1125
+
1126
+
1127
+ # Display a colored, formatted message
1128
+ def self.cdisplay(msg)
1129
+ @__tester.cdisplay(@__name, msg)
1130
+ end
1131
+
1132
+
1133
+ end