zillabyte-cli 0.0.24 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +6 -14
  2. data/lib/#zillabyte-cli.rb# +5 -0
  3. data/lib/zillabyte/api/apps.rb +16 -132
  4. data/lib/zillabyte/api/components.rb +115 -0
  5. data/lib/zillabyte/api/flows.rb +121 -0
  6. data/lib/zillabyte/api/keys.rb +70 -0
  7. data/lib/zillabyte/api.rb +15 -2
  8. data/lib/zillabyte/auth.rb +43 -16
  9. data/lib/zillabyte/cli/#logs.rb# +12 -0
  10. data/lib/zillabyte/cli/#repl.rb# +43 -0
  11. data/lib/zillabyte/cli/apps.rb +52 -893
  12. data/lib/zillabyte/cli/auth.rb +3 -8
  13. data/lib/zillabyte/cli/base.rb +28 -7
  14. data/lib/zillabyte/cli/components.rb +245 -0
  15. data/lib/zillabyte/cli/flows.rb +549 -0
  16. data/lib/zillabyte/cli/git.rb +38 -0
  17. data/lib/zillabyte/cli/help.rb +11 -4
  18. data/lib/zillabyte/cli/keys.rb +177 -0
  19. data/lib/zillabyte/cli/query.rb +0 -1
  20. data/lib/zillabyte/cli/relations.rb +2 -1
  21. data/lib/zillabyte/cli/templates/{js → apps/js}/simple_function.js +0 -0
  22. data/lib/zillabyte/cli/templates/{js → apps/js}/zillabyte.conf.yaml +0 -0
  23. data/lib/zillabyte/cli/templates/apps/python/app.py +17 -0
  24. data/lib/zillabyte/cli/templates/{python → apps/python}/requirements.txt +0 -0
  25. data/lib/zillabyte/cli/templates/{python → apps/python}/zillabyte.conf.yaml +1 -1
  26. data/lib/zillabyte/cli/templates/{ruby → apps/ruby}/Gemfile +0 -0
  27. data/lib/zillabyte/cli/templates/{ruby → apps/ruby}/app.rb +1 -1
  28. data/lib/zillabyte/cli/templates/{ruby → apps/ruby}/zillabyte.conf.yaml +0 -0
  29. data/lib/zillabyte/cli/templates/python/{simple_function.py → #simple_function.py#} +3 -6
  30. data/lib/zillabyte/common/session.rb +3 -1
  31. data/lib/zillabyte/helpers.rb +64 -1
  32. data/lib/zillabyte/runner/app_runner.rb +226 -0
  33. data/lib/zillabyte/runner/component_operation.rb +529 -0
  34. data/lib/zillabyte/runner/component_runner.rb +244 -0
  35. data/lib/zillabyte/runner/multilang_operation.rb +1133 -0
  36. data/lib/zillabyte/runner/operation.rb +11 -0
  37. data/lib/zillabyte/runner.rb +6 -0
  38. data/lib/zillabyte-cli/version.rb +1 -1
  39. data/zillabyte-cli.gemspec +1 -0
  40. metadata +83 -52
@@ -0,0 +1,1133 @@
1
+ require 'json'
2
+ require 'mkfifo'
3
+ require "zillabyte/runner/component_operation"
4
+
5
+
6
+ # Emulate a multilang operation
7
+ class Zillabyte::Runner::MultilangOperation
8
+
9
+ HANDSHAKE_MESSAGE = "{\"pidDir\": \"/tmp\"}\n"
10
+ DONE_MESSAGE = "{\"command\": \"done\"}\n"
11
+ NEXT_MESSAGE = "{\"command\": \"next\"}\n"
12
+ BEGIN_CYCLE_MESSAGE = "{\"command\": \"begin_cycle\"}\n"
13
+ END_CYCLE_MESSAGE = "{\"command\": \"end_cycle\"}\n"
14
+ PONG_PREFIX = "{\"pong\": \""
15
+ PONG_SUFFIX = "\"}\n"
16
+ ENDMARKER = "\nend\n"
17
+
18
+ def self.run(node, dir, consumee, consumer_pipes, tester, meta, options = {})
19
+
20
+ @__node = node
21
+ @__name = node["name"]
22
+ @__type = node["type"]
23
+ @__dir = dir
24
+ @__consumee = consumee
25
+ @__consumer_pipes = consumer_pipes
26
+ @__tester = tester
27
+
28
+ @__meta = meta
29
+ @__options = options
30
+ @__output_type = options[:output_type]
31
+
32
+ # Each consumer of a stream gets its own queue and message passing
33
+ @__emit_queues = {}
34
+ @__consumer_pipes.each_pair do |stream, consumers|
35
+ consumers.each_key do |consumer|
36
+ @__emit_queues[stream] ||= {}
37
+ @__emit_queues[stream][consumer] = {:write_queue => [], :ready => true}
38
+ end
39
+ end
40
+
41
+ begin
42
+ case @__type
43
+ when "source"
44
+ self.run_source()
45
+ when "group_by"
46
+ self.run_group_by()
47
+ when "each"
48
+ self.run_each()
49
+ when "filter"
50
+ self.run_filter()
51
+ when "component"
52
+ Zillabyte::Runner::ComponentOperation.run(node, dir, consumee, consumer_pipes, tester, meta, options = {})
53
+ when "sink"
54
+ self.run_sink()
55
+ else
56
+ cdisplay("invalid operation type #{@__type}")
57
+ end
58
+ rescue => e
59
+ cdisplay e.message
60
+ cdisplay e.backtrace
61
+ end
62
+ end
63
+
64
+
65
+ def self.run_source()
66
+
67
+ end_cycle_policy = @__node["end_cycle_policy"]
68
+
69
+ # Interactive source
70
+ if @__options[:interactive]
71
+ loop do
72
+
73
+ msg = @__consumee[:rd_child].gets
74
+
75
+ # Build tuple
76
+ begin
77
+ tuple = JSON.parse(msg)
78
+ rescue JSON::ParserError
79
+ cdisplay "Error: invalid JSON"
80
+ next
81
+ end
82
+
83
+ tuple_json = build_tuple_json(tuple)
84
+ display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [truncate_message(k), truncate_message(v)]}].to_json
85
+ send_to_consumers(tuple_json)
86
+ end
87
+
88
+ # Source from relation
89
+ elsif @__node['matches'] or @__node["relation"]
90
+
91
+ # Query API for rows
92
+ matches = @__node['matches'] || (@__node["relation"]["query"])
93
+ cdisplay("Fetching remote data...")
94
+ res = @__tester.query_agnostic(matches)
95
+ rows = res["rows"]
96
+ if(rows.nil? or rows.length == 0)
97
+ cdisplay("Could not find data that matches your 'matches' clause")
98
+ exit(-1)
99
+ end
100
+ cdisplay("Received #{rows.length} rows!")
101
+
102
+ # Enqueue rows for sending to consumers
103
+ column_aliases = res["column_aliases"]
104
+ rows.each do |tuple|
105
+ tuple_json = build_tuple_json(tuple, nil, column_aliases)
106
+
107
+ @__emit_queues.each_pair do |stream, consumers|
108
+ consumers.each_pair do |consumer, emitter|
109
+ emitter[:write_queue] << tuple_json
110
+ end
111
+ end
112
+ end
113
+
114
+ # Index streams and consumers by their pipes for lookup
115
+ consumer_hash = {}
116
+ @__emit_queues.each_pair do |stream, consumers|
117
+ consumers.each_key do |consumer|
118
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
119
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
120
+ end
121
+ end
122
+
123
+
124
+ # Send first tuple
125
+ @__emit_queues.each_pair do |stream, consumers|
126
+ consumers.each_key do |consumer|
127
+ tuple_json = get_consumer_tuple(stream, consumer)
128
+ emit_consumer_tuple(stream, consumer, tuple_json)
129
+ end
130
+ end
131
+
132
+ # Sent tuples to consumers as appropriate
133
+ loop do
134
+
135
+ # Retrieve messages from consumers
136
+ rs, ws, es = IO.select(consumer_hash.keys, [], [])
137
+
138
+ # Emit tuples to consumers
139
+ emitted = false
140
+ rs.each do |r|
141
+
142
+ # Read from consumer
143
+ msg = read_message(r)
144
+
145
+ stream = consumer_hash[r][:stream]
146
+ consumer = consumer_hash[r][:consumer]
147
+
148
+ # Consumer is ready for next message
149
+ if msg["command"] && msg["command"] == "next"
150
+
151
+ @__emit_queues[stream][consumer][:ready] = true
152
+ tuple_json = get_consumer_tuple(stream, consumer)
153
+
154
+ # If all messages have been sent to consumer, end their cycle
155
+ if tuple_json.nil?
156
+ write_stream = get_write_stream(stream, consumer)
157
+ cdisplay "ending cycle for #{consumer}"
158
+ write_message(write_stream, END_CYCLE_MESSAGE)
159
+ write_message(write_stream, DONE_MESSAGE)
160
+
161
+ else
162
+ # Emit tuple to consumer
163
+ emit_consumer_tuple(stream, consumer, tuple_json)
164
+ emitted = true
165
+ end
166
+ end
167
+ end
168
+
169
+ # Exit when done emitting
170
+ if !emitted
171
+ return
172
+ end
173
+ end
174
+
175
+ # Custom source
176
+ else
177
+
178
+ # Index streams and consumers by their pipes for lookup
179
+
180
+ consumer_hash ={}
181
+ @__emit_queues.each_pair do |stream, consumers|
182
+ consumers.each_key do |consumer|
183
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
184
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
185
+ end
186
+ end
187
+
188
+ # Keep track of how many consumers to handle before exiting
189
+ consumers_running = consumer_hash.keys.length
190
+
191
+ # Setup multilang pipe
192
+ ml_pipe = "#{@__name}_pipe"
193
+ if File.exists?("#{ml_pipe}.in")
194
+ File.delete("#{ml_pipe}.in")
195
+ end
196
+ File.mkfifo("#{ml_pipe}.in")
197
+
198
+
199
+ # Spawn multilang process
200
+ cmd = command("--execute_live --name #{@__name} --pipe #{ml_pipe}")
201
+ begin
202
+
203
+ Open3.popen3(cmd) do |ml_input, stdout, stderr, wait_thread|
204
+ begin
205
+
206
+ # Multilang output tuples
207
+ ml_output = File.open("#{ml_pipe}.in", "r")
208
+
209
+ # Setup streams from consumers and multilang
210
+ read_streams = consumer_hash.keys.concat [stdout, ml_output]
211
+
212
+ # Handshake
213
+ handshake(ml_input, ml_output)
214
+
215
+ # Begin cycle
216
+ begin_cycle(ml_input, ml_output)
217
+ emitted = false
218
+ write_message(ml_input, NEXT_MESSAGE)
219
+ multilang_queue = []
220
+ end_cycle_policy = @__options[:end_cycle_policy]
221
+ end_cycle_received = false
222
+
223
+ # Receive and handle messages
224
+ loop do
225
+
226
+ # Read from a stream
227
+ rs = select_read_streams(read_streams)
228
+ rs.each do |r|
229
+
230
+ # Read stdout straight to user
231
+ if r == stdout && consumers_running > 0
232
+ msg = r.gets
233
+ msg = msg.sub(/\n/, "")
234
+ cdisplay("log: #{msg}")
235
+ next
236
+ end
237
+
238
+ obj = read_message(r)
239
+
240
+ if obj.nil?
241
+ next
242
+ end
243
+
244
+ if obj["command"]
245
+ case obj["command"]
246
+
247
+ # Multilang emitted a tuple
248
+ when "emit"
249
+
250
+ stream = obj['stream']
251
+ # Check for null emit
252
+ if end_cycle_policy != "explicit"
253
+
254
+ if obj['tuple'].nil?
255
+ end_cycle_received = true
256
+ else
257
+ nil_values = false
258
+ obj['tuple'].each_value do |v|
259
+ if v.nil?
260
+ nil_values = true
261
+ break
262
+ end
263
+ end
264
+ end_cycle_received = nil_values
265
+ next unless !end_cycle_received
266
+ end
267
+ end
268
+
269
+ # Valid emit
270
+ emitted = true
271
+
272
+ # Send or enqueue tuple for each consumer
273
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
274
+
275
+
276
+ @__emit_queues[stream].each_pair do |consumer, emitter|
277
+ if emitter[:ready]
278
+ emit_consumer_tuple(stream, consumer, tuple_json)
279
+ else
280
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
281
+ end
282
+ end
283
+
284
+ # Consumer is ready for a message
285
+ when "next"
286
+ stream = consumer_hash[r][:stream]
287
+ consumer = consumer_hash[r][:consumer]
288
+ @__emit_queues[stream][consumer][:ready] = true
289
+ tuple_json = get_consumer_tuple(stream, consumer)
290
+
291
+ # End cycle for consumer if it has processed all tuples
292
+ if tuple_json.nil? && end_cycle_received
293
+
294
+ write_stream = get_write_stream(stream, consumer)
295
+ write_message(write_stream, END_CYCLE_MESSAGE)
296
+ consumers_running -= 1
297
+ if consumers_running == 0
298
+ break
299
+ end
300
+
301
+ # TODO break if last consumer
302
+ elsif !tuple_json.nil?
303
+ # Emit tuple to consumer
304
+ emit_consumer_tuple(stream, consumer, tuple_json)
305
+ emitted = true
306
+ end
307
+
308
+ # Multilang is done emitting a group of tuples
309
+ when "done"
310
+ # End cycle if no tuples were emitted
311
+ if !emitted && end_cycle_policy == "null_emit"
312
+ end_cycle_received = true
313
+ else
314
+ emitted = false
315
+ end
316
+
317
+ # Send the next tuple to multilang
318
+ if !multilang_queue.empty?
319
+ write_message(ml_input, multilang_queue.shift)
320
+
321
+ # Request next tuple from mutilang
322
+ elsif !end_cycle_received
323
+ write_message(ml_input, NEXT_MESSAGE)
324
+
325
+ # If there are no more messages to send, we are done
326
+ else end_cycle_received
327
+ finished = true
328
+ # End cycle for ready consumers
329
+ @__emit_queues.each_pair do |stream, consumers|
330
+ consumers.each_pair do |consumer, emitter|
331
+ if emitter[:ready]
332
+ write_stream = get_write_stream(stream, consumer)
333
+ write_message(write_stream, END_CYCLE_MESSAGE)
334
+ consumers_running -= 1
335
+ if consumers_running == 0
336
+ break
337
+ end
338
+ end
339
+ end
340
+
341
+ end
342
+ end
343
+
344
+ # Multilang sent an error message
345
+ when "fail"
346
+ cdisplay("ERROR : #{obj['msg']}")
347
+
348
+ # Multilang sent a log message
349
+ when "log"
350
+ cdisplay "LOG: #{obj['msg']}"
351
+
352
+ # Multilang sent signal to end the cycle
353
+ when "end_cycle"
354
+ if end_cycle_policy != "explicit"
355
+ cdisplay "received end_cycle command for non explicit policy"
356
+ next
357
+ end
358
+ end_cycle_received = true
359
+
360
+ end
361
+
362
+ # Multilang sent a ping
363
+ elsif obj['ping']
364
+ write_message(to_ml, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
365
+ end
366
+ end
367
+
368
+ # Exit after ending consumer cycles
369
+ if consumers_running == 0
370
+ return
371
+ end
372
+
373
+ end
374
+ rescue Errno::EIO
375
+ cdisplay("Errno:EIO error")
376
+ ensure
377
+ # cleanup
378
+ pid = wait_thread[:pid]
379
+ ml_input.close
380
+ ml_output.close
381
+ File.delete("#{ml_pipe}.in")
382
+ stdout.close
383
+ stderr.close
384
+ Process.kill('INT', pid)
385
+ end
386
+ end
387
+ rescue PTY::ChildExited
388
+ cdisplay("The child process exited!")
389
+ end
390
+ end
391
+ end
392
+
393
+
394
+ def self.run_each()
395
+
396
+ # Index streams and consumers by their pipes for lookup
397
+ consumer_hash = {}
398
+ @__emit_queues.each_pair do |stream, consumers|
399
+ consumers.each_key do |consumer|
400
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
401
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
402
+ end
403
+ end
404
+
405
+
406
+ # Keep track of how many consumers to handle before exiting
407
+ consumers_running = consumer_hash.keys.length
408
+
409
+ # Setup multilang pipe
410
+ ml_pipe = "#{@__name}_pipe"
411
+ if File.exists?("#{ml_pipe}.in")
412
+ File.delete("#{ml_pipe}.in")
413
+ end
414
+ File.mkfifo("#{ml_pipe}.in")
415
+
416
+ cmd = command("--execute_live --name #{@__name} --pipe #{ml_pipe}")
417
+ begin
418
+ # Start the operation...
419
+ Open3.popen3(cmd) do |ml_input, stdout, stderr, wait_thread|
420
+ begin
421
+
422
+ # Multilang output tuples
423
+ ml_output = File.open("#{ml_pipe}.in", "r+")
424
+
425
+ # Setup streams from consumers, multilang, and the consumee
426
+ read_streams = consumer_hash.keys.concat [@__consumee[:rd_child], ml_output, stdout]
427
+
428
+ # Handshake
429
+ handshake(ml_input, ml_output)
430
+
431
+ # Begin cycle
432
+ multilang_queue = []
433
+ mutlilang_count = 0
434
+ end_cycle_received = false
435
+
436
+ # Receive and handle messages
437
+ loop do
438
+
439
+ # Read from a stream
440
+ rs = select_read_streams(read_streams)
441
+ rs.each do |r|
442
+
443
+ # Read STDOUT from program straight to user
444
+ if r == stdout
445
+ msg = r.gets
446
+ msg = msg.sub(/\n/, "")
447
+ cdisplay("log: #{msg}")
448
+ next
449
+ end
450
+
451
+ # Receive an object
452
+ obj = read_message(r)
453
+ if obj["command"]
454
+ case obj["command"]
455
+
456
+ # Multilang emitted a tuple
457
+ when "emit"
458
+
459
+ stream = obj["stream"]
460
+
461
+ # Send or enqueue tuple for each consumer
462
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
463
+
464
+ @__emit_queues[stream].each_pair do |consumer, emitter|
465
+ if emitter[:ready]
466
+ emit_consumer_tuple(stream, consumer, tuple_json)
467
+ else
468
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
469
+ end
470
+ end
471
+
472
+ # Consumer is ready for a message
473
+ when "next"
474
+
475
+ stream = consumer_hash[r][:stream]
476
+ consumer = consumer_hash[r][:consumer]
477
+
478
+ @__emit_queues[stream][consumer][:ready] = true
479
+ tuple_json = get_consumer_tuple(stream, consumer)
480
+
481
+ # End cycle for consumer if it has processed all tuples
482
+ if tuple_json.nil? && end_cycle_received
483
+ write_stream = get_write_stream(stream, consumer)
484
+ write_message(write_stream, END_CYCLE_MESSAGE)
485
+ consumers_running -= 1
486
+ if consumers_running == 0
487
+ break
488
+ end
489
+
490
+
491
+ # TODO break if last consumer
492
+ elsif !tuple_json.nil?
493
+ # Emit tuple to consumer
494
+ emit_consumer_tuple(stream, consumer, tuple_json)
495
+ emitted = true
496
+ end
497
+
498
+ # Multilang is done emitting a group of tuples
499
+ when "done"
500
+ mutlilang_count -= 1
501
+
502
+ # Send the next tuple to multilang
503
+ if !multilang_queue.empty?
504
+ write_message(ml_input, multilang_queue.shift)
505
+
506
+ # Request next tuple from consumee
507
+ elsif !end_cycle_received
508
+ write_message(@__consumee[:wr_child], NEXT_MESSAGE)
509
+
510
+
511
+ # If there are no more messages to send, we are done
512
+ elsif end_cycle_received && mutlilang_count == 0
513
+ finished = true
514
+
515
+ # End cycle for ready consumers
516
+ @__emit_queues.each_pair do |stream, consumers|
517
+ consumers.each_pair do |consumer, emitter|
518
+ if emitter[:ready]
519
+ write_stream = get_write_stream(stream, consumer)
520
+ write_message(write_stream, END_CYCLE_MESSAGE)
521
+ consumers_running -= 1
522
+ if consumers_running == 0
523
+ break
524
+ end
525
+ end
526
+ end
527
+ end
528
+ end
529
+
530
+ # Multilang sent an error message
531
+ when "fail"
532
+ cdisplay("ERROR : #{obj['msg']}")
533
+
534
+ # Multilang sent a log message
535
+ when "log"
536
+ cdisplay "LOG: #{obj['msg']}"
537
+
538
+ # Consumee operation sent signal to end_cycle
539
+ when "end_cycle"
540
+ end_cycle_received = true
541
+
542
+ if mutlilang_count == 0
543
+
544
+ @__emit_queues.each_pair do |stream, consumers|
545
+ consumers.each_pair do |consumer, emitter|
546
+ if emitter[:ready]
547
+ write_stream = get_write_stream(stream, consumer)
548
+ write_message(write_stream, END_CYCLE_MESSAGE)
549
+ consumers_running -= 1
550
+ if consumers_running == 0
551
+ break
552
+ end
553
+ end
554
+ end
555
+ end
556
+
557
+ end
558
+
559
+ end
560
+
561
+ # Received a tuple from consumee
562
+ elsif obj['tuple']
563
+
564
+ # Send or enqueue to multilang
565
+ mutlilang_count += 1
566
+ if multilang_queue.empty?
567
+ write_message(ml_input, obj.to_json)
568
+ else
569
+ multilang_queue << obj.to_json
570
+ end
571
+
572
+ # Multilang sent a ping
573
+ elsif obj['ping']
574
+ write_message(to_ml, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
575
+ end
576
+ end
577
+
578
+ # Exit after ending consumer cycles
579
+ if consumers_running == 0
580
+ break
581
+ end
582
+
583
+ end
584
+ rescue Errno::EIO
585
+ cdisplay("Errno:EIO error")
586
+ ensure
587
+ # cleanup
588
+ pid = wait_thread[:pid]
589
+ ml_input.close
590
+ ml_output.close
591
+ File.delete("#{ml_pipe}.in")
592
+ stdout.close
593
+ stderr.close
594
+ Process.kill('INT', pid)
595
+ end
596
+ end
597
+ rescue PTY::ChildExited
598
+ cdisplay("The child process exited!")
599
+ end
600
+ end
601
+
602
+
603
+ def self.run_group_by()
604
+
605
+ # Index streams and consumers by their pipes for lookup
606
+ consumer_hash = {}
607
+ @__emit_queues.each_pair do |stream, consumers|
608
+ consumers.each_key do |consumer|
609
+ read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
610
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
611
+ end
612
+ end
613
+
614
+ # Keep track of how many consumers to handle before exiting
615
+ consumers_running = consumer_hash.keys.length
616
+
617
+ # Setup groups
618
+ group_by = @__node['group_by']
619
+ group_tuples = {}
620
+ emit_count = 0 # used to track how many emits are expected once groups are sent to multilang
621
+ emitted_tuples = [] # used to send to consumers after once groupings are emitted
622
+ tuple_queue = []
623
+
624
+ # Setup multilang pipe
625
+ ml_pipe = "#{@__name}_pipe"
626
+ if File.exists?("#{ml_pipe}.in")
627
+ File.delete("#{ml_pipe}.in")
628
+ end
629
+ File.mkfifo("#{ml_pipe}.in")
630
+
631
+ cmd = command("--execute_live --name #{@__name} --pipe #{ml_pipe}")
632
+ begin
633
+ # Start the operation...
634
+ Open3.popen3(cmd) do |ml_input, stdout, stderr, wait_thread|
635
+ begin
636
+
637
+ # Multilang output tuples
638
+ ml_output = File.open("#{ml_pipe}.in", "r+")
639
+ # Setup streams from consumers, multilang, and the consumee
640
+ read_streams = consumer_hash.keys.concat [stdout, ml_output, @__consumee[:rd_child]]
641
+
642
+ # Handshake
643
+ handshake(ml_input, ml_output)
644
+
645
+ # Begin cycle
646
+ end_cycle_received = false
647
+ finished_emitting = false
648
+
649
+ # select a stream
650
+ loop do
651
+
652
+ # Read from a stream
653
+ rs = select_read_streams(read_streams)
654
+ rs.each do |r|
655
+
656
+ # Read STDOUT from program straight to user
657
+ if r == stdout
658
+ msg = r.gets
659
+ msg = msg.sub(/\n/, "")
660
+ cdisplay("log: #{msg}")
661
+ next
662
+ end
663
+
664
+
665
+ # Receive an object
666
+ obj = read_message(r)
667
+
668
+ if obj["command"]
669
+ case obj["command"]
670
+
671
+ when "done"
672
+
673
+ if end_cycle_received
674
+ tuple_json = tuple_queue.shift
675
+ if !tuple_json.nil?
676
+ write_message(ml_input, tuple_json)
677
+ end
678
+ end
679
+
680
+ next
681
+
682
+ # Begin aggregation
683
+ when "end_cycle"
684
+ end_cycle_received = true
685
+ read_streams = [ml_output]
686
+
687
+ group_tuples.each do |group_tuple, tuples|
688
+ tuple_queue << "{\"command\": \"begin_group\", \"tuple\": #{group_tuple.to_json}, \"meta\":{}}\n"
689
+ tuples.each do |t|
690
+ tuple_queue << "{\"command\": \"aggregate\", #{t}}\n"
691
+ end
692
+ tuple_queue << "{\"command\": \"end_group\"}\n"
693
+
694
+ # keep track of how many emits are expected
695
+ emit_count += 1
696
+ end
697
+
698
+ tuple_json = tuple_queue.shift
699
+ if !tuple_json.nil?
700
+ write_message(ml_input, tuple_json)
701
+ end
702
+
703
+ # Multilang has emitted a grouped tuple
704
+ when "emit"
705
+ stream = obj['stream']
706
+ emit_count -= 1
707
+ # Enqueue for consumers
708
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
709
+ @__emit_queues.each_pair do |stream, consumers|
710
+ consumers.each_key do |consumer|
711
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
712
+ end
713
+ end
714
+
715
+ # End cycle when done emitting
716
+ if end_cycle_received && emit_count == 0
717
+ finished_emitting = true
718
+ break
719
+ elsif end_cycle_received
720
+ tuple_json = tuple_queue.shift
721
+ if !tuple_json.nil?
722
+ write_message(ml_input, tuple_json)
723
+ end
724
+ end
725
+
726
+ end
727
+
728
+ # Received a tuple from operation
729
+ elsif obj["tuple"]
730
+ tuple = obj["tuple"].to_json
731
+ meta = obj["meta"].to_json
732
+ column_aliases = obj["column_aliases"] || {}
733
+ aliases = Hash[column_aliases.map{|h| [h["alias"],h["concrete_name"]]}]
734
+ gt = {}
735
+
736
+ # Get the column names to group on
737
+ group_by.each do |field|
738
+ field_name = aliases[field] || field
739
+ gt[field] = obj["tuple"][field_name]
740
+ end
741
+
742
+ msg_no_brackets = "\"tuple\": #{tuple}, \"meta\": #{meta}, \"column_aliases\": #{column_aliases.to_json}"
743
+
744
+ # Group tuple into existing group or create new group
745
+ if group_tuples[gt]
746
+ group_tuples[gt] << msg_no_brackets
747
+ else
748
+ group_tuples[gt] = [msg_no_brackets]
749
+ end
750
+
751
+ # Ask operation for next tuple
752
+ write_message(@__consumee[:wr_child], NEXT_MESSAGE)
753
+
754
+ end
755
+ end
756
+
757
+ # Send tuples to consumers
758
+ if finished_emitting && consumers_running > 0
759
+
760
+ # Send first tuple
761
+ @__emit_queues.each_pair do |stream, consumers|
762
+ consumers.each_key do |consumer|
763
+ tuple_json = get_consumer_tuple(stream, consumer)
764
+ emit_consumer_tuple(stream, consumer, tuple_json)
765
+ end
766
+ end
767
+
768
+ # Sent tuples to consumers as appropriate
769
+ loop do
770
+
771
+ # Retrieve messages from consumers
772
+ rs, ws, es = IO.select(consumer_hash.keys, [], [])
773
+
774
+ # Emit tuples to consumers
775
+ emitted = false
776
+ rs.each do |r|
777
+
778
+ # Read from consumer
779
+ msg = read_message(r)
780
+ consumer = consumer_hash[r][:consumer]
781
+ stream = consumer_hash[r][:stream]
782
+
783
+ # Consumer is ready for next message
784
+ if msg["command"] && msg["command"] == "next"
785
+
786
+ @__emit_queues[stream][consumer][:ready] = true
787
+ tuple_json = get_consumer_tuple(stream, consumer)
788
+
789
+ # If all messages have been sent to a consumer, end its cycle
790
+ if tuple_json.nil?
791
+ write_stream = get_write_stream(stream, consumer)
792
+ write_message(write_stream, END_CYCLE_MESSAGE)
793
+ consumers_running -= 1
794
+ if consumers_running == 0
795
+ break
796
+ end
797
+ else
798
+ # Emit tuple to consumer
799
+ emit_consumer_tuple(stream, consumer, tuple_json)
800
+ emitted = true
801
+ end
802
+ end
803
+
804
+ end
805
+ # Exit when done emitting
806
+ if !emitted
807
+ return
808
+ end
809
+ end
810
+ break
811
+
812
+ # Exit after ending all consumer cycles
813
+ elsif consumers_running == 0
814
+ break
815
+ end
816
+
817
+
818
+ end
819
+ rescue Errno::EIO
820
+ cdisplay("Errno:EIO error")
821
+ ensure
822
+ # cleanup
823
+ pid = wait_thread[:pid]
824
+ ml_input.close
825
+ ml_output.close
826
+ File.delete("#{ml_pipe}.in")
827
+ stdout.close
828
+ stderr.close
829
+ Process.kill('INT', pid)
830
+ end
831
+ end
832
+ rescue PTY::ChildExited
833
+ cdisplay("The child process exited!")
834
+ end
835
+ end
836
+
837
+
838
+ def self.run_filter()
839
+ self.run_each()
840
+ end
841
+
842
+
843
+ # Send a message to all consumers of the operation
844
+ def self.run_sink()
845
+ output = @__options[:output]
846
+ messages = []
847
+ loop do
848
+ # Read messages
849
+ msg = read_message(@__consumee[:rd_child])
850
+ obj = msg
851
+
852
+ # Add row
853
+ if obj['tuple']
854
+ messages << msg
855
+ display_json = Hash[obj['tuple'].map{|k, v| [truncate_message(k), truncate_message(v)]}].to_json
856
+ if @__options[:interactive]
857
+ cdisplay "received #{display_json}"
858
+ end
859
+ write_message(@__consumee[:wr_child], NEXT_MESSAGE)
860
+
861
+ # End cycle
862
+ elsif obj['command'] && obj['command'] == "end_cycle"
863
+ break
864
+ end
865
+ end
866
+
867
+ if messages.empty?
868
+ cdisplay "empty relation"
869
+ return
870
+ end
871
+
872
+ # Build table
873
+ table = Terminal::Table.new :title => @__name
874
+ csv_str = CSV.generate do |csv|
875
+ header_written = false;
876
+ messages.each do |obj|
877
+ begin
878
+
879
+ t = obj['tuple']
880
+ m = obj['meta'] || {}
881
+
882
+ if t
883
+ if header_written == false
884
+ keys = [t.keys, m.keys].flatten
885
+ csv << keys
886
+ table << keys
887
+ table << :separator
888
+ header_written = true
889
+ end
890
+
891
+ vals = [t.values, m.values].flatten
892
+ csv << vals
893
+ table << vals.flat_map{|v| "#{v}"[0..100]}
894
+ end
895
+ rescue JSON::ParserError
896
+ cdisplay("invalid JSON")
897
+ next
898
+ rescue Exception => e
899
+ cdisplay e
900
+ end
901
+ end
902
+ end
903
+
904
+ # Output table
905
+ cdisplay("\n#{table.to_s}")
906
+ cdisplay ""
907
+
908
+ # Write file
909
+ if output
910
+ filename = "#{output}.csv"
911
+ f = File.open(filename, "w")
912
+ f.write(csv_str)
913
+ f.close()
914
+ cdisplay("output written to #{filename}")
915
+ end
916
+ end
917
+
918
+
919
+ private
920
+
921
+ BUFSIZE = 8192
922
+
923
+ # Each reading pipe has a read buffer and message queue
924
+ @__read_buffers = {}
925
+ @__read_buffered_messages = {}
926
+
927
+
928
+ # Return availible reading streams
929
+ def self.select_read_streams(read_streams)
930
+
931
+ rs = []
932
+ read_streams.each do |read_stream|
933
+ @__read_buffered_messages[read_stream] ||= []
934
+ if !@__read_buffered_messages[read_stream].empty?
935
+ rs << read_stream
936
+ end
937
+ end
938
+ return rs unless rs.empty?
939
+
940
+ rs, ws, es = IO.select(read_streams, [], [])
941
+ return rs
942
+ end
943
+
944
+
945
+ # Read a JSON message
946
+ def self.read_message(read_stream)
947
+
948
+ @__read_buffers[read_stream] ||= ""
949
+ @__read_buffered_messages[read_stream] ||= []
950
+ if !@__read_buffered_messages[read_stream].empty?
951
+ obj = @__read_buffered_messages[read_stream].shift
952
+ return obj
953
+ end
954
+
955
+ # read message from stream
956
+ loop do
957
+
958
+ while !@__read_buffers[read_stream].include? ENDMARKER
959
+ segment = read_stream.sysread(BUFSIZE)
960
+ @__read_buffers[read_stream] << segment
961
+ end
962
+
963
+ # cdisplay "READMESSAGE: read #{segment.length} bytes, read buffer length : #{@__read_buffer.length}"
964
+ # TODO this include is redundant
965
+ read_buffer = @__read_buffers[read_stream]
966
+ if read_buffer.include? ENDMARKER
967
+ objs = read_buffer.split(ENDMARKER)
968
+ ends = read_buffer.scan(ENDMARKER)
969
+ if objs.count == ends.count # We have a full number of messages
970
+ objs.each do |obj|
971
+ begin
972
+ @__read_buffered_messages[read_stream] << JSON.parse(obj)
973
+ # cdisplay "READMESSAGE: got hash #{hash}"
974
+ rescue JSON::ParserError
975
+ cdisplay "READMESSAGE: invalid JSON #{obj}"
976
+ end
977
+ end
978
+ @__read_buffers[read_stream] = ""
979
+ return @__read_buffered_messages[read_stream].shift
980
+ else
981
+
982
+ (0..ends.count-1).each do |i|
983
+ obj = objs[i]
984
+ begin
985
+ @__read_buffered_messages[read_stream] << JSON.parse(obj)
986
+ rescue JSON::ParserError
987
+ cdisplay "READMESSAGE: invalid JSON #{obj}"
988
+ end
989
+ end
990
+
991
+ # cdisplay "adding leftovers : \n #{objs[ends.count..-1]}"
992
+ @__read_buffers[read_stream] = objs[ends.count..-1].join(ENDMARKER)
993
+ return @__read_buffered_messages[read_stream].shift
994
+ end
995
+ end
996
+ end
997
+ end
998
+
999
+
1000
+ # Write JSON message
1001
+ def self.write_message(write_stream, msg)
1002
+ write_msg = msg.strip + ENDMARKER
1003
+ write_stream.write write_msg
1004
+ write_stream.flush
1005
+ end
1006
+
1007
+ # Format a message for display
1008
+ def self.truncate_message(msg)
1009
+ return msg if(!msg.instance_of?(String))
1010
+ t_length = 50 # truncates entries to this length
1011
+ m_length = msg.length
1012
+ msg_out = m_length > t_length ? msg[0..t_length-3]+"..." : msg
1013
+ msg_out
1014
+ end
1015
+
1016
+
1017
+ # Handshake connection to multilang
1018
+ def self.handshake(write_stream, read_stream)
1019
+ begin
1020
+ write_message write_stream, HANDSHAKE_MESSAGE
1021
+ msg = read_message(read_stream)
1022
+ rescue Exception => e
1023
+ cdisplay(e)
1024
+ cdisplay("Error handshaking node")
1025
+ raise e
1026
+ end
1027
+ end
1028
+
1029
+
1030
+ # Instruct multilang to begin cycle
1031
+ def self.begin_cycle(write_stream, read_stream)
1032
+ begin
1033
+ write_message(write_stream, BEGIN_CYCLE_MESSAGE)
1034
+ msg = read_message(read_stream)
1035
+ obj = Hash[msg]
1036
+ if obj["command"] != "done"
1037
+ raise "Invalid response from multilang #{msg}"
1038
+ end
1039
+ rescue Exception => e
1040
+ cdisplay(e)
1041
+ end
1042
+ end
1043
+
1044
+
1045
+ # Send object to every consumer of the operation, regardless of stream
1046
+ def self.send_to_consumers(json_obj)
1047
+ @__consumer_pipes.each_pair do |stream, consumers|
1048
+ consumers.each_pair do |consumer, pipe|
1049
+ write_message(pipe[:wr_parent], json_obj)
1050
+ cdisplay "emitted #{json_obj} to #{consumer}"
1051
+ end
1052
+ end
1053
+ end
1054
+
1055
+
1056
+ # Get the write pipe of the stream consumer
1057
+ def self.get_write_stream(stream, consumer)
1058
+ @__consumer_pipes[stream][consumer][:wr_parent]
1059
+ end
1060
+
1061
+
1062
+ # Get tuple for sending to consumer of stream
1063
+ def self.get_consumer_tuple(stream, consumer)
1064
+ @__emit_queues[stream][consumer][:write_queue].shift
1065
+ end
1066
+
1067
+
1068
+ # Emit tuple_json to the consumer of a stream
1069
+ def self.emit_consumer_tuple(stream, consumer, tuple_json)
1070
+ begin
1071
+ display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [truncate_message(k), truncate_message(v)]}].to_json
1072
+ rescue JSON::ParserError
1073
+ cdisplay "Error: invalid JSON"
1074
+ end
1075
+ write_stream = get_write_stream(stream, consumer)
1076
+ write_message(write_stream, tuple_json)
1077
+ @__emit_queues[stream][consumer][:ready] = false
1078
+ cdisplay "emitted tuple #{display_json} to #{consumer} "
1079
+ end
1080
+
1081
+
1082
+ # Build a tuple and format into JSON
1083
+ def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
1084
+ meta ||= {}
1085
+ column_aliases ||= {}
1086
+ values = {}
1087
+ tuple.each do |k, v|
1088
+ if(k == "id")
1089
+ next
1090
+ elsif(k == "confidence" or k == "since" or k == "source")
1091
+ meta[k] = v
1092
+ else
1093
+ values[k] = v
1094
+ end
1095
+ end
1096
+ tuple_json = {"tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
1097
+ return tuple_json
1098
+ end
1099
+
1100
+
1101
+ # Construct a multilang command
1102
+ def self.command(arg, ignore_stderr=false)
1103
+ cdisplay("could not extract meta information. missing zillabyte.conf.yml?") if @__meta.nil?
1104
+
1105
+ full_script = File.join(@__dir, @__meta["script"])
1106
+ stderr_opt = "2> /dev/null" if ignore_stderr
1107
+
1108
+ case @__meta["language"]
1109
+ when "ruby"
1110
+ # Execute in the bundler context
1111
+ cmd = "cd \"#{@__dir}\"; unset BUNDLE_GEMFILE; ZILLABYTE_HARNESS=1 bundle exec ruby \"#{full_script}\" #{arg} #{stderr_opt}"
1112
+ when "python"#{
1113
+ if(File.directory?("#{@__dir}/vEnv"))
1114
+ cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte #{@__dir}/vEnv/bin/python \"#{full_script}\" #{arg} #{stderr_opt}"
1115
+ else
1116
+ cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte python \"#{full_script}\" #{arg} #{stderr_opt}"
1117
+ end
1118
+ when "js"
1119
+ cmd = "cd \"#{@__dir}\"; NODE_PATH=~/zb1/multilang/js/src/lib #{Zillabyte::API::NODEJS_BIN} \"#{full_script}\" #{arg} #{stderr_opt}"
1120
+ else
1121
+ cdisplay("no language specified")
1122
+ end
1123
+ return cmd
1124
+ end
1125
+
1126
+
1127
+ # Display a colored, formatted message
1128
+ def self.cdisplay(msg)
1129
+ @__tester.cdisplay(@__name, msg)
1130
+ end
1131
+
1132
+
1133
+ end