zillabyte-cli 0.1.18 → 0.1.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,10 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
25
25
 
26
26
  # Setup streams
27
27
  @nodes = meta["nodes"]
28
+ @node_map = {}
29
+ @nodes.each do |n|
30
+ @node_map[n["name"]] = n
31
+ end
28
32
 
29
33
  # Index stream consummers and emitters by stream name
30
34
  @arcs = meta["arcs"]
@@ -45,18 +49,28 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
45
49
  if n["type"] == "source"
46
50
  fields = n["fields"]
47
51
  end
48
-
49
- # Create two new pipes in the parent.
50
- rd_child, wr_parent = IO.pipe()
51
- rd_parent, wr_child = IO.pipe()
52
52
 
53
+ # Create two new pipes in the parent.
54
+ rd_child_1, wr_parent_1 = IO.pipe()
55
+ rd_parent_1, wr_child_1 = IO.pipe()
56
+
53
57
  @operation_pipes[name] = {
54
- :node => n,
55
- :rd_child => rd_child,
56
- :wr_child => wr_child,
57
- :rd_parent => rd_parent,
58
- :wr_parent => wr_parent
58
+ "rd_child_1" => rd_child_1,
59
+ "wr_child_1" => wr_child_1,
60
+ "rd_parent_1" => rd_parent_1,
61
+ "wr_parent_1" => wr_parent_1
59
62
  }
63
+
64
+ # Add a second(right hand side) set ofpipes for joins
65
+ if type == "join"
66
+ # Create two new pipes in the parent.
67
+ rd_child_2, wr_parent_2 = IO.pipe()
68
+ rd_parent_2, wr_child_2 = IO.pipe()
69
+ @operation_pipes[name]["rd_child_2"] = rd_child_2
70
+ @operation_pipes[name]["wr_child_2"] = wr_child_2
71
+ @operation_pipes[name]["rd_parent_2"] = rd_parent_2
72
+ @operation_pipes[name]["wr_parent_2"] = wr_parent_2
73
+ end
60
74
  end
61
75
 
62
76
  # Maps origin => {stream => [destinations]}
@@ -79,56 +93,115 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
79
93
  emits = n["emits"]
80
94
 
81
95
  pipes = @operation_pipes[name]
82
- rd_child = pipes[:rd_child]
83
- wr_child = pipes[:wr_child]
84
- rd_parent = pipes[:rd_parent]
85
- wr_parent = pipes[:wr_parent]
86
96
 
87
97
  # Fork.
88
98
  pid = fork()
89
99
  if pid # In parent
90
- # Close the reading end of the child so we can write to the child.
91
- rd_child.close()
92
- # Close the writing end of the child so we can read from the child.
93
- wr_child.close()
94
-
100
+ # Close the reading end of the first child so we can write to the child.
101
+ pipes["rd_child_1"].close()
102
+ # Close the writing end of the first child so we can read from the child.
103
+ pipes["wr_child_1"].close()
104
+
105
+ if type == "join"
106
+ # Close the reading end of the second child so we can write to the child.
107
+ pipes["rd_child_2"].close()
108
+ # Close the writing end of the second child so we can read from the child.
109
+ pipes["wr_child_2"].close()
110
+ end
95
111
  else # in child
96
- # Close the writing end of the parent so we can read from the parent.
97
- wr_parent.close()
98
- # Close the reading end of the parent so we can write to the parent.
99
- rd_parent.close()
100
- begin
112
+ # Close the writing end of the first parent so we can read from the parent.
113
+ pipes["wr_parent_1"].close()
114
+ # Close the reading end of the first parent so we can write to the parent.
115
+ pipes["rd_parent_1"].close()
116
+
117
+ if type == "join"
118
+ # Close the reading end of the second child so we can write to the child.
119
+ pipes["rd_parent_2"].close()
120
+ # Close the writing end of the second child so we can read from the child.
121
+ pipes["wr_parent_2"].close()
122
+ end
101
123
 
124
+ begin
102
125
  # Setup reading and writing pipes for communicating with consumee component
103
- in_pipe = {:rd_child => @operation_pipes[name][:rd_child], :wr_child => @operation_pipes[name][:wr_child]}
126
+ if type != "join"
127
+ in_pipes = {"rd_child_1" => @operation_pipes[name]["rd_child_1"], "wr_child_1" => @operation_pipes[name]["wr_child_1"]}
128
+
129
+ # Add join specific options
130
+ else
131
+ options[:join_options] = {}
132
+ in_pipes = {}
133
+ @arcs.each do |a|
134
+
135
+ if (a["dest"] == name)
136
+ # Left Side
137
+ if (a["left"] == 1)
138
+ options[:join_options][:lhs] = a["origin"]
139
+ in_pipes["rd_child_1"] = @operation_pipes[name]["rd_child_1"]
140
+ in_pipes["wr_child_1"] = @operation_pipes[name]["wr_child_1"]
141
+ # Right Side
142
+ elsif (a["right"] == 1)
143
+ options[:join_options][:rhs] = a["origin"]
144
+ in_pipes["rd_child_2"] = @operation_pipes[name]["rd_child_2"]
145
+ in_pipes["wr_child_2"] = @operation_pipes[name]["wr_child_2"]
146
+ end
147
+ end
148
+ end
149
+ end
104
150
 
105
- # Index consumer pipes by consumer_name
151
+ # Index consumer pipes by stream name, consumer_name
106
152
  out_pipes = {}
107
153
 
154
+ # Check if you are the consumee for a downstream join in order to select the correct pipe
108
155
  if type != "sink"
109
156
  @arc_map[name].each_pair do |stream, destinations|
110
157
  out_pipes[stream] ||= {}
158
+
111
159
  destinations.each do |dest|
112
- out_pipes[stream][dest] = {:wr_parent => @operation_pipes[dest][:wr_parent], :rd_parent => @operation_pipes[dest][:rd_parent] }
160
+ out_pipes[stream][dest] ||= {}
161
+
162
+ # Check for a join at the destination
163
+ if (@node_map[dest]["type"] == "join")
164
+ @arcs.each do |a|
165
+ if (a["dest"] == dest && a["origin"] == name)
166
+ # Left Side
167
+ if (a["left"] == 1)
168
+ out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
169
+ out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
170
+ break
171
+ elsif (a["right"] == 1)
172
+ out_pipes[stream][dest]["wr_parent_2"] = @operation_pipes[dest]["wr_parent_2"]
173
+ out_pipes[stream][dest]["rd_parent_2"] = @operation_pipes[dest]["rd_parent_2"]
174
+ break
175
+ end
176
+ end
177
+ end
178
+ else
179
+ out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
180
+ out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
181
+ end
113
182
  end
114
183
  end
115
184
  end
116
185
 
117
186
  # Run the child process
118
- Zillabyte::Runner::ComponentOperation.run(n, dir, in_pipe, out_pipes, self, meta, options)
119
-
120
- rescue => e
121
- display e
187
+ Zillabyte::Runner::ComponentOperation.run(n, dir, in_pipes, out_pipes, self, meta, options)
122
188
 
189
+ rescue => e
190
+ display e.message
191
+ display e.backtrace
123
192
  ensure
124
193
  # Close the reading end of the child
125
- rd_child.close()
194
+ pipes["rd_child_1"].close()
126
195
  # Close the writing end of the child
127
- wr_child.close()
128
- exit!(-1)
129
- end
196
+ pipes["wr_child_1"].close()
130
197
 
131
- end #end child
198
+ # Close secondary join child
199
+ pipes["rd_child_2"].close() if pipes["rd_child_2"]
200
+ pipes["wr_child_2"].close() if pipes["wr_child_2"]
201
+
202
+ exit!(-1)
203
+ end #end child
204
+ end
132
205
  end
133
206
 
134
207
 
@@ -164,20 +237,26 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
164
237
  display "Enter an input tuple in JSON format i.e.{ \"url\" : \"foo.com\", \"html\" : \"bar.html\" }"
165
238
  msg = ask
166
239
  # Send tuple to source
167
- @operation_pipes[source][:wr_parent].puts msg
240
+ @operation_pipes[source]["wr_parent_1"].puts msg
168
241
  end
169
242
  end
243
+ rescue => e
244
+ display e.message
245
+ display e.backtrace
170
246
 
171
247
  ensure
172
248
  Process.waitall()
173
249
  @operation_pipes.each do |name, pipes|
174
250
  #Close the writing end of the parent
175
- pipes[:wr_parent].close()
251
+ pipes["wr_parent_1"].close()
176
252
  # Close the reading end of the parent
177
- pipes[:rd_parent].close()
253
+ pipes["rd_parent_1"].close()
254
+
255
+ # Close secondary join parent
256
+ pipes["wr_parent_2"].close() if pipes["wr_parent_2"]
257
+ pipes["rd_parent_2"].close() if pipes["rd_parent_2"]
178
258
  end
179
259
  end
180
-
181
260
  end
182
261
 
183
262
  def session
@@ -185,7 +264,7 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
185
264
  end
186
265
 
187
266
 
188
- def cdisplay(name, message, useName=true)
267
+ def cdisplay(name, message, useName=true)
189
268
  color = @colors[name] || :default
190
269
  if message == ""
191
270
  display ""
@@ -205,7 +284,6 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
205
284
  end
206
285
  end
207
286
 
208
-
209
287
  def display(message, newline = true)
210
288
  @session.display(message, newline)
211
289
  end
@@ -241,8 +319,4 @@ class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
241
319
  end
242
320
  end
243
321
 
244
-
245
-
246
-
247
-
248
322
  end
@@ -11,24 +11,25 @@ module Zillabyte; module Runner; class MultilangOperation
11
11
  PONG_SUFFIX = "\"}\n"
12
12
  ENDMARKER = "\nend\n"
13
13
 
14
- def self.run(node, dir, consumee, consumer_pipes, tester, meta, options = {})
14
+ def self.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
15
15
  require("mkfifo")
16
16
  require("zillabyte/runner/component_operation")
17
17
 
18
18
  require("pty")
19
19
  require("open3")
20
+
20
21
  @__node = node
21
22
  @__name = node["name"]
23
+ @__tester = tester
24
+
22
25
  @__type = node["type"]
23
26
  @__dir = dir
24
- @__consumee = consumee
25
- @__consumer_pipes = consumer_pipes
26
- @__tester = tester
27
27
 
28
+ @__consumee_pipes = consumee_pipes
29
+ @__consumer_pipes = consumer_pipes
28
30
  @__meta = meta
29
31
  @__options = options
30
32
  @__output_type = options[:output_type]
31
-
32
33
  # Each consumer of a stream gets its own queue and message passing
33
34
  @__emit_queues = {}
34
35
  @__consumer_pipes.each_pair do |stream, consumers|
@@ -44,12 +45,14 @@ module Zillabyte; module Runner; class MultilangOperation
44
45
  self.run_source()
45
46
  when "group_by"
46
47
  self.run_group_by()
48
+ when "join"
49
+ self.run_join()
47
50
  when "each"
48
51
  self.run_each()
49
52
  when "filter"
50
53
  self.run_filter()
51
54
  when "component"
52
- Zillabyte::Runner::ComponentOperation.run(node, dir, consumee, consumer_pipes, tester, meta, options = {})
55
+ Zillabyte::Runner::ComponentOperation.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
53
56
  when "sink"
54
57
  self.run_sink()
55
58
  else
@@ -70,7 +73,7 @@ module Zillabyte; module Runner; class MultilangOperation
70
73
  if @__options[:interactive]
71
74
  loop do
72
75
 
73
- msg = @__consumee[:rd_child].gets
76
+ msg = @__consumee_pipes["rd_child_1"].gets
74
77
 
75
78
  # Build tuple
76
79
  begin
@@ -97,13 +100,10 @@ module Zillabyte; module Runner; class MultilangOperation
97
100
  cdisplay("Could not find data that matches your 'matches' clause")
98
101
  exit(-1)
99
102
  end
100
- cdisplay("Received #{rows.length} rows!")
101
-
102
103
  # Enqueue rows for sending to consumers
103
- column_aliases = res["column_aliases"]
104
+ column_aliases = res['column_aliases']
104
105
  rows.each do |tuple|
105
106
  tuple_json = build_tuple_json(tuple, nil, column_aliases)
106
-
107
107
  @__emit_queues.each_pair do |stream, consumers|
108
108
  consumers.each_pair do |consumer, emitter|
109
109
  emitter[:write_queue] << tuple_json
@@ -112,13 +112,7 @@ module Zillabyte; module Runner; class MultilangOperation
112
112
  end
113
113
 
114
114
  # Index streams and consumers by their pipes for lookup
115
- consumer_hash = {}
116
- @__emit_queues.each_pair do |stream, consumers|
117
- consumers.each_key do |consumer|
118
- read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
119
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
120
- end
121
- end
115
+ consumer_hash = build_consumer_hash()
122
116
 
123
117
 
124
118
  # Send first tuple
@@ -155,9 +149,8 @@ module Zillabyte; module Runner; class MultilangOperation
155
149
  if tuple_json.nil?
156
150
  write_stream = get_write_stream(stream, consumer)
157
151
  cdisplay "ending cycle for #{consumer}"
158
- write_message(write_stream, END_CYCLE_MESSAGE)
159
- write_message(write_stream, DONE_MESSAGE)
160
-
152
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
153
+ send_command_tuple(stream, consumer, DONE_MESSAGE)
161
154
  else
162
155
  # Emit tuple to consumer
163
156
  emit_consumer_tuple(stream, consumer, tuple_json)
@@ -175,15 +168,9 @@ module Zillabyte; module Runner; class MultilangOperation
175
168
  # Custom source
176
169
  else
177
170
 
178
- # Index streams and consumers by their pipes for lookup
179
171
 
180
- consumer_hash ={}
181
- @__emit_queues.each_pair do |stream, consumers|
182
- consumers.each_key do |consumer|
183
- read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
184
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
185
- end
186
- end
172
+ # Index streams and consumers by their pipes for lookup
173
+ consumer_hash = build_consumer_hash()
187
174
 
188
175
  # Keep track of how many consumers to handle before exiting
189
176
  consumers_running = consumer_hash.keys.length
@@ -209,6 +196,7 @@ module Zillabyte; module Runner; class MultilangOperation
209
196
  # Setup streams from consumers and multilang
210
197
  read_streams = consumer_hash.keys.concat [stdout, ml_output]
211
198
 
199
+
212
200
  # Handshake
213
201
  handshake(ml_input, ml_output)
214
202
 
@@ -242,11 +230,11 @@ module Zillabyte; module Runner; class MultilangOperation
242
230
  end
243
231
 
244
232
  if obj["command"]
233
+
245
234
  case obj["command"]
246
235
 
247
236
  # Multilang emitted a tuple
248
237
  when "emit"
249
-
250
238
  stream = obj['stream']
251
239
  # Check for null emit
252
240
  if end_cycle_policy != "explicit"
@@ -272,10 +260,10 @@ module Zillabyte; module Runner; class MultilangOperation
272
260
  # Send or enqueue tuple for each consumer
273
261
  tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
274
262
 
275
-
276
263
  @__emit_queues[stream].each_pair do |consumer, emitter|
277
264
  if emitter[:ready]
278
265
  emit_consumer_tuple(stream, consumer, tuple_json)
266
+
279
267
  else
280
268
  @__emit_queues[stream][consumer][:write_queue] << tuple_json
281
269
  end
@@ -291,8 +279,7 @@ module Zillabyte; module Runner; class MultilangOperation
291
279
  # End cycle for consumer if it has processed all tuples
292
280
  if tuple_json.nil? && end_cycle_received
293
281
 
294
- write_stream = get_write_stream(stream, consumer)
295
- write_message(write_stream, END_CYCLE_MESSAGE)
282
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
296
283
  consumers_running -= 1
297
284
  if consumers_running == 0
298
285
  break
@@ -329,8 +316,7 @@ module Zillabyte; module Runner; class MultilangOperation
329
316
  @__emit_queues.each_pair do |stream, consumers|
330
317
  consumers.each_pair do |consumer, emitter|
331
318
  if emitter[:ready]
332
- write_stream = get_write_stream(stream, consumer)
333
- write_message(write_stream, END_CYCLE_MESSAGE)
319
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
334
320
  consumers_running -= 1
335
321
  if consumers_running == 0
336
322
  break
@@ -393,15 +379,9 @@ module Zillabyte; module Runner; class MultilangOperation
393
379
 
394
380
  def self.run_each()
395
381
 
396
- # Index streams and consumers by their pipes for lookup
397
- consumer_hash = {}
398
- @__emit_queues.each_pair do |stream, consumers|
399
- consumers.each_key do |consumer|
400
- read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
401
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
402
- end
403
- end
404
382
 
383
+ # Index streams and consumers by their pipes for lookup
384
+ consumer_hash = build_consumer_hash()
405
385
 
406
386
  # Keep track of how many consumers to handle before exiting
407
387
  consumers_running = consumer_hash.keys.length
@@ -423,7 +403,7 @@ module Zillabyte; module Runner; class MultilangOperation
423
403
  ml_output = File.open("#{ml_pipe}.in", "r+")
424
404
 
425
405
  # Setup streams from consumers, multilang, and the consumee
426
- read_streams = consumer_hash.keys.concat [@__consumee[:rd_child], ml_output, stdout]
406
+ read_streams = consumer_hash.keys.concat [@__consumee_pipes["rd_child_1"], ml_output, stdout]
427
407
 
428
408
  # Handshake
429
409
  handshake(ml_input, ml_output)
@@ -432,6 +412,7 @@ module Zillabyte; module Runner; class MultilangOperation
432
412
  multilang_queue = []
433
413
  mutlilang_count = 0
434
414
  end_cycle_received = false
415
+ column_aliases = nil
435
416
 
436
417
 
437
418
  # Receive and handle messages
@@ -461,7 +442,7 @@ module Zillabyte; module Runner; class MultilangOperation
461
442
  stream = obj["stream"]
462
443
 
463
444
  # Send or enqueue tuple for each consumer
464
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
445
+ tuple_json = build_tuple_json(obj['tuple'], obj['meta'], column_aliases)
465
446
 
466
447
  @__emit_queues[stream].each_pair do |consumer, emitter|
467
448
  if emitter[:ready]
@@ -473,17 +454,16 @@ module Zillabyte; module Runner; class MultilangOperation
473
454
 
474
455
  # Consumer is ready for a message
475
456
  when "next"
476
-
477
457
  stream = consumer_hash[r][:stream]
478
458
  consumer = consumer_hash[r][:consumer]
479
459
 
460
+
480
461
  @__emit_queues[stream][consumer][:ready] = true
481
462
  tuple_json = get_consumer_tuple(stream, consumer)
482
463
 
483
464
  # End cycle for consumer if it has processed all tuples
484
465
  if tuple_json.nil? && end_cycle_received
485
- write_stream = get_write_stream(stream, consumer)
486
- write_message(write_stream, END_CYCLE_MESSAGE)
466
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
487
467
  consumers_running -= 1
488
468
  if consumers_running == 0
489
469
  break
@@ -507,8 +487,7 @@ module Zillabyte; module Runner; class MultilangOperation
507
487
 
508
488
  # Request next tuple from consumee
509
489
  elsif !end_cycle_received
510
- write_message(@__consumee[:wr_child], NEXT_MESSAGE)
511
-
490
+ write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
512
491
 
513
492
  # If there are no more messages to send, we are done
514
493
  elsif end_cycle_received && mutlilang_count == 0
@@ -518,8 +497,7 @@ module Zillabyte; module Runner; class MultilangOperation
518
497
  @__emit_queues.each_pair do |stream, consumers|
519
498
  consumers.each_pair do |consumer, emitter|
520
499
  if emitter[:ready]
521
- write_stream = get_write_stream(stream, consumer)
522
- write_message(write_stream, END_CYCLE_MESSAGE)
500
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
523
501
  consumers_running -= 1
524
502
  if consumers_running == 0
525
503
  break
@@ -540,13 +518,11 @@ module Zillabyte; module Runner; class MultilangOperation
540
518
  # Consumee operation sent signal to end_cycle
541
519
  when "end_cycle"
542
520
  end_cycle_received = true
543
-
544
521
  if mutlilang_count == 0
545
522
  @__emit_queues.each_pair do |stream, consumers|
546
523
  consumers.each_pair do |consumer, emitter|
547
524
  if emitter[:ready]
548
- write_stream = get_write_stream(stream, consumer)
549
- write_message(write_stream, END_CYCLE_MESSAGE)
525
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
550
526
  consumers_running -= 1
551
527
  if consumers_running == 0
552
528
  break
@@ -561,7 +537,7 @@ module Zillabyte; module Runner; class MultilangOperation
561
537
 
562
538
  # Received a tuple from consumee
563
539
  elsif obj['tuple']
564
-
540
+ column_aliases = obj['column_aliases']
565
541
  # Send or enqueue to multilang
566
542
  mutlilang_count += 1
567
543
  if multilang_queue.empty?
@@ -604,13 +580,7 @@ module Zillabyte; module Runner; class MultilangOperation
604
580
  def self.run_group_by()
605
581
 
606
582
  # Index streams and consumers by their pipes for lookup
607
- consumer_hash = {}
608
- @__emit_queues.each_pair do |stream, consumers|
609
- consumers.each_key do |consumer|
610
- read_stream = @__consumer_pipes[stream][consumer][:rd_parent]
611
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
612
- end
613
- end
583
+ consumer_hash = build_consumer_hash
614
584
 
615
585
  # Keep track of how many consumers to handle before exiting
616
586
  consumers_running = consumer_hash.keys.length
@@ -638,7 +608,7 @@ module Zillabyte; module Runner; class MultilangOperation
638
608
  # Multilang output tuples
639
609
  ml_output = File.open("#{ml_pipe}.in", "r+")
640
610
  # Setup streams from consumers, multilang, and the consumee
641
- read_streams = consumer_hash.keys.concat [stdout, ml_output, @__consumee[:rd_child]]
611
+ read_streams = consumer_hash.keys.concat [stdout, ml_output, @__consumee_pipes["rd_child_1"]]
642
612
 
643
613
  # Handshake
644
614
  handshake(ml_input, ml_output)
@@ -750,7 +720,7 @@ module Zillabyte; module Runner; class MultilangOperation
750
720
  end
751
721
 
752
722
  # Ask operation for next tuple
753
- write_message(@__consumee[:wr_child], NEXT_MESSAGE)
723
+ write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
754
724
 
755
725
  # Multilang sent a ping
756
726
  elsif obj['ping']
@@ -839,6 +809,330 @@ module Zillabyte; module Runner; class MultilangOperation
839
809
  end
840
810
 
841
811
 
812
+ def self.run_join()
813
+
814
+ lhs_fields = @__node["lhs_fields"]
815
+ rhs_fields = @__node["rhs_fields"]
816
+ join_type = @__node["join_type"]
817
+
818
+
819
+ # Sanity check
820
+ if lhs_fields.nil? || rhs_fields.nil? || join_type.nil?
821
+ return
822
+ end
823
+
824
+
825
+ # Index the consumee streams for left and right sides
826
+ consumer_hash = build_consumer_hash()
827
+
828
+ # read all tuples from lefthand and right hand streams
829
+ read_streams = [@__consumee_pipes["rd_child_1"], @__consumee_pipes["rd_child_2"]]
830
+
831
+ # Index left and right hand consumees by their streams for lookup
832
+ consumee_hash = {}
833
+ # Left side
834
+ lhs = @__options[:join_options][:lhs]
835
+ consumee_hash[@__consumee_pipes["rd_child_1"]] = lhs
836
+ # Right side
837
+ rhs = @__options[:join_options][:rhs]
838
+ consumee_hash[@__consumee_pipes["rd_child_2"]] = rhs
839
+
840
+ # Keep track of how many consumers to handle before exiting
841
+ consumers_running = 1
842
+
843
+ # Index the incoming tuples by their join key
844
+ lhs_tuples = {}
845
+ rhs_tuples = {}
846
+
847
+ tuple_queue = []
848
+
849
+ # Begin cycle
850
+ left_end_cycle_received = false
851
+ right_end_cycle_received = false
852
+
853
+ # Receive and handle messages
854
+ loop do
855
+
856
+ # Read from a stream
857
+ rs = select_read_streams(read_streams)
858
+ rs.each do |r|
859
+
860
+ # Receive an object
861
+ obj = read_message(r)
862
+
863
+ if obj["command"]
864
+ case obj["command"]
865
+
866
+ # A consumee is done emitting
867
+ when "end_cycle"
868
+
869
+ if consumee_hash[r] == lhs
870
+ left_end_cycle_received = true
871
+ elsif consumee_hash[r] == rhs
872
+ right_end_cycle_received = true
873
+ end
874
+
875
+ # We are done receiving from streams
876
+ if left_end_cycle_received && right_end_cycle_received
877
+ break
878
+ end
879
+ end
880
+
881
+ # Received a tuple from consumee
882
+ elsif obj['tuple']
883
+ tuple = obj['tuple']
884
+ if consumee_hash[r] == lhs
885
+ lhs_tuples[tuple[lhs_fields]] ||= []
886
+ lhs_tuples[tuple[lhs_fields]] << tuple
887
+ elsif consumee_hash[r] == rhs
888
+ rhs_tuples[tuple[rhs_fields]] ||= []
889
+ rhs_tuples[tuple[rhs_fields]] << tuple
890
+ end
891
+ end
892
+
893
+ #Ask operation for next tuple
894
+ if consumee_hash[r] == lhs
895
+ write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
896
+ elsif consumee_hash[r] == rhs
897
+ write_message(@__consumee_pipes["wr_child_2"], NEXT_MESSAGE)
898
+ end
899
+ end
900
+
901
+ # Break out if emits ended
902
+ if left_end_cycle_received && right_end_cycle_received
903
+ break
904
+ end
905
+ end
906
+ # Build the joined tuples
907
+ joined_tuples = []
908
+
909
+ # If no joined tuples, end the cycle
910
+ if lhs_tuples.empty? && rhs_fields.empty?
911
+ @__emit_queues.each_pair do |stream, consumers|
912
+ consumers.each_pair do |consumer, emitter|
913
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
914
+ end
915
+ end
916
+ return
917
+ end
918
+ joined_fields = (lhs_tuples.values[0][0].keys.concat rhs_tuples.values[0][0].keys).uniq
919
+ case join_type
920
+ when "inner"
921
+ lhs_tuples.each_pair do |key, lhs_tuples|
922
+ lhs_tuples.each do |lhs_tuple|
923
+
924
+ tuple = {}
925
+ if !rhs_tuples[key].nil?
926
+ rhs_tuples[key].each do |rhs_tuple|
927
+ # Check for a valid join
928
+ if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
929
+ # Add the fields
930
+ joined_fields.each do |field|
931
+ if lhs_tuple.has_key? field
932
+ tuple[field] = lhs_tuple[field]
933
+ else
934
+ tuple[field] = rhs_tuple[field]
935
+ end
936
+ end
937
+
938
+ # Add the tuple
939
+ joined_tuples << tuple
940
+ end
941
+ end
942
+ end
943
+ end
944
+ end
945
+ when "left"
946
+ lhs_tuples.each_pair do |key, lhs_tuples|
947
+ lhs_tuples.each do |lhs_tuple|
948
+
949
+ joined = false
950
+ if rhs_tuples.has_key? key
951
+ rhs_tuples[key].each do |rhs_tuple|
952
+ # Check for a valid join
953
+ if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
954
+ tuple = {}
955
+
956
+ # Add the fields
957
+ joined_fields.each do |field|
958
+ if lhs_tuple.has_key? field
959
+ tuple[field] = lhs_tuple[field]
960
+ else
961
+ tuple[field] = rhs_tuple[field]
962
+ end
963
+ end
964
+ joined_tuples << tuple
965
+ joined = true
966
+ end
967
+ end
968
+ end
969
+ if !joined
970
+ tuple = {}
971
+ lhs_tuples.each.each do |hash|
972
+ hash.each do |key, value|
973
+ tuple[key] = value
974
+ end
975
+ end
976
+
977
+ joined_fields.each do |field|
978
+ if !tuple.has_key? field
979
+ tuple[field] = nil
980
+ end
981
+ end
982
+ # Add the tuple
983
+ joined_tuples << tuple
984
+ end
985
+ end
986
+ end
987
+ when "right"
988
+ rhs_tuples.each_pair do |key, rhs_tuples|
989
+ rhs_tuples.each do |rhs_tuple|
990
+
991
+ joined = false
992
+ if lhs_tuples.has_key? key
993
+ lhs_tuples[key].each do |lhs_tuple|
994
+ # Check for a valid join
995
+ if !rhs_tuple[lhs_fields].nil? && !lhs_tuple[rhs_fields].nil?
996
+ tuple = {}
997
+
998
+ # Add the fields
999
+ joined_fields.each do |field|
1000
+ if rhs_tuple.has_key? field
1001
+ tuple[field] = rhs_tuple[field]
1002
+ else
1003
+ tuple[field] = lhs_tuple[field]
1004
+ end
1005
+ end
1006
+ joined_tuples << tuple
1007
+ joined = true
1008
+ end
1009
+ end
1010
+ end
1011
+ if !joined
1012
+ tuple = {}
1013
+ rhs_tuples.each.each do |hash|
1014
+ hash.each do |key, value|
1015
+ tuple[key] = value
1016
+ end
1017
+ end
1018
+
1019
+ joined_fields.each do |field|
1020
+ if !tuple.has_key? field
1021
+ tuple[field] = nil
1022
+ end
1023
+ end
1024
+ # Add the tuple
1025
+ joined_tuples << tuple
1026
+ end
1027
+ end
1028
+ end
1029
+ when "outer"
1030
+ lhs_tuples.each_pair do |key, lhs_tuples|
1031
+ lhs_tuples.each do |lhs_tuple|
1032
+
1033
+ joined = false
1034
+ if rhs_tuples.has_key? key
1035
+ rhs_tuples[key].each do |rhs_tuple|
1036
+ # Check for a valid join
1037
+ if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
1038
+ tuple = {}
1039
+
1040
+ # Add the fields
1041
+ joined_fields.each do |field|
1042
+ if lhs_tuple.has_key? field
1043
+ tuple[field] = lhs_tuple[field]
1044
+ else
1045
+ tuple[field] = rhs_tuple[field]
1046
+ end
1047
+ end
1048
+ joined_tuples << tuple
1049
+ joined = true
1050
+ end
1051
+ end
1052
+ end
1053
+ if !joined
1054
+ tuple = {}
1055
+ lhs_tuples.each.each do |hash|
1056
+ hash.each do |key, value|
1057
+ tuple[key] = value
1058
+ end
1059
+ end
1060
+
1061
+ joined_fields.each do |field|
1062
+ if !tuple.has_key? field
1063
+ tuple[field] = nil
1064
+ end
1065
+ end
1066
+ # Add the tuple
1067
+ joined_tuples << tuple
1068
+ end
1069
+ end
1070
+ end
1071
+ end
1072
+
1073
+ # Setup output queues
1074
+ joined_tuples.each do |tuple|
1075
+ tuple_json = build_tuple_json(tuple)
1076
+ @__emit_queues.each_pair do |stream, consumers|
1077
+ consumers.each_key do |consumer|
1078
+ @__emit_queues[stream][consumer][:write_queue] << tuple_json
1079
+ end
1080
+ end
1081
+ end
1082
+
1083
+ # Send first tuple
1084
+ @__emit_queues.each_pair do |stream, consumers|
1085
+ consumers.each_key do |consumer|
1086
+
1087
+ tuple_json = get_consumer_tuple(stream, consumer)
1088
+ if tuple_json.nil?
1089
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
1090
+ consumers_running -= 1
1091
+ if consumers_running == 0
1092
+ return
1093
+ end
1094
+ else
1095
+ emit_consumer_tuple(stream, consumer, tuple_json)
1096
+ end
1097
+ end
1098
+ end
1099
+
1100
+ # Sent tuples to consumers as appropriate
1101
+ loop do
1102
+
1103
+ # Retrieve messages from consumers
1104
+ rs, ws, es = IO.select(consumer_hash.keys, [], [])
1105
+
1106
+ # Emit tuples to consumers
1107
+ rs.each do |r|
1108
+
1109
+ # Read from consumer
1110
+ msg = read_message(r)
1111
+ consumer = consumer_hash[r][:consumer]
1112
+ stream = consumer_hash[r][:stream]
1113
+
1114
+ # Consumer is ready for next message
1115
+ if msg["command"] && msg["command"] == "next"
1116
+
1117
+ @__emit_queues[stream][consumer][:ready] = true
1118
+ tuple_json = get_consumer_tuple(stream, consumer)
1119
+
1120
+ # If all messages have been sent to a consumer, end its cycle
1121
+ if tuple_json.nil?
1122
+ send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
1123
+ consumers_running -= 1
1124
+ if consumers_running == 0
1125
+ return
1126
+ end
1127
+ else
1128
+ # Emit tuple to consumer
1129
+ emit_consumer_tuple(stream, consumer, tuple_json)
1130
+ end
1131
+ end
1132
+ end
1133
+ end
1134
+ end
1135
+
842
1136
  def self.run_filter()
843
1137
  self.run_each()
844
1138
  end
@@ -861,14 +1155,12 @@ module Zillabyte; module Runner; class MultilangOperation
861
1155
  col_map[key] = type_map[type]
862
1156
  end
863
1157
 
864
-
865
-
866
1158
  messages = []
867
1159
 
868
1160
  output = @__options["output"]
869
1161
  loop do
870
1162
  # Read messages
871
- msg = read_message(@__consumee[:rd_child])
1163
+ msg = read_message(@__consumee_pipes["rd_child_1"])
872
1164
  obj = msg
873
1165
 
874
1166
  # Add row
@@ -882,14 +1174,20 @@ module Zillabyte; module Runner; class MultilangOperation
882
1174
  cdisplay("\n \nPress Ctrl-C to exit", false)
883
1175
  return
884
1176
  end
885
-
886
1177
  # Check tuple columns for valid entry
887
1178
  columns_to_check = col_map.length
888
- tuple.each_pair do |col,value|
1179
+ tuple.keys.each do |col|
1180
+ value = tuple[col]
889
1181
  types = type_map.each_value.map {|t| value.is_a? t}
890
- if col_map.include? col
891
- if value.is_a? col_map[col]
1182
+ matched_column = check_tuple_for_alias(col, obj['column_aliases'], col_map.keys)
1183
+ if !matched_column.nil?
1184
+ if value.nil? || (value.is_a? col_map[matched_column])
892
1185
  columns_to_check -= 1
1186
+ # Set the proper column name
1187
+ if col != matched_column
1188
+ tuple[matched_column] = value
1189
+ tuple.delete(col)
1190
+ end
893
1191
  else
894
1192
  break
895
1193
  end
@@ -901,12 +1199,11 @@ module Zillabyte; module Runner; class MultilangOperation
901
1199
  cdisplay("\n \nPress Ctrl-C to exit", false)
902
1200
  return
903
1201
  end
904
-
905
1202
  messages << msg
906
1203
  if @__options[:interactive]
907
1204
  cdisplay "received #{display_json}"
908
1205
  end
909
- write_message(@__consumee[:wr_child], NEXT_MESSAGE)
1206
+ write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
910
1207
 
911
1208
  # End cycle
912
1209
  elsif obj['command'] && obj['command'] == "end_cycle"
@@ -943,7 +1240,7 @@ module Zillabyte; module Runner; class MultilangOperation
943
1240
 
944
1241
  vals = [t.values, m.values].flatten
945
1242
  csv << vals
946
- table << vals.flat_map{|v| "#{v}"[0..100]}
1243
+ table << vals.flat_map{|v| "#{v.to_s}"[0..100]}
947
1244
  end
948
1245
  rescue JSON::ParserError
949
1246
  cdisplay("invalid JSON")
@@ -990,7 +1287,6 @@ module Zillabyte; module Runner; class MultilangOperation
990
1287
  end
991
1288
  end
992
1289
  return rs unless rs.empty?
993
-
994
1290
  rs, ws, es = IO.select(read_streams, [], [])
995
1291
  return rs
996
1292
  end
@@ -999,18 +1295,17 @@ module Zillabyte; module Runner; class MultilangOperation
999
1295
  # Read a JSON message
1000
1296
  def self.read_message(read_stream)
1001
1297
 
1298
+
1002
1299
  @__read_buffers[read_stream] ||= ""
1003
1300
  @__read_buffered_messages[read_stream] ||= []
1004
1301
  if !@__read_buffered_messages[read_stream].empty?
1005
1302
  obj = @__read_buffered_messages[read_stream].shift
1006
1303
  return obj
1007
1304
  end
1008
-
1009
1305
  # read message from stream
1010
1306
  loop do
1011
1307
 
1012
1308
  while !@__read_buffers[read_stream].include? ENDMARKER
1013
-
1014
1309
  segment = read_stream.sysread(BUFSIZE)
1015
1310
  @__read_buffers[read_stream] << segment
1016
1311
  end
@@ -1096,12 +1391,42 @@ module Zillabyte; module Runner; class MultilangOperation
1096
1391
  end
1097
1392
  end
1098
1393
 
1394
+ # Build the hash of consumer streams for lookup when receiving responses
1395
+ def self.build_consumer_hash()
1396
+ consumer_hash = {}
1397
+ @__emit_queues.each_pair do |stream, consumers|
1398
+ consumers.each_key do |consumer|
1399
+
1400
+ pipes = @__consumer_pipes[stream][consumer]
1401
+ if pipes.has_key? "rd_parent_1"
1402
+ read_stream = pipes["rd_parent_1"]
1403
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
1404
+
1405
+ elsif pipes.has_key? "rd_parent_2"
1406
+ read_stream = pipes["rd_parent_2"]
1407
+ consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
1408
+ end
1409
+ end
1410
+ end
1411
+
1412
+ return consumer_hash
1413
+ end
1414
+
1099
1415
 
1100
1416
  # Send object to every consumer of the operation, regardless of stream
1101
1417
  def self.send_to_consumers(json_obj)
1102
1418
  @__consumer_pipes.each_pair do |stream, consumers|
1103
1419
  consumers.each_pair do |consumer, pipe|
1104
- write_message(pipe[:wr_parent], json_obj)
1420
+
1421
+ # Single or Left hand pipe
1422
+ if (pipe.has_key? "wr_parent_1")
1423
+ write_stream = get_write_stream(stream, consumer, 1)
1424
+ write_message(write_stream, json_obj)
1425
+ elsif (pipe.has_key? "wr_parent_2")
1426
+ write_stream = get_write_stream(stream, consumer, 2)
1427
+ write_message(write_stream, json_obj)
1428
+ end
1429
+
1105
1430
  cdisplay "emitted #{json_obj} to #{consumer}"
1106
1431
  end
1107
1432
  end
@@ -1109,8 +1434,9 @@ module Zillabyte; module Runner; class MultilangOperation
1109
1434
 
1110
1435
 
1111
1436
  # Get the write pipe of the stream consumer
1112
- def self.get_write_stream(stream, consumer)
1113
- @__consumer_pipes[stream][consumer][:wr_parent]
1437
+ def self.get_write_stream(stream, consumer, number=1)
1438
+ wr_pipe = "wr_parent_" + number.to_s
1439
+ @__consumer_pipes[stream][consumer][wr_pipe]
1114
1440
  end
1115
1441
 
1116
1442
 
@@ -1120,6 +1446,23 @@ module Zillabyte; module Runner; class MultilangOperation
1120
1446
  end
1121
1447
 
1122
1448
 
1449
+ # Send a command message to a consumer
1450
+ def self.send_command_tuple(stream, consumer, json_obj)
1451
+ pipe = @__consumer_pipes[stream][consumer]
1452
+ # Single or Left hand pipe
1453
+ if (pipe.has_key? "wr_parent_1")
1454
+ write_stream = get_write_stream(stream, consumer, 1)
1455
+ write_message(write_stream, json_obj)
1456
+
1457
+ # Right hand pipe
1458
+ elsif (pipe.has_key? "wr_parent_2")
1459
+ write_stream = get_write_stream(stream, consumer, 2)
1460
+ write_message(write_stream, json_obj)
1461
+ end
1462
+ @__emit_queues[stream][consumer][:ready] = false
1463
+ end
1464
+
1465
+
1123
1466
  # Emit tuple_json to the consumer of a stream
1124
1467
  def self.emit_consumer_tuple(stream, consumer, tuple_json)
1125
1468
  begin
@@ -1127,13 +1470,43 @@ module Zillabyte; module Runner; class MultilangOperation
1127
1470
  rescue JSON::ParserError
1128
1471
  cdisplay "Error: invalid JSON"
1129
1472
  end
1130
- write_stream = get_write_stream(stream, consumer)
1131
- write_message(write_stream, tuple_json)
1473
+
1474
+ pipe = @__consumer_pipes[stream][consumer]
1475
+ # Single or Left hand pipe
1476
+ if (pipe.has_key? "wr_parent_1")
1477
+ write_stream = get_write_stream(stream, consumer, 1)
1478
+ write_message(write_stream, tuple_json)
1479
+
1480
+ # Right hand pipe
1481
+ elsif (pipe.has_key? "wr_parent_2")
1482
+ write_stream = get_write_stream(stream, consumer, 2)
1483
+ write_message(write_stream, tuple_json)
1484
+ end
1485
+
1132
1486
  @__emit_queues[stream][consumer][:ready] = false
1133
1487
  cdisplay "emitted tuple #{display_json} to #{consumer} "
1134
1488
  end
1135
1489
 
1136
1490
 
1491
+ # Check if a tuple's column or its aliases matches an expected column and returns the valid column
1492
+ def self.check_tuple_for_alias(column, column_aliases, valid_columns)
1493
+
1494
+ valid_columns.each do |expected|
1495
+ if column == expected
1496
+ return expected
1497
+ end
1498
+
1499
+ column_aliases.each do |hash|
1500
+ concrete_name = hash["concrete_name"]
1501
+ if (column == concrete_name || column == hash['alias']) && hash["alias"] == expected
1502
+ return expected
1503
+ end
1504
+ end
1505
+ end
1506
+
1507
+ return nil
1508
+ end
1509
+
1137
1510
  # Build a tuple and format into JSON
1138
1511
  def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
1139
1512
  meta ||= {}
@@ -1144,11 +1517,12 @@ module Zillabyte; module Runner; class MultilangOperation
1144
1517
  next
1145
1518
  elsif(k == "confidence" or k == "since" or k == "source")
1146
1519
  meta[k] = v
1147
- else
1520
+ else
1148
1521
  values[k] = v
1149
1522
  end
1150
1523
  end
1151
- tuple_json = {"tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
1524
+ tuple_json = {"op" => @__name, "tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
1525
+
1152
1526
  return tuple_json
1153
1527
  end
1154
1528