zillabyte-cli 0.9.20 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,337 +0,0 @@
1
- require "zillabyte/runner/multilang_operation"
2
- require "zillabyte/runner/component_operation"
3
-
4
- # HIDDEN:
5
- class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
6
- include Zillabyte::Helpers
7
-
8
- KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
9
-
10
- def run (meta, dir = Dir.pwd, session = nil, options = {})
11
-
12
- if meta.nil? or session.nil?
13
- return
14
- end
15
-
16
- @session = session
17
- @colors = {}
18
-
19
- # Get options
20
- input = options[:input]
21
- output = options[:output]
22
- otype = options[:output_type]
23
-
24
- # Show the user what we know about their app...
25
- display "inferring your app details..."
26
- describe_component(meta)
27
-
28
- # Setup streams
29
- @nodes = meta["nodes"]
30
- @node_map = {}
31
- @nodes.each do |n|
32
- @node_map[n["name"]] = n
33
- end
34
-
35
- # Index stream consummers and emitters by stream name
36
- @arcs = meta["arcs"]
37
-
38
- # Organize component pipes
39
- @operations = {}
40
- @operation_pipes = {}
41
-
42
-
43
- # Start component
44
- begin
45
-
46
- # Setup operation pipes
47
- @nodes.each do |n|
48
-
49
- name = n["name"]
50
- type = n["type"]
51
- if n["type"] == "source"
52
- fields = n["fields"]
53
- end
54
-
55
- # Create two new pipes in the parent.
56
- rd_child_1, wr_parent_1 = IO.pipe()
57
- rd_parent_1, wr_child_1 = IO.pipe()
58
-
59
- @operation_pipes[name] = {
60
- "rd_child_1" => rd_child_1,
61
- "wr_child_1" => wr_child_1,
62
- "rd_parent_1" => rd_parent_1,
63
- "wr_parent_1" => wr_parent_1
64
- }
65
-
66
- # Add a second(right hand side) set ofpipes for joins
67
- if type == "join"
68
- # Create two new pipes in the parent.
69
- rd_child_2, wr_parent_2 = IO.pipe()
70
- rd_parent_2, wr_child_2 = IO.pipe()
71
- @operation_pipes[name]["rd_child_2"] = rd_child_2
72
- @operation_pipes[name]["wr_child_2"] = wr_child_2
73
- @operation_pipes[name]["rd_parent_2"] = rd_parent_2
74
- @operation_pipes[name]["wr_parent_2"] = wr_parent_2
75
- end
76
- end
77
-
78
- # Maps origin => {stream => [destinations]}
79
- @arc_map = {}
80
- @arcs.each do |a|
81
- origin = a["origin"]
82
- name = a["name"]
83
- dest = a["dest"]
84
- @arc_map[origin] ||= {}
85
- @arc_map[origin][name] ||= []
86
- @arc_map[origin][name] << a["dest"]
87
- end
88
-
89
-
90
- # Spawn component threads
91
- @nodes.each do |n|
92
-
93
- name = n["name"]
94
- type = n["type"]
95
- emits = n["emits"]
96
-
97
- pipes = @operation_pipes[name]
98
-
99
- # Fork.
100
- pid = fork()
101
- if pid # In parent
102
- # Close the reading end of the first child so we can write to the child.
103
- pipes["rd_child_1"].close()
104
- # Close the writing end of the first child so we can read from the child.
105
- pipes["wr_child_1"].close()
106
-
107
- if type == "join"
108
- # Close the reading end of the second child so we can write to the child.
109
- pipes["rd_child_2"].close()
110
- # Close the writing end of the second child so we can read from the child.
111
- pipes["wr_child_2"].close()
112
- end
113
- else # in child
114
- # Close the writing end of the first parent so we can read from the parent.
115
- pipes["wr_parent_1"].close()
116
- # Close the reading end of the first parent so we can write to the parent.
117
- pipes["rd_parent_1"].close()
118
-
119
- if type == "join"
120
- # Close the reading end of the second child so we can write to the child.
121
- pipes["rd_parent_2"].close()
122
- # Close the writing end of the second child so we can read from the child.
123
- pipes["wr_parent_2"].close()
124
- end
125
-
126
- begin
127
- # Setup reading and writing pipes for communicating with consumee component
128
- if type != "join"
129
- in_pipes = {"rd_child_1" => @operation_pipes[name]["rd_child_1"], "wr_child_1" => @operation_pipes[name]["wr_child_1"]}
130
-
131
- # Add join specific options
132
- else
133
- options[:join_options] = {}
134
- in_pipes = {}
135
- @arcs.each do |a|
136
-
137
- if (a["dest"] == name)
138
- # Left Side
139
- if (a["left"] == 1)
140
- options[:join_options][:lhs] = a["origin"]
141
- in_pipes["rd_child_1"] = @operation_pipes[name]["rd_child_1"]
142
- in_pipes["wr_child_1"] = @operation_pipes[name]["wr_child_1"]
143
- # Right Side
144
- elsif (a["right"] == 1)
145
- options[:join_options][:rhs] = a["origin"]
146
- in_pipes["rd_child_2"] = @operation_pipes[name]["rd_child_2"]
147
- in_pipes["wr_child_2"] = @operation_pipes[name]["wr_child_2"]
148
- end
149
- end
150
- end
151
- end
152
-
153
- # Index consumer pipes by stream name, consumer_name
154
- out_pipes = {}
155
-
156
- # Check if you are the consumee for a downstream join in order to select the correct pipe
157
- if type != "output"
158
- @arc_map[name].each_pair do |stream, destinations|
159
- out_pipes[stream] ||= {}
160
-
161
- destinations.each do |dest|
162
- out_pipes[stream][dest] ||= {}
163
-
164
- # Check for a join at the destination
165
- if (@node_map[dest]["type"] == "join")
166
- @arcs.each do |a|
167
- if (a["dest"] == dest && a["origin"] == name)
168
- # Left Side
169
- if (a["left"] == 1)
170
- out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
171
- out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
172
- break
173
- elsif (a["right"] == 1)
174
- out_pipes[stream][dest]["wr_parent_2"] = @operation_pipes[dest]["wr_parent_2"]
175
- out_pipes[stream][dest]["rd_parent_2"] = @operation_pipes[dest]["rd_parent_2"]
176
- break
177
- end
178
- end
179
- end
180
- else
181
- out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
182
- out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
183
- end
184
- end
185
- end
186
- end
187
-
188
- # Run the child process
189
- Zillabyte::Runner::ComponentOperation.run(n, dir, in_pipes, out_pipes, self, meta, options)
190
-
191
- rescue => e
192
- display e.message
193
- display e.backtrace
194
- ensure
195
- # Close the reading end of the child
196
- pipes["rd_child_1"].close()
197
- # Close the writing end of the child
198
- pipes["wr_child_1"].close()
199
-
200
- # Close secondary join child
201
- pipes["rd_child_2"].close() if pipes["rd_child_2"]
202
- pipes["wr_child_2"].close() if pipes["wr_child_2"]
203
-
204
- exit!(-1)
205
- end #end child
206
- end
207
- end
208
-
209
-
210
- # If no input file, read from STDIN
211
- # TODO handle inputs
212
- if input.nil?
213
-
214
- source = ""
215
- @nodes.each do |n|
216
- name = n["name"]
217
- type = n["type"]
218
-
219
- if type == "input"
220
- if source == ""
221
- source = name
222
- else
223
- display "Error: Cannot run component with multiple input sources without input files"
224
- return
225
- end
226
- end
227
- end
228
-
229
- display ""
230
- display "To view results: Enter 'end' "
231
- display ""
232
-
233
- while true
234
-
235
- fields = @node_map[source]['fields'].map {|h| h.keys[0].upcase }
236
- display "Enter an input tuple in the form : #{fields.join(' ')}"
237
- msg = ask
238
-
239
- # Kill the cycle
240
- if msg == 'end'
241
- @operation_pipes[source]["wr_parent_1"].puts KILL_CYCLE_MESSAGE
242
- break
243
-
244
- # Check arguments
245
- else
246
- args = msg.scan(/(?:\w|"[^"]*")+/)
247
- if (args.length % fields.length != 0)
248
- display "Error: Argument length must be a multiple of the schema length"
249
- next
250
- end
251
- end
252
- # Send tuple to source
253
- @operation_pipes[source]["wr_parent_1"].puts msg
254
-
255
- end
256
- end
257
-
258
- rescue => e
259
- display e.message
260
- display e.backtrace
261
-
262
- ensure
263
- Process.waitall()
264
- @operation_pipes.each do |name, pipes|
265
- #Close the writing end of the parent
266
- pipes["wr_parent_1"].close()
267
- # Close the reading end of the parent
268
- pipes["rd_parent_1"].close()
269
-
270
- # Close secondary join parent
271
- pipes["wr_parent_2"].close() if pipes["wr_parent_2"]
272
- pipes["rd_parent_2"].close() if pipes["rd_parent_2"]
273
- end
274
- end
275
- end
276
-
277
- def session
278
- @session
279
- end
280
-
281
-
282
- def cdisplay(name, message, useName=true)
283
- color = @colors[name] || :default
284
- if message.nil? || message == ""
285
- return
286
- else
287
-
288
- if message.is_a?(Array)
289
- lines = message
290
- else
291
- lines = message.split("\n")
292
- end
293
-
294
- prefix = useName ? "#{name} - " : ""
295
- display "#{prefix}#{lines.first}".colorize(color)
296
- lines[1..-1].each do |line|
297
- display "#{' '*prefix.size}#{line}".colorize(color)
298
- end
299
- end
300
- end
301
-
302
- def display(message, newline = true)
303
- @session.display(message, newline)
304
- end
305
-
306
- def describe_component(meta)
307
- require("colorize")
308
- require("indentation")
309
- colors ||= [:green, :yellow, :magenta, :cyan, :white, :blue, :light_yellow, :light_blue, :red, :light_magenta, :light_cyan]
310
- rjust = 20
311
-
312
- display "#{'component name'.rjust(rjust)}: #{meta['name']}"
313
- display "#{'component language'.rjust(rjust)}: #{meta['language']}"
314
- meta['nodes'].each_with_index do |node, index|
315
- color = @colors[node['name']] ||= colors[index % colors.length]
316
- display (("="*rjust + " operation ##{index}").colorize(color))
317
- display "#{"name".rjust(rjust)}: #{node['name'].to_s.colorize(color)}"
318
-
319
- # Convert metadata typing to that of components
320
- if node['type'] == "input"
321
- type = "input"
322
- display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
323
- display "#{"fields".rjust(rjust)}: #{node['fields'].to_s.colorize(color)}"
324
- display "#{"matches".rjust(rjust)}: #{JSON.pretty_generate(node['matches']).indent(rjust+2).lstrip.colorize(color)}" if node['matches']
325
- elsif node['type'] == "output"
326
- type = "output"
327
- display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
328
- display "#{"columns".rjust(rjust)}: #{node['columns'].to_s.colorize(color)}"
329
-
330
- else
331
- type = node['type']
332
- display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
333
- end
334
- end
335
- end
336
-
337
- end
@@ -1,1662 +0,0 @@
1
-
2
- # Emulate a multilang operation
3
- module Zillabyte; module Runner; class MultilangOperation
4
-
5
- HANDSHAKE_MESSAGE = "{\"pidDir\": \"/tmp\"}\n"
6
- PREPARE_MESSAGE = " {\"command\": \"prepare\"}\n"
7
- DONE_MESSAGE = "{\"command\": \"done\"}\n"
8
- NEXT_MESSAGE = "{\"command\": \"next\"}\n"
9
- BEGIN_CYCLE_MESSAGE = "{\"command\": \"begin_cycle\"}\n"
10
- END_CYCLE_MESSAGE = "{\"command\": \"end_cycle\"}\n"
11
- KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
12
- PONG_PREFIX = "{\"pong\": \""
13
- PONG_SUFFIX = "\"}\n"
14
- ENDMARKER = "\nend\n"
15
-
16
- def self.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
17
- require("mkfifo")
18
- require("zillabyte/runner/component_operation")
19
- require("zillabyte/runner/operation")
20
-
21
- require("pty")
22
- require("open3")
23
-
24
- @__node = node
25
- @__name = node["name"]
26
- @__tester = tester
27
-
28
- @__type = node["type"]
29
- @__dir = dir
30
-
31
- @__consumee_pipes = consumee_pipes
32
- @__consumer_pipes = consumer_pipes
33
- @__meta = meta
34
- @__options = options
35
- @__output_type = options[:output_type]
36
- # Each consumer of a stream gets its own queue and message passing
37
- @__emit_queues = {}
38
- @__consumer_pipes.each_pair do |stream, consumers|
39
- consumers.each_key do |consumer|
40
- @__emit_queues[stream] ||= {}
41
- @__emit_queues[stream][consumer] = {:write_queue => [], :ready => true}
42
- end
43
- end
44
-
45
- begin
46
- case @__type
47
- when "source"
48
- self.run_source()
49
- when "group_by"
50
- self.run_group_by()
51
- when "join"
52
- self.run_join()
53
- when "each"
54
- self.run_each()
55
- when "filter"
56
- self.run_filter()
57
- when "component"
58
- Zillabyte::Runner::ComponentOperation.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
59
- when "sink"
60
- self.run_sink()
61
- else
62
- cdisplay("invalid operation type #{@__type}")
63
- end
64
- rescue => e
65
- cdisplay e.message
66
- end
67
-
68
- end
69
-
70
-
71
- def self.run_source()
72
-
73
- end_cycle_policy = @__node["end_cycle_policy"]
74
-
75
- # Interactive source
76
- if @__options[:interactive]
77
- loop do
78
-
79
- msg = @__consumee_pipes["rd_child_1"].gets
80
-
81
- if msg == KILL_CYCLE_MESSAGE
82
- send_to_consumers(KILL_CYCLE_MESSAGE)
83
- return
84
- else
85
- # Build tuple
86
- begin
87
- obj = JSON.parse(msg)
88
- rescue JSON::ParserError
89
- cdisplay "Error: invalid JSON"
90
- next
91
- end
92
-
93
- tuple_json = build_tuple_json(obj)
94
- display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
95
- send_to_consumers(tuple_json)
96
-
97
- end
98
- end
99
-
100
- # Source from relation
101
- elsif @__node['matches'] or @__node["relation"]
102
-
103
- # Query API for rows
104
- matches = @__node['matches'] || (@__node["relation"]["query"])
105
- cdisplay("Fetching remote data...")
106
- res = @__tester.query_agnostic(matches)
107
- rows = res["rows"]
108
- if(rows.nil? or rows.length == 0)
109
- cdisplay("Could not find data that matches your 'matches' clause")
110
- exit(-1)
111
- end
112
- # Enqueue rows for sending to consumers
113
- column_aliases = res['column_aliases']
114
- rows.each do |tuple|
115
- tuple_json = build_tuple_json(tuple, nil, column_aliases)
116
- @__emit_queues.each_pair do |stream, consumers|
117
- consumers.each_pair do |consumer, emitter|
118
- emitter[:write_queue] << tuple_json
119
- end
120
- end
121
- end
122
-
123
- # Index streams and consumers by their pipes for lookup
124
- consumer_hash = build_consumer_hash()
125
-
126
-
127
- # Send first tuple
128
- @__emit_queues.each_pair do |stream, consumers|
129
- consumers.each_key do |consumer|
130
- tuple_json = get_consumer_tuple(stream, consumer)
131
- emit_consumer_tuple(stream, consumer, tuple_json)
132
- end
133
- end
134
-
135
- # Sent tuples to consumers as appropriate
136
- loop do
137
-
138
- # Retrieve messages from consumers
139
- rs, ws, es = IO.select(consumer_hash.keys, [], [])
140
-
141
- # Emit tuples to consumers
142
- emitted = false
143
- rs.each do |r|
144
-
145
- # Read from consumer
146
- msg = read_message(r)
147
-
148
- stream = consumer_hash[r][:stream]
149
- consumer = consumer_hash[r][:consumer]
150
-
151
- # Consumer is ready for next message
152
- if msg["command"]
153
-
154
- case msg["command"]
155
- when "next"
156
-
157
- @__emit_queues[stream][consumer][:ready] = true
158
- tuple_json = get_consumer_tuple(stream, consumer)
159
-
160
- # If all messages have been sent to consumer, end their cycle
161
- if tuple_json.nil?
162
- write_stream = get_write_stream(stream, consumer)
163
- cdisplay "ending cycle for #{consumer}"
164
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
165
- send_command_tuple(stream, consumer, DONE_MESSAGE)
166
- else
167
- # Emit tuple to consumer
168
- emit_consumer_tuple(stream, consumer, tuple_json)
169
- emitted = true
170
- end
171
- when "kill_cycle"
172
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
173
- return
174
- end
175
- end
176
- end
177
-
178
- # Exit when done emitting
179
- if !emitted
180
- return
181
- end
182
- end
183
-
184
- # Custom source
185
- else
186
-
187
- # Index streams and consumers by their pipes for lookup
188
- consumer_hash = build_consumer_hash()
189
-
190
- # Keep track of how many consumers to handle before exiting
191
- consumers_running = consumer_hash.keys.length
192
-
193
- # Kill the cycle on error
194
- cycle_killed = false
195
-
196
- # Setup multilang socket
197
- require 'socket'
198
- host = "0.0.0.0"
199
- server = TCPServer.new(0)
200
- port = server.addr[1]
201
-
202
- # Spawn multilang process
203
- cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
204
-
205
- begin
206
-
207
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
208
- begin
209
- server_thread = Thread.new do
210
- ml_socket = server.accept()
211
-
212
- # RUN SOURCE
213
- begin
214
- # Setup streams from consumers and multilang(stdout and socket communication)
215
- read_streams = consumer_hash.keys.concat [stdout, ml_socket]
216
-
217
- # Handshake
218
- handshake(ml_socket, ml_socket)
219
- prepare(ml_socket, ml_socket)
220
-
221
- # Begin cycle
222
- begin_cycle(ml_socket, ml_socket)
223
- emitted = false
224
- write_message(ml_socket, NEXT_MESSAGE)
225
- multilang_queue = []
226
- end_cycle_policy = @__options[:end_cycle_policy]
227
- end_cycle_received = false
228
-
229
- # Receive and handle messages
230
- loop do
231
- # Read from a stream
232
- rs = select_read_streams(read_streams)
233
- rs.each do |r|
234
- # Read stdout straight to user
235
- if r == stdout && consumers_running > 0
236
- msg = r.gets
237
- msg = msg.sub(/\n/, "")
238
- cdisplay("log: #{msg}")
239
- next
240
- end
241
-
242
- obj = read_message(r)
243
-
244
- if obj.nil?
245
- next
246
- end
247
-
248
- if obj["command"]
249
- case obj["command"]
250
-
251
- # Multilang emitted a tuple
252
- when "emit"
253
- stream = obj['stream']
254
- # Check for null emit
255
- if end_cycle_policy != "explicit"
256
-
257
- if obj['tuple'].nil?
258
- end_cycle_received = true
259
- else
260
- nil_values = false
261
- obj['tuple'].each_value do |v|
262
- if v.nil?
263
- nil_values = true
264
- break
265
- end
266
- end
267
- end_cycle_received = nil_values
268
- next unless !end_cycle_received
269
- end
270
- end
271
-
272
- # Valid emit
273
- emitted = true
274
-
275
- # Send or enqueue tuple for each consumer
276
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
277
-
278
- @__emit_queues[stream].each_pair do |consumer, emitter|
279
- if emitter[:ready]
280
- emit_consumer_tuple(stream, consumer, tuple_json)
281
-
282
- else
283
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
284
- end
285
- end
286
-
287
- # Consumer is ready for a message
288
- when "next"
289
- stream = consumer_hash[r][:stream]
290
- consumer = consumer_hash[r][:consumer]
291
- @__emit_queues[stream][consumer][:ready] = true
292
- tuple_json = get_consumer_tuple(stream, consumer)
293
-
294
- # End cycle for consumer if it has processed all tuples
295
- if tuple_json.nil? && end_cycle_received
296
-
297
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
298
- consumers_running -= 1
299
- if consumers_running == 0
300
- exit(0)
301
- end
302
-
303
- # TODO break if last consumer
304
- elsif !tuple_json.nil?
305
- # Emit tuple to consumer
306
- emit_consumer_tuple(stream, consumer, tuple_json)
307
- emitted = true
308
- end
309
-
310
- # Multilang is done emitting a group of tuples
311
- when "done"
312
- # End cycle if no tuples were emitted
313
- if !emitted && end_cycle_policy == "null_emit"
314
- end_cycle_received = true
315
- else
316
- emitted = false
317
- end
318
-
319
- # Send the next tuple to multilang
320
- if !multilang_queue.empty?
321
- write_message(ml_socket, multilang_queue.shift)
322
-
323
- # Request next tuple from mutilang
324
- elsif !end_cycle_received
325
- write_message(ml_socket, NEXT_MESSAGE)
326
-
327
- # If there are no more messages to send, we are done
328
- else end_cycle_received
329
- finished = true
330
- # End cycle for ready consumers
331
- @__emit_queues.each_pair do |stream, consumers|
332
- consumers.each_pair do |consumer, emitter|
333
- if emitter[:ready]
334
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
335
- consumers_running -= 1
336
- if consumers_running == 0
337
- exit(0)
338
- end
339
- end
340
- end
341
-
342
- end
343
- end
344
-
345
- # Multilang sent an error message
346
- when "fail"
347
- cdisplay("ERROR : #{obj['msg']}")
348
- cycle_killed = true
349
- exit(0)
350
-
351
- # Multilang sent a log message
352
- when "log"
353
- cdisplay "LOG: #{obj['msg']}"
354
-
355
- # Multilang sent signal to end the cycle
356
- when "end_cycle"
357
- if end_cycle_policy != "explicit"
358
- cdisplay "received end_cycle command for non explicit policy"
359
- next
360
- end
361
- end_cycle_received = true
362
- when "kill_cycle"
363
- cycle_killed = true
364
- exit(0)
365
- end
366
-
367
- # Multilang sent a ping
368
- elsif obj['ping']
369
- write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
370
- end
371
- end
372
- end
373
-
374
- # Exit after ending consumer cycles
375
- if consumers_running == 0
376
- exit(0)
377
- end
378
- rescue => e
379
- cycle_killed = true
380
- cdisplay e.message
381
- cdisplay e.backtrace
382
- ensure
383
- ml_socket.close()
384
- end
385
- end
386
- server_thread.join()
387
- rescue => e
388
- ensure
389
- # cleanup
390
- if cycle_killed
391
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
392
- end
393
- pid = wait_thread[:pid]
394
- stdout.close
395
- Process.kill('INT', pid)
396
- Process.exit!(true)
397
- end
398
- end
399
-
400
- rescue PTY::ChildExited
401
- end
402
- end
403
- end
404
-
405
-
406
- def self.run_each()
407
-
408
- # Index streams and consumers by their pipes for lookup
409
- consumer_hash = build_consumer_hash()
410
-
411
- # Keep track of how many consumers to handle before exiting
412
- consumers_running = consumer_hash.keys.length
413
-
414
- # Kill the cycle on error
415
- cycle_killed = false
416
-
417
- # Setup multilang socket
418
- require 'socket'
419
- host = "0.0.0.0"
420
- server = TCPServer.new(0)
421
- port = server.addr[1]
422
-
423
- # Spawn multilang process
424
- cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
425
-
426
- begin
427
- # Start the operation...
428
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
429
- begin
430
- server_thread = Thread.new do
431
- ml_socket = server.accept()
432
- begin
433
- # Setup streams from consumers, multilang, and the consumee
434
- read_streams = consumer_hash.keys.concat [@__consumee_pipes["rd_child_1"], ml_socket, stdout]
435
-
436
- # Handshake
437
- handshake(ml_socket, ml_socket)
438
- prepare(ml_socket, ml_socket)
439
-
440
- # Begin cycle
441
- multilang_queue = []
442
- mutlilang_count = 0
443
- end_cycle_received = false
444
- column_aliases = nil
445
-
446
-
447
- # Receive and handle messages
448
- loop do
449
-
450
- # Read from a stream
451
- rs = select_read_streams(read_streams)
452
- rs.each do |r|
453
-
454
- # Read STDOUT from program straight to user
455
- if r == stdout
456
- msg = r.gets
457
- if !msg.nil?
458
- msg = msg.sub(/\n/, "")
459
- cdisplay("LOG: #{msg}")
460
- end
461
- next
462
- end
463
-
464
- # Receive an object
465
- obj = read_message(r)
466
-
467
- if obj["command"]
468
- case obj["command"]
469
-
470
- # Multilang emitted a tuple
471
- when "emit"
472
-
473
- stream = obj["stream"]
474
-
475
- # Send or enqueue tuple for each consumer
476
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], column_aliases)
477
-
478
- @__emit_queues[stream].each_pair do |consumer, emitter|
479
- if emitter[:ready]
480
- emit_consumer_tuple(stream, consumer, tuple_json)
481
- else
482
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
483
- end
484
- end
485
-
486
- # Consumer is ready for a message
487
- when "next"
488
- stream = consumer_hash[r][:stream]
489
- consumer = consumer_hash[r][:consumer]
490
-
491
-
492
- @__emit_queues[stream][consumer][:ready] = true
493
- tuple_json = get_consumer_tuple(stream, consumer)
494
-
495
- # End cycle for consumer if it has processed all tuples
496
- if tuple_json.nil? && end_cycle_received
497
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
498
- consumers_running -= 1
499
- if consumers_running == 0
500
- break
501
- end
502
-
503
-
504
- # TODO break if last consumer
505
- elsif !tuple_json.nil?
506
- # Emit tuple to consumer
507
- emit_consumer_tuple(stream, consumer, tuple_json)
508
- emitted = true
509
- end
510
-
511
- # Multilang is done emitting a group of tuples
512
- when "done"
513
- mutlilang_count -= 1
514
-
515
- # Send the next tuple to multilang
516
- if !multilang_queue.empty?
517
- write_message(ml_socket, multilang_queue.shift)
518
-
519
- # If there are no more messages to send, we are done
520
- elsif end_cycle_received && mutlilang_count == 0
521
- finished = true
522
-
523
- # End cycle for ready consumers
524
- @__emit_queues.each_pair do |stream, consumers|
525
- consumers.each_pair do |consumer, emitter|
526
- if emitter[:ready]
527
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
528
- consumers_running -= 1
529
- if consumers_running == 0
530
- break
531
- end
532
- end
533
- end
534
- end
535
- end
536
-
537
- # Multilang sent an error message
538
- when "fail"
539
- cdisplay("ERROR : #{obj['msg']}")
540
- cycle_killed = true
541
- exit(0)
542
-
543
- # Multilang sent a log message
544
- when "log"
545
- cdisplay "LOG: #{obj['msg']}"
546
-
547
- # Consumee operation sent signal to end_cycle
548
- when "end_cycle"
549
- end_cycle_received = true
550
- if mutlilang_count == 0
551
- @__emit_queues.each_pair do |stream, consumers|
552
- consumers.each_pair do |consumer, emitter|
553
- if emitter[:ready]
554
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
555
- consumers_running -= 1
556
- if consumers_running == 0
557
- exit(0)
558
- end
559
- end
560
- end
561
- end
562
- end
563
-
564
- when "kill_cycle"
565
- cycle_killed = true
566
- exit(0)
567
- end
568
-
569
- # Received a tuple from consumee
570
- elsif obj['tuple']
571
- column_aliases = obj['column_aliases']
572
- # Send or enqueue to multilang
573
- mutlilang_count += 1
574
- if multilang_queue.empty?
575
- write_message(ml_socket, obj.to_json)
576
- else
577
- multilang_queue << obj.to_json
578
- end
579
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
580
-
581
- # Multilang sent a ping
582
- elsif obj['ping']
583
- write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
584
- end
585
- end
586
-
587
- # Exit after ending consumer cycles
588
- if consumers_running == 0
589
- exit(0)
590
- end
591
-
592
- end
593
- ensure
594
- ml_socket.close()
595
- end
596
- end
597
- server_thread.join()
598
- rescue => e
599
- cdisplay e.message
600
- cdisplay e.backtrace
601
- ensure
602
- # cleanup
603
- if cycle_killed
604
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
605
- send_to_consumees(KILL_CYCLE_MESSAGE)
606
- end
607
- pid = wait_thread[:pid]
608
- stdin.close
609
- stdout.close
610
- stderr.close
611
- end
612
- end
613
- rescue PTY::ChildExited
614
- cdisplay("The child process exited!")
615
- end
616
-
617
- end
618
-
619
-
620
- def self.run_group_by()
621
-
622
- # Index streams and consumers by their pipes for lookup
623
- consumer_hash = build_consumer_hash
624
-
625
- # Keep track of how many consumers to handle before exiting
626
- consumers_running = consumer_hash.keys.length
627
-
628
- # Kill the cycle on error
629
- cycle_killed = false
630
-
631
- # Setup groups
632
- group_by = @__node['group_by']
633
- group_tuples = {}
634
- emit_count = 0 # used to track how many emits are expected once groups are sent to multilang
635
- emitted_tuples = [] # used to send to consumers after once groupings are emitted
636
- tuple_queue = []
637
-
638
- # Setup multilang pipe
639
- require 'socket'
640
- host = "0.0.0.0"
641
- server = TCPServer.new(0)
642
- port = server.addr[1]
643
-
644
- # Spawn multilang process
645
- cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
646
-
647
- begin
648
- # Start the operation...
649
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
650
- begin
651
- server_thread = Thread.new do
652
- ml_socket = server.accept()
653
- begin
654
- # Setup streams from consumers, multilang, and the consumee
655
- read_streams = consumer_hash.keys.concat [stdout, ml_socket, @__consumee_pipes["rd_child_1"]]
656
-
657
- # Handshake
658
- handshake(ml_socket, ml_socket)
659
- prepare(ml_socket, ml_socket)
660
-
661
- # Begin cycle
662
- end_cycle_received = false
663
- finished_emitting = false
664
-
665
- # Handle streams
666
- loop do
667
- # Read from a stream
668
- rs = select_read_streams(read_streams)
669
- rs.each do |r|
670
-
671
- # Read STDOUT from program straight to user
672
- if r == stdout
673
- msg = r.gets
674
- msg = msg.sub(/\n/, "")
675
- cdisplay("log: #{msg}")
676
- next
677
- end
678
-
679
-
680
- # Receive an object
681
- obj = read_message(r)
682
-
683
- if obj["command"]
684
- case obj["command"]
685
-
686
- when "done"
687
-
688
- if end_cycle_received
689
- tuple_json = tuple_queue.shift
690
- if !tuple_json.nil?
691
- write_message(ml_socket, tuple_json)
692
- end
693
- end
694
-
695
- next
696
-
697
- # Begin aggregation
698
- when "end_cycle"
699
- end_cycle_received = true
700
- read_streams = [ml_socket]
701
-
702
- group_tuples.each do |group_tuple, tuples|
703
- tuple_queue << "{\"command\": \"begin_group\", \"tuple\": #{group_tuple.to_json}, \"meta\":{}}\n"
704
- tuples.each do |t|
705
- tuple_queue << "{\"command\": \"aggregate\", #{t}}\n"
706
- end
707
- tuple_queue << "{\"command\": \"end_group\"}\n"
708
-
709
- # keep track of how many emits are expected
710
- emit_count += 1
711
- end
712
-
713
- tuple_json = tuple_queue.shift
714
- if !tuple_json.nil?
715
- write_message(ml_socket, tuple_json)
716
- end
717
-
718
- # Multilang has emitted a grouped tuple
719
- when "emit"
720
- stream = obj['stream']
721
- emit_count -= 1
722
- # Enqueue for consumers
723
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
724
- @__emit_queues.each_pair do |stream, consumers|
725
- consumers.each_key do |consumer|
726
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
727
- end
728
- end
729
-
730
- # End cycle when done emitting
731
- if end_cycle_received && emit_count == 0
732
- finished_emitting = true
733
- break
734
- elsif end_cycle_received
735
- tuple_json = tuple_queue.shift
736
- if !tuple_json.nil?
737
- write_message(ml_socket, tuple_json)
738
- end
739
- end
740
-
741
- # An error has occured
742
- when "kill_cycle"
743
- cycle_killed = true
744
- exit(0)
745
- end
746
-
747
- # Received a tuple from operation
748
- elsif obj["tuple"]
749
- tuple = obj["tuple"].to_json
750
- meta = obj["meta"].to_json
751
- column_aliases = obj["column_aliases"] || {}
752
- aliases = Hash[column_aliases.map{|h| [h["alias"],h["concrete_name"]]}]
753
- gt = {}
754
-
755
- # Get the column names to group on
756
- group_by.each do |field|
757
- field_name = aliases[field] || field
758
- gt[field] = obj["tuple"][field_name]
759
- end
760
-
761
- msg_no_brackets = "\"tuple\": #{tuple}, \"meta\": #{meta}, \"column_aliases\": #{column_aliases.to_json}"
762
-
763
- # Group tuple into existing group or create new group
764
- if group_tuples[gt]
765
- group_tuples[gt] << msg_no_brackets
766
- else
767
- group_tuples[gt] = [msg_no_brackets]
768
- end
769
-
770
- # Ask operation for next tuple
771
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
772
-
773
- # Multilang sent a ping
774
- elsif obj['ping']
775
- write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
776
- end
777
- end
778
-
779
- # Send tuples to consumers
780
- if finished_emitting && consumers_running > 0
781
-
782
- # Send first tuple
783
- @__emit_queues.each_pair do |stream, consumers|
784
- consumers.each_key do |consumer|
785
- tuple_json = get_consumer_tuple(stream, consumer)
786
- emit_consumer_tuple(stream, consumer, tuple_json)
787
- end
788
- end
789
-
790
- # Sent tuples to consumers as appropriate
791
- loop do
792
-
793
- # Retrieve messages from consumers
794
- rs, ws, es = IO.select(consumer_hash.keys, [], [])
795
-
796
- # Emit tuples to consumers
797
- emitted = false
798
- rs.each do |r|
799
-
800
- # Read from consumer
801
- msg = read_message(r)
802
- consumer = consumer_hash[r][:consumer]
803
- stream = consumer_hash[r][:stream]
804
-
805
- # Consumer is ready for next message
806
- if msg["command"] && msg["command"] == "next"
807
-
808
- @__emit_queues[stream][consumer][:ready] = true
809
- tuple_json = get_consumer_tuple(stream, consumer)
810
-
811
- # If all messages have been sent to a consumer, end its cycle
812
- if tuple_json.nil?
813
- write_stream = get_write_stream(stream, consumer)
814
- write_message(write_stream, END_CYCLE_MESSAGE)
815
- consumers_running -= 1
816
- if consumers_running == 0
817
- break
818
- end
819
- else
820
- # Emit tuple to consumer
821
- emit_consumer_tuple(stream, consumer, tuple_json)
822
- emitted = true
823
- end
824
- end
825
-
826
- end
827
- # Exit when done emitting
828
- if !emitted
829
- exit(0)
830
- end
831
- end
832
- break
833
-
834
- # Exit after ending all consumer cycles
835
- elsif consumers_running == 0
836
- break
837
- end
838
- end
839
- ensure
840
- ml_socket.close()
841
- end
842
- end
843
- server_thread.join()
844
-
845
- rescue Errno::EIO
846
- cdisplay("Errno:EIO error")
847
- ensure
848
- # cleanup
849
- if cycle_killed
850
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
851
- send_to_consumees(KILL_CYCLE_MESSAGE)
852
- end
853
-
854
- pid = wait_thread[:pid]
855
- stdin.close
856
- stdout.close
857
- stderr.close
858
- Process.kill('INT', pid)
859
- Process.exit!(true)
860
- end
861
- end
862
- rescue PTY::ChildExited
863
- if File.exists?("#{ml_pipe}.in")
864
- File.delete("#{ml_pipe}.in")
865
- end
866
- cdisplay("The child process exited!")
867
- end
868
- end
869
-
870
-
871
- def self.run_join()
872
-
873
- lhs_fields = @__node["lhs_fields"]
874
- rhs_fields = @__node["rhs_fields"]
875
- join_type = @__node["join_type"]
876
-
877
-
878
- # Sanity check
879
- if lhs_fields.nil? || rhs_fields.nil? || join_type.nil?
880
- return
881
- end
882
-
883
-
884
- # Index the consumee streams for left and right sides
885
- consumer_hash = build_consumer_hash()
886
-
887
- # Kill the cycle on error
888
- cycle_killed = false
889
-
890
- # read all tuples from lefthand and right hand streams
891
- read_streams = [@__consumee_pipes["rd_child_1"], @__consumee_pipes["rd_child_2"]]
892
-
893
- # Index left and right hand consumees by their streams for lookup
894
- consumee_hash = {}
895
- # Left side
896
- lhs = @__options[:join_options][:lhs]
897
- consumee_hash[@__consumee_pipes["rd_child_1"]] = lhs
898
- # Right side
899
- rhs = @__options[:join_options][:rhs]
900
- consumee_hash[@__consumee_pipes["rd_child_2"]] = rhs
901
-
902
- # Keep track of how many consumers to handle before exiting
903
- consumers_running = 1
904
-
905
- # Index the incoming tuples by their join key
906
- lhs_tuples = {}
907
- rhs_tuples = {}
908
-
909
- tuple_queue = []
910
-
911
- # Begin cycle
912
- left_end_cycle_received = false
913
- right_end_cycle_received = false
914
-
915
- # Receive and handle messages
916
- loop do
917
-
918
- # Read from a stream
919
- rs = select_read_streams(read_streams)
920
- rs.each do |r|
921
-
922
- # Receive an object
923
- obj = read_message(r)
924
-
925
- if obj["command"]
926
- case obj["command"]
927
-
928
- # A consumee is done emitting
929
- when "end_cycle"
930
-
931
- if consumee_hash[r] == lhs
932
- left_end_cycle_received = true
933
- elsif consumee_hash[r] == rhs
934
- right_end_cycle_received = true
935
- end
936
-
937
- # We are done receiving from streams
938
- if left_end_cycle_received && right_end_cycle_received
939
- break
940
- end
941
-
942
- when "kill_cycle"
943
- cycle_killed = true
944
- return
945
- end
946
-
947
- # Received a tuple from consumee
948
- elsif obj['tuple']
949
- tuple = obj['tuple']
950
- if consumee_hash[r] == lhs
951
- lhs_tuples[tuple[lhs_fields]] ||= []
952
- lhs_tuples[tuple[lhs_fields]] << tuple
953
- elsif consumee_hash[r] == rhs
954
- rhs_tuples[tuple[rhs_fields]] ||= []
955
- rhs_tuples[tuple[rhs_fields]] << tuple
956
- end
957
- end
958
-
959
- #Ask operation for next tuple
960
- if consumee_hash[r] == lhs
961
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
962
- elsif consumee_hash[r] == rhs
963
- write_message(@__consumee_pipes["wr_child_2"], NEXT_MESSAGE)
964
- end
965
- end
966
-
967
- # Break out if emits ended
968
- if left_end_cycle_received && right_end_cycle_received
969
- break
970
- end
971
- end
972
- # Build the joined tuples
973
- joined_tuples = []
974
-
975
- # If no joined tuples, end the cycle
976
- if lhs_tuples.empty? && rhs_fields.empty?
977
- @__emit_queues.each_pair do |stream, consumers|
978
- consumers.each_pair do |consumer, emitter|
979
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
980
- end
981
- end
982
- return
983
- end
984
- joined_fields = (lhs_tuples.values[0][0].keys.concat rhs_tuples.values[0][0].keys).uniq
985
- case join_type
986
- when "inner"
987
- lhs_tuples.each_pair do |key, lhs_tuples|
988
- lhs_tuples.each do |lhs_tuple|
989
-
990
- tuple = {}
991
- if !rhs_tuples[key].nil?
992
- rhs_tuples[key].each do |rhs_tuple|
993
- # Check for a valid join
994
- if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
995
- # Add the fields
996
- joined_fields.each do |field|
997
- if lhs_tuple.has_key? field
998
- tuple[field] = lhs_tuple[field]
999
- else
1000
- tuple[field] = rhs_tuple[field]
1001
- end
1002
- end
1003
-
1004
- # Add the tuple
1005
- joined_tuples << tuple
1006
- end
1007
- end
1008
- end
1009
- end
1010
- end
1011
- when "left"
1012
- lhs_tuples.each_pair do |key, lhs_tuples|
1013
- lhs_tuples.each do |lhs_tuple|
1014
-
1015
- joined = false
1016
- if rhs_tuples.has_key? key
1017
- rhs_tuples[key].each do |rhs_tuple|
1018
- # Check for a valid join
1019
- if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
1020
- tuple = {}
1021
-
1022
- # Add the fields
1023
- joined_fields.each do |field|
1024
- if lhs_tuple.has_key? field
1025
- tuple[field] = lhs_tuple[field]
1026
- else
1027
- tuple[field] = rhs_tuple[field]
1028
- end
1029
- end
1030
- joined_tuples << tuple
1031
- joined = true
1032
- end
1033
- end
1034
- end
1035
- if !joined
1036
- tuple = {}
1037
- lhs_tuples.each.each do |hash|
1038
- hash.each do |key, value|
1039
- tuple[key] = value
1040
- end
1041
- end
1042
-
1043
- joined_fields.each do |field|
1044
- if !tuple.has_key? field
1045
- tuple[field] = nil
1046
- end
1047
- end
1048
- # Add the tuple
1049
- joined_tuples << tuple
1050
- end
1051
- end
1052
- end
1053
- when "right"
1054
- rhs_tuples.each_pair do |key, rhs_tuples|
1055
- rhs_tuples.each do |rhs_tuple|
1056
-
1057
- joined = false
1058
- if lhs_tuples.has_key? key
1059
- lhs_tuples[key].each do |lhs_tuple|
1060
- # Check for a valid join
1061
- if !rhs_tuple[lhs_fields].nil? && !lhs_tuple[rhs_fields].nil?
1062
- tuple = {}
1063
-
1064
- # Add the fields
1065
- joined_fields.each do |field|
1066
- if rhs_tuple.has_key? field
1067
- tuple[field] = rhs_tuple[field]
1068
- else
1069
- tuple[field] = lhs_tuple[field]
1070
- end
1071
- end
1072
- joined_tuples << tuple
1073
- joined = true
1074
- end
1075
- end
1076
- end
1077
- if !joined
1078
- tuple = {}
1079
- rhs_tuples.each.each do |hash|
1080
- hash.each do |key, value|
1081
- tuple[key] = value
1082
- end
1083
- end
1084
-
1085
- joined_fields.each do |field|
1086
- if !tuple.has_key? field
1087
- tuple[field] = nil
1088
- end
1089
- end
1090
- # Add the tuple
1091
- joined_tuples << tuple
1092
- end
1093
- end
1094
- end
1095
- when "outer"
1096
- lhs_tuples.each_pair do |key, lhs_tuples|
1097
- lhs_tuples.each do |lhs_tuple|
1098
-
1099
- joined = false
1100
- if rhs_tuples.has_key? key
1101
- rhs_tuples[key].each do |rhs_tuple|
1102
- # Check for a valid join
1103
- if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
1104
- tuple = {}
1105
-
1106
- # Add the fields
1107
- joined_fields.each do |field|
1108
- if lhs_tuple.has_key? field
1109
- tuple[field] = lhs_tuple[field]
1110
- else
1111
- tuple[field] = rhs_tuple[field]
1112
- end
1113
- end
1114
- joined_tuples << tuple
1115
- joined = true
1116
- end
1117
- end
1118
- end
1119
- if !joined
1120
- tuple = {}
1121
- lhs_tuples.each.each do |hash|
1122
- hash.each do |key, value|
1123
- tuple[key] = value
1124
- end
1125
- end
1126
-
1127
- joined_fields.each do |field|
1128
- if !tuple.has_key? field
1129
- tuple[field] = nil
1130
- end
1131
- end
1132
- # Add the tuple
1133
- joined_tuples << tuple
1134
- end
1135
- end
1136
- end
1137
- end
1138
-
1139
- # Setup output queues
1140
- joined_tuples.each do |tuple|
1141
- tuple_json = build_tuple_json(tuple)
1142
- @__emit_queues.each_pair do |stream, consumers|
1143
- consumers.each_key do |consumer|
1144
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
1145
- end
1146
- end
1147
- end
1148
-
1149
- # Send first tuple
1150
- @__emit_queues.each_pair do |stream, consumers|
1151
- consumers.each_key do |consumer|
1152
-
1153
- tuple_json = get_consumer_tuple(stream, consumer)
1154
- if tuple_json.nil?
1155
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
1156
- consumers_running -= 1
1157
- if consumers_running == 0
1158
- return
1159
- end
1160
- else
1161
- emit_consumer_tuple(stream, consumer, tuple_json)
1162
- end
1163
- end
1164
- end
1165
-
1166
- # Sent tuples to consumers as appropriate
1167
- loop do
1168
-
1169
- # Retrieve messages from consumers
1170
- rs, ws, es = IO.select(consumer_hash.keys, [], [])
1171
-
1172
- # Emit tuples to consumers
1173
- rs.each do |r|
1174
-
1175
- # Read from consumer
1176
- msg = read_message(r)
1177
- consumer = consumer_hash[r][:consumer]
1178
- stream = consumer_hash[r][:stream]
1179
-
1180
- # Consumer is ready for next message
1181
- if msg["command"]
1182
- case msg["command"]
1183
- when"next"
1184
-
1185
- @__emit_queues[stream][consumer][:ready] = true
1186
- tuple_json = get_consumer_tuple(stream, consumer)
1187
-
1188
- # If all messages have been sent to a consumer, end its cycle
1189
- if tuple_json.nil?
1190
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
1191
- consumers_running -= 1
1192
- if consumers_running == 0
1193
- return
1194
- end
1195
- else
1196
- # Emit tuple to consumer
1197
- emit_consumer_tuple(stream, consumer, tuple_json)
1198
- end
1199
- when "kill_cycle"
1200
- cycle_killed = true
1201
- return
1202
- end
1203
- end
1204
- end
1205
- end
1206
- end
1207
-
1208
- def self.run_filter()
1209
- self.run_each()
1210
- end
1211
-
1212
-
1213
- # Send a message to all consumers of the operation
1214
- def self.run_sink()
1215
- columns = @__node["columns"]
1216
-
1217
- type_map = {
1218
- "string" => String,
1219
- "double" => Float,
1220
- "integer" => Integer,
1221
- "float" => Float,
1222
- "array" => Array,
1223
- "map" => Hash
1224
- }
1225
-
1226
- col_map = {}
1227
- columns.each do |hash|
1228
- key = hash.keys[0]
1229
- type = hash[key]
1230
- col_map[key] = type_map[type]
1231
- end
1232
-
1233
- tuples = []
1234
-
1235
- output = @__options["output"]
1236
- loop do
1237
- # Read messages
1238
- obj = read_message(@__consumee_pipes["rd_child_1"])
1239
-
1240
- # Add row
1241
- if obj['tuple']
1242
-
1243
- tuple = obj['tuple']
1244
- display_json = Hash[obj['tuple'].map{|k, v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
1245
-
1246
- if col_map.keys.length > tuple.keys.length
1247
- cdisplay "Error: invalid keys for sink tuple : Expected #{col_map.keys} , got: #{tuple.keys}"
1248
- cdisplay("\n \nPress Ctrl-C to exit", false)
1249
- return
1250
- end
1251
- # Check tuple columns for valid entry
1252
- columns_to_check = col_map.length
1253
- tuple.keys.each do |col|
1254
- value = tuple[col]
1255
- types = type_map.each_value.map {|t| value.is_a? t}
1256
- matched_column = check_tuple_for_alias(col, obj['column_aliases'], col_map.keys)
1257
- if !matched_column.nil?
1258
- if value.nil? || (value.is_a? col_map[matched_column])
1259
- columns_to_check -= 1
1260
- # Set the proper column name
1261
- if col != matched_column
1262
- tuple[matched_column] = value
1263
- tuple.delete(col)
1264
- end
1265
- else
1266
- break
1267
- end
1268
- end
1269
- end
1270
-
1271
- if columns_to_check != 0
1272
- cdisplay "Error: invalid schema for sink tuple #{display_json}"
1273
- return
1274
- end
1275
- tuples << obj
1276
- if @__options[:interactive]
1277
- cdisplay "received #{display_json}"
1278
- end
1279
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
1280
-
1281
- # End cycle
1282
- elsif obj['command']
1283
- case obj['command']
1284
- when "end_cycle"
1285
- break
1286
- when "kill_cycle"
1287
- break
1288
- end
1289
- end
1290
- end
1291
-
1292
- if tuples.empty?
1293
- cdisplay "No tuples received"
1294
- return
1295
- end
1296
-
1297
- # Build table
1298
- require("terminal-table")
1299
- table = Terminal::Table.new :title => @__name
1300
- require("csv")
1301
- csv_str = CSV.generate do |csv|
1302
- header_written = false;
1303
- tuples.each do |obj|
1304
- begin
1305
-
1306
- t = obj['tuple']
1307
- m = obj['meta'] || {}
1308
-
1309
- if t
1310
- if header_written == false
1311
- keys = [t.keys, m.keys].flatten
1312
- csv << keys
1313
- table << keys
1314
- table << :separator
1315
- header_written = true
1316
- end
1317
-
1318
- vals = [t.values, m.values].flatten
1319
- csv << vals
1320
- table << vals.flat_map{|v| "#{v.to_s}"[0..100]}
1321
- end
1322
- rescue JSON::ParserError
1323
- cdisplay("invalid JSON")
1324
- next
1325
- rescue => e
1326
- cdisplay e.message
1327
- cdisplay e.backtrace
1328
- end
1329
- end
1330
- end
1331
-
1332
- # Output table
1333
- cdisplay("\n#{table.to_s}")
1334
- cdisplay "#{tuples.length} rows"
1335
-
1336
- # Write file
1337
- if output
1338
- filename = "#{output}.csv"
1339
- f = File.open(filename, "w")
1340
- f.write(csv_str)
1341
- f.close()
1342
- cdisplay("output written to #{filename}")
1343
- end
1344
- end
1345
-
1346
-
1347
- private
1348
-
1349
- BUFSIZE = 8192
1350
-
1351
- # Each reading pipe has a read buffer and message queue
1352
- @__read_buffers = {}
1353
- @__read_buffered_messages = {}
1354
-
1355
-
1356
- # Return availible reading streams
1357
- def self.select_read_streams(read_streams)
1358
-
1359
- rs = []
1360
- read_streams.each do |read_stream|
1361
- @__read_buffered_messages[read_stream] ||= []
1362
- if !@__read_buffered_messages[read_stream].empty?
1363
- rs << read_stream
1364
- end
1365
- end
1366
- return rs unless rs.empty?
1367
- rs, ws, es = IO.select(read_streams, [], [])
1368
- return rs
1369
- end
1370
-
1371
-
1372
- # Read a JSON message
1373
- def self.read_message(read_stream)
1374
-
1375
- @__read_buffers[read_stream] ||= ""
1376
- @__read_buffered_messages[read_stream] ||= []
1377
- if !@__read_buffered_messages[read_stream].empty?
1378
- obj = @__read_buffered_messages[read_stream].shift
1379
- return obj
1380
- end
1381
- # read message from stream
1382
- loop do
1383
-
1384
- while !@__read_buffers[read_stream].include? ENDMARKER
1385
- segment = read_stream.sysread(BUFSIZE)
1386
- @__read_buffers[read_stream] << segment
1387
- end
1388
-
1389
- # cdisplay "READMESSAGE: read #{segment.length} bytes, read buffer length : #{@__read_buffer.length}"
1390
- # TODO this include is redundant
1391
- read_buffer = @__read_buffers[read_stream]
1392
- if read_buffer.include? ENDMARKER
1393
- objs = read_buffer.split(ENDMARKER)
1394
- ends = read_buffer.scan(ENDMARKER)
1395
- if objs.count == ends.count # We have a full number of messages
1396
- objs.each do |obj|
1397
- begin
1398
- @__read_buffered_messages[read_stream] << JSON.parse(obj)
1399
- # cdisplay "READMESSAGE: got hash #{hash}"
1400
- rescue JSON::ParserError
1401
- cdisplay "READMESSAGE: invalid JSON #{obj}"
1402
- end
1403
- end
1404
- @__read_buffers[read_stream] = ""
1405
- return @__read_buffered_messages[read_stream].shift
1406
- else
1407
-
1408
- (0..ends.count-1).each do |i|
1409
- obj = objs[i]
1410
- begin
1411
- @__read_buffered_messages[read_stream] << JSON.parse(obj)
1412
- rescue JSON::ParserError
1413
- cdisplay "READMESSAGE: invalid JSON #{obj}"
1414
- end
1415
- end
1416
-
1417
- # cdisplay "adding leftovers : \n #{objs[ends.count..-1]}"
1418
- @__read_buffers[read_stream] = objs[ends.count..-1].join(ENDMARKER)
1419
- return @__read_buffered_messages[read_stream].shift
1420
- end
1421
- end
1422
- end
1423
- end
1424
-
1425
-
1426
- # Write JSON message
1427
- def self.write_message(write_stream, msg)
1428
- write_msg = msg.strip + ENDMARKER
1429
- write_stream.write write_msg
1430
- write_stream.flush
1431
- end
1432
-
1433
-
1434
- # Handshake connection to multilang
1435
- def self.handshake(write_stream, read_stream)
1436
- begin
1437
- write_message write_stream, HANDSHAKE_MESSAGE
1438
- msg = read_message(read_stream)
1439
- rescue => e
1440
- cdisplay("Error handshaking node")
1441
- raise e
1442
- end
1443
- end
1444
-
1445
-
1446
- # Instruct multilang to run prepare step
1447
- def self.prepare(write_stream, read_stream)
1448
- begin
1449
- write_message write_stream, PREPARE_MESSAGE
1450
- msg = read_message(read_stream)
1451
- rescue => e
1452
- cdisplay("Error running prepare")
1453
- raise e
1454
- end
1455
- end
1456
-
1457
-
1458
- # Instruct multilang to begin cycle
1459
- def self.begin_cycle(write_stream, read_stream)
1460
- write_message(write_stream, BEGIN_CYCLE_MESSAGE)
1461
-
1462
- while 1 do
1463
-
1464
- msg = read_message(read_stream)
1465
- obj = Hash[msg]
1466
- case obj["command"]
1467
- when "log"
1468
- cdisplay "LOG: #{obj['msg']}"
1469
- when "done"
1470
- break
1471
- else
1472
- cdisplay("Error beginning cycle")
1473
- raise obj["msg"]
1474
- end
1475
- end
1476
-
1477
- end
1478
-
1479
- # Build the hash of consumer streams for lookup when receiving responses
1480
- def self.build_consumer_hash()
1481
- consumer_hash = {}
1482
- @__emit_queues.each_pair do |stream, consumers|
1483
- consumers.each_key do |consumer|
1484
-
1485
- pipes = @__consumer_pipes[stream][consumer]
1486
- if pipes.has_key? "rd_parent_1"
1487
- read_stream = pipes["rd_parent_1"]
1488
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
1489
-
1490
- elsif pipes.has_key? "rd_parent_2"
1491
- read_stream = pipes["rd_parent_2"]
1492
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
1493
- end
1494
- end
1495
- end
1496
-
1497
- return consumer_hash
1498
- end
1499
-
1500
- # Send object to every consumer of the operation, regardless of stream
1501
- def self.send_to_consumees(json_obj)
1502
- pipes = @__consumee_pipes
1503
- # Left hand(or singular) input
1504
- if (pipes.has_key? "wr_child_1")
1505
- write_stream = pipes["wr_child_1"]
1506
- write_message(write_stream, json_obj)
1507
- end
1508
-
1509
- # Right hand input
1510
- if (pipes.has_key? "wr_child_2")
1511
- write_stream = pipes["wr_child_2"]
1512
- write_message(write_stream, json_obj)
1513
- end
1514
- end
1515
-
1516
- # Send object to every consumer of the operation, regardless of stream
1517
- def self.send_to_consumers(json_obj, display = false)
1518
- @__consumer_pipes.each_pair do |stream, consumers|
1519
- consumers.each_pair do |consumer, pipe|
1520
- # Single or Left hand pipe
1521
- if (pipe.has_key? "wr_parent_1")
1522
- write_stream = get_write_stream(stream, consumer, 1)
1523
- write_message(write_stream, json_obj)
1524
- elsif (pipe.has_key? "wr_parent_2")
1525
- write_stream = get_write_stream(stream, consumer, 2)
1526
- write_message(write_stream, json_obj)
1527
- end
1528
- if display
1529
- cdisplay "emitted #{json_obj.chomp} to #{consumer}"
1530
- end
1531
- end
1532
- end
1533
- end
1534
-
1535
-
1536
- # Get the write pipe of the stream consumer
1537
- def self.get_write_stream(stream, consumer, number=1)
1538
- wr_pipe = "wr_parent_" + number.to_s
1539
- @__consumer_pipes[stream][consumer][wr_pipe]
1540
- end
1541
-
1542
-
1543
- # Get tuple for sending to consumer of stream
1544
- def self.get_consumer_tuple(stream, consumer)
1545
- @__emit_queues[stream][consumer][:write_queue].shift
1546
- end
1547
-
1548
-
1549
-
1550
- # Send a command message to a consumer
1551
- def self.send_command_tuple(stream, consumer, json_obj)
1552
- pipe = @__consumer_pipes[stream][consumer]
1553
- # Single or Left hand pipe
1554
- if (pipe.has_key? "wr_parent_1")
1555
- write_stream = get_write_stream(stream, consumer, 1)
1556
- write_message(write_stream, json_obj)
1557
-
1558
- # Right hand pipe
1559
- elsif (pipe.has_key? "wr_parent_2")
1560
- write_stream = get_write_stream(stream, consumer, 2)
1561
- write_message(write_stream, json_obj)
1562
- end
1563
- @__emit_queues[stream][consumer][:ready] = false
1564
- end
1565
-
1566
-
1567
- # Emit tuple_json to the consumer of a stream
1568
- def self.emit_consumer_tuple(stream, consumer, tuple_json)
1569
- begin
1570
- display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
1571
- rescue JSON::ParserError
1572
- cdisplay "Error: invalid JSON"
1573
- end
1574
-
1575
- pipe = @__consumer_pipes[stream][consumer]
1576
- # Single or Left hand pipe
1577
- if (pipe.has_key? "wr_parent_1")
1578
- write_stream = get_write_stream(stream, consumer, 1)
1579
- write_message(write_stream, tuple_json)
1580
-
1581
- # Right hand pipe
1582
- elsif (pipe.has_key? "wr_parent_2")
1583
- write_stream = get_write_stream(stream, consumer, 2)
1584
- write_message(write_stream, tuple_json)
1585
- end
1586
-
1587
- @__emit_queues[stream][consumer][:ready] = false
1588
- cdisplay "emitted tuple #{display_json} to #{consumer} "
1589
- end
1590
-
1591
-
1592
- # Check if a tuple's column or its aliases matches an expected column and returns the valid column
1593
- def self.check_tuple_for_alias(column, column_aliases, valid_columns)
1594
-
1595
- valid_columns.each do |expected|
1596
- if column == expected
1597
- return expected
1598
- end
1599
-
1600
- column_aliases.each do |hash|
1601
- concrete_name = hash["concrete_name"]
1602
- if (column == concrete_name || column == hash['alias']) && hash["alias"] == expected
1603
- return expected
1604
- end
1605
- end
1606
- end
1607
-
1608
- return nil
1609
- end
1610
-
1611
- # Build a tuple and format into JSON
1612
- def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
1613
- meta ||= {}
1614
- column_aliases ||= {}
1615
- values = {}
1616
- tuple.each do |k, v|
1617
- if(k == "id")
1618
- next
1619
- elsif(k == "confidence" or k == "since" or k == "source")
1620
- meta[k] = v
1621
- else
1622
- values[k] = v
1623
- end
1624
- end
1625
- tuple_json = {"op" => @__name, "tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
1626
-
1627
- return tuple_json
1628
- end
1629
-
1630
-
1631
- # Construct a multilang command
1632
- def self.command(arg, ignore_stderr=false)
1633
- cdisplay("could not extract meta information. missing zillabyte.conf.yml?") if @__meta.nil?
1634
- full_script = File.join(@__dir, @__meta["script"])
1635
- stderr_opt = "2> /dev/null" if ignore_stderr
1636
-
1637
- case @__meta["language"]
1638
- when "ruby"
1639
- # Execute in the bundler context
1640
- cmd = "cd \"#{@__dir}\"; unset BUNDLE_GEMFILE; ZILLABYTE_HARNESS=1 bundle exec ruby \"#{full_script}\" #{arg} #{stderr_opt}"
1641
- when "python"#{
1642
- if(File.directory?("#{@__dir}/vEnv"))
1643
- cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte #{@__dir}/vEnv/bin/python \"#{full_script}\" #{arg} #{stderr_opt}"
1644
- else
1645
- cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte python \"#{full_script}\" #{arg} #{stderr_opt}"
1646
- end
1647
- when "js"
1648
- cmd = "cd \"#{@__dir}\"; NODE_PATH=~/zb1/multilang/js/src/lib #{Zillabyte::API::NODEJS_BIN} \"#{full_script}\" #{arg} #{stderr_opt}"
1649
- else
1650
- cdisplay("no language specified")
1651
- end
1652
- return cmd
1653
- end
1654
-
1655
-
1656
- # Display a colored, formatted message
1657
- def self.cdisplay(msg, useName=true)
1658
- @__tester.cdisplay(@__name, msg, useName)
1659
- end
1660
-
1661
- end; end; end
1662
-