zillabyte-cli 0.9.20 → 0.9.21

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,337 +0,0 @@
1
- require "zillabyte/runner/multilang_operation"
2
- require "zillabyte/runner/component_operation"
3
-
4
- # HIDDEN:
5
- class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
6
- include Zillabyte::Helpers
7
-
8
- KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
9
-
10
- def run (meta, dir = Dir.pwd, session = nil, options = {})
11
-
12
- if meta.nil? or session.nil?
13
- return
14
- end
15
-
16
- @session = session
17
- @colors = {}
18
-
19
- # Get options
20
- input = options[:input]
21
- output = options[:output]
22
- otype = options[:output_type]
23
-
24
- # Show the user what we know about their app...
25
- display "inferring your app details..."
26
- describe_component(meta)
27
-
28
- # Setup streams
29
- @nodes = meta["nodes"]
30
- @node_map = {}
31
- @nodes.each do |n|
32
- @node_map[n["name"]] = n
33
- end
34
-
35
- # Index stream consummers and emitters by stream name
36
- @arcs = meta["arcs"]
37
-
38
- # Organize component pipes
39
- @operations = {}
40
- @operation_pipes = {}
41
-
42
-
43
- # Start component
44
- begin
45
-
46
- # Setup operation pipes
47
- @nodes.each do |n|
48
-
49
- name = n["name"]
50
- type = n["type"]
51
- if n["type"] == "source"
52
- fields = n["fields"]
53
- end
54
-
55
- # Create two new pipes in the parent.
56
- rd_child_1, wr_parent_1 = IO.pipe()
57
- rd_parent_1, wr_child_1 = IO.pipe()
58
-
59
- @operation_pipes[name] = {
60
- "rd_child_1" => rd_child_1,
61
- "wr_child_1" => wr_child_1,
62
- "rd_parent_1" => rd_parent_1,
63
- "wr_parent_1" => wr_parent_1
64
- }
65
-
66
- # Add a second(right hand side) set ofpipes for joins
67
- if type == "join"
68
- # Create two new pipes in the parent.
69
- rd_child_2, wr_parent_2 = IO.pipe()
70
- rd_parent_2, wr_child_2 = IO.pipe()
71
- @operation_pipes[name]["rd_child_2"] = rd_child_2
72
- @operation_pipes[name]["wr_child_2"] = wr_child_2
73
- @operation_pipes[name]["rd_parent_2"] = rd_parent_2
74
- @operation_pipes[name]["wr_parent_2"] = wr_parent_2
75
- end
76
- end
77
-
78
- # Maps origin => {stream => [destinations]}
79
- @arc_map = {}
80
- @arcs.each do |a|
81
- origin = a["origin"]
82
- name = a["name"]
83
- dest = a["dest"]
84
- @arc_map[origin] ||= {}
85
- @arc_map[origin][name] ||= []
86
- @arc_map[origin][name] << a["dest"]
87
- end
88
-
89
-
90
- # Spawn component threads
91
- @nodes.each do |n|
92
-
93
- name = n["name"]
94
- type = n["type"]
95
- emits = n["emits"]
96
-
97
- pipes = @operation_pipes[name]
98
-
99
- # Fork.
100
- pid = fork()
101
- if pid # In parent
102
- # Close the reading end of the first child so we can write to the child.
103
- pipes["rd_child_1"].close()
104
- # Close the writing end of the first child so we can read from the child.
105
- pipes["wr_child_1"].close()
106
-
107
- if type == "join"
108
- # Close the reading end of the second child so we can write to the child.
109
- pipes["rd_child_2"].close()
110
- # Close the writing end of the second child so we can read from the child.
111
- pipes["wr_child_2"].close()
112
- end
113
- else # in child
114
- # Close the writing end of the first parent so we can read from the parent.
115
- pipes["wr_parent_1"].close()
116
- # Close the reading end of the first parent so we can write to the parent.
117
- pipes["rd_parent_1"].close()
118
-
119
- if type == "join"
120
- # Close the reading end of the second child so we can write to the child.
121
- pipes["rd_parent_2"].close()
122
- # Close the writing end of the second child so we can read from the child.
123
- pipes["wr_parent_2"].close()
124
- end
125
-
126
- begin
127
- # Setup reading and writing pipes for communicating with consumee component
128
- if type != "join"
129
- in_pipes = {"rd_child_1" => @operation_pipes[name]["rd_child_1"], "wr_child_1" => @operation_pipes[name]["wr_child_1"]}
130
-
131
- # Add join specific options
132
- else
133
- options[:join_options] = {}
134
- in_pipes = {}
135
- @arcs.each do |a|
136
-
137
- if (a["dest"] == name)
138
- # Left Side
139
- if (a["left"] == 1)
140
- options[:join_options][:lhs] = a["origin"]
141
- in_pipes["rd_child_1"] = @operation_pipes[name]["rd_child_1"]
142
- in_pipes["wr_child_1"] = @operation_pipes[name]["wr_child_1"]
143
- # Right Side
144
- elsif (a["right"] == 1)
145
- options[:join_options][:rhs] = a["origin"]
146
- in_pipes["rd_child_2"] = @operation_pipes[name]["rd_child_2"]
147
- in_pipes["wr_child_2"] = @operation_pipes[name]["wr_child_2"]
148
- end
149
- end
150
- end
151
- end
152
-
153
- # Index consumer pipes by stream name, consumer_name
154
- out_pipes = {}
155
-
156
- # Check if you are the consumee for a downstream join in order to select the correct pipe
157
- if type != "output"
158
- @arc_map[name].each_pair do |stream, destinations|
159
- out_pipes[stream] ||= {}
160
-
161
- destinations.each do |dest|
162
- out_pipes[stream][dest] ||= {}
163
-
164
- # Check for a join at the destination
165
- if (@node_map[dest]["type"] == "join")
166
- @arcs.each do |a|
167
- if (a["dest"] == dest && a["origin"] == name)
168
- # Left Side
169
- if (a["left"] == 1)
170
- out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
171
- out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
172
- break
173
- elsif (a["right"] == 1)
174
- out_pipes[stream][dest]["wr_parent_2"] = @operation_pipes[dest]["wr_parent_2"]
175
- out_pipes[stream][dest]["rd_parent_2"] = @operation_pipes[dest]["rd_parent_2"]
176
- break
177
- end
178
- end
179
- end
180
- else
181
- out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
182
- out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
183
- end
184
- end
185
- end
186
- end
187
-
188
- # Run the child process
189
- Zillabyte::Runner::ComponentOperation.run(n, dir, in_pipes, out_pipes, self, meta, options)
190
-
191
- rescue => e
192
- display e.message
193
- display e.backtrace
194
- ensure
195
- # Close the reading end of the child
196
- pipes["rd_child_1"].close()
197
- # Close the writing end of the child
198
- pipes["wr_child_1"].close()
199
-
200
- # Close secondary join child
201
- pipes["rd_child_2"].close() if pipes["rd_child_2"]
202
- pipes["wr_child_2"].close() if pipes["wr_child_2"]
203
-
204
- exit!(-1)
205
- end #end child
206
- end
207
- end
208
-
209
-
210
- # If no input file, read from STDIN
211
- # TODO handle inputs
212
- if input.nil?
213
-
214
- source = ""
215
- @nodes.each do |n|
216
- name = n["name"]
217
- type = n["type"]
218
-
219
- if type == "input"
220
- if source == ""
221
- source = name
222
- else
223
- display "Error: Cannot run component with multiple input sources without input files"
224
- return
225
- end
226
- end
227
- end
228
-
229
- display ""
230
- display "To view results: Enter 'end' "
231
- display ""
232
-
233
- while true
234
-
235
- fields = @node_map[source]['fields'].map {|h| h.keys[0].upcase }
236
- display "Enter an input tuple in the form : #{fields.join(' ')}"
237
- msg = ask
238
-
239
- # Kill the cycle
240
- if msg == 'end'
241
- @operation_pipes[source]["wr_parent_1"].puts KILL_CYCLE_MESSAGE
242
- break
243
-
244
- # Check arguments
245
- else
246
- args = msg.scan(/(?:\w|"[^"]*")+/)
247
- if (args.length % fields.length != 0)
248
- display "Error: Argument length must be a multiple of the schema length"
249
- next
250
- end
251
- end
252
- # Send tuple to source
253
- @operation_pipes[source]["wr_parent_1"].puts msg
254
-
255
- end
256
- end
257
-
258
- rescue => e
259
- display e.message
260
- display e.backtrace
261
-
262
- ensure
263
- Process.waitall()
264
- @operation_pipes.each do |name, pipes|
265
- #Close the writing end of the parent
266
- pipes["wr_parent_1"].close()
267
- # Close the reading end of the parent
268
- pipes["rd_parent_1"].close()
269
-
270
- # Close secondary join parent
271
- pipes["wr_parent_2"].close() if pipes["wr_parent_2"]
272
- pipes["rd_parent_2"].close() if pipes["rd_parent_2"]
273
- end
274
- end
275
- end
276
-
277
- def session
278
- @session
279
- end
280
-
281
-
282
- def cdisplay(name, message, useName=true)
283
- color = @colors[name] || :default
284
- if message.nil? || message == ""
285
- return
286
- else
287
-
288
- if message.is_a?(Array)
289
- lines = message
290
- else
291
- lines = message.split("\n")
292
- end
293
-
294
- prefix = useName ? "#{name} - " : ""
295
- display "#{prefix}#{lines.first}".colorize(color)
296
- lines[1..-1].each do |line|
297
- display "#{' '*prefix.size}#{line}".colorize(color)
298
- end
299
- end
300
- end
301
-
302
- def display(message, newline = true)
303
- @session.display(message, newline)
304
- end
305
-
306
- def describe_component(meta)
307
- require("colorize")
308
- require("indentation")
309
- colors ||= [:green, :yellow, :magenta, :cyan, :white, :blue, :light_yellow, :light_blue, :red, :light_magenta, :light_cyan]
310
- rjust = 20
311
-
312
- display "#{'component name'.rjust(rjust)}: #{meta['name']}"
313
- display "#{'component language'.rjust(rjust)}: #{meta['language']}"
314
- meta['nodes'].each_with_index do |node, index|
315
- color = @colors[node['name']] ||= colors[index % colors.length]
316
- display (("="*rjust + " operation ##{index}").colorize(color))
317
- display "#{"name".rjust(rjust)}: #{node['name'].to_s.colorize(color)}"
318
-
319
- # Convert metadata typing to that of components
320
- if node['type'] == "input"
321
- type = "input"
322
- display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
323
- display "#{"fields".rjust(rjust)}: #{node['fields'].to_s.colorize(color)}"
324
- display "#{"matches".rjust(rjust)}: #{JSON.pretty_generate(node['matches']).indent(rjust+2).lstrip.colorize(color)}" if node['matches']
325
- elsif node['type'] == "output"
326
- type = "output"
327
- display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
328
- display "#{"columns".rjust(rjust)}: #{node['columns'].to_s.colorize(color)}"
329
-
330
- else
331
- type = node['type']
332
- display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
333
- end
334
- end
335
- end
336
-
337
- end
@@ -1,1662 +0,0 @@
1
-
2
- # Emulate a multilang operation
3
- module Zillabyte; module Runner; class MultilangOperation
4
-
5
- HANDSHAKE_MESSAGE = "{\"pidDir\": \"/tmp\"}\n"
6
- PREPARE_MESSAGE = " {\"command\": \"prepare\"}\n"
7
- DONE_MESSAGE = "{\"command\": \"done\"}\n"
8
- NEXT_MESSAGE = "{\"command\": \"next\"}\n"
9
- BEGIN_CYCLE_MESSAGE = "{\"command\": \"begin_cycle\"}\n"
10
- END_CYCLE_MESSAGE = "{\"command\": \"end_cycle\"}\n"
11
- KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
12
- PONG_PREFIX = "{\"pong\": \""
13
- PONG_SUFFIX = "\"}\n"
14
- ENDMARKER = "\nend\n"
15
-
16
- def self.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
17
- require("mkfifo")
18
- require("zillabyte/runner/component_operation")
19
- require("zillabyte/runner/operation")
20
-
21
- require("pty")
22
- require("open3")
23
-
24
- @__node = node
25
- @__name = node["name"]
26
- @__tester = tester
27
-
28
- @__type = node["type"]
29
- @__dir = dir
30
-
31
- @__consumee_pipes = consumee_pipes
32
- @__consumer_pipes = consumer_pipes
33
- @__meta = meta
34
- @__options = options
35
- @__output_type = options[:output_type]
36
- # Each consumer of a stream gets its own queue and message passing
37
- @__emit_queues = {}
38
- @__consumer_pipes.each_pair do |stream, consumers|
39
- consumers.each_key do |consumer|
40
- @__emit_queues[stream] ||= {}
41
- @__emit_queues[stream][consumer] = {:write_queue => [], :ready => true}
42
- end
43
- end
44
-
45
- begin
46
- case @__type
47
- when "source"
48
- self.run_source()
49
- when "group_by"
50
- self.run_group_by()
51
- when "join"
52
- self.run_join()
53
- when "each"
54
- self.run_each()
55
- when "filter"
56
- self.run_filter()
57
- when "component"
58
- Zillabyte::Runner::ComponentOperation.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
59
- when "sink"
60
- self.run_sink()
61
- else
62
- cdisplay("invalid operation type #{@__type}")
63
- end
64
- rescue => e
65
- cdisplay e.message
66
- end
67
-
68
- end
69
-
70
-
71
- def self.run_source()
72
-
73
- end_cycle_policy = @__node["end_cycle_policy"]
74
-
75
- # Interactive source
76
- if @__options[:interactive]
77
- loop do
78
-
79
- msg = @__consumee_pipes["rd_child_1"].gets
80
-
81
- if msg == KILL_CYCLE_MESSAGE
82
- send_to_consumers(KILL_CYCLE_MESSAGE)
83
- return
84
- else
85
- # Build tuple
86
- begin
87
- obj = JSON.parse(msg)
88
- rescue JSON::ParserError
89
- cdisplay "Error: invalid JSON"
90
- next
91
- end
92
-
93
- tuple_json = build_tuple_json(obj)
94
- display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
95
- send_to_consumers(tuple_json)
96
-
97
- end
98
- end
99
-
100
- # Source from relation
101
- elsif @__node['matches'] or @__node["relation"]
102
-
103
- # Query API for rows
104
- matches = @__node['matches'] || (@__node["relation"]["query"])
105
- cdisplay("Fetching remote data...")
106
- res = @__tester.query_agnostic(matches)
107
- rows = res["rows"]
108
- if(rows.nil? or rows.length == 0)
109
- cdisplay("Could not find data that matches your 'matches' clause")
110
- exit(-1)
111
- end
112
- # Enqueue rows for sending to consumers
113
- column_aliases = res['column_aliases']
114
- rows.each do |tuple|
115
- tuple_json = build_tuple_json(tuple, nil, column_aliases)
116
- @__emit_queues.each_pair do |stream, consumers|
117
- consumers.each_pair do |consumer, emitter|
118
- emitter[:write_queue] << tuple_json
119
- end
120
- end
121
- end
122
-
123
- # Index streams and consumers by their pipes for lookup
124
- consumer_hash = build_consumer_hash()
125
-
126
-
127
- # Send first tuple
128
- @__emit_queues.each_pair do |stream, consumers|
129
- consumers.each_key do |consumer|
130
- tuple_json = get_consumer_tuple(stream, consumer)
131
- emit_consumer_tuple(stream, consumer, tuple_json)
132
- end
133
- end
134
-
135
- # Sent tuples to consumers as appropriate
136
- loop do
137
-
138
- # Retrieve messages from consumers
139
- rs, ws, es = IO.select(consumer_hash.keys, [], [])
140
-
141
- # Emit tuples to consumers
142
- emitted = false
143
- rs.each do |r|
144
-
145
- # Read from consumer
146
- msg = read_message(r)
147
-
148
- stream = consumer_hash[r][:stream]
149
- consumer = consumer_hash[r][:consumer]
150
-
151
- # Consumer is ready for next message
152
- if msg["command"]
153
-
154
- case msg["command"]
155
- when "next"
156
-
157
- @__emit_queues[stream][consumer][:ready] = true
158
- tuple_json = get_consumer_tuple(stream, consumer)
159
-
160
- # If all messages have been sent to consumer, end their cycle
161
- if tuple_json.nil?
162
- write_stream = get_write_stream(stream, consumer)
163
- cdisplay "ending cycle for #{consumer}"
164
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
165
- send_command_tuple(stream, consumer, DONE_MESSAGE)
166
- else
167
- # Emit tuple to consumer
168
- emit_consumer_tuple(stream, consumer, tuple_json)
169
- emitted = true
170
- end
171
- when "kill_cycle"
172
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
173
- return
174
- end
175
- end
176
- end
177
-
178
- # Exit when done emitting
179
- if !emitted
180
- return
181
- end
182
- end
183
-
184
- # Custom source
185
- else
186
-
187
- # Index streams and consumers by their pipes for lookup
188
- consumer_hash = build_consumer_hash()
189
-
190
- # Keep track of how many consumers to handle before exiting
191
- consumers_running = consumer_hash.keys.length
192
-
193
- # Kill the cycle on error
194
- cycle_killed = false
195
-
196
- # Setup multilang socket
197
- require 'socket'
198
- host = "0.0.0.0"
199
- server = TCPServer.new(0)
200
- port = server.addr[1]
201
-
202
- # Spawn multilang process
203
- cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
204
-
205
- begin
206
-
207
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
208
- begin
209
- server_thread = Thread.new do
210
- ml_socket = server.accept()
211
-
212
- # RUN SOURCE
213
- begin
214
- # Setup streams from consumers and multilang(stdout and socket communication)
215
- read_streams = consumer_hash.keys.concat [stdout, ml_socket]
216
-
217
- # Handshake
218
- handshake(ml_socket, ml_socket)
219
- prepare(ml_socket, ml_socket)
220
-
221
- # Begin cycle
222
- begin_cycle(ml_socket, ml_socket)
223
- emitted = false
224
- write_message(ml_socket, NEXT_MESSAGE)
225
- multilang_queue = []
226
- end_cycle_policy = @__options[:end_cycle_policy]
227
- end_cycle_received = false
228
-
229
- # Receive and handle messages
230
- loop do
231
- # Read from a stream
232
- rs = select_read_streams(read_streams)
233
- rs.each do |r|
234
- # Read stdout straight to user
235
- if r == stdout && consumers_running > 0
236
- msg = r.gets
237
- msg = msg.sub(/\n/, "")
238
- cdisplay("log: #{msg}")
239
- next
240
- end
241
-
242
- obj = read_message(r)
243
-
244
- if obj.nil?
245
- next
246
- end
247
-
248
- if obj["command"]
249
- case obj["command"]
250
-
251
- # Multilang emitted a tuple
252
- when "emit"
253
- stream = obj['stream']
254
- # Check for null emit
255
- if end_cycle_policy != "explicit"
256
-
257
- if obj['tuple'].nil?
258
- end_cycle_received = true
259
- else
260
- nil_values = false
261
- obj['tuple'].each_value do |v|
262
- if v.nil?
263
- nil_values = true
264
- break
265
- end
266
- end
267
- end_cycle_received = nil_values
268
- next unless !end_cycle_received
269
- end
270
- end
271
-
272
- # Valid emit
273
- emitted = true
274
-
275
- # Send or enqueue tuple for each consumer
276
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
277
-
278
- @__emit_queues[stream].each_pair do |consumer, emitter|
279
- if emitter[:ready]
280
- emit_consumer_tuple(stream, consumer, tuple_json)
281
-
282
- else
283
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
284
- end
285
- end
286
-
287
- # Consumer is ready for a message
288
- when "next"
289
- stream = consumer_hash[r][:stream]
290
- consumer = consumer_hash[r][:consumer]
291
- @__emit_queues[stream][consumer][:ready] = true
292
- tuple_json = get_consumer_tuple(stream, consumer)
293
-
294
- # End cycle for consumer if it has processed all tuples
295
- if tuple_json.nil? && end_cycle_received
296
-
297
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
298
- consumers_running -= 1
299
- if consumers_running == 0
300
- exit(0)
301
- end
302
-
303
- # TODO break if last consumer
304
- elsif !tuple_json.nil?
305
- # Emit tuple to consumer
306
- emit_consumer_tuple(stream, consumer, tuple_json)
307
- emitted = true
308
- end
309
-
310
- # Multilang is done emitting a group of tuples
311
- when "done"
312
- # End cycle if no tuples were emitted
313
- if !emitted && end_cycle_policy == "null_emit"
314
- end_cycle_received = true
315
- else
316
- emitted = false
317
- end
318
-
319
- # Send the next tuple to multilang
320
- if !multilang_queue.empty?
321
- write_message(ml_socket, multilang_queue.shift)
322
-
323
- # Request next tuple from mutilang
324
- elsif !end_cycle_received
325
- write_message(ml_socket, NEXT_MESSAGE)
326
-
327
- # If there are no more messages to send, we are done
328
- else end_cycle_received
329
- finished = true
330
- # End cycle for ready consumers
331
- @__emit_queues.each_pair do |stream, consumers|
332
- consumers.each_pair do |consumer, emitter|
333
- if emitter[:ready]
334
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
335
- consumers_running -= 1
336
- if consumers_running == 0
337
- exit(0)
338
- end
339
- end
340
- end
341
-
342
- end
343
- end
344
-
345
- # Multilang sent an error message
346
- when "fail"
347
- cdisplay("ERROR : #{obj['msg']}")
348
- cycle_killed = true
349
- exit(0)
350
-
351
- # Multilang sent a log message
352
- when "log"
353
- cdisplay "LOG: #{obj['msg']}"
354
-
355
- # Multilang sent signal to end the cycle
356
- when "end_cycle"
357
- if end_cycle_policy != "explicit"
358
- cdisplay "received end_cycle command for non explicit policy"
359
- next
360
- end
361
- end_cycle_received = true
362
- when "kill_cycle"
363
- cycle_killed = true
364
- exit(0)
365
- end
366
-
367
- # Multilang sent a ping
368
- elsif obj['ping']
369
- write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
370
- end
371
- end
372
- end
373
-
374
- # Exit after ending consumer cycles
375
- if consumers_running == 0
376
- exit(0)
377
- end
378
- rescue => e
379
- cycle_killed = true
380
- cdisplay e.message
381
- cdisplay e.backtrace
382
- ensure
383
- ml_socket.close()
384
- end
385
- end
386
- server_thread.join()
387
- rescue => e
388
- ensure
389
- # cleanup
390
- if cycle_killed
391
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
392
- end
393
- pid = wait_thread[:pid]
394
- stdout.close
395
- Process.kill('INT', pid)
396
- Process.exit!(true)
397
- end
398
- end
399
-
400
- rescue PTY::ChildExited
401
- end
402
- end
403
- end
404
-
405
-
406
- def self.run_each()
407
-
408
- # Index streams and consumers by their pipes for lookup
409
- consumer_hash = build_consumer_hash()
410
-
411
- # Keep track of how many consumers to handle before exiting
412
- consumers_running = consumer_hash.keys.length
413
-
414
- # Kill the cycle on error
415
- cycle_killed = false
416
-
417
- # Setup multilang socket
418
- require 'socket'
419
- host = "0.0.0.0"
420
- server = TCPServer.new(0)
421
- port = server.addr[1]
422
-
423
- # Spawn multilang process
424
- cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
425
-
426
- begin
427
- # Start the operation...
428
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
429
- begin
430
- server_thread = Thread.new do
431
- ml_socket = server.accept()
432
- begin
433
- # Setup streams from consumers, multilang, and the consumee
434
- read_streams = consumer_hash.keys.concat [@__consumee_pipes["rd_child_1"], ml_socket, stdout]
435
-
436
- # Handshake
437
- handshake(ml_socket, ml_socket)
438
- prepare(ml_socket, ml_socket)
439
-
440
- # Begin cycle
441
- multilang_queue = []
442
- mutlilang_count = 0
443
- end_cycle_received = false
444
- column_aliases = nil
445
-
446
-
447
- # Receive and handle messages
448
- loop do
449
-
450
- # Read from a stream
451
- rs = select_read_streams(read_streams)
452
- rs.each do |r|
453
-
454
- # Read STDOUT from program straight to user
455
- if r == stdout
456
- msg = r.gets
457
- if !msg.nil?
458
- msg = msg.sub(/\n/, "")
459
- cdisplay("LOG: #{msg}")
460
- end
461
- next
462
- end
463
-
464
- # Receive an object
465
- obj = read_message(r)
466
-
467
- if obj["command"]
468
- case obj["command"]
469
-
470
- # Multilang emitted a tuple
471
- when "emit"
472
-
473
- stream = obj["stream"]
474
-
475
- # Send or enqueue tuple for each consumer
476
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], column_aliases)
477
-
478
- @__emit_queues[stream].each_pair do |consumer, emitter|
479
- if emitter[:ready]
480
- emit_consumer_tuple(stream, consumer, tuple_json)
481
- else
482
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
483
- end
484
- end
485
-
486
- # Consumer is ready for a message
487
- when "next"
488
- stream = consumer_hash[r][:stream]
489
- consumer = consumer_hash[r][:consumer]
490
-
491
-
492
- @__emit_queues[stream][consumer][:ready] = true
493
- tuple_json = get_consumer_tuple(stream, consumer)
494
-
495
- # End cycle for consumer if it has processed all tuples
496
- if tuple_json.nil? && end_cycle_received
497
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
498
- consumers_running -= 1
499
- if consumers_running == 0
500
- break
501
- end
502
-
503
-
504
- # TODO break if last consumer
505
- elsif !tuple_json.nil?
506
- # Emit tuple to consumer
507
- emit_consumer_tuple(stream, consumer, tuple_json)
508
- emitted = true
509
- end
510
-
511
- # Multilang is done emitting a group of tuples
512
- when "done"
513
- mutlilang_count -= 1
514
-
515
- # Send the next tuple to multilang
516
- if !multilang_queue.empty?
517
- write_message(ml_socket, multilang_queue.shift)
518
-
519
- # If there are no more messages to send, we are done
520
- elsif end_cycle_received && mutlilang_count == 0
521
- finished = true
522
-
523
- # End cycle for ready consumers
524
- @__emit_queues.each_pair do |stream, consumers|
525
- consumers.each_pair do |consumer, emitter|
526
- if emitter[:ready]
527
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
528
- consumers_running -= 1
529
- if consumers_running == 0
530
- break
531
- end
532
- end
533
- end
534
- end
535
- end
536
-
537
- # Multilang sent an error message
538
- when "fail"
539
- cdisplay("ERROR : #{obj['msg']}")
540
- cycle_killed = true
541
- exit(0)
542
-
543
- # Multilang sent a log message
544
- when "log"
545
- cdisplay "LOG: #{obj['msg']}"
546
-
547
- # Consumee operation sent signal to end_cycle
548
- when "end_cycle"
549
- end_cycle_received = true
550
- if mutlilang_count == 0
551
- @__emit_queues.each_pair do |stream, consumers|
552
- consumers.each_pair do |consumer, emitter|
553
- if emitter[:ready]
554
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
555
- consumers_running -= 1
556
- if consumers_running == 0
557
- exit(0)
558
- end
559
- end
560
- end
561
- end
562
- end
563
-
564
- when "kill_cycle"
565
- cycle_killed = true
566
- exit(0)
567
- end
568
-
569
- # Received a tuple from consumee
570
- elsif obj['tuple']
571
- column_aliases = obj['column_aliases']
572
- # Send or enqueue to multilang
573
- mutlilang_count += 1
574
- if multilang_queue.empty?
575
- write_message(ml_socket, obj.to_json)
576
- else
577
- multilang_queue << obj.to_json
578
- end
579
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
580
-
581
- # Multilang sent a ping
582
- elsif obj['ping']
583
- write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
584
- end
585
- end
586
-
587
- # Exit after ending consumer cycles
588
- if consumers_running == 0
589
- exit(0)
590
- end
591
-
592
- end
593
- ensure
594
- ml_socket.close()
595
- end
596
- end
597
- server_thread.join()
598
- rescue => e
599
- cdisplay e.message
600
- cdisplay e.backtrace
601
- ensure
602
- # cleanup
603
- if cycle_killed
604
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
605
- send_to_consumees(KILL_CYCLE_MESSAGE)
606
- end
607
- pid = wait_thread[:pid]
608
- stdin.close
609
- stdout.close
610
- stderr.close
611
- end
612
- end
613
- rescue PTY::ChildExited
614
- cdisplay("The child process exited!")
615
- end
616
-
617
- end
618
-
619
-
620
- def self.run_group_by()
621
-
622
- # Index streams and consumers by their pipes for lookup
623
- consumer_hash = build_consumer_hash
624
-
625
- # Keep track of how many consumers to handle before exiting
626
- consumers_running = consumer_hash.keys.length
627
-
628
- # Kill the cycle on error
629
- cycle_killed = false
630
-
631
- # Setup groups
632
- group_by = @__node['group_by']
633
- group_tuples = {}
634
- emit_count = 0 # used to track how many emits are expected once groups are sent to multilang
635
- emitted_tuples = [] # used to send to consumers after once groupings are emitted
636
- tuple_queue = []
637
-
638
- # Setup multilang pipe
639
- require 'socket'
640
- host = "0.0.0.0"
641
- server = TCPServer.new(0)
642
- port = server.addr[1]
643
-
644
- # Spawn multilang process
645
- cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
646
-
647
- begin
648
- # Start the operation...
649
- Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
650
- begin
651
- server_thread = Thread.new do
652
- ml_socket = server.accept()
653
- begin
654
- # Setup streams from consumers, multilang, and the consumee
655
- read_streams = consumer_hash.keys.concat [stdout, ml_socket, @__consumee_pipes["rd_child_1"]]
656
-
657
- # Handshake
658
- handshake(ml_socket, ml_socket)
659
- prepare(ml_socket, ml_socket)
660
-
661
- # Begin cycle
662
- end_cycle_received = false
663
- finished_emitting = false
664
-
665
- # Handle streams
666
- loop do
667
- # Read from a stream
668
- rs = select_read_streams(read_streams)
669
- rs.each do |r|
670
-
671
- # Read STDOUT from program straight to user
672
- if r == stdout
673
- msg = r.gets
674
- msg = msg.sub(/\n/, "")
675
- cdisplay("log: #{msg}")
676
- next
677
- end
678
-
679
-
680
- # Receive an object
681
- obj = read_message(r)
682
-
683
- if obj["command"]
684
- case obj["command"]
685
-
686
- when "done"
687
-
688
- if end_cycle_received
689
- tuple_json = tuple_queue.shift
690
- if !tuple_json.nil?
691
- write_message(ml_socket, tuple_json)
692
- end
693
- end
694
-
695
- next
696
-
697
- # Begin aggregation
698
- when "end_cycle"
699
- end_cycle_received = true
700
- read_streams = [ml_socket]
701
-
702
- group_tuples.each do |group_tuple, tuples|
703
- tuple_queue << "{\"command\": \"begin_group\", \"tuple\": #{group_tuple.to_json}, \"meta\":{}}\n"
704
- tuples.each do |t|
705
- tuple_queue << "{\"command\": \"aggregate\", #{t}}\n"
706
- end
707
- tuple_queue << "{\"command\": \"end_group\"}\n"
708
-
709
- # keep track of how many emits are expected
710
- emit_count += 1
711
- end
712
-
713
- tuple_json = tuple_queue.shift
714
- if !tuple_json.nil?
715
- write_message(ml_socket, tuple_json)
716
- end
717
-
718
- # Multilang has emitted a grouped tuple
719
- when "emit"
720
- stream = obj['stream']
721
- emit_count -= 1
722
- # Enqueue for consumers
723
- tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
724
- @__emit_queues.each_pair do |stream, consumers|
725
- consumers.each_key do |consumer|
726
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
727
- end
728
- end
729
-
730
- # End cycle when done emitting
731
- if end_cycle_received && emit_count == 0
732
- finished_emitting = true
733
- break
734
- elsif end_cycle_received
735
- tuple_json = tuple_queue.shift
736
- if !tuple_json.nil?
737
- write_message(ml_socket, tuple_json)
738
- end
739
- end
740
-
741
- # An error has occured
742
- when "kill_cycle"
743
- cycle_killed = true
744
- exit(0)
745
- end
746
-
747
- # Received a tuple from operation
748
- elsif obj["tuple"]
749
- tuple = obj["tuple"].to_json
750
- meta = obj["meta"].to_json
751
- column_aliases = obj["column_aliases"] || {}
752
- aliases = Hash[column_aliases.map{|h| [h["alias"],h["concrete_name"]]}]
753
- gt = {}
754
-
755
- # Get the column names to group on
756
- group_by.each do |field|
757
- field_name = aliases[field] || field
758
- gt[field] = obj["tuple"][field_name]
759
- end
760
-
761
- msg_no_brackets = "\"tuple\": #{tuple}, \"meta\": #{meta}, \"column_aliases\": #{column_aliases.to_json}"
762
-
763
- # Group tuple into existing group or create new group
764
- if group_tuples[gt]
765
- group_tuples[gt] << msg_no_brackets
766
- else
767
- group_tuples[gt] = [msg_no_brackets]
768
- end
769
-
770
- # Ask operation for next tuple
771
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
772
-
773
- # Multilang sent a ping
774
- elsif obj['ping']
775
- write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
776
- end
777
- end
778
-
779
- # Send tuples to consumers
780
- if finished_emitting && consumers_running > 0
781
-
782
- # Send first tuple
783
- @__emit_queues.each_pair do |stream, consumers|
784
- consumers.each_key do |consumer|
785
- tuple_json = get_consumer_tuple(stream, consumer)
786
- emit_consumer_tuple(stream, consumer, tuple_json)
787
- end
788
- end
789
-
790
- # Sent tuples to consumers as appropriate
791
- loop do
792
-
793
- # Retrieve messages from consumers
794
- rs, ws, es = IO.select(consumer_hash.keys, [], [])
795
-
796
- # Emit tuples to consumers
797
- emitted = false
798
- rs.each do |r|
799
-
800
- # Read from consumer
801
- msg = read_message(r)
802
- consumer = consumer_hash[r][:consumer]
803
- stream = consumer_hash[r][:stream]
804
-
805
- # Consumer is ready for next message
806
- if msg["command"] && msg["command"] == "next"
807
-
808
- @__emit_queues[stream][consumer][:ready] = true
809
- tuple_json = get_consumer_tuple(stream, consumer)
810
-
811
- # If all messages have been sent to a consumer, end its cycle
812
- if tuple_json.nil?
813
- write_stream = get_write_stream(stream, consumer)
814
- write_message(write_stream, END_CYCLE_MESSAGE)
815
- consumers_running -= 1
816
- if consumers_running == 0
817
- break
818
- end
819
- else
820
- # Emit tuple to consumer
821
- emit_consumer_tuple(stream, consumer, tuple_json)
822
- emitted = true
823
- end
824
- end
825
-
826
- end
827
- # Exit when done emitting
828
- if !emitted
829
- exit(0)
830
- end
831
- end
832
- break
833
-
834
- # Exit after ending all consumer cycles
835
- elsif consumers_running == 0
836
- break
837
- end
838
- end
839
- ensure
840
- ml_socket.close()
841
- end
842
- end
843
- server_thread.join()
844
-
845
- rescue Errno::EIO
846
- cdisplay("Errno:EIO error")
847
- ensure
848
- # cleanup
849
- if cycle_killed
850
- send_to_consumers(KILL_CYCLE_MESSAGE, false)
851
- send_to_consumees(KILL_CYCLE_MESSAGE)
852
- end
853
-
854
- pid = wait_thread[:pid]
855
- stdin.close
856
- stdout.close
857
- stderr.close
858
- Process.kill('INT', pid)
859
- Process.exit!(true)
860
- end
861
- end
862
- rescue PTY::ChildExited
863
- if File.exists?("#{ml_pipe}.in")
864
- File.delete("#{ml_pipe}.in")
865
- end
866
- cdisplay("The child process exited!")
867
- end
868
- end
869
-
870
-
871
- def self.run_join()
872
-
873
- lhs_fields = @__node["lhs_fields"]
874
- rhs_fields = @__node["rhs_fields"]
875
- join_type = @__node["join_type"]
876
-
877
-
878
- # Sanity check
879
- if lhs_fields.nil? || rhs_fields.nil? || join_type.nil?
880
- return
881
- end
882
-
883
-
884
- # Index the consumee streams for left and right sides
885
- consumer_hash = build_consumer_hash()
886
-
887
- # Kill the cycle on error
888
- cycle_killed = false
889
-
890
- # read all tuples from lefthand and right hand streams
891
- read_streams = [@__consumee_pipes["rd_child_1"], @__consumee_pipes["rd_child_2"]]
892
-
893
- # Index left and right hand consumees by their streams for lookup
894
- consumee_hash = {}
895
- # Left side
896
- lhs = @__options[:join_options][:lhs]
897
- consumee_hash[@__consumee_pipes["rd_child_1"]] = lhs
898
- # Right side
899
- rhs = @__options[:join_options][:rhs]
900
- consumee_hash[@__consumee_pipes["rd_child_2"]] = rhs
901
-
902
- # Keep track of how many consumers to handle before exiting
903
- consumers_running = 1
904
-
905
- # Index the incoming tuples by their join key
906
- lhs_tuples = {}
907
- rhs_tuples = {}
908
-
909
- tuple_queue = []
910
-
911
- # Begin cycle
912
- left_end_cycle_received = false
913
- right_end_cycle_received = false
914
-
915
- # Receive and handle messages
916
- loop do
917
-
918
- # Read from a stream
919
- rs = select_read_streams(read_streams)
920
- rs.each do |r|
921
-
922
- # Receive an object
923
- obj = read_message(r)
924
-
925
- if obj["command"]
926
- case obj["command"]
927
-
928
- # A consumee is done emitting
929
- when "end_cycle"
930
-
931
- if consumee_hash[r] == lhs
932
- left_end_cycle_received = true
933
- elsif consumee_hash[r] == rhs
934
- right_end_cycle_received = true
935
- end
936
-
937
- # We are done receiving from streams
938
- if left_end_cycle_received && right_end_cycle_received
939
- break
940
- end
941
-
942
- when "kill_cycle"
943
- cycle_killed = true
944
- return
945
- end
946
-
947
- # Received a tuple from consumee
948
- elsif obj['tuple']
949
- tuple = obj['tuple']
950
- if consumee_hash[r] == lhs
951
- lhs_tuples[tuple[lhs_fields]] ||= []
952
- lhs_tuples[tuple[lhs_fields]] << tuple
953
- elsif consumee_hash[r] == rhs
954
- rhs_tuples[tuple[rhs_fields]] ||= []
955
- rhs_tuples[tuple[rhs_fields]] << tuple
956
- end
957
- end
958
-
959
- #Ask operation for next tuple
960
- if consumee_hash[r] == lhs
961
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
962
- elsif consumee_hash[r] == rhs
963
- write_message(@__consumee_pipes["wr_child_2"], NEXT_MESSAGE)
964
- end
965
- end
966
-
967
- # Break out if emits ended
968
- if left_end_cycle_received && right_end_cycle_received
969
- break
970
- end
971
- end
972
- # Build the joined tuples
973
- joined_tuples = []
974
-
975
- # If no joined tuples, end the cycle
976
- if lhs_tuples.empty? && rhs_fields.empty?
977
- @__emit_queues.each_pair do |stream, consumers|
978
- consumers.each_pair do |consumer, emitter|
979
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
980
- end
981
- end
982
- return
983
- end
984
- joined_fields = (lhs_tuples.values[0][0].keys.concat rhs_tuples.values[0][0].keys).uniq
985
- case join_type
986
- when "inner"
987
- lhs_tuples.each_pair do |key, lhs_tuples|
988
- lhs_tuples.each do |lhs_tuple|
989
-
990
- tuple = {}
991
- if !rhs_tuples[key].nil?
992
- rhs_tuples[key].each do |rhs_tuple|
993
- # Check for a valid join
994
- if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
995
- # Add the fields
996
- joined_fields.each do |field|
997
- if lhs_tuple.has_key? field
998
- tuple[field] = lhs_tuple[field]
999
- else
1000
- tuple[field] = rhs_tuple[field]
1001
- end
1002
- end
1003
-
1004
- # Add the tuple
1005
- joined_tuples << tuple
1006
- end
1007
- end
1008
- end
1009
- end
1010
- end
1011
- when "left"
1012
- lhs_tuples.each_pair do |key, lhs_tuples|
1013
- lhs_tuples.each do |lhs_tuple|
1014
-
1015
- joined = false
1016
- if rhs_tuples.has_key? key
1017
- rhs_tuples[key].each do |rhs_tuple|
1018
- # Check for a valid join
1019
- if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
1020
- tuple = {}
1021
-
1022
- # Add the fields
1023
- joined_fields.each do |field|
1024
- if lhs_tuple.has_key? field
1025
- tuple[field] = lhs_tuple[field]
1026
- else
1027
- tuple[field] = rhs_tuple[field]
1028
- end
1029
- end
1030
- joined_tuples << tuple
1031
- joined = true
1032
- end
1033
- end
1034
- end
1035
- if !joined
1036
- tuple = {}
1037
- lhs_tuples.each.each do |hash|
1038
- hash.each do |key, value|
1039
- tuple[key] = value
1040
- end
1041
- end
1042
-
1043
- joined_fields.each do |field|
1044
- if !tuple.has_key? field
1045
- tuple[field] = nil
1046
- end
1047
- end
1048
- # Add the tuple
1049
- joined_tuples << tuple
1050
- end
1051
- end
1052
- end
1053
- when "right"
1054
- rhs_tuples.each_pair do |key, rhs_tuples|
1055
- rhs_tuples.each do |rhs_tuple|
1056
-
1057
- joined = false
1058
- if lhs_tuples.has_key? key
1059
- lhs_tuples[key].each do |lhs_tuple|
1060
- # Check for a valid join
1061
- if !rhs_tuple[lhs_fields].nil? && !lhs_tuple[rhs_fields].nil?
1062
- tuple = {}
1063
-
1064
- # Add the fields
1065
- joined_fields.each do |field|
1066
- if rhs_tuple.has_key? field
1067
- tuple[field] = rhs_tuple[field]
1068
- else
1069
- tuple[field] = lhs_tuple[field]
1070
- end
1071
- end
1072
- joined_tuples << tuple
1073
- joined = true
1074
- end
1075
- end
1076
- end
1077
- if !joined
1078
- tuple = {}
1079
- rhs_tuples.each.each do |hash|
1080
- hash.each do |key, value|
1081
- tuple[key] = value
1082
- end
1083
- end
1084
-
1085
- joined_fields.each do |field|
1086
- if !tuple.has_key? field
1087
- tuple[field] = nil
1088
- end
1089
- end
1090
- # Add the tuple
1091
- joined_tuples << tuple
1092
- end
1093
- end
1094
- end
1095
- when "outer"
1096
- lhs_tuples.each_pair do |key, lhs_tuples|
1097
- lhs_tuples.each do |lhs_tuple|
1098
-
1099
- joined = false
1100
- if rhs_tuples.has_key? key
1101
- rhs_tuples[key].each do |rhs_tuple|
1102
- # Check for a valid join
1103
- if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
1104
- tuple = {}
1105
-
1106
- # Add the fields
1107
- joined_fields.each do |field|
1108
- if lhs_tuple.has_key? field
1109
- tuple[field] = lhs_tuple[field]
1110
- else
1111
- tuple[field] = rhs_tuple[field]
1112
- end
1113
- end
1114
- joined_tuples << tuple
1115
- joined = true
1116
- end
1117
- end
1118
- end
1119
- if !joined
1120
- tuple = {}
1121
- lhs_tuples.each.each do |hash|
1122
- hash.each do |key, value|
1123
- tuple[key] = value
1124
- end
1125
- end
1126
-
1127
- joined_fields.each do |field|
1128
- if !tuple.has_key? field
1129
- tuple[field] = nil
1130
- end
1131
- end
1132
- # Add the tuple
1133
- joined_tuples << tuple
1134
- end
1135
- end
1136
- end
1137
- end
1138
-
1139
- # Setup output queues
1140
- joined_tuples.each do |tuple|
1141
- tuple_json = build_tuple_json(tuple)
1142
- @__emit_queues.each_pair do |stream, consumers|
1143
- consumers.each_key do |consumer|
1144
- @__emit_queues[stream][consumer][:write_queue] << tuple_json
1145
- end
1146
- end
1147
- end
1148
-
1149
- # Send first tuple
1150
- @__emit_queues.each_pair do |stream, consumers|
1151
- consumers.each_key do |consumer|
1152
-
1153
- tuple_json = get_consumer_tuple(stream, consumer)
1154
- if tuple_json.nil?
1155
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
1156
- consumers_running -= 1
1157
- if consumers_running == 0
1158
- return
1159
- end
1160
- else
1161
- emit_consumer_tuple(stream, consumer, tuple_json)
1162
- end
1163
- end
1164
- end
1165
-
1166
- # Sent tuples to consumers as appropriate
1167
- loop do
1168
-
1169
- # Retrieve messages from consumers
1170
- rs, ws, es = IO.select(consumer_hash.keys, [], [])
1171
-
1172
- # Emit tuples to consumers
1173
- rs.each do |r|
1174
-
1175
- # Read from consumer
1176
- msg = read_message(r)
1177
- consumer = consumer_hash[r][:consumer]
1178
- stream = consumer_hash[r][:stream]
1179
-
1180
- # Consumer is ready for next message
1181
- if msg["command"]
1182
- case msg["command"]
1183
- when"next"
1184
-
1185
- @__emit_queues[stream][consumer][:ready] = true
1186
- tuple_json = get_consumer_tuple(stream, consumer)
1187
-
1188
- # If all messages have been sent to a consumer, end its cycle
1189
- if tuple_json.nil?
1190
- send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
1191
- consumers_running -= 1
1192
- if consumers_running == 0
1193
- return
1194
- end
1195
- else
1196
- # Emit tuple to consumer
1197
- emit_consumer_tuple(stream, consumer, tuple_json)
1198
- end
1199
- when "kill_cycle"
1200
- cycle_killed = true
1201
- return
1202
- end
1203
- end
1204
- end
1205
- end
1206
- end
1207
-
1208
- def self.run_filter()
1209
- self.run_each()
1210
- end
1211
-
1212
-
1213
- # Send a message to all consumers of the operation
1214
- def self.run_sink()
1215
- columns = @__node["columns"]
1216
-
1217
- type_map = {
1218
- "string" => String,
1219
- "double" => Float,
1220
- "integer" => Integer,
1221
- "float" => Float,
1222
- "array" => Array,
1223
- "map" => Hash
1224
- }
1225
-
1226
- col_map = {}
1227
- columns.each do |hash|
1228
- key = hash.keys[0]
1229
- type = hash[key]
1230
- col_map[key] = type_map[type]
1231
- end
1232
-
1233
- tuples = []
1234
-
1235
- output = @__options["output"]
1236
- loop do
1237
- # Read messages
1238
- obj = read_message(@__consumee_pipes["rd_child_1"])
1239
-
1240
- # Add row
1241
- if obj['tuple']
1242
-
1243
- tuple = obj['tuple']
1244
- display_json = Hash[obj['tuple'].map{|k, v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
1245
-
1246
- if col_map.keys.length > tuple.keys.length
1247
- cdisplay "Error: invalid keys for sink tuple : Expected #{col_map.keys} , got: #{tuple.keys}"
1248
- cdisplay("\n \nPress Ctrl-C to exit", false)
1249
- return
1250
- end
1251
- # Check tuple columns for valid entry
1252
- columns_to_check = col_map.length
1253
- tuple.keys.each do |col|
1254
- value = tuple[col]
1255
- types = type_map.each_value.map {|t| value.is_a? t}
1256
- matched_column = check_tuple_for_alias(col, obj['column_aliases'], col_map.keys)
1257
- if !matched_column.nil?
1258
- if value.nil? || (value.is_a? col_map[matched_column])
1259
- columns_to_check -= 1
1260
- # Set the proper column name
1261
- if col != matched_column
1262
- tuple[matched_column] = value
1263
- tuple.delete(col)
1264
- end
1265
- else
1266
- break
1267
- end
1268
- end
1269
- end
1270
-
1271
- if columns_to_check != 0
1272
- cdisplay "Error: invalid schema for sink tuple #{display_json}"
1273
- return
1274
- end
1275
- tuples << obj
1276
- if @__options[:interactive]
1277
- cdisplay "received #{display_json}"
1278
- end
1279
- write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
1280
-
1281
- # End cycle
1282
- elsif obj['command']
1283
- case obj['command']
1284
- when "end_cycle"
1285
- break
1286
- when "kill_cycle"
1287
- break
1288
- end
1289
- end
1290
- end
1291
-
1292
- if tuples.empty?
1293
- cdisplay "No tuples received"
1294
- return
1295
- end
1296
-
1297
- # Build table
1298
- require("terminal-table")
1299
- table = Terminal::Table.new :title => @__name
1300
- require("csv")
1301
- csv_str = CSV.generate do |csv|
1302
- header_written = false;
1303
- tuples.each do |obj|
1304
- begin
1305
-
1306
- t = obj['tuple']
1307
- m = obj['meta'] || {}
1308
-
1309
- if t
1310
- if header_written == false
1311
- keys = [t.keys, m.keys].flatten
1312
- csv << keys
1313
- table << keys
1314
- table << :separator
1315
- header_written = true
1316
- end
1317
-
1318
- vals = [t.values, m.values].flatten
1319
- csv << vals
1320
- table << vals.flat_map{|v| "#{v.to_s}"[0..100]}
1321
- end
1322
- rescue JSON::ParserError
1323
- cdisplay("invalid JSON")
1324
- next
1325
- rescue => e
1326
- cdisplay e.message
1327
- cdisplay e.backtrace
1328
- end
1329
- end
1330
- end
1331
-
1332
- # Output table
1333
- cdisplay("\n#{table.to_s}")
1334
- cdisplay "#{tuples.length} rows"
1335
-
1336
- # Write file
1337
- if output
1338
- filename = "#{output}.csv"
1339
- f = File.open(filename, "w")
1340
- f.write(csv_str)
1341
- f.close()
1342
- cdisplay("output written to #{filename}")
1343
- end
1344
- end
1345
-
1346
-
1347
- private
1348
-
1349
- BUFSIZE = 8192
1350
-
1351
- # Each reading pipe has a read buffer and message queue
1352
- @__read_buffers = {}
1353
- @__read_buffered_messages = {}
1354
-
1355
-
1356
- # Return availible reading streams
1357
- def self.select_read_streams(read_streams)
1358
-
1359
- rs = []
1360
- read_streams.each do |read_stream|
1361
- @__read_buffered_messages[read_stream] ||= []
1362
- if !@__read_buffered_messages[read_stream].empty?
1363
- rs << read_stream
1364
- end
1365
- end
1366
- return rs unless rs.empty?
1367
- rs, ws, es = IO.select(read_streams, [], [])
1368
- return rs
1369
- end
1370
-
1371
-
1372
- # Read a JSON message
1373
- def self.read_message(read_stream)
1374
-
1375
- @__read_buffers[read_stream] ||= ""
1376
- @__read_buffered_messages[read_stream] ||= []
1377
- if !@__read_buffered_messages[read_stream].empty?
1378
- obj = @__read_buffered_messages[read_stream].shift
1379
- return obj
1380
- end
1381
- # read message from stream
1382
- loop do
1383
-
1384
- while !@__read_buffers[read_stream].include? ENDMARKER
1385
- segment = read_stream.sysread(BUFSIZE)
1386
- @__read_buffers[read_stream] << segment
1387
- end
1388
-
1389
- # cdisplay "READMESSAGE: read #{segment.length} bytes, read buffer length : #{@__read_buffer.length}"
1390
- # TODO this include is redundant
1391
- read_buffer = @__read_buffers[read_stream]
1392
- if read_buffer.include? ENDMARKER
1393
- objs = read_buffer.split(ENDMARKER)
1394
- ends = read_buffer.scan(ENDMARKER)
1395
- if objs.count == ends.count # We have a full number of messages
1396
- objs.each do |obj|
1397
- begin
1398
- @__read_buffered_messages[read_stream] << JSON.parse(obj)
1399
- # cdisplay "READMESSAGE: got hash #{hash}"
1400
- rescue JSON::ParserError
1401
- cdisplay "READMESSAGE: invalid JSON #{obj}"
1402
- end
1403
- end
1404
- @__read_buffers[read_stream] = ""
1405
- return @__read_buffered_messages[read_stream].shift
1406
- else
1407
-
1408
- (0..ends.count-1).each do |i|
1409
- obj = objs[i]
1410
- begin
1411
- @__read_buffered_messages[read_stream] << JSON.parse(obj)
1412
- rescue JSON::ParserError
1413
- cdisplay "READMESSAGE: invalid JSON #{obj}"
1414
- end
1415
- end
1416
-
1417
- # cdisplay "adding leftovers : \n #{objs[ends.count..-1]}"
1418
- @__read_buffers[read_stream] = objs[ends.count..-1].join(ENDMARKER)
1419
- return @__read_buffered_messages[read_stream].shift
1420
- end
1421
- end
1422
- end
1423
- end
1424
-
1425
-
1426
- # Write JSON message
1427
- def self.write_message(write_stream, msg)
1428
- write_msg = msg.strip + ENDMARKER
1429
- write_stream.write write_msg
1430
- write_stream.flush
1431
- end
1432
-
1433
-
1434
- # Handshake connection to multilang
1435
- def self.handshake(write_stream, read_stream)
1436
- begin
1437
- write_message write_stream, HANDSHAKE_MESSAGE
1438
- msg = read_message(read_stream)
1439
- rescue => e
1440
- cdisplay("Error handshaking node")
1441
- raise e
1442
- end
1443
- end
1444
-
1445
-
1446
- # Instruct multilang to run prepare step
1447
- def self.prepare(write_stream, read_stream)
1448
- begin
1449
- write_message write_stream, PREPARE_MESSAGE
1450
- msg = read_message(read_stream)
1451
- rescue => e
1452
- cdisplay("Error running prepare")
1453
- raise e
1454
- end
1455
- end
1456
-
1457
-
1458
- # Instruct multilang to begin cycle
1459
- def self.begin_cycle(write_stream, read_stream)
1460
- write_message(write_stream, BEGIN_CYCLE_MESSAGE)
1461
-
1462
- while 1 do
1463
-
1464
- msg = read_message(read_stream)
1465
- obj = Hash[msg]
1466
- case obj["command"]
1467
- when "log"
1468
- cdisplay "LOG: #{obj['msg']}"
1469
- when "done"
1470
- break
1471
- else
1472
- cdisplay("Error beginning cycle")
1473
- raise obj["msg"]
1474
- end
1475
- end
1476
-
1477
- end
1478
-
1479
- # Build the hash of consumer streams for lookup when receiving responses
1480
- def self.build_consumer_hash()
1481
- consumer_hash = {}
1482
- @__emit_queues.each_pair do |stream, consumers|
1483
- consumers.each_key do |consumer|
1484
-
1485
- pipes = @__consumer_pipes[stream][consumer]
1486
- if pipes.has_key? "rd_parent_1"
1487
- read_stream = pipes["rd_parent_1"]
1488
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
1489
-
1490
- elsif pipes.has_key? "rd_parent_2"
1491
- read_stream = pipes["rd_parent_2"]
1492
- consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
1493
- end
1494
- end
1495
- end
1496
-
1497
- return consumer_hash
1498
- end
1499
-
1500
- # Send object to every consumer of the operation, regardless of stream
1501
- def self.send_to_consumees(json_obj)
1502
- pipes = @__consumee_pipes
1503
- # Left hand(or singular) input
1504
- if (pipes.has_key? "wr_child_1")
1505
- write_stream = pipes["wr_child_1"]
1506
- write_message(write_stream, json_obj)
1507
- end
1508
-
1509
- # Right hand input
1510
- if (pipes.has_key? "wr_child_2")
1511
- write_stream = pipes["wr_child_2"]
1512
- write_message(write_stream, json_obj)
1513
- end
1514
- end
1515
-
1516
- # Send object to every consumer of the operation, regardless of stream
1517
- def self.send_to_consumers(json_obj, display = false)
1518
- @__consumer_pipes.each_pair do |stream, consumers|
1519
- consumers.each_pair do |consumer, pipe|
1520
- # Single or Left hand pipe
1521
- if (pipe.has_key? "wr_parent_1")
1522
- write_stream = get_write_stream(stream, consumer, 1)
1523
- write_message(write_stream, json_obj)
1524
- elsif (pipe.has_key? "wr_parent_2")
1525
- write_stream = get_write_stream(stream, consumer, 2)
1526
- write_message(write_stream, json_obj)
1527
- end
1528
- if display
1529
- cdisplay "emitted #{json_obj.chomp} to #{consumer}"
1530
- end
1531
- end
1532
- end
1533
- end
1534
-
1535
-
1536
- # Get the write pipe of the stream consumer
1537
- def self.get_write_stream(stream, consumer, number=1)
1538
- wr_pipe = "wr_parent_" + number.to_s
1539
- @__consumer_pipes[stream][consumer][wr_pipe]
1540
- end
1541
-
1542
-
1543
- # Get tuple for sending to consumer of stream
1544
- def self.get_consumer_tuple(stream, consumer)
1545
- @__emit_queues[stream][consumer][:write_queue].shift
1546
- end
1547
-
1548
-
1549
-
1550
- # Send a command message to a consumer
1551
- def self.send_command_tuple(stream, consumer, json_obj)
1552
- pipe = @__consumer_pipes[stream][consumer]
1553
- # Single or Left hand pipe
1554
- if (pipe.has_key? "wr_parent_1")
1555
- write_stream = get_write_stream(stream, consumer, 1)
1556
- write_message(write_stream, json_obj)
1557
-
1558
- # Right hand pipe
1559
- elsif (pipe.has_key? "wr_parent_2")
1560
- write_stream = get_write_stream(stream, consumer, 2)
1561
- write_message(write_stream, json_obj)
1562
- end
1563
- @__emit_queues[stream][consumer][:ready] = false
1564
- end
1565
-
1566
-
1567
- # Emit tuple_json to the consumer of a stream
1568
- def self.emit_consumer_tuple(stream, consumer, tuple_json)
1569
- begin
1570
- display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
1571
- rescue JSON::ParserError
1572
- cdisplay "Error: invalid JSON"
1573
- end
1574
-
1575
- pipe = @__consumer_pipes[stream][consumer]
1576
- # Single or Left hand pipe
1577
- if (pipe.has_key? "wr_parent_1")
1578
- write_stream = get_write_stream(stream, consumer, 1)
1579
- write_message(write_stream, tuple_json)
1580
-
1581
- # Right hand pipe
1582
- elsif (pipe.has_key? "wr_parent_2")
1583
- write_stream = get_write_stream(stream, consumer, 2)
1584
- write_message(write_stream, tuple_json)
1585
- end
1586
-
1587
- @__emit_queues[stream][consumer][:ready] = false
1588
- cdisplay "emitted tuple #{display_json} to #{consumer} "
1589
- end
1590
-
1591
-
1592
- # Check if a tuple's column or its aliases matches an expected column and returns the valid column
1593
- def self.check_tuple_for_alias(column, column_aliases, valid_columns)
1594
-
1595
- valid_columns.each do |expected|
1596
- if column == expected
1597
- return expected
1598
- end
1599
-
1600
- column_aliases.each do |hash|
1601
- concrete_name = hash["concrete_name"]
1602
- if (column == concrete_name || column == hash['alias']) && hash["alias"] == expected
1603
- return expected
1604
- end
1605
- end
1606
- end
1607
-
1608
- return nil
1609
- end
1610
-
1611
- # Build a tuple and format into JSON
1612
- def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
1613
- meta ||= {}
1614
- column_aliases ||= {}
1615
- values = {}
1616
- tuple.each do |k, v|
1617
- if(k == "id")
1618
- next
1619
- elsif(k == "confidence" or k == "since" or k == "source")
1620
- meta[k] = v
1621
- else
1622
- values[k] = v
1623
- end
1624
- end
1625
- tuple_json = {"op" => @__name, "tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
1626
-
1627
- return tuple_json
1628
- end
1629
-
1630
-
1631
- # Construct a multilang command
1632
- def self.command(arg, ignore_stderr=false)
1633
- cdisplay("could not extract meta information. missing zillabyte.conf.yml?") if @__meta.nil?
1634
- full_script = File.join(@__dir, @__meta["script"])
1635
- stderr_opt = "2> /dev/null" if ignore_stderr
1636
-
1637
- case @__meta["language"]
1638
- when "ruby"
1639
- # Execute in the bundler context
1640
- cmd = "cd \"#{@__dir}\"; unset BUNDLE_GEMFILE; ZILLABYTE_HARNESS=1 bundle exec ruby \"#{full_script}\" #{arg} #{stderr_opt}"
1641
- when "python"#{
1642
- if(File.directory?("#{@__dir}/vEnv"))
1643
- cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte #{@__dir}/vEnv/bin/python \"#{full_script}\" #{arg} #{stderr_opt}"
1644
- else
1645
- cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte python \"#{full_script}\" #{arg} #{stderr_opt}"
1646
- end
1647
- when "js"
1648
- cmd = "cd \"#{@__dir}\"; NODE_PATH=~/zb1/multilang/js/src/lib #{Zillabyte::API::NODEJS_BIN} \"#{full_script}\" #{arg} #{stderr_opt}"
1649
- else
1650
- cdisplay("no language specified")
1651
- end
1652
- return cmd
1653
- end
1654
-
1655
-
1656
- # Display a colored, formatted message
1657
- def self.cdisplay(msg, useName=true)
1658
- @__tester.cdisplay(@__name, msg, useName)
1659
- end
1660
-
1661
- end; end; end
1662
-