zillabyte-cli 0.9.20 → 0.9.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +13 -5
- data/lib/zillabyte-cli/version.rb +1 -1
- data/lib/zillabyte/api/components.rb +2 -2
- data/lib/zillabyte/cli/base.rb +15 -0
- data/lib/zillabyte/cli/data.rb +31 -1
- data/lib/zillabyte/cli/download.rb +67 -0
- data/lib/zillabyte/cli/flows.rb +91 -37
- data/lib/zillabyte/helpers.rb +12 -1
- metadata +28 -43
- data/lib/#zillabyte-cli.rb# +0 -5
- data/lib/zillabyte/cli/#logs.rb# +0 -12
- data/lib/zillabyte/cli/#repl.rb# +0 -43
- data/lib/zillabyte/cli/templates/python/#simple_function.py# +0 -27
- data/lib/zillabyte/runner.rb +0 -6
- data/lib/zillabyte/runner/app_runner.rb +0 -320
- data/lib/zillabyte/runner/component_operation.rb +0 -636
- data/lib/zillabyte/runner/component_runner.rb +0 -337
- data/lib/zillabyte/runner/multilang_operation.rb +0 -1662
- data/lib/zillabyte/runner/operation.rb +0 -18
@@ -1,337 +0,0 @@
|
|
1
|
-
require "zillabyte/runner/multilang_operation"
|
2
|
-
require "zillabyte/runner/component_operation"
|
3
|
-
|
4
|
-
# HIDDEN:
|
5
|
-
class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
|
6
|
-
include Zillabyte::Helpers
|
7
|
-
|
8
|
-
KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
|
9
|
-
|
10
|
-
def run (meta, dir = Dir.pwd, session = nil, options = {})
|
11
|
-
|
12
|
-
if meta.nil? or session.nil?
|
13
|
-
return
|
14
|
-
end
|
15
|
-
|
16
|
-
@session = session
|
17
|
-
@colors = {}
|
18
|
-
|
19
|
-
# Get options
|
20
|
-
input = options[:input]
|
21
|
-
output = options[:output]
|
22
|
-
otype = options[:output_type]
|
23
|
-
|
24
|
-
# Show the user what we know about their app...
|
25
|
-
display "inferring your app details..."
|
26
|
-
describe_component(meta)
|
27
|
-
|
28
|
-
# Setup streams
|
29
|
-
@nodes = meta["nodes"]
|
30
|
-
@node_map = {}
|
31
|
-
@nodes.each do |n|
|
32
|
-
@node_map[n["name"]] = n
|
33
|
-
end
|
34
|
-
|
35
|
-
# Index stream consummers and emitters by stream name
|
36
|
-
@arcs = meta["arcs"]
|
37
|
-
|
38
|
-
# Organize component pipes
|
39
|
-
@operations = {}
|
40
|
-
@operation_pipes = {}
|
41
|
-
|
42
|
-
|
43
|
-
# Start component
|
44
|
-
begin
|
45
|
-
|
46
|
-
# Setup operation pipes
|
47
|
-
@nodes.each do |n|
|
48
|
-
|
49
|
-
name = n["name"]
|
50
|
-
type = n["type"]
|
51
|
-
if n["type"] == "source"
|
52
|
-
fields = n["fields"]
|
53
|
-
end
|
54
|
-
|
55
|
-
# Create two new pipes in the parent.
|
56
|
-
rd_child_1, wr_parent_1 = IO.pipe()
|
57
|
-
rd_parent_1, wr_child_1 = IO.pipe()
|
58
|
-
|
59
|
-
@operation_pipes[name] = {
|
60
|
-
"rd_child_1" => rd_child_1,
|
61
|
-
"wr_child_1" => wr_child_1,
|
62
|
-
"rd_parent_1" => rd_parent_1,
|
63
|
-
"wr_parent_1" => wr_parent_1
|
64
|
-
}
|
65
|
-
|
66
|
-
# Add a second(right hand side) set ofpipes for joins
|
67
|
-
if type == "join"
|
68
|
-
# Create two new pipes in the parent.
|
69
|
-
rd_child_2, wr_parent_2 = IO.pipe()
|
70
|
-
rd_parent_2, wr_child_2 = IO.pipe()
|
71
|
-
@operation_pipes[name]["rd_child_2"] = rd_child_2
|
72
|
-
@operation_pipes[name]["wr_child_2"] = wr_child_2
|
73
|
-
@operation_pipes[name]["rd_parent_2"] = rd_parent_2
|
74
|
-
@operation_pipes[name]["wr_parent_2"] = wr_parent_2
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Maps origin => {stream => [destinations]}
|
79
|
-
@arc_map = {}
|
80
|
-
@arcs.each do |a|
|
81
|
-
origin = a["origin"]
|
82
|
-
name = a["name"]
|
83
|
-
dest = a["dest"]
|
84
|
-
@arc_map[origin] ||= {}
|
85
|
-
@arc_map[origin][name] ||= []
|
86
|
-
@arc_map[origin][name] << a["dest"]
|
87
|
-
end
|
88
|
-
|
89
|
-
|
90
|
-
# Spawn component threads
|
91
|
-
@nodes.each do |n|
|
92
|
-
|
93
|
-
name = n["name"]
|
94
|
-
type = n["type"]
|
95
|
-
emits = n["emits"]
|
96
|
-
|
97
|
-
pipes = @operation_pipes[name]
|
98
|
-
|
99
|
-
# Fork.
|
100
|
-
pid = fork()
|
101
|
-
if pid # In parent
|
102
|
-
# Close the reading end of the first child so we can write to the child.
|
103
|
-
pipes["rd_child_1"].close()
|
104
|
-
# Close the writing end of the first child so we can read from the child.
|
105
|
-
pipes["wr_child_1"].close()
|
106
|
-
|
107
|
-
if type == "join"
|
108
|
-
# Close the reading end of the second child so we can write to the child.
|
109
|
-
pipes["rd_child_2"].close()
|
110
|
-
# Close the writing end of the second child so we can read from the child.
|
111
|
-
pipes["wr_child_2"].close()
|
112
|
-
end
|
113
|
-
else # in child
|
114
|
-
# Close the writing end of the first parent so we can read from the parent.
|
115
|
-
pipes["wr_parent_1"].close()
|
116
|
-
# Close the reading end of the first parent so we can write to the parent.
|
117
|
-
pipes["rd_parent_1"].close()
|
118
|
-
|
119
|
-
if type == "join"
|
120
|
-
# Close the reading end of the second child so we can write to the child.
|
121
|
-
pipes["rd_parent_2"].close()
|
122
|
-
# Close the writing end of the second child so we can read from the child.
|
123
|
-
pipes["wr_parent_2"].close()
|
124
|
-
end
|
125
|
-
|
126
|
-
begin
|
127
|
-
# Setup reading and writing pipes for communicating with consumee component
|
128
|
-
if type != "join"
|
129
|
-
in_pipes = {"rd_child_1" => @operation_pipes[name]["rd_child_1"], "wr_child_1" => @operation_pipes[name]["wr_child_1"]}
|
130
|
-
|
131
|
-
# Add join specific options
|
132
|
-
else
|
133
|
-
options[:join_options] = {}
|
134
|
-
in_pipes = {}
|
135
|
-
@arcs.each do |a|
|
136
|
-
|
137
|
-
if (a["dest"] == name)
|
138
|
-
# Left Side
|
139
|
-
if (a["left"] == 1)
|
140
|
-
options[:join_options][:lhs] = a["origin"]
|
141
|
-
in_pipes["rd_child_1"] = @operation_pipes[name]["rd_child_1"]
|
142
|
-
in_pipes["wr_child_1"] = @operation_pipes[name]["wr_child_1"]
|
143
|
-
# Right Side
|
144
|
-
elsif (a["right"] == 1)
|
145
|
-
options[:join_options][:rhs] = a["origin"]
|
146
|
-
in_pipes["rd_child_2"] = @operation_pipes[name]["rd_child_2"]
|
147
|
-
in_pipes["wr_child_2"] = @operation_pipes[name]["wr_child_2"]
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
# Index consumer pipes by stream name, consumer_name
|
154
|
-
out_pipes = {}
|
155
|
-
|
156
|
-
# Check if you are the consumee for a downstream join in order to select the correct pipe
|
157
|
-
if type != "output"
|
158
|
-
@arc_map[name].each_pair do |stream, destinations|
|
159
|
-
out_pipes[stream] ||= {}
|
160
|
-
|
161
|
-
destinations.each do |dest|
|
162
|
-
out_pipes[stream][dest] ||= {}
|
163
|
-
|
164
|
-
# Check for a join at the destination
|
165
|
-
if (@node_map[dest]["type"] == "join")
|
166
|
-
@arcs.each do |a|
|
167
|
-
if (a["dest"] == dest && a["origin"] == name)
|
168
|
-
# Left Side
|
169
|
-
if (a["left"] == 1)
|
170
|
-
out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
|
171
|
-
out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
|
172
|
-
break
|
173
|
-
elsif (a["right"] == 1)
|
174
|
-
out_pipes[stream][dest]["wr_parent_2"] = @operation_pipes[dest]["wr_parent_2"]
|
175
|
-
out_pipes[stream][dest]["rd_parent_2"] = @operation_pipes[dest]["rd_parent_2"]
|
176
|
-
break
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
else
|
181
|
-
out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
|
182
|
-
out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
|
183
|
-
end
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
# Run the child process
|
189
|
-
Zillabyte::Runner::ComponentOperation.run(n, dir, in_pipes, out_pipes, self, meta, options)
|
190
|
-
|
191
|
-
rescue => e
|
192
|
-
display e.message
|
193
|
-
display e.backtrace
|
194
|
-
ensure
|
195
|
-
# Close the reading end of the child
|
196
|
-
pipes["rd_child_1"].close()
|
197
|
-
# Close the writing end of the child
|
198
|
-
pipes["wr_child_1"].close()
|
199
|
-
|
200
|
-
# Close secondary join child
|
201
|
-
pipes["rd_child_2"].close() if pipes["rd_child_2"]
|
202
|
-
pipes["wr_child_2"].close() if pipes["wr_child_2"]
|
203
|
-
|
204
|
-
exit!(-1)
|
205
|
-
end #end child
|
206
|
-
end
|
207
|
-
end
|
208
|
-
|
209
|
-
|
210
|
-
# If no input file, read from STDIN
|
211
|
-
# TODO handle inputs
|
212
|
-
if input.nil?
|
213
|
-
|
214
|
-
source = ""
|
215
|
-
@nodes.each do |n|
|
216
|
-
name = n["name"]
|
217
|
-
type = n["type"]
|
218
|
-
|
219
|
-
if type == "input"
|
220
|
-
if source == ""
|
221
|
-
source = name
|
222
|
-
else
|
223
|
-
display "Error: Cannot run component with multiple input sources without input files"
|
224
|
-
return
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
display ""
|
230
|
-
display "To view results: Enter 'end' "
|
231
|
-
display ""
|
232
|
-
|
233
|
-
while true
|
234
|
-
|
235
|
-
fields = @node_map[source]['fields'].map {|h| h.keys[0].upcase }
|
236
|
-
display "Enter an input tuple in the form : #{fields.join(' ')}"
|
237
|
-
msg = ask
|
238
|
-
|
239
|
-
# Kill the cycle
|
240
|
-
if msg == 'end'
|
241
|
-
@operation_pipes[source]["wr_parent_1"].puts KILL_CYCLE_MESSAGE
|
242
|
-
break
|
243
|
-
|
244
|
-
# Check arguments
|
245
|
-
else
|
246
|
-
args = msg.scan(/(?:\w|"[^"]*")+/)
|
247
|
-
if (args.length % fields.length != 0)
|
248
|
-
display "Error: Argument length must be a multiple of the schema length"
|
249
|
-
next
|
250
|
-
end
|
251
|
-
end
|
252
|
-
# Send tuple to source
|
253
|
-
@operation_pipes[source]["wr_parent_1"].puts msg
|
254
|
-
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
rescue => e
|
259
|
-
display e.message
|
260
|
-
display e.backtrace
|
261
|
-
|
262
|
-
ensure
|
263
|
-
Process.waitall()
|
264
|
-
@operation_pipes.each do |name, pipes|
|
265
|
-
#Close the writing end of the parent
|
266
|
-
pipes["wr_parent_1"].close()
|
267
|
-
# Close the reading end of the parent
|
268
|
-
pipes["rd_parent_1"].close()
|
269
|
-
|
270
|
-
# Close secondary join parent
|
271
|
-
pipes["wr_parent_2"].close() if pipes["wr_parent_2"]
|
272
|
-
pipes["rd_parent_2"].close() if pipes["rd_parent_2"]
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
def session
|
278
|
-
@session
|
279
|
-
end
|
280
|
-
|
281
|
-
|
282
|
-
def cdisplay(name, message, useName=true)
|
283
|
-
color = @colors[name] || :default
|
284
|
-
if message.nil? || message == ""
|
285
|
-
return
|
286
|
-
else
|
287
|
-
|
288
|
-
if message.is_a?(Array)
|
289
|
-
lines = message
|
290
|
-
else
|
291
|
-
lines = message.split("\n")
|
292
|
-
end
|
293
|
-
|
294
|
-
prefix = useName ? "#{name} - " : ""
|
295
|
-
display "#{prefix}#{lines.first}".colorize(color)
|
296
|
-
lines[1..-1].each do |line|
|
297
|
-
display "#{' '*prefix.size}#{line}".colorize(color)
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
def display(message, newline = true)
|
303
|
-
@session.display(message, newline)
|
304
|
-
end
|
305
|
-
|
306
|
-
def describe_component(meta)
|
307
|
-
require("colorize")
|
308
|
-
require("indentation")
|
309
|
-
colors ||= [:green, :yellow, :magenta, :cyan, :white, :blue, :light_yellow, :light_blue, :red, :light_magenta, :light_cyan]
|
310
|
-
rjust = 20
|
311
|
-
|
312
|
-
display "#{'component name'.rjust(rjust)}: #{meta['name']}"
|
313
|
-
display "#{'component language'.rjust(rjust)}: #{meta['language']}"
|
314
|
-
meta['nodes'].each_with_index do |node, index|
|
315
|
-
color = @colors[node['name']] ||= colors[index % colors.length]
|
316
|
-
display (("="*rjust + " operation ##{index}").colorize(color))
|
317
|
-
display "#{"name".rjust(rjust)}: #{node['name'].to_s.colorize(color)}"
|
318
|
-
|
319
|
-
# Convert metadata typing to that of components
|
320
|
-
if node['type'] == "input"
|
321
|
-
type = "input"
|
322
|
-
display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
|
323
|
-
display "#{"fields".rjust(rjust)}: #{node['fields'].to_s.colorize(color)}"
|
324
|
-
display "#{"matches".rjust(rjust)}: #{JSON.pretty_generate(node['matches']).indent(rjust+2).lstrip.colorize(color)}" if node['matches']
|
325
|
-
elsif node['type'] == "output"
|
326
|
-
type = "output"
|
327
|
-
display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
|
328
|
-
display "#{"columns".rjust(rjust)}: #{node['columns'].to_s.colorize(color)}"
|
329
|
-
|
330
|
-
else
|
331
|
-
type = node['type']
|
332
|
-
display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
337
|
-
end
|
@@ -1,1662 +0,0 @@
|
|
1
|
-
|
2
|
-
# Emulate a multilang operation
|
3
|
-
module Zillabyte; module Runner; class MultilangOperation
|
4
|
-
|
5
|
-
HANDSHAKE_MESSAGE = "{\"pidDir\": \"/tmp\"}\n"
|
6
|
-
PREPARE_MESSAGE = " {\"command\": \"prepare\"}\n"
|
7
|
-
DONE_MESSAGE = "{\"command\": \"done\"}\n"
|
8
|
-
NEXT_MESSAGE = "{\"command\": \"next\"}\n"
|
9
|
-
BEGIN_CYCLE_MESSAGE = "{\"command\": \"begin_cycle\"}\n"
|
10
|
-
END_CYCLE_MESSAGE = "{\"command\": \"end_cycle\"}\n"
|
11
|
-
KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
|
12
|
-
PONG_PREFIX = "{\"pong\": \""
|
13
|
-
PONG_SUFFIX = "\"}\n"
|
14
|
-
ENDMARKER = "\nend\n"
|
15
|
-
|
16
|
-
def self.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
|
17
|
-
require("mkfifo")
|
18
|
-
require("zillabyte/runner/component_operation")
|
19
|
-
require("zillabyte/runner/operation")
|
20
|
-
|
21
|
-
require("pty")
|
22
|
-
require("open3")
|
23
|
-
|
24
|
-
@__node = node
|
25
|
-
@__name = node["name"]
|
26
|
-
@__tester = tester
|
27
|
-
|
28
|
-
@__type = node["type"]
|
29
|
-
@__dir = dir
|
30
|
-
|
31
|
-
@__consumee_pipes = consumee_pipes
|
32
|
-
@__consumer_pipes = consumer_pipes
|
33
|
-
@__meta = meta
|
34
|
-
@__options = options
|
35
|
-
@__output_type = options[:output_type]
|
36
|
-
# Each consumer of a stream gets its own queue and message passing
|
37
|
-
@__emit_queues = {}
|
38
|
-
@__consumer_pipes.each_pair do |stream, consumers|
|
39
|
-
consumers.each_key do |consumer|
|
40
|
-
@__emit_queues[stream] ||= {}
|
41
|
-
@__emit_queues[stream][consumer] = {:write_queue => [], :ready => true}
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
begin
|
46
|
-
case @__type
|
47
|
-
when "source"
|
48
|
-
self.run_source()
|
49
|
-
when "group_by"
|
50
|
-
self.run_group_by()
|
51
|
-
when "join"
|
52
|
-
self.run_join()
|
53
|
-
when "each"
|
54
|
-
self.run_each()
|
55
|
-
when "filter"
|
56
|
-
self.run_filter()
|
57
|
-
when "component"
|
58
|
-
Zillabyte::Runner::ComponentOperation.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
|
59
|
-
when "sink"
|
60
|
-
self.run_sink()
|
61
|
-
else
|
62
|
-
cdisplay("invalid operation type #{@__type}")
|
63
|
-
end
|
64
|
-
rescue => e
|
65
|
-
cdisplay e.message
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
def self.run_source()
|
72
|
-
|
73
|
-
end_cycle_policy = @__node["end_cycle_policy"]
|
74
|
-
|
75
|
-
# Interactive source
|
76
|
-
if @__options[:interactive]
|
77
|
-
loop do
|
78
|
-
|
79
|
-
msg = @__consumee_pipes["rd_child_1"].gets
|
80
|
-
|
81
|
-
if msg == KILL_CYCLE_MESSAGE
|
82
|
-
send_to_consumers(KILL_CYCLE_MESSAGE)
|
83
|
-
return
|
84
|
-
else
|
85
|
-
# Build tuple
|
86
|
-
begin
|
87
|
-
obj = JSON.parse(msg)
|
88
|
-
rescue JSON::ParserError
|
89
|
-
cdisplay "Error: invalid JSON"
|
90
|
-
next
|
91
|
-
end
|
92
|
-
|
93
|
-
tuple_json = build_tuple_json(obj)
|
94
|
-
display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
|
95
|
-
send_to_consumers(tuple_json)
|
96
|
-
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# Source from relation
|
101
|
-
elsif @__node['matches'] or @__node["relation"]
|
102
|
-
|
103
|
-
# Query API for rows
|
104
|
-
matches = @__node['matches'] || (@__node["relation"]["query"])
|
105
|
-
cdisplay("Fetching remote data...")
|
106
|
-
res = @__tester.query_agnostic(matches)
|
107
|
-
rows = res["rows"]
|
108
|
-
if(rows.nil? or rows.length == 0)
|
109
|
-
cdisplay("Could not find data that matches your 'matches' clause")
|
110
|
-
exit(-1)
|
111
|
-
end
|
112
|
-
# Enqueue rows for sending to consumers
|
113
|
-
column_aliases = res['column_aliases']
|
114
|
-
rows.each do |tuple|
|
115
|
-
tuple_json = build_tuple_json(tuple, nil, column_aliases)
|
116
|
-
@__emit_queues.each_pair do |stream, consumers|
|
117
|
-
consumers.each_pair do |consumer, emitter|
|
118
|
-
emitter[:write_queue] << tuple_json
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# Index streams and consumers by their pipes for lookup
|
124
|
-
consumer_hash = build_consumer_hash()
|
125
|
-
|
126
|
-
|
127
|
-
# Send first tuple
|
128
|
-
@__emit_queues.each_pair do |stream, consumers|
|
129
|
-
consumers.each_key do |consumer|
|
130
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
131
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# Sent tuples to consumers as appropriate
|
136
|
-
loop do
|
137
|
-
|
138
|
-
# Retrieve messages from consumers
|
139
|
-
rs, ws, es = IO.select(consumer_hash.keys, [], [])
|
140
|
-
|
141
|
-
# Emit tuples to consumers
|
142
|
-
emitted = false
|
143
|
-
rs.each do |r|
|
144
|
-
|
145
|
-
# Read from consumer
|
146
|
-
msg = read_message(r)
|
147
|
-
|
148
|
-
stream = consumer_hash[r][:stream]
|
149
|
-
consumer = consumer_hash[r][:consumer]
|
150
|
-
|
151
|
-
# Consumer is ready for next message
|
152
|
-
if msg["command"]
|
153
|
-
|
154
|
-
case msg["command"]
|
155
|
-
when "next"
|
156
|
-
|
157
|
-
@__emit_queues[stream][consumer][:ready] = true
|
158
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
159
|
-
|
160
|
-
# If all messages have been sent to consumer, end their cycle
|
161
|
-
if tuple_json.nil?
|
162
|
-
write_stream = get_write_stream(stream, consumer)
|
163
|
-
cdisplay "ending cycle for #{consumer}"
|
164
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
165
|
-
send_command_tuple(stream, consumer, DONE_MESSAGE)
|
166
|
-
else
|
167
|
-
# Emit tuple to consumer
|
168
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
169
|
-
emitted = true
|
170
|
-
end
|
171
|
-
when "kill_cycle"
|
172
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
173
|
-
return
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
# Exit when done emitting
|
179
|
-
if !emitted
|
180
|
-
return
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
# Custom source
|
185
|
-
else
|
186
|
-
|
187
|
-
# Index streams and consumers by their pipes for lookup
|
188
|
-
consumer_hash = build_consumer_hash()
|
189
|
-
|
190
|
-
# Keep track of how many consumers to handle before exiting
|
191
|
-
consumers_running = consumer_hash.keys.length
|
192
|
-
|
193
|
-
# Kill the cycle on error
|
194
|
-
cycle_killed = false
|
195
|
-
|
196
|
-
# Setup multilang socket
|
197
|
-
require 'socket'
|
198
|
-
host = "0.0.0.0"
|
199
|
-
server = TCPServer.new(0)
|
200
|
-
port = server.addr[1]
|
201
|
-
|
202
|
-
# Spawn multilang process
|
203
|
-
cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
|
204
|
-
|
205
|
-
begin
|
206
|
-
|
207
|
-
Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
|
208
|
-
begin
|
209
|
-
server_thread = Thread.new do
|
210
|
-
ml_socket = server.accept()
|
211
|
-
|
212
|
-
# RUN SOURCE
|
213
|
-
begin
|
214
|
-
# Setup streams from consumers and multilang(stdout and socket communication)
|
215
|
-
read_streams = consumer_hash.keys.concat [stdout, ml_socket]
|
216
|
-
|
217
|
-
# Handshake
|
218
|
-
handshake(ml_socket, ml_socket)
|
219
|
-
prepare(ml_socket, ml_socket)
|
220
|
-
|
221
|
-
# Begin cycle
|
222
|
-
begin_cycle(ml_socket, ml_socket)
|
223
|
-
emitted = false
|
224
|
-
write_message(ml_socket, NEXT_MESSAGE)
|
225
|
-
multilang_queue = []
|
226
|
-
end_cycle_policy = @__options[:end_cycle_policy]
|
227
|
-
end_cycle_received = false
|
228
|
-
|
229
|
-
# Receive and handle messages
|
230
|
-
loop do
|
231
|
-
# Read from a stream
|
232
|
-
rs = select_read_streams(read_streams)
|
233
|
-
rs.each do |r|
|
234
|
-
# Read stdout straight to user
|
235
|
-
if r == stdout && consumers_running > 0
|
236
|
-
msg = r.gets
|
237
|
-
msg = msg.sub(/\n/, "")
|
238
|
-
cdisplay("log: #{msg}")
|
239
|
-
next
|
240
|
-
end
|
241
|
-
|
242
|
-
obj = read_message(r)
|
243
|
-
|
244
|
-
if obj.nil?
|
245
|
-
next
|
246
|
-
end
|
247
|
-
|
248
|
-
if obj["command"]
|
249
|
-
case obj["command"]
|
250
|
-
|
251
|
-
# Multilang emitted a tuple
|
252
|
-
when "emit"
|
253
|
-
stream = obj['stream']
|
254
|
-
# Check for null emit
|
255
|
-
if end_cycle_policy != "explicit"
|
256
|
-
|
257
|
-
if obj['tuple'].nil?
|
258
|
-
end_cycle_received = true
|
259
|
-
else
|
260
|
-
nil_values = false
|
261
|
-
obj['tuple'].each_value do |v|
|
262
|
-
if v.nil?
|
263
|
-
nil_values = true
|
264
|
-
break
|
265
|
-
end
|
266
|
-
end
|
267
|
-
end_cycle_received = nil_values
|
268
|
-
next unless !end_cycle_received
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
# Valid emit
|
273
|
-
emitted = true
|
274
|
-
|
275
|
-
# Send or enqueue tuple for each consumer
|
276
|
-
tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
|
277
|
-
|
278
|
-
@__emit_queues[stream].each_pair do |consumer, emitter|
|
279
|
-
if emitter[:ready]
|
280
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
281
|
-
|
282
|
-
else
|
283
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
# Consumer is ready for a message
|
288
|
-
when "next"
|
289
|
-
stream = consumer_hash[r][:stream]
|
290
|
-
consumer = consumer_hash[r][:consumer]
|
291
|
-
@__emit_queues[stream][consumer][:ready] = true
|
292
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
293
|
-
|
294
|
-
# End cycle for consumer if it has processed all tuples
|
295
|
-
if tuple_json.nil? && end_cycle_received
|
296
|
-
|
297
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
298
|
-
consumers_running -= 1
|
299
|
-
if consumers_running == 0
|
300
|
-
exit(0)
|
301
|
-
end
|
302
|
-
|
303
|
-
# TODO break if last consumer
|
304
|
-
elsif !tuple_json.nil?
|
305
|
-
# Emit tuple to consumer
|
306
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
307
|
-
emitted = true
|
308
|
-
end
|
309
|
-
|
310
|
-
# Multilang is done emitting a group of tuples
|
311
|
-
when "done"
|
312
|
-
# End cycle if no tuples were emitted
|
313
|
-
if !emitted && end_cycle_policy == "null_emit"
|
314
|
-
end_cycle_received = true
|
315
|
-
else
|
316
|
-
emitted = false
|
317
|
-
end
|
318
|
-
|
319
|
-
# Send the next tuple to multilang
|
320
|
-
if !multilang_queue.empty?
|
321
|
-
write_message(ml_socket, multilang_queue.shift)
|
322
|
-
|
323
|
-
# Request next tuple from mutilang
|
324
|
-
elsif !end_cycle_received
|
325
|
-
write_message(ml_socket, NEXT_MESSAGE)
|
326
|
-
|
327
|
-
# If there are no more messages to send, we are done
|
328
|
-
else end_cycle_received
|
329
|
-
finished = true
|
330
|
-
# End cycle for ready consumers
|
331
|
-
@__emit_queues.each_pair do |stream, consumers|
|
332
|
-
consumers.each_pair do |consumer, emitter|
|
333
|
-
if emitter[:ready]
|
334
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
335
|
-
consumers_running -= 1
|
336
|
-
if consumers_running == 0
|
337
|
-
exit(0)
|
338
|
-
end
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
end
|
343
|
-
end
|
344
|
-
|
345
|
-
# Multilang sent an error message
|
346
|
-
when "fail"
|
347
|
-
cdisplay("ERROR : #{obj['msg']}")
|
348
|
-
cycle_killed = true
|
349
|
-
exit(0)
|
350
|
-
|
351
|
-
# Multilang sent a log message
|
352
|
-
when "log"
|
353
|
-
cdisplay "LOG: #{obj['msg']}"
|
354
|
-
|
355
|
-
# Multilang sent signal to end the cycle
|
356
|
-
when "end_cycle"
|
357
|
-
if end_cycle_policy != "explicit"
|
358
|
-
cdisplay "received end_cycle command for non explicit policy"
|
359
|
-
next
|
360
|
-
end
|
361
|
-
end_cycle_received = true
|
362
|
-
when "kill_cycle"
|
363
|
-
cycle_killed = true
|
364
|
-
exit(0)
|
365
|
-
end
|
366
|
-
|
367
|
-
# Multilang sent a ping
|
368
|
-
elsif obj['ping']
|
369
|
-
write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
|
370
|
-
end
|
371
|
-
end
|
372
|
-
end
|
373
|
-
|
374
|
-
# Exit after ending consumer cycles
|
375
|
-
if consumers_running == 0
|
376
|
-
exit(0)
|
377
|
-
end
|
378
|
-
rescue => e
|
379
|
-
cycle_killed = true
|
380
|
-
cdisplay e.message
|
381
|
-
cdisplay e.backtrace
|
382
|
-
ensure
|
383
|
-
ml_socket.close()
|
384
|
-
end
|
385
|
-
end
|
386
|
-
server_thread.join()
|
387
|
-
rescue => e
|
388
|
-
ensure
|
389
|
-
# cleanup
|
390
|
-
if cycle_killed
|
391
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
392
|
-
end
|
393
|
-
pid = wait_thread[:pid]
|
394
|
-
stdout.close
|
395
|
-
Process.kill('INT', pid)
|
396
|
-
Process.exit!(true)
|
397
|
-
end
|
398
|
-
end
|
399
|
-
|
400
|
-
rescue PTY::ChildExited
|
401
|
-
end
|
402
|
-
end
|
403
|
-
end
|
404
|
-
|
405
|
-
|
406
|
-
def self.run_each()
|
407
|
-
|
408
|
-
# Index streams and consumers by their pipes for lookup
|
409
|
-
consumer_hash = build_consumer_hash()
|
410
|
-
|
411
|
-
# Keep track of how many consumers to handle before exiting
|
412
|
-
consumers_running = consumer_hash.keys.length
|
413
|
-
|
414
|
-
# Kill the cycle on error
|
415
|
-
cycle_killed = false
|
416
|
-
|
417
|
-
# Setup multilang socket
|
418
|
-
require 'socket'
|
419
|
-
host = "0.0.0.0"
|
420
|
-
server = TCPServer.new(0)
|
421
|
-
port = server.addr[1]
|
422
|
-
|
423
|
-
# Spawn multilang process
|
424
|
-
cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
|
425
|
-
|
426
|
-
begin
|
427
|
-
# Start the operation...
|
428
|
-
Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
|
429
|
-
begin
|
430
|
-
server_thread = Thread.new do
|
431
|
-
ml_socket = server.accept()
|
432
|
-
begin
|
433
|
-
# Setup streams from consumers, multilang, and the consumee
|
434
|
-
read_streams = consumer_hash.keys.concat [@__consumee_pipes["rd_child_1"], ml_socket, stdout]
|
435
|
-
|
436
|
-
# Handshake
|
437
|
-
handshake(ml_socket, ml_socket)
|
438
|
-
prepare(ml_socket, ml_socket)
|
439
|
-
|
440
|
-
# Begin cycle
|
441
|
-
multilang_queue = []
|
442
|
-
mutlilang_count = 0
|
443
|
-
end_cycle_received = false
|
444
|
-
column_aliases = nil
|
445
|
-
|
446
|
-
|
447
|
-
# Receive and handle messages
|
448
|
-
loop do
|
449
|
-
|
450
|
-
# Read from a stream
|
451
|
-
rs = select_read_streams(read_streams)
|
452
|
-
rs.each do |r|
|
453
|
-
|
454
|
-
# Read STDOUT from program straight to user
|
455
|
-
if r == stdout
|
456
|
-
msg = r.gets
|
457
|
-
if !msg.nil?
|
458
|
-
msg = msg.sub(/\n/, "")
|
459
|
-
cdisplay("LOG: #{msg}")
|
460
|
-
end
|
461
|
-
next
|
462
|
-
end
|
463
|
-
|
464
|
-
# Receive an object
|
465
|
-
obj = read_message(r)
|
466
|
-
|
467
|
-
if obj["command"]
|
468
|
-
case obj["command"]
|
469
|
-
|
470
|
-
# Multilang emitted a tuple
|
471
|
-
when "emit"
|
472
|
-
|
473
|
-
stream = obj["stream"]
|
474
|
-
|
475
|
-
# Send or enqueue tuple for each consumer
|
476
|
-
tuple_json = build_tuple_json(obj['tuple'], obj['meta'], column_aliases)
|
477
|
-
|
478
|
-
@__emit_queues[stream].each_pair do |consumer, emitter|
|
479
|
-
if emitter[:ready]
|
480
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
481
|
-
else
|
482
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
483
|
-
end
|
484
|
-
end
|
485
|
-
|
486
|
-
# Consumer is ready for a message
|
487
|
-
when "next"
|
488
|
-
stream = consumer_hash[r][:stream]
|
489
|
-
consumer = consumer_hash[r][:consumer]
|
490
|
-
|
491
|
-
|
492
|
-
@__emit_queues[stream][consumer][:ready] = true
|
493
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
494
|
-
|
495
|
-
# End cycle for consumer if it has processed all tuples
|
496
|
-
if tuple_json.nil? && end_cycle_received
|
497
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
498
|
-
consumers_running -= 1
|
499
|
-
if consumers_running == 0
|
500
|
-
break
|
501
|
-
end
|
502
|
-
|
503
|
-
|
504
|
-
# TODO break if last consumer
|
505
|
-
elsif !tuple_json.nil?
|
506
|
-
# Emit tuple to consumer
|
507
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
508
|
-
emitted = true
|
509
|
-
end
|
510
|
-
|
511
|
-
# Multilang is done emitting a group of tuples
|
512
|
-
when "done"
|
513
|
-
mutlilang_count -= 1
|
514
|
-
|
515
|
-
# Send the next tuple to multilang
|
516
|
-
if !multilang_queue.empty?
|
517
|
-
write_message(ml_socket, multilang_queue.shift)
|
518
|
-
|
519
|
-
# If there are no more messages to send, we are done
|
520
|
-
elsif end_cycle_received && mutlilang_count == 0
|
521
|
-
finished = true
|
522
|
-
|
523
|
-
# End cycle for ready consumers
|
524
|
-
@__emit_queues.each_pair do |stream, consumers|
|
525
|
-
consumers.each_pair do |consumer, emitter|
|
526
|
-
if emitter[:ready]
|
527
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
528
|
-
consumers_running -= 1
|
529
|
-
if consumers_running == 0
|
530
|
-
break
|
531
|
-
end
|
532
|
-
end
|
533
|
-
end
|
534
|
-
end
|
535
|
-
end
|
536
|
-
|
537
|
-
# Multilang sent an error message
|
538
|
-
when "fail"
|
539
|
-
cdisplay("ERROR : #{obj['msg']}")
|
540
|
-
cycle_killed = true
|
541
|
-
exit(0)
|
542
|
-
|
543
|
-
# Multilang sent a log message
|
544
|
-
when "log"
|
545
|
-
cdisplay "LOG: #{obj['msg']}"
|
546
|
-
|
547
|
-
# Consumee operation sent signal to end_cycle
|
548
|
-
when "end_cycle"
|
549
|
-
end_cycle_received = true
|
550
|
-
if mutlilang_count == 0
|
551
|
-
@__emit_queues.each_pair do |stream, consumers|
|
552
|
-
consumers.each_pair do |consumer, emitter|
|
553
|
-
if emitter[:ready]
|
554
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
555
|
-
consumers_running -= 1
|
556
|
-
if consumers_running == 0
|
557
|
-
exit(0)
|
558
|
-
end
|
559
|
-
end
|
560
|
-
end
|
561
|
-
end
|
562
|
-
end
|
563
|
-
|
564
|
-
when "kill_cycle"
|
565
|
-
cycle_killed = true
|
566
|
-
exit(0)
|
567
|
-
end
|
568
|
-
|
569
|
-
# Received a tuple from consumee
|
570
|
-
elsif obj['tuple']
|
571
|
-
column_aliases = obj['column_aliases']
|
572
|
-
# Send or enqueue to multilang
|
573
|
-
mutlilang_count += 1
|
574
|
-
if multilang_queue.empty?
|
575
|
-
write_message(ml_socket, obj.to_json)
|
576
|
-
else
|
577
|
-
multilang_queue << obj.to_json
|
578
|
-
end
|
579
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
580
|
-
|
581
|
-
# Multilang sent a ping
|
582
|
-
elsif obj['ping']
|
583
|
-
write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
|
584
|
-
end
|
585
|
-
end
|
586
|
-
|
587
|
-
# Exit after ending consumer cycles
|
588
|
-
if consumers_running == 0
|
589
|
-
exit(0)
|
590
|
-
end
|
591
|
-
|
592
|
-
end
|
593
|
-
ensure
|
594
|
-
ml_socket.close()
|
595
|
-
end
|
596
|
-
end
|
597
|
-
server_thread.join()
|
598
|
-
rescue => e
|
599
|
-
cdisplay e.message
|
600
|
-
cdisplay e.backtrace
|
601
|
-
ensure
|
602
|
-
# cleanup
|
603
|
-
if cycle_killed
|
604
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
605
|
-
send_to_consumees(KILL_CYCLE_MESSAGE)
|
606
|
-
end
|
607
|
-
pid = wait_thread[:pid]
|
608
|
-
stdin.close
|
609
|
-
stdout.close
|
610
|
-
stderr.close
|
611
|
-
end
|
612
|
-
end
|
613
|
-
rescue PTY::ChildExited
|
614
|
-
cdisplay("The child process exited!")
|
615
|
-
end
|
616
|
-
|
617
|
-
end
|
618
|
-
|
619
|
-
|
620
|
-
def self.run_group_by()
|
621
|
-
|
622
|
-
# Index streams and consumers by their pipes for lookup
|
623
|
-
consumer_hash = build_consumer_hash
|
624
|
-
|
625
|
-
# Keep track of how many consumers to handle before exiting
|
626
|
-
consumers_running = consumer_hash.keys.length
|
627
|
-
|
628
|
-
# Kill the cycle on error
|
629
|
-
cycle_killed = false
|
630
|
-
|
631
|
-
# Setup groups
|
632
|
-
group_by = @__node['group_by']
|
633
|
-
group_tuples = {}
|
634
|
-
emit_count = 0 # used to track how many emits are expected once groups are sent to multilang
|
635
|
-
emitted_tuples = [] # used to send to consumers after once groupings are emitted
|
636
|
-
tuple_queue = []
|
637
|
-
|
638
|
-
# Setup multilang pipe
|
639
|
-
require 'socket'
|
640
|
-
host = "0.0.0.0"
|
641
|
-
server = TCPServer.new(0)
|
642
|
-
port = server.addr[1]
|
643
|
-
|
644
|
-
# Spawn multilang process
|
645
|
-
cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
|
646
|
-
|
647
|
-
begin
|
648
|
-
# Start the operation...
|
649
|
-
Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
|
650
|
-
begin
|
651
|
-
server_thread = Thread.new do
|
652
|
-
ml_socket = server.accept()
|
653
|
-
begin
|
654
|
-
# Setup streams from consumers, multilang, and the consumee
|
655
|
-
read_streams = consumer_hash.keys.concat [stdout, ml_socket, @__consumee_pipes["rd_child_1"]]
|
656
|
-
|
657
|
-
# Handshake
|
658
|
-
handshake(ml_socket, ml_socket)
|
659
|
-
prepare(ml_socket, ml_socket)
|
660
|
-
|
661
|
-
# Begin cycle
|
662
|
-
end_cycle_received = false
|
663
|
-
finished_emitting = false
|
664
|
-
|
665
|
-
# Handle streams
|
666
|
-
loop do
|
667
|
-
# Read from a stream
|
668
|
-
rs = select_read_streams(read_streams)
|
669
|
-
rs.each do |r|
|
670
|
-
|
671
|
-
# Read STDOUT from program straight to user
|
672
|
-
if r == stdout
|
673
|
-
msg = r.gets
|
674
|
-
msg = msg.sub(/\n/, "")
|
675
|
-
cdisplay("log: #{msg}")
|
676
|
-
next
|
677
|
-
end
|
678
|
-
|
679
|
-
|
680
|
-
# Receive an object
|
681
|
-
obj = read_message(r)
|
682
|
-
|
683
|
-
if obj["command"]
|
684
|
-
case obj["command"]
|
685
|
-
|
686
|
-
when "done"
|
687
|
-
|
688
|
-
if end_cycle_received
|
689
|
-
tuple_json = tuple_queue.shift
|
690
|
-
if !tuple_json.nil?
|
691
|
-
write_message(ml_socket, tuple_json)
|
692
|
-
end
|
693
|
-
end
|
694
|
-
|
695
|
-
next
|
696
|
-
|
697
|
-
# Begin aggregation
|
698
|
-
when "end_cycle"
|
699
|
-
end_cycle_received = true
|
700
|
-
read_streams = [ml_socket]
|
701
|
-
|
702
|
-
group_tuples.each do |group_tuple, tuples|
|
703
|
-
tuple_queue << "{\"command\": \"begin_group\", \"tuple\": #{group_tuple.to_json}, \"meta\":{}}\n"
|
704
|
-
tuples.each do |t|
|
705
|
-
tuple_queue << "{\"command\": \"aggregate\", #{t}}\n"
|
706
|
-
end
|
707
|
-
tuple_queue << "{\"command\": \"end_group\"}\n"
|
708
|
-
|
709
|
-
# keep track of how many emits are expected
|
710
|
-
emit_count += 1
|
711
|
-
end
|
712
|
-
|
713
|
-
tuple_json = tuple_queue.shift
|
714
|
-
if !tuple_json.nil?
|
715
|
-
write_message(ml_socket, tuple_json)
|
716
|
-
end
|
717
|
-
|
718
|
-
# Multilang has emitted a grouped tuple
|
719
|
-
when "emit"
|
720
|
-
stream = obj['stream']
|
721
|
-
emit_count -= 1
|
722
|
-
# Enqueue for consumers
|
723
|
-
tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
|
724
|
-
@__emit_queues.each_pair do |stream, consumers|
|
725
|
-
consumers.each_key do |consumer|
|
726
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
727
|
-
end
|
728
|
-
end
|
729
|
-
|
730
|
-
# End cycle when done emitting
|
731
|
-
if end_cycle_received && emit_count == 0
|
732
|
-
finished_emitting = true
|
733
|
-
break
|
734
|
-
elsif end_cycle_received
|
735
|
-
tuple_json = tuple_queue.shift
|
736
|
-
if !tuple_json.nil?
|
737
|
-
write_message(ml_socket, tuple_json)
|
738
|
-
end
|
739
|
-
end
|
740
|
-
|
741
|
-
# An error has occured
|
742
|
-
when "kill_cycle"
|
743
|
-
cycle_killed = true
|
744
|
-
exit(0)
|
745
|
-
end
|
746
|
-
|
747
|
-
# Received a tuple from operation
|
748
|
-
elsif obj["tuple"]
|
749
|
-
tuple = obj["tuple"].to_json
|
750
|
-
meta = obj["meta"].to_json
|
751
|
-
column_aliases = obj["column_aliases"] || {}
|
752
|
-
aliases = Hash[column_aliases.map{|h| [h["alias"],h["concrete_name"]]}]
|
753
|
-
gt = {}
|
754
|
-
|
755
|
-
# Get the column names to group on
|
756
|
-
group_by.each do |field|
|
757
|
-
field_name = aliases[field] || field
|
758
|
-
gt[field] = obj["tuple"][field_name]
|
759
|
-
end
|
760
|
-
|
761
|
-
msg_no_brackets = "\"tuple\": #{tuple}, \"meta\": #{meta}, \"column_aliases\": #{column_aliases.to_json}"
|
762
|
-
|
763
|
-
# Group tuple into existing group or create new group
|
764
|
-
if group_tuples[gt]
|
765
|
-
group_tuples[gt] << msg_no_brackets
|
766
|
-
else
|
767
|
-
group_tuples[gt] = [msg_no_brackets]
|
768
|
-
end
|
769
|
-
|
770
|
-
# Ask operation for next tuple
|
771
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
772
|
-
|
773
|
-
# Multilang sent a ping
|
774
|
-
elsif obj['ping']
|
775
|
-
write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
|
776
|
-
end
|
777
|
-
end
|
778
|
-
|
779
|
-
# Send tuples to consumers
|
780
|
-
if finished_emitting && consumers_running > 0
|
781
|
-
|
782
|
-
# Send first tuple
|
783
|
-
@__emit_queues.each_pair do |stream, consumers|
|
784
|
-
consumers.each_key do |consumer|
|
785
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
786
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
787
|
-
end
|
788
|
-
end
|
789
|
-
|
790
|
-
# Sent tuples to consumers as appropriate
|
791
|
-
loop do
|
792
|
-
|
793
|
-
# Retrieve messages from consumers
|
794
|
-
rs, ws, es = IO.select(consumer_hash.keys, [], [])
|
795
|
-
|
796
|
-
# Emit tuples to consumers
|
797
|
-
emitted = false
|
798
|
-
rs.each do |r|
|
799
|
-
|
800
|
-
# Read from consumer
|
801
|
-
msg = read_message(r)
|
802
|
-
consumer = consumer_hash[r][:consumer]
|
803
|
-
stream = consumer_hash[r][:stream]
|
804
|
-
|
805
|
-
# Consumer is ready for next message
|
806
|
-
if msg["command"] && msg["command"] == "next"
|
807
|
-
|
808
|
-
@__emit_queues[stream][consumer][:ready] = true
|
809
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
810
|
-
|
811
|
-
# If all messages have been sent to a consumer, end its cycle
|
812
|
-
if tuple_json.nil?
|
813
|
-
write_stream = get_write_stream(stream, consumer)
|
814
|
-
write_message(write_stream, END_CYCLE_MESSAGE)
|
815
|
-
consumers_running -= 1
|
816
|
-
if consumers_running == 0
|
817
|
-
break
|
818
|
-
end
|
819
|
-
else
|
820
|
-
# Emit tuple to consumer
|
821
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
822
|
-
emitted = true
|
823
|
-
end
|
824
|
-
end
|
825
|
-
|
826
|
-
end
|
827
|
-
# Exit when done emitting
|
828
|
-
if !emitted
|
829
|
-
exit(0)
|
830
|
-
end
|
831
|
-
end
|
832
|
-
break
|
833
|
-
|
834
|
-
# Exit after ending all consumer cycles
|
835
|
-
elsif consumers_running == 0
|
836
|
-
break
|
837
|
-
end
|
838
|
-
end
|
839
|
-
ensure
|
840
|
-
ml_socket.close()
|
841
|
-
end
|
842
|
-
end
|
843
|
-
server_thread.join()
|
844
|
-
|
845
|
-
rescue Errno::EIO
|
846
|
-
cdisplay("Errno:EIO error")
|
847
|
-
ensure
|
848
|
-
# cleanup
|
849
|
-
if cycle_killed
|
850
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
851
|
-
send_to_consumees(KILL_CYCLE_MESSAGE)
|
852
|
-
end
|
853
|
-
|
854
|
-
pid = wait_thread[:pid]
|
855
|
-
stdin.close
|
856
|
-
stdout.close
|
857
|
-
stderr.close
|
858
|
-
Process.kill('INT', pid)
|
859
|
-
Process.exit!(true)
|
860
|
-
end
|
861
|
-
end
|
862
|
-
rescue PTY::ChildExited
|
863
|
-
if File.exists?("#{ml_pipe}.in")
|
864
|
-
File.delete("#{ml_pipe}.in")
|
865
|
-
end
|
866
|
-
cdisplay("The child process exited!")
|
867
|
-
end
|
868
|
-
end
|
869
|
-
|
870
|
-
|
871
|
-
def self.run_join()
|
872
|
-
|
873
|
-
lhs_fields = @__node["lhs_fields"]
|
874
|
-
rhs_fields = @__node["rhs_fields"]
|
875
|
-
join_type = @__node["join_type"]
|
876
|
-
|
877
|
-
|
878
|
-
# Sanity check
|
879
|
-
if lhs_fields.nil? || rhs_fields.nil? || join_type.nil?
|
880
|
-
return
|
881
|
-
end
|
882
|
-
|
883
|
-
|
884
|
-
# Index the consumee streams for left and right sides
|
885
|
-
consumer_hash = build_consumer_hash()
|
886
|
-
|
887
|
-
# Kill the cycle on error
|
888
|
-
cycle_killed = false
|
889
|
-
|
890
|
-
# read all tuples from lefthand and right hand streams
|
891
|
-
read_streams = [@__consumee_pipes["rd_child_1"], @__consumee_pipes["rd_child_2"]]
|
892
|
-
|
893
|
-
# Index left and right hand consumees by their streams for lookup
|
894
|
-
consumee_hash = {}
|
895
|
-
# Left side
|
896
|
-
lhs = @__options[:join_options][:lhs]
|
897
|
-
consumee_hash[@__consumee_pipes["rd_child_1"]] = lhs
|
898
|
-
# Right side
|
899
|
-
rhs = @__options[:join_options][:rhs]
|
900
|
-
consumee_hash[@__consumee_pipes["rd_child_2"]] = rhs
|
901
|
-
|
902
|
-
# Keep track of how many consumers to handle before exiting
|
903
|
-
consumers_running = 1
|
904
|
-
|
905
|
-
# Index the incoming tuples by their join key
|
906
|
-
lhs_tuples = {}
|
907
|
-
rhs_tuples = {}
|
908
|
-
|
909
|
-
tuple_queue = []
|
910
|
-
|
911
|
-
# Begin cycle
|
912
|
-
left_end_cycle_received = false
|
913
|
-
right_end_cycle_received = false
|
914
|
-
|
915
|
-
# Receive and handle messages
|
916
|
-
loop do
|
917
|
-
|
918
|
-
# Read from a stream
|
919
|
-
rs = select_read_streams(read_streams)
|
920
|
-
rs.each do |r|
|
921
|
-
|
922
|
-
# Receive an object
|
923
|
-
obj = read_message(r)
|
924
|
-
|
925
|
-
if obj["command"]
|
926
|
-
case obj["command"]
|
927
|
-
|
928
|
-
# A consumee is done emitting
|
929
|
-
when "end_cycle"
|
930
|
-
|
931
|
-
if consumee_hash[r] == lhs
|
932
|
-
left_end_cycle_received = true
|
933
|
-
elsif consumee_hash[r] == rhs
|
934
|
-
right_end_cycle_received = true
|
935
|
-
end
|
936
|
-
|
937
|
-
# We are done receiving from streams
|
938
|
-
if left_end_cycle_received && right_end_cycle_received
|
939
|
-
break
|
940
|
-
end
|
941
|
-
|
942
|
-
when "kill_cycle"
|
943
|
-
cycle_killed = true
|
944
|
-
return
|
945
|
-
end
|
946
|
-
|
947
|
-
# Received a tuple from consumee
|
948
|
-
elsif obj['tuple']
|
949
|
-
tuple = obj['tuple']
|
950
|
-
if consumee_hash[r] == lhs
|
951
|
-
lhs_tuples[tuple[lhs_fields]] ||= []
|
952
|
-
lhs_tuples[tuple[lhs_fields]] << tuple
|
953
|
-
elsif consumee_hash[r] == rhs
|
954
|
-
rhs_tuples[tuple[rhs_fields]] ||= []
|
955
|
-
rhs_tuples[tuple[rhs_fields]] << tuple
|
956
|
-
end
|
957
|
-
end
|
958
|
-
|
959
|
-
#Ask operation for next tuple
|
960
|
-
if consumee_hash[r] == lhs
|
961
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
962
|
-
elsif consumee_hash[r] == rhs
|
963
|
-
write_message(@__consumee_pipes["wr_child_2"], NEXT_MESSAGE)
|
964
|
-
end
|
965
|
-
end
|
966
|
-
|
967
|
-
# Break out if emits ended
|
968
|
-
if left_end_cycle_received && right_end_cycle_received
|
969
|
-
break
|
970
|
-
end
|
971
|
-
end
|
972
|
-
# Build the joined tuples
|
973
|
-
joined_tuples = []
|
974
|
-
|
975
|
-
# If no joined tuples, end the cycle
|
976
|
-
if lhs_tuples.empty? && rhs_fields.empty?
|
977
|
-
@__emit_queues.each_pair do |stream, consumers|
|
978
|
-
consumers.each_pair do |consumer, emitter|
|
979
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
980
|
-
end
|
981
|
-
end
|
982
|
-
return
|
983
|
-
end
|
984
|
-
joined_fields = (lhs_tuples.values[0][0].keys.concat rhs_tuples.values[0][0].keys).uniq
|
985
|
-
case join_type
|
986
|
-
when "inner"
|
987
|
-
lhs_tuples.each_pair do |key, lhs_tuples|
|
988
|
-
lhs_tuples.each do |lhs_tuple|
|
989
|
-
|
990
|
-
tuple = {}
|
991
|
-
if !rhs_tuples[key].nil?
|
992
|
-
rhs_tuples[key].each do |rhs_tuple|
|
993
|
-
# Check for a valid join
|
994
|
-
if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
|
995
|
-
# Add the fields
|
996
|
-
joined_fields.each do |field|
|
997
|
-
if lhs_tuple.has_key? field
|
998
|
-
tuple[field] = lhs_tuple[field]
|
999
|
-
else
|
1000
|
-
tuple[field] = rhs_tuple[field]
|
1001
|
-
end
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
# Add the tuple
|
1005
|
-
joined_tuples << tuple
|
1006
|
-
end
|
1007
|
-
end
|
1008
|
-
end
|
1009
|
-
end
|
1010
|
-
end
|
1011
|
-
when "left"
|
1012
|
-
lhs_tuples.each_pair do |key, lhs_tuples|
|
1013
|
-
lhs_tuples.each do |lhs_tuple|
|
1014
|
-
|
1015
|
-
joined = false
|
1016
|
-
if rhs_tuples.has_key? key
|
1017
|
-
rhs_tuples[key].each do |rhs_tuple|
|
1018
|
-
# Check for a valid join
|
1019
|
-
if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
|
1020
|
-
tuple = {}
|
1021
|
-
|
1022
|
-
# Add the fields
|
1023
|
-
joined_fields.each do |field|
|
1024
|
-
if lhs_tuple.has_key? field
|
1025
|
-
tuple[field] = lhs_tuple[field]
|
1026
|
-
else
|
1027
|
-
tuple[field] = rhs_tuple[field]
|
1028
|
-
end
|
1029
|
-
end
|
1030
|
-
joined_tuples << tuple
|
1031
|
-
joined = true
|
1032
|
-
end
|
1033
|
-
end
|
1034
|
-
end
|
1035
|
-
if !joined
|
1036
|
-
tuple = {}
|
1037
|
-
lhs_tuples.each.each do |hash|
|
1038
|
-
hash.each do |key, value|
|
1039
|
-
tuple[key] = value
|
1040
|
-
end
|
1041
|
-
end
|
1042
|
-
|
1043
|
-
joined_fields.each do |field|
|
1044
|
-
if !tuple.has_key? field
|
1045
|
-
tuple[field] = nil
|
1046
|
-
end
|
1047
|
-
end
|
1048
|
-
# Add the tuple
|
1049
|
-
joined_tuples << tuple
|
1050
|
-
end
|
1051
|
-
end
|
1052
|
-
end
|
1053
|
-
when "right"
|
1054
|
-
rhs_tuples.each_pair do |key, rhs_tuples|
|
1055
|
-
rhs_tuples.each do |rhs_tuple|
|
1056
|
-
|
1057
|
-
joined = false
|
1058
|
-
if lhs_tuples.has_key? key
|
1059
|
-
lhs_tuples[key].each do |lhs_tuple|
|
1060
|
-
# Check for a valid join
|
1061
|
-
if !rhs_tuple[lhs_fields].nil? && !lhs_tuple[rhs_fields].nil?
|
1062
|
-
tuple = {}
|
1063
|
-
|
1064
|
-
# Add the fields
|
1065
|
-
joined_fields.each do |field|
|
1066
|
-
if rhs_tuple.has_key? field
|
1067
|
-
tuple[field] = rhs_tuple[field]
|
1068
|
-
else
|
1069
|
-
tuple[field] = lhs_tuple[field]
|
1070
|
-
end
|
1071
|
-
end
|
1072
|
-
joined_tuples << tuple
|
1073
|
-
joined = true
|
1074
|
-
end
|
1075
|
-
end
|
1076
|
-
end
|
1077
|
-
if !joined
|
1078
|
-
tuple = {}
|
1079
|
-
rhs_tuples.each.each do |hash|
|
1080
|
-
hash.each do |key, value|
|
1081
|
-
tuple[key] = value
|
1082
|
-
end
|
1083
|
-
end
|
1084
|
-
|
1085
|
-
joined_fields.each do |field|
|
1086
|
-
if !tuple.has_key? field
|
1087
|
-
tuple[field] = nil
|
1088
|
-
end
|
1089
|
-
end
|
1090
|
-
# Add the tuple
|
1091
|
-
joined_tuples << tuple
|
1092
|
-
end
|
1093
|
-
end
|
1094
|
-
end
|
1095
|
-
when "outer"
|
1096
|
-
lhs_tuples.each_pair do |key, lhs_tuples|
|
1097
|
-
lhs_tuples.each do |lhs_tuple|
|
1098
|
-
|
1099
|
-
joined = false
|
1100
|
-
if rhs_tuples.has_key? key
|
1101
|
-
rhs_tuples[key].each do |rhs_tuple|
|
1102
|
-
# Check for a valid join
|
1103
|
-
if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
|
1104
|
-
tuple = {}
|
1105
|
-
|
1106
|
-
# Add the fields
|
1107
|
-
joined_fields.each do |field|
|
1108
|
-
if lhs_tuple.has_key? field
|
1109
|
-
tuple[field] = lhs_tuple[field]
|
1110
|
-
else
|
1111
|
-
tuple[field] = rhs_tuple[field]
|
1112
|
-
end
|
1113
|
-
end
|
1114
|
-
joined_tuples << tuple
|
1115
|
-
joined = true
|
1116
|
-
end
|
1117
|
-
end
|
1118
|
-
end
|
1119
|
-
if !joined
|
1120
|
-
tuple = {}
|
1121
|
-
lhs_tuples.each.each do |hash|
|
1122
|
-
hash.each do |key, value|
|
1123
|
-
tuple[key] = value
|
1124
|
-
end
|
1125
|
-
end
|
1126
|
-
|
1127
|
-
joined_fields.each do |field|
|
1128
|
-
if !tuple.has_key? field
|
1129
|
-
tuple[field] = nil
|
1130
|
-
end
|
1131
|
-
end
|
1132
|
-
# Add the tuple
|
1133
|
-
joined_tuples << tuple
|
1134
|
-
end
|
1135
|
-
end
|
1136
|
-
end
|
1137
|
-
end
|
1138
|
-
|
1139
|
-
# Setup output queues
|
1140
|
-
joined_tuples.each do |tuple|
|
1141
|
-
tuple_json = build_tuple_json(tuple)
|
1142
|
-
@__emit_queues.each_pair do |stream, consumers|
|
1143
|
-
consumers.each_key do |consumer|
|
1144
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
1145
|
-
end
|
1146
|
-
end
|
1147
|
-
end
|
1148
|
-
|
1149
|
-
# Send first tuple
|
1150
|
-
@__emit_queues.each_pair do |stream, consumers|
|
1151
|
-
consumers.each_key do |consumer|
|
1152
|
-
|
1153
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
1154
|
-
if tuple_json.nil?
|
1155
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
1156
|
-
consumers_running -= 1
|
1157
|
-
if consumers_running == 0
|
1158
|
-
return
|
1159
|
-
end
|
1160
|
-
else
|
1161
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
1162
|
-
end
|
1163
|
-
end
|
1164
|
-
end
|
1165
|
-
|
1166
|
-
# Sent tuples to consumers as appropriate
|
1167
|
-
loop do
|
1168
|
-
|
1169
|
-
# Retrieve messages from consumers
|
1170
|
-
rs, ws, es = IO.select(consumer_hash.keys, [], [])
|
1171
|
-
|
1172
|
-
# Emit tuples to consumers
|
1173
|
-
rs.each do |r|
|
1174
|
-
|
1175
|
-
# Read from consumer
|
1176
|
-
msg = read_message(r)
|
1177
|
-
consumer = consumer_hash[r][:consumer]
|
1178
|
-
stream = consumer_hash[r][:stream]
|
1179
|
-
|
1180
|
-
# Consumer is ready for next message
|
1181
|
-
if msg["command"]
|
1182
|
-
case msg["command"]
|
1183
|
-
when"next"
|
1184
|
-
|
1185
|
-
@__emit_queues[stream][consumer][:ready] = true
|
1186
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
1187
|
-
|
1188
|
-
# If all messages have been sent to a consumer, end its cycle
|
1189
|
-
if tuple_json.nil?
|
1190
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
1191
|
-
consumers_running -= 1
|
1192
|
-
if consumers_running == 0
|
1193
|
-
return
|
1194
|
-
end
|
1195
|
-
else
|
1196
|
-
# Emit tuple to consumer
|
1197
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
1198
|
-
end
|
1199
|
-
when "kill_cycle"
|
1200
|
-
cycle_killed = true
|
1201
|
-
return
|
1202
|
-
end
|
1203
|
-
end
|
1204
|
-
end
|
1205
|
-
end
|
1206
|
-
end
|
1207
|
-
|
1208
|
-
def self.run_filter()
|
1209
|
-
self.run_each()
|
1210
|
-
end
|
1211
|
-
|
1212
|
-
|
1213
|
-
# Send a message to all consumers of the operation
|
1214
|
-
def self.run_sink()
|
1215
|
-
columns = @__node["columns"]
|
1216
|
-
|
1217
|
-
type_map = {
|
1218
|
-
"string" => String,
|
1219
|
-
"double" => Float,
|
1220
|
-
"integer" => Integer,
|
1221
|
-
"float" => Float,
|
1222
|
-
"array" => Array,
|
1223
|
-
"map" => Hash
|
1224
|
-
}
|
1225
|
-
|
1226
|
-
col_map = {}
|
1227
|
-
columns.each do |hash|
|
1228
|
-
key = hash.keys[0]
|
1229
|
-
type = hash[key]
|
1230
|
-
col_map[key] = type_map[type]
|
1231
|
-
end
|
1232
|
-
|
1233
|
-
tuples = []
|
1234
|
-
|
1235
|
-
output = @__options["output"]
|
1236
|
-
loop do
|
1237
|
-
# Read messages
|
1238
|
-
obj = read_message(@__consumee_pipes["rd_child_1"])
|
1239
|
-
|
1240
|
-
# Add row
|
1241
|
-
if obj['tuple']
|
1242
|
-
|
1243
|
-
tuple = obj['tuple']
|
1244
|
-
display_json = Hash[obj['tuple'].map{|k, v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
|
1245
|
-
|
1246
|
-
if col_map.keys.length > tuple.keys.length
|
1247
|
-
cdisplay "Error: invalid keys for sink tuple : Expected #{col_map.keys} , got: #{tuple.keys}"
|
1248
|
-
cdisplay("\n \nPress Ctrl-C to exit", false)
|
1249
|
-
return
|
1250
|
-
end
|
1251
|
-
# Check tuple columns for valid entry
|
1252
|
-
columns_to_check = col_map.length
|
1253
|
-
tuple.keys.each do |col|
|
1254
|
-
value = tuple[col]
|
1255
|
-
types = type_map.each_value.map {|t| value.is_a? t}
|
1256
|
-
matched_column = check_tuple_for_alias(col, obj['column_aliases'], col_map.keys)
|
1257
|
-
if !matched_column.nil?
|
1258
|
-
if value.nil? || (value.is_a? col_map[matched_column])
|
1259
|
-
columns_to_check -= 1
|
1260
|
-
# Set the proper column name
|
1261
|
-
if col != matched_column
|
1262
|
-
tuple[matched_column] = value
|
1263
|
-
tuple.delete(col)
|
1264
|
-
end
|
1265
|
-
else
|
1266
|
-
break
|
1267
|
-
end
|
1268
|
-
end
|
1269
|
-
end
|
1270
|
-
|
1271
|
-
if columns_to_check != 0
|
1272
|
-
cdisplay "Error: invalid schema for sink tuple #{display_json}"
|
1273
|
-
return
|
1274
|
-
end
|
1275
|
-
tuples << obj
|
1276
|
-
if @__options[:interactive]
|
1277
|
-
cdisplay "received #{display_json}"
|
1278
|
-
end
|
1279
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
1280
|
-
|
1281
|
-
# End cycle
|
1282
|
-
elsif obj['command']
|
1283
|
-
case obj['command']
|
1284
|
-
when "end_cycle"
|
1285
|
-
break
|
1286
|
-
when "kill_cycle"
|
1287
|
-
break
|
1288
|
-
end
|
1289
|
-
end
|
1290
|
-
end
|
1291
|
-
|
1292
|
-
if tuples.empty?
|
1293
|
-
cdisplay "No tuples received"
|
1294
|
-
return
|
1295
|
-
end
|
1296
|
-
|
1297
|
-
# Build table
|
1298
|
-
require("terminal-table")
|
1299
|
-
table = Terminal::Table.new :title => @__name
|
1300
|
-
require("csv")
|
1301
|
-
csv_str = CSV.generate do |csv|
|
1302
|
-
header_written = false;
|
1303
|
-
tuples.each do |obj|
|
1304
|
-
begin
|
1305
|
-
|
1306
|
-
t = obj['tuple']
|
1307
|
-
m = obj['meta'] || {}
|
1308
|
-
|
1309
|
-
if t
|
1310
|
-
if header_written == false
|
1311
|
-
keys = [t.keys, m.keys].flatten
|
1312
|
-
csv << keys
|
1313
|
-
table << keys
|
1314
|
-
table << :separator
|
1315
|
-
header_written = true
|
1316
|
-
end
|
1317
|
-
|
1318
|
-
vals = [t.values, m.values].flatten
|
1319
|
-
csv << vals
|
1320
|
-
table << vals.flat_map{|v| "#{v.to_s}"[0..100]}
|
1321
|
-
end
|
1322
|
-
rescue JSON::ParserError
|
1323
|
-
cdisplay("invalid JSON")
|
1324
|
-
next
|
1325
|
-
rescue => e
|
1326
|
-
cdisplay e.message
|
1327
|
-
cdisplay e.backtrace
|
1328
|
-
end
|
1329
|
-
end
|
1330
|
-
end
|
1331
|
-
|
1332
|
-
# Output table
|
1333
|
-
cdisplay("\n#{table.to_s}")
|
1334
|
-
cdisplay "#{tuples.length} rows"
|
1335
|
-
|
1336
|
-
# Write file
|
1337
|
-
if output
|
1338
|
-
filename = "#{output}.csv"
|
1339
|
-
f = File.open(filename, "w")
|
1340
|
-
f.write(csv_str)
|
1341
|
-
f.close()
|
1342
|
-
cdisplay("output written to #{filename}")
|
1343
|
-
end
|
1344
|
-
end
|
1345
|
-
|
1346
|
-
|
1347
|
-
private
|
1348
|
-
|
1349
|
-
BUFSIZE = 8192
|
1350
|
-
|
1351
|
-
# Each reading pipe has a read buffer and message queue
|
1352
|
-
@__read_buffers = {}
|
1353
|
-
@__read_buffered_messages = {}
|
1354
|
-
|
1355
|
-
|
1356
|
-
# Return availible reading streams
|
1357
|
-
def self.select_read_streams(read_streams)
|
1358
|
-
|
1359
|
-
rs = []
|
1360
|
-
read_streams.each do |read_stream|
|
1361
|
-
@__read_buffered_messages[read_stream] ||= []
|
1362
|
-
if !@__read_buffered_messages[read_stream].empty?
|
1363
|
-
rs << read_stream
|
1364
|
-
end
|
1365
|
-
end
|
1366
|
-
return rs unless rs.empty?
|
1367
|
-
rs, ws, es = IO.select(read_streams, [], [])
|
1368
|
-
return rs
|
1369
|
-
end
|
1370
|
-
|
1371
|
-
|
1372
|
-
# Read a JSON message
|
1373
|
-
def self.read_message(read_stream)
|
1374
|
-
|
1375
|
-
@__read_buffers[read_stream] ||= ""
|
1376
|
-
@__read_buffered_messages[read_stream] ||= []
|
1377
|
-
if !@__read_buffered_messages[read_stream].empty?
|
1378
|
-
obj = @__read_buffered_messages[read_stream].shift
|
1379
|
-
return obj
|
1380
|
-
end
|
1381
|
-
# read message from stream
|
1382
|
-
loop do
|
1383
|
-
|
1384
|
-
while !@__read_buffers[read_stream].include? ENDMARKER
|
1385
|
-
segment = read_stream.sysread(BUFSIZE)
|
1386
|
-
@__read_buffers[read_stream] << segment
|
1387
|
-
end
|
1388
|
-
|
1389
|
-
# cdisplay "READMESSAGE: read #{segment.length} bytes, read buffer length : #{@__read_buffer.length}"
|
1390
|
-
# TODO this include is redundant
|
1391
|
-
read_buffer = @__read_buffers[read_stream]
|
1392
|
-
if read_buffer.include? ENDMARKER
|
1393
|
-
objs = read_buffer.split(ENDMARKER)
|
1394
|
-
ends = read_buffer.scan(ENDMARKER)
|
1395
|
-
if objs.count == ends.count # We have a full number of messages
|
1396
|
-
objs.each do |obj|
|
1397
|
-
begin
|
1398
|
-
@__read_buffered_messages[read_stream] << JSON.parse(obj)
|
1399
|
-
# cdisplay "READMESSAGE: got hash #{hash}"
|
1400
|
-
rescue JSON::ParserError
|
1401
|
-
cdisplay "READMESSAGE: invalid JSON #{obj}"
|
1402
|
-
end
|
1403
|
-
end
|
1404
|
-
@__read_buffers[read_stream] = ""
|
1405
|
-
return @__read_buffered_messages[read_stream].shift
|
1406
|
-
else
|
1407
|
-
|
1408
|
-
(0..ends.count-1).each do |i|
|
1409
|
-
obj = objs[i]
|
1410
|
-
begin
|
1411
|
-
@__read_buffered_messages[read_stream] << JSON.parse(obj)
|
1412
|
-
rescue JSON::ParserError
|
1413
|
-
cdisplay "READMESSAGE: invalid JSON #{obj}"
|
1414
|
-
end
|
1415
|
-
end
|
1416
|
-
|
1417
|
-
# cdisplay "adding leftovers : \n #{objs[ends.count..-1]}"
|
1418
|
-
@__read_buffers[read_stream] = objs[ends.count..-1].join(ENDMARKER)
|
1419
|
-
return @__read_buffered_messages[read_stream].shift
|
1420
|
-
end
|
1421
|
-
end
|
1422
|
-
end
|
1423
|
-
end
|
1424
|
-
|
1425
|
-
|
1426
|
-
# Write JSON message
|
1427
|
-
def self.write_message(write_stream, msg)
|
1428
|
-
write_msg = msg.strip + ENDMARKER
|
1429
|
-
write_stream.write write_msg
|
1430
|
-
write_stream.flush
|
1431
|
-
end
|
1432
|
-
|
1433
|
-
|
1434
|
-
# Handshake connection to multilang
|
1435
|
-
def self.handshake(write_stream, read_stream)
|
1436
|
-
begin
|
1437
|
-
write_message write_stream, HANDSHAKE_MESSAGE
|
1438
|
-
msg = read_message(read_stream)
|
1439
|
-
rescue => e
|
1440
|
-
cdisplay("Error handshaking node")
|
1441
|
-
raise e
|
1442
|
-
end
|
1443
|
-
end
|
1444
|
-
|
1445
|
-
|
1446
|
-
# Instruct multilang to run prepare step
|
1447
|
-
def self.prepare(write_stream, read_stream)
|
1448
|
-
begin
|
1449
|
-
write_message write_stream, PREPARE_MESSAGE
|
1450
|
-
msg = read_message(read_stream)
|
1451
|
-
rescue => e
|
1452
|
-
cdisplay("Error running prepare")
|
1453
|
-
raise e
|
1454
|
-
end
|
1455
|
-
end
|
1456
|
-
|
1457
|
-
|
1458
|
-
# Instruct multilang to begin cycle
|
1459
|
-
def self.begin_cycle(write_stream, read_stream)
|
1460
|
-
write_message(write_stream, BEGIN_CYCLE_MESSAGE)
|
1461
|
-
|
1462
|
-
while 1 do
|
1463
|
-
|
1464
|
-
msg = read_message(read_stream)
|
1465
|
-
obj = Hash[msg]
|
1466
|
-
case obj["command"]
|
1467
|
-
when "log"
|
1468
|
-
cdisplay "LOG: #{obj['msg']}"
|
1469
|
-
when "done"
|
1470
|
-
break
|
1471
|
-
else
|
1472
|
-
cdisplay("Error beginning cycle")
|
1473
|
-
raise obj["msg"]
|
1474
|
-
end
|
1475
|
-
end
|
1476
|
-
|
1477
|
-
end
|
1478
|
-
|
1479
|
-
# Build the hash of consumer streams for lookup when receiving responses
|
1480
|
-
def self.build_consumer_hash()
|
1481
|
-
consumer_hash = {}
|
1482
|
-
@__emit_queues.each_pair do |stream, consumers|
|
1483
|
-
consumers.each_key do |consumer|
|
1484
|
-
|
1485
|
-
pipes = @__consumer_pipes[stream][consumer]
|
1486
|
-
if pipes.has_key? "rd_parent_1"
|
1487
|
-
read_stream = pipes["rd_parent_1"]
|
1488
|
-
consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
|
1489
|
-
|
1490
|
-
elsif pipes.has_key? "rd_parent_2"
|
1491
|
-
read_stream = pipes["rd_parent_2"]
|
1492
|
-
consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
|
1493
|
-
end
|
1494
|
-
end
|
1495
|
-
end
|
1496
|
-
|
1497
|
-
return consumer_hash
|
1498
|
-
end
|
1499
|
-
|
1500
|
-
# Send object to every consumer of the operation, regardless of stream
|
1501
|
-
def self.send_to_consumees(json_obj)
|
1502
|
-
pipes = @__consumee_pipes
|
1503
|
-
# Left hand(or singular) input
|
1504
|
-
if (pipes.has_key? "wr_child_1")
|
1505
|
-
write_stream = pipes["wr_child_1"]
|
1506
|
-
write_message(write_stream, json_obj)
|
1507
|
-
end
|
1508
|
-
|
1509
|
-
# Right hand input
|
1510
|
-
if (pipes.has_key? "wr_child_2")
|
1511
|
-
write_stream = pipes["wr_child_2"]
|
1512
|
-
write_message(write_stream, json_obj)
|
1513
|
-
end
|
1514
|
-
end
|
1515
|
-
|
1516
|
-
# Send object to every consumer of the operation, regardless of stream
|
1517
|
-
def self.send_to_consumers(json_obj, display = false)
|
1518
|
-
@__consumer_pipes.each_pair do |stream, consumers|
|
1519
|
-
consumers.each_pair do |consumer, pipe|
|
1520
|
-
# Single or Left hand pipe
|
1521
|
-
if (pipe.has_key? "wr_parent_1")
|
1522
|
-
write_stream = get_write_stream(stream, consumer, 1)
|
1523
|
-
write_message(write_stream, json_obj)
|
1524
|
-
elsif (pipe.has_key? "wr_parent_2")
|
1525
|
-
write_stream = get_write_stream(stream, consumer, 2)
|
1526
|
-
write_message(write_stream, json_obj)
|
1527
|
-
end
|
1528
|
-
if display
|
1529
|
-
cdisplay "emitted #{json_obj.chomp} to #{consumer}"
|
1530
|
-
end
|
1531
|
-
end
|
1532
|
-
end
|
1533
|
-
end
|
1534
|
-
|
1535
|
-
|
1536
|
-
# Get the write pipe of the stream consumer
|
1537
|
-
def self.get_write_stream(stream, consumer, number=1)
|
1538
|
-
wr_pipe = "wr_parent_" + number.to_s
|
1539
|
-
@__consumer_pipes[stream][consumer][wr_pipe]
|
1540
|
-
end
|
1541
|
-
|
1542
|
-
|
1543
|
-
# Get tuple for sending to consumer of stream
|
1544
|
-
def self.get_consumer_tuple(stream, consumer)
|
1545
|
-
@__emit_queues[stream][consumer][:write_queue].shift
|
1546
|
-
end
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
# Send a command message to a consumer
|
1551
|
-
def self.send_command_tuple(stream, consumer, json_obj)
|
1552
|
-
pipe = @__consumer_pipes[stream][consumer]
|
1553
|
-
# Single or Left hand pipe
|
1554
|
-
if (pipe.has_key? "wr_parent_1")
|
1555
|
-
write_stream = get_write_stream(stream, consumer, 1)
|
1556
|
-
write_message(write_stream, json_obj)
|
1557
|
-
|
1558
|
-
# Right hand pipe
|
1559
|
-
elsif (pipe.has_key? "wr_parent_2")
|
1560
|
-
write_stream = get_write_stream(stream, consumer, 2)
|
1561
|
-
write_message(write_stream, json_obj)
|
1562
|
-
end
|
1563
|
-
@__emit_queues[stream][consumer][:ready] = false
|
1564
|
-
end
|
1565
|
-
|
1566
|
-
|
1567
|
-
# Emit tuple_json to the consumer of a stream
|
1568
|
-
def self.emit_consumer_tuple(stream, consumer, tuple_json)
|
1569
|
-
begin
|
1570
|
-
display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
|
1571
|
-
rescue JSON::ParserError
|
1572
|
-
cdisplay "Error: invalid JSON"
|
1573
|
-
end
|
1574
|
-
|
1575
|
-
pipe = @__consumer_pipes[stream][consumer]
|
1576
|
-
# Single or Left hand pipe
|
1577
|
-
if (pipe.has_key? "wr_parent_1")
|
1578
|
-
write_stream = get_write_stream(stream, consumer, 1)
|
1579
|
-
write_message(write_stream, tuple_json)
|
1580
|
-
|
1581
|
-
# Right hand pipe
|
1582
|
-
elsif (pipe.has_key? "wr_parent_2")
|
1583
|
-
write_stream = get_write_stream(stream, consumer, 2)
|
1584
|
-
write_message(write_stream, tuple_json)
|
1585
|
-
end
|
1586
|
-
|
1587
|
-
@__emit_queues[stream][consumer][:ready] = false
|
1588
|
-
cdisplay "emitted tuple #{display_json} to #{consumer} "
|
1589
|
-
end
|
1590
|
-
|
1591
|
-
|
1592
|
-
# Check if a tuple's column or its aliases matches an expected column and returns the valid column
|
1593
|
-
def self.check_tuple_for_alias(column, column_aliases, valid_columns)
|
1594
|
-
|
1595
|
-
valid_columns.each do |expected|
|
1596
|
-
if column == expected
|
1597
|
-
return expected
|
1598
|
-
end
|
1599
|
-
|
1600
|
-
column_aliases.each do |hash|
|
1601
|
-
concrete_name = hash["concrete_name"]
|
1602
|
-
if (column == concrete_name || column == hash['alias']) && hash["alias"] == expected
|
1603
|
-
return expected
|
1604
|
-
end
|
1605
|
-
end
|
1606
|
-
end
|
1607
|
-
|
1608
|
-
return nil
|
1609
|
-
end
|
1610
|
-
|
1611
|
-
# Build a tuple and format into JSON
|
1612
|
-
def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
|
1613
|
-
meta ||= {}
|
1614
|
-
column_aliases ||= {}
|
1615
|
-
values = {}
|
1616
|
-
tuple.each do |k, v|
|
1617
|
-
if(k == "id")
|
1618
|
-
next
|
1619
|
-
elsif(k == "confidence" or k == "since" or k == "source")
|
1620
|
-
meta[k] = v
|
1621
|
-
else
|
1622
|
-
values[k] = v
|
1623
|
-
end
|
1624
|
-
end
|
1625
|
-
tuple_json = {"op" => @__name, "tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
|
1626
|
-
|
1627
|
-
return tuple_json
|
1628
|
-
end
|
1629
|
-
|
1630
|
-
|
1631
|
-
# Construct a multilang command
|
1632
|
-
def self.command(arg, ignore_stderr=false)
|
1633
|
-
cdisplay("could not extract meta information. missing zillabyte.conf.yml?") if @__meta.nil?
|
1634
|
-
full_script = File.join(@__dir, @__meta["script"])
|
1635
|
-
stderr_opt = "2> /dev/null" if ignore_stderr
|
1636
|
-
|
1637
|
-
case @__meta["language"]
|
1638
|
-
when "ruby"
|
1639
|
-
# Execute in the bundler context
|
1640
|
-
cmd = "cd \"#{@__dir}\"; unset BUNDLE_GEMFILE; ZILLABYTE_HARNESS=1 bundle exec ruby \"#{full_script}\" #{arg} #{stderr_opt}"
|
1641
|
-
when "python"#{
|
1642
|
-
if(File.directory?("#{@__dir}/vEnv"))
|
1643
|
-
cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte #{@__dir}/vEnv/bin/python \"#{full_script}\" #{arg} #{stderr_opt}"
|
1644
|
-
else
|
1645
|
-
cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte python \"#{full_script}\" #{arg} #{stderr_opt}"
|
1646
|
-
end
|
1647
|
-
when "js"
|
1648
|
-
cmd = "cd \"#{@__dir}\"; NODE_PATH=~/zb1/multilang/js/src/lib #{Zillabyte::API::NODEJS_BIN} \"#{full_script}\" #{arg} #{stderr_opt}"
|
1649
|
-
else
|
1650
|
-
cdisplay("no language specified")
|
1651
|
-
end
|
1652
|
-
return cmd
|
1653
|
-
end
|
1654
|
-
|
1655
|
-
|
1656
|
-
# Display a colored, formatted message
|
1657
|
-
def self.cdisplay(msg, useName=true)
|
1658
|
-
@__tester.cdisplay(@__name, msg, useName)
|
1659
|
-
end
|
1660
|
-
|
1661
|
-
end; end; end
|
1662
|
-
|