zillabyte-cli 0.9.20 → 0.9.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +13 -5
- data/lib/zillabyte-cli/version.rb +1 -1
- data/lib/zillabyte/api/components.rb +2 -2
- data/lib/zillabyte/cli/base.rb +15 -0
- data/lib/zillabyte/cli/data.rb +31 -1
- data/lib/zillabyte/cli/download.rb +67 -0
- data/lib/zillabyte/cli/flows.rb +91 -37
- data/lib/zillabyte/helpers.rb +12 -1
- metadata +28 -43
- data/lib/#zillabyte-cli.rb# +0 -5
- data/lib/zillabyte/cli/#logs.rb# +0 -12
- data/lib/zillabyte/cli/#repl.rb# +0 -43
- data/lib/zillabyte/cli/templates/python/#simple_function.py# +0 -27
- data/lib/zillabyte/runner.rb +0 -6
- data/lib/zillabyte/runner/app_runner.rb +0 -320
- data/lib/zillabyte/runner/component_operation.rb +0 -636
- data/lib/zillabyte/runner/component_runner.rb +0 -337
- data/lib/zillabyte/runner/multilang_operation.rb +0 -1662
- data/lib/zillabyte/runner/operation.rb +0 -18
@@ -1,337 +0,0 @@
|
|
1
|
-
require "zillabyte/runner/multilang_operation"
|
2
|
-
require "zillabyte/runner/component_operation"
|
3
|
-
|
4
|
-
# HIDDEN:
|
5
|
-
class Zillabyte::Runner::ComponentRunner < Zillabyte::Command::Base
|
6
|
-
include Zillabyte::Helpers
|
7
|
-
|
8
|
-
KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
|
9
|
-
|
10
|
-
def run (meta, dir = Dir.pwd, session = nil, options = {})
|
11
|
-
|
12
|
-
if meta.nil? or session.nil?
|
13
|
-
return
|
14
|
-
end
|
15
|
-
|
16
|
-
@session = session
|
17
|
-
@colors = {}
|
18
|
-
|
19
|
-
# Get options
|
20
|
-
input = options[:input]
|
21
|
-
output = options[:output]
|
22
|
-
otype = options[:output_type]
|
23
|
-
|
24
|
-
# Show the user what we know about their app...
|
25
|
-
display "inferring your app details..."
|
26
|
-
describe_component(meta)
|
27
|
-
|
28
|
-
# Setup streams
|
29
|
-
@nodes = meta["nodes"]
|
30
|
-
@node_map = {}
|
31
|
-
@nodes.each do |n|
|
32
|
-
@node_map[n["name"]] = n
|
33
|
-
end
|
34
|
-
|
35
|
-
# Index stream consummers and emitters by stream name
|
36
|
-
@arcs = meta["arcs"]
|
37
|
-
|
38
|
-
# Organize component pipes
|
39
|
-
@operations = {}
|
40
|
-
@operation_pipes = {}
|
41
|
-
|
42
|
-
|
43
|
-
# Start component
|
44
|
-
begin
|
45
|
-
|
46
|
-
# Setup operation pipes
|
47
|
-
@nodes.each do |n|
|
48
|
-
|
49
|
-
name = n["name"]
|
50
|
-
type = n["type"]
|
51
|
-
if n["type"] == "source"
|
52
|
-
fields = n["fields"]
|
53
|
-
end
|
54
|
-
|
55
|
-
# Create two new pipes in the parent.
|
56
|
-
rd_child_1, wr_parent_1 = IO.pipe()
|
57
|
-
rd_parent_1, wr_child_1 = IO.pipe()
|
58
|
-
|
59
|
-
@operation_pipes[name] = {
|
60
|
-
"rd_child_1" => rd_child_1,
|
61
|
-
"wr_child_1" => wr_child_1,
|
62
|
-
"rd_parent_1" => rd_parent_1,
|
63
|
-
"wr_parent_1" => wr_parent_1
|
64
|
-
}
|
65
|
-
|
66
|
-
# Add a second(right hand side) set ofpipes for joins
|
67
|
-
if type == "join"
|
68
|
-
# Create two new pipes in the parent.
|
69
|
-
rd_child_2, wr_parent_2 = IO.pipe()
|
70
|
-
rd_parent_2, wr_child_2 = IO.pipe()
|
71
|
-
@operation_pipes[name]["rd_child_2"] = rd_child_2
|
72
|
-
@operation_pipes[name]["wr_child_2"] = wr_child_2
|
73
|
-
@operation_pipes[name]["rd_parent_2"] = rd_parent_2
|
74
|
-
@operation_pipes[name]["wr_parent_2"] = wr_parent_2
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Maps origin => {stream => [destinations]}
|
79
|
-
@arc_map = {}
|
80
|
-
@arcs.each do |a|
|
81
|
-
origin = a["origin"]
|
82
|
-
name = a["name"]
|
83
|
-
dest = a["dest"]
|
84
|
-
@arc_map[origin] ||= {}
|
85
|
-
@arc_map[origin][name] ||= []
|
86
|
-
@arc_map[origin][name] << a["dest"]
|
87
|
-
end
|
88
|
-
|
89
|
-
|
90
|
-
# Spawn component threads
|
91
|
-
@nodes.each do |n|
|
92
|
-
|
93
|
-
name = n["name"]
|
94
|
-
type = n["type"]
|
95
|
-
emits = n["emits"]
|
96
|
-
|
97
|
-
pipes = @operation_pipes[name]
|
98
|
-
|
99
|
-
# Fork.
|
100
|
-
pid = fork()
|
101
|
-
if pid # In parent
|
102
|
-
# Close the reading end of the first child so we can write to the child.
|
103
|
-
pipes["rd_child_1"].close()
|
104
|
-
# Close the writing end of the first child so we can read from the child.
|
105
|
-
pipes["wr_child_1"].close()
|
106
|
-
|
107
|
-
if type == "join"
|
108
|
-
# Close the reading end of the second child so we can write to the child.
|
109
|
-
pipes["rd_child_2"].close()
|
110
|
-
# Close the writing end of the second child so we can read from the child.
|
111
|
-
pipes["wr_child_2"].close()
|
112
|
-
end
|
113
|
-
else # in child
|
114
|
-
# Close the writing end of the first parent so we can read from the parent.
|
115
|
-
pipes["wr_parent_1"].close()
|
116
|
-
# Close the reading end of the first parent so we can write to the parent.
|
117
|
-
pipes["rd_parent_1"].close()
|
118
|
-
|
119
|
-
if type == "join"
|
120
|
-
# Close the reading end of the second child so we can write to the child.
|
121
|
-
pipes["rd_parent_2"].close()
|
122
|
-
# Close the writing end of the second child so we can read from the child.
|
123
|
-
pipes["wr_parent_2"].close()
|
124
|
-
end
|
125
|
-
|
126
|
-
begin
|
127
|
-
# Setup reading and writing pipes for communicating with consumee component
|
128
|
-
if type != "join"
|
129
|
-
in_pipes = {"rd_child_1" => @operation_pipes[name]["rd_child_1"], "wr_child_1" => @operation_pipes[name]["wr_child_1"]}
|
130
|
-
|
131
|
-
# Add join specific options
|
132
|
-
else
|
133
|
-
options[:join_options] = {}
|
134
|
-
in_pipes = {}
|
135
|
-
@arcs.each do |a|
|
136
|
-
|
137
|
-
if (a["dest"] == name)
|
138
|
-
# Left Side
|
139
|
-
if (a["left"] == 1)
|
140
|
-
options[:join_options][:lhs] = a["origin"]
|
141
|
-
in_pipes["rd_child_1"] = @operation_pipes[name]["rd_child_1"]
|
142
|
-
in_pipes["wr_child_1"] = @operation_pipes[name]["wr_child_1"]
|
143
|
-
# Right Side
|
144
|
-
elsif (a["right"] == 1)
|
145
|
-
options[:join_options][:rhs] = a["origin"]
|
146
|
-
in_pipes["rd_child_2"] = @operation_pipes[name]["rd_child_2"]
|
147
|
-
in_pipes["wr_child_2"] = @operation_pipes[name]["wr_child_2"]
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
|
-
# Index consumer pipes by stream name, consumer_name
|
154
|
-
out_pipes = {}
|
155
|
-
|
156
|
-
# Check if you are the consumee for a downstream join in order to select the correct pipe
|
157
|
-
if type != "output"
|
158
|
-
@arc_map[name].each_pair do |stream, destinations|
|
159
|
-
out_pipes[stream] ||= {}
|
160
|
-
|
161
|
-
destinations.each do |dest|
|
162
|
-
out_pipes[stream][dest] ||= {}
|
163
|
-
|
164
|
-
# Check for a join at the destination
|
165
|
-
if (@node_map[dest]["type"] == "join")
|
166
|
-
@arcs.each do |a|
|
167
|
-
if (a["dest"] == dest && a["origin"] == name)
|
168
|
-
# Left Side
|
169
|
-
if (a["left"] == 1)
|
170
|
-
out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
|
171
|
-
out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
|
172
|
-
break
|
173
|
-
elsif (a["right"] == 1)
|
174
|
-
out_pipes[stream][dest]["wr_parent_2"] = @operation_pipes[dest]["wr_parent_2"]
|
175
|
-
out_pipes[stream][dest]["rd_parent_2"] = @operation_pipes[dest]["rd_parent_2"]
|
176
|
-
break
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
else
|
181
|
-
out_pipes[stream][dest]["wr_parent_1"] = @operation_pipes[dest]["wr_parent_1"]
|
182
|
-
out_pipes[stream][dest]["rd_parent_1"] = @operation_pipes[dest]["rd_parent_1"]
|
183
|
-
end
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
# Run the child process
|
189
|
-
Zillabyte::Runner::ComponentOperation.run(n, dir, in_pipes, out_pipes, self, meta, options)
|
190
|
-
|
191
|
-
rescue => e
|
192
|
-
display e.message
|
193
|
-
display e.backtrace
|
194
|
-
ensure
|
195
|
-
# Close the reading end of the child
|
196
|
-
pipes["rd_child_1"].close()
|
197
|
-
# Close the writing end of the child
|
198
|
-
pipes["wr_child_1"].close()
|
199
|
-
|
200
|
-
# Close secondary join child
|
201
|
-
pipes["rd_child_2"].close() if pipes["rd_child_2"]
|
202
|
-
pipes["wr_child_2"].close() if pipes["wr_child_2"]
|
203
|
-
|
204
|
-
exit!(-1)
|
205
|
-
end #end child
|
206
|
-
end
|
207
|
-
end
|
208
|
-
|
209
|
-
|
210
|
-
# If no input file, read from STDIN
|
211
|
-
# TODO handle inputs
|
212
|
-
if input.nil?
|
213
|
-
|
214
|
-
source = ""
|
215
|
-
@nodes.each do |n|
|
216
|
-
name = n["name"]
|
217
|
-
type = n["type"]
|
218
|
-
|
219
|
-
if type == "input"
|
220
|
-
if source == ""
|
221
|
-
source = name
|
222
|
-
else
|
223
|
-
display "Error: Cannot run component with multiple input sources without input files"
|
224
|
-
return
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
display ""
|
230
|
-
display "To view results: Enter 'end' "
|
231
|
-
display ""
|
232
|
-
|
233
|
-
while true
|
234
|
-
|
235
|
-
fields = @node_map[source]['fields'].map {|h| h.keys[0].upcase }
|
236
|
-
display "Enter an input tuple in the form : #{fields.join(' ')}"
|
237
|
-
msg = ask
|
238
|
-
|
239
|
-
# Kill the cycle
|
240
|
-
if msg == 'end'
|
241
|
-
@operation_pipes[source]["wr_parent_1"].puts KILL_CYCLE_MESSAGE
|
242
|
-
break
|
243
|
-
|
244
|
-
# Check arguments
|
245
|
-
else
|
246
|
-
args = msg.scan(/(?:\w|"[^"]*")+/)
|
247
|
-
if (args.length % fields.length != 0)
|
248
|
-
display "Error: Argument length must be a multiple of the schema length"
|
249
|
-
next
|
250
|
-
end
|
251
|
-
end
|
252
|
-
# Send tuple to source
|
253
|
-
@operation_pipes[source]["wr_parent_1"].puts msg
|
254
|
-
|
255
|
-
end
|
256
|
-
end
|
257
|
-
|
258
|
-
rescue => e
|
259
|
-
display e.message
|
260
|
-
display e.backtrace
|
261
|
-
|
262
|
-
ensure
|
263
|
-
Process.waitall()
|
264
|
-
@operation_pipes.each do |name, pipes|
|
265
|
-
#Close the writing end of the parent
|
266
|
-
pipes["wr_parent_1"].close()
|
267
|
-
# Close the reading end of the parent
|
268
|
-
pipes["rd_parent_1"].close()
|
269
|
-
|
270
|
-
# Close secondary join parent
|
271
|
-
pipes["wr_parent_2"].close() if pipes["wr_parent_2"]
|
272
|
-
pipes["rd_parent_2"].close() if pipes["rd_parent_2"]
|
273
|
-
end
|
274
|
-
end
|
275
|
-
end
|
276
|
-
|
277
|
-
def session
|
278
|
-
@session
|
279
|
-
end
|
280
|
-
|
281
|
-
|
282
|
-
def cdisplay(name, message, useName=true)
|
283
|
-
color = @colors[name] || :default
|
284
|
-
if message.nil? || message == ""
|
285
|
-
return
|
286
|
-
else
|
287
|
-
|
288
|
-
if message.is_a?(Array)
|
289
|
-
lines = message
|
290
|
-
else
|
291
|
-
lines = message.split("\n")
|
292
|
-
end
|
293
|
-
|
294
|
-
prefix = useName ? "#{name} - " : ""
|
295
|
-
display "#{prefix}#{lines.first}".colorize(color)
|
296
|
-
lines[1..-1].each do |line|
|
297
|
-
display "#{' '*prefix.size}#{line}".colorize(color)
|
298
|
-
end
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
def display(message, newline = true)
|
303
|
-
@session.display(message, newline)
|
304
|
-
end
|
305
|
-
|
306
|
-
def describe_component(meta)
|
307
|
-
require("colorize")
|
308
|
-
require("indentation")
|
309
|
-
colors ||= [:green, :yellow, :magenta, :cyan, :white, :blue, :light_yellow, :light_blue, :red, :light_magenta, :light_cyan]
|
310
|
-
rjust = 20
|
311
|
-
|
312
|
-
display "#{'component name'.rjust(rjust)}: #{meta['name']}"
|
313
|
-
display "#{'component language'.rjust(rjust)}: #{meta['language']}"
|
314
|
-
meta['nodes'].each_with_index do |node, index|
|
315
|
-
color = @colors[node['name']] ||= colors[index % colors.length]
|
316
|
-
display (("="*rjust + " operation ##{index}").colorize(color))
|
317
|
-
display "#{"name".rjust(rjust)}: #{node['name'].to_s.colorize(color)}"
|
318
|
-
|
319
|
-
# Convert metadata typing to that of components
|
320
|
-
if node['type'] == "input"
|
321
|
-
type = "input"
|
322
|
-
display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
|
323
|
-
display "#{"fields".rjust(rjust)}: #{node['fields'].to_s.colorize(color)}"
|
324
|
-
display "#{"matches".rjust(rjust)}: #{JSON.pretty_generate(node['matches']).indent(rjust+2).lstrip.colorize(color)}" if node['matches']
|
325
|
-
elsif node['type'] == "output"
|
326
|
-
type = "output"
|
327
|
-
display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
|
328
|
-
display "#{"columns".rjust(rjust)}: #{node['columns'].to_s.colorize(color)}"
|
329
|
-
|
330
|
-
else
|
331
|
-
type = node['type']
|
332
|
-
display "#{"type".rjust(rjust)}: #{type.to_s.colorize(color)}"
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|
336
|
-
|
337
|
-
end
|
@@ -1,1662 +0,0 @@
|
|
1
|
-
|
2
|
-
# Emulate a multilang operation
|
3
|
-
module Zillabyte; module Runner; class MultilangOperation
|
4
|
-
|
5
|
-
HANDSHAKE_MESSAGE = "{\"pidDir\": \"/tmp\"}\n"
|
6
|
-
PREPARE_MESSAGE = " {\"command\": \"prepare\"}\n"
|
7
|
-
DONE_MESSAGE = "{\"command\": \"done\"}\n"
|
8
|
-
NEXT_MESSAGE = "{\"command\": \"next\"}\n"
|
9
|
-
BEGIN_CYCLE_MESSAGE = "{\"command\": \"begin_cycle\"}\n"
|
10
|
-
END_CYCLE_MESSAGE = "{\"command\": \"end_cycle\"}\n"
|
11
|
-
KILL_CYCLE_MESSAGE = "{\"command\": \"kill_cycle\"}\n"
|
12
|
-
PONG_PREFIX = "{\"pong\": \""
|
13
|
-
PONG_SUFFIX = "\"}\n"
|
14
|
-
ENDMARKER = "\nend\n"
|
15
|
-
|
16
|
-
def self.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
|
17
|
-
require("mkfifo")
|
18
|
-
require("zillabyte/runner/component_operation")
|
19
|
-
require("zillabyte/runner/operation")
|
20
|
-
|
21
|
-
require("pty")
|
22
|
-
require("open3")
|
23
|
-
|
24
|
-
@__node = node
|
25
|
-
@__name = node["name"]
|
26
|
-
@__tester = tester
|
27
|
-
|
28
|
-
@__type = node["type"]
|
29
|
-
@__dir = dir
|
30
|
-
|
31
|
-
@__consumee_pipes = consumee_pipes
|
32
|
-
@__consumer_pipes = consumer_pipes
|
33
|
-
@__meta = meta
|
34
|
-
@__options = options
|
35
|
-
@__output_type = options[:output_type]
|
36
|
-
# Each consumer of a stream gets its own queue and message passing
|
37
|
-
@__emit_queues = {}
|
38
|
-
@__consumer_pipes.each_pair do |stream, consumers|
|
39
|
-
consumers.each_key do |consumer|
|
40
|
-
@__emit_queues[stream] ||= {}
|
41
|
-
@__emit_queues[stream][consumer] = {:write_queue => [], :ready => true}
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
begin
|
46
|
-
case @__type
|
47
|
-
when "source"
|
48
|
-
self.run_source()
|
49
|
-
when "group_by"
|
50
|
-
self.run_group_by()
|
51
|
-
when "join"
|
52
|
-
self.run_join()
|
53
|
-
when "each"
|
54
|
-
self.run_each()
|
55
|
-
when "filter"
|
56
|
-
self.run_filter()
|
57
|
-
when "component"
|
58
|
-
Zillabyte::Runner::ComponentOperation.run(node, dir, consumee_pipes, consumer_pipes, tester, meta, options = {})
|
59
|
-
when "sink"
|
60
|
-
self.run_sink()
|
61
|
-
else
|
62
|
-
cdisplay("invalid operation type #{@__type}")
|
63
|
-
end
|
64
|
-
rescue => e
|
65
|
-
cdisplay e.message
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
|
71
|
-
def self.run_source()
|
72
|
-
|
73
|
-
end_cycle_policy = @__node["end_cycle_policy"]
|
74
|
-
|
75
|
-
# Interactive source
|
76
|
-
if @__options[:interactive]
|
77
|
-
loop do
|
78
|
-
|
79
|
-
msg = @__consumee_pipes["rd_child_1"].gets
|
80
|
-
|
81
|
-
if msg == KILL_CYCLE_MESSAGE
|
82
|
-
send_to_consumers(KILL_CYCLE_MESSAGE)
|
83
|
-
return
|
84
|
-
else
|
85
|
-
# Build tuple
|
86
|
-
begin
|
87
|
-
obj = JSON.parse(msg)
|
88
|
-
rescue JSON::ParserError
|
89
|
-
cdisplay "Error: invalid JSON"
|
90
|
-
next
|
91
|
-
end
|
92
|
-
|
93
|
-
tuple_json = build_tuple_json(obj)
|
94
|
-
display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
|
95
|
-
send_to_consumers(tuple_json)
|
96
|
-
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# Source from relation
|
101
|
-
elsif @__node['matches'] or @__node["relation"]
|
102
|
-
|
103
|
-
# Query API for rows
|
104
|
-
matches = @__node['matches'] || (@__node["relation"]["query"])
|
105
|
-
cdisplay("Fetching remote data...")
|
106
|
-
res = @__tester.query_agnostic(matches)
|
107
|
-
rows = res["rows"]
|
108
|
-
if(rows.nil? or rows.length == 0)
|
109
|
-
cdisplay("Could not find data that matches your 'matches' clause")
|
110
|
-
exit(-1)
|
111
|
-
end
|
112
|
-
# Enqueue rows for sending to consumers
|
113
|
-
column_aliases = res['column_aliases']
|
114
|
-
rows.each do |tuple|
|
115
|
-
tuple_json = build_tuple_json(tuple, nil, column_aliases)
|
116
|
-
@__emit_queues.each_pair do |stream, consumers|
|
117
|
-
consumers.each_pair do |consumer, emitter|
|
118
|
-
emitter[:write_queue] << tuple_json
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# Index streams and consumers by their pipes for lookup
|
124
|
-
consumer_hash = build_consumer_hash()
|
125
|
-
|
126
|
-
|
127
|
-
# Send first tuple
|
128
|
-
@__emit_queues.each_pair do |stream, consumers|
|
129
|
-
consumers.each_key do |consumer|
|
130
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
131
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# Sent tuples to consumers as appropriate
|
136
|
-
loop do
|
137
|
-
|
138
|
-
# Retrieve messages from consumers
|
139
|
-
rs, ws, es = IO.select(consumer_hash.keys, [], [])
|
140
|
-
|
141
|
-
# Emit tuples to consumers
|
142
|
-
emitted = false
|
143
|
-
rs.each do |r|
|
144
|
-
|
145
|
-
# Read from consumer
|
146
|
-
msg = read_message(r)
|
147
|
-
|
148
|
-
stream = consumer_hash[r][:stream]
|
149
|
-
consumer = consumer_hash[r][:consumer]
|
150
|
-
|
151
|
-
# Consumer is ready for next message
|
152
|
-
if msg["command"]
|
153
|
-
|
154
|
-
case msg["command"]
|
155
|
-
when "next"
|
156
|
-
|
157
|
-
@__emit_queues[stream][consumer][:ready] = true
|
158
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
159
|
-
|
160
|
-
# If all messages have been sent to consumer, end their cycle
|
161
|
-
if tuple_json.nil?
|
162
|
-
write_stream = get_write_stream(stream, consumer)
|
163
|
-
cdisplay "ending cycle for #{consumer}"
|
164
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
165
|
-
send_command_tuple(stream, consumer, DONE_MESSAGE)
|
166
|
-
else
|
167
|
-
# Emit tuple to consumer
|
168
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
169
|
-
emitted = true
|
170
|
-
end
|
171
|
-
when "kill_cycle"
|
172
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
173
|
-
return
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end
|
177
|
-
|
178
|
-
# Exit when done emitting
|
179
|
-
if !emitted
|
180
|
-
return
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
# Custom source
|
185
|
-
else
|
186
|
-
|
187
|
-
# Index streams and consumers by their pipes for lookup
|
188
|
-
consumer_hash = build_consumer_hash()
|
189
|
-
|
190
|
-
# Keep track of how many consumers to handle before exiting
|
191
|
-
consumers_running = consumer_hash.keys.length
|
192
|
-
|
193
|
-
# Kill the cycle on error
|
194
|
-
cycle_killed = false
|
195
|
-
|
196
|
-
# Setup multilang socket
|
197
|
-
require 'socket'
|
198
|
-
host = "0.0.0.0"
|
199
|
-
server = TCPServer.new(0)
|
200
|
-
port = server.addr[1]
|
201
|
-
|
202
|
-
# Spawn multilang process
|
203
|
-
cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
|
204
|
-
|
205
|
-
begin
|
206
|
-
|
207
|
-
Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
|
208
|
-
begin
|
209
|
-
server_thread = Thread.new do
|
210
|
-
ml_socket = server.accept()
|
211
|
-
|
212
|
-
# RUN SOURCE
|
213
|
-
begin
|
214
|
-
# Setup streams from consumers and multilang(stdout and socket communication)
|
215
|
-
read_streams = consumer_hash.keys.concat [stdout, ml_socket]
|
216
|
-
|
217
|
-
# Handshake
|
218
|
-
handshake(ml_socket, ml_socket)
|
219
|
-
prepare(ml_socket, ml_socket)
|
220
|
-
|
221
|
-
# Begin cycle
|
222
|
-
begin_cycle(ml_socket, ml_socket)
|
223
|
-
emitted = false
|
224
|
-
write_message(ml_socket, NEXT_MESSAGE)
|
225
|
-
multilang_queue = []
|
226
|
-
end_cycle_policy = @__options[:end_cycle_policy]
|
227
|
-
end_cycle_received = false
|
228
|
-
|
229
|
-
# Receive and handle messages
|
230
|
-
loop do
|
231
|
-
# Read from a stream
|
232
|
-
rs = select_read_streams(read_streams)
|
233
|
-
rs.each do |r|
|
234
|
-
# Read stdout straight to user
|
235
|
-
if r == stdout && consumers_running > 0
|
236
|
-
msg = r.gets
|
237
|
-
msg = msg.sub(/\n/, "")
|
238
|
-
cdisplay("log: #{msg}")
|
239
|
-
next
|
240
|
-
end
|
241
|
-
|
242
|
-
obj = read_message(r)
|
243
|
-
|
244
|
-
if obj.nil?
|
245
|
-
next
|
246
|
-
end
|
247
|
-
|
248
|
-
if obj["command"]
|
249
|
-
case obj["command"]
|
250
|
-
|
251
|
-
# Multilang emitted a tuple
|
252
|
-
when "emit"
|
253
|
-
stream = obj['stream']
|
254
|
-
# Check for null emit
|
255
|
-
if end_cycle_policy != "explicit"
|
256
|
-
|
257
|
-
if obj['tuple'].nil?
|
258
|
-
end_cycle_received = true
|
259
|
-
else
|
260
|
-
nil_values = false
|
261
|
-
obj['tuple'].each_value do |v|
|
262
|
-
if v.nil?
|
263
|
-
nil_values = true
|
264
|
-
break
|
265
|
-
end
|
266
|
-
end
|
267
|
-
end_cycle_received = nil_values
|
268
|
-
next unless !end_cycle_received
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
# Valid emit
|
273
|
-
emitted = true
|
274
|
-
|
275
|
-
# Send or enqueue tuple for each consumer
|
276
|
-
tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
|
277
|
-
|
278
|
-
@__emit_queues[stream].each_pair do |consumer, emitter|
|
279
|
-
if emitter[:ready]
|
280
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
281
|
-
|
282
|
-
else
|
283
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
# Consumer is ready for a message
|
288
|
-
when "next"
|
289
|
-
stream = consumer_hash[r][:stream]
|
290
|
-
consumer = consumer_hash[r][:consumer]
|
291
|
-
@__emit_queues[stream][consumer][:ready] = true
|
292
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
293
|
-
|
294
|
-
# End cycle for consumer if it has processed all tuples
|
295
|
-
if tuple_json.nil? && end_cycle_received
|
296
|
-
|
297
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
298
|
-
consumers_running -= 1
|
299
|
-
if consumers_running == 0
|
300
|
-
exit(0)
|
301
|
-
end
|
302
|
-
|
303
|
-
# TODO break if last consumer
|
304
|
-
elsif !tuple_json.nil?
|
305
|
-
# Emit tuple to consumer
|
306
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
307
|
-
emitted = true
|
308
|
-
end
|
309
|
-
|
310
|
-
# Multilang is done emitting a group of tuples
|
311
|
-
when "done"
|
312
|
-
# End cycle if no tuples were emitted
|
313
|
-
if !emitted && end_cycle_policy == "null_emit"
|
314
|
-
end_cycle_received = true
|
315
|
-
else
|
316
|
-
emitted = false
|
317
|
-
end
|
318
|
-
|
319
|
-
# Send the next tuple to multilang
|
320
|
-
if !multilang_queue.empty?
|
321
|
-
write_message(ml_socket, multilang_queue.shift)
|
322
|
-
|
323
|
-
# Request next tuple from mutilang
|
324
|
-
elsif !end_cycle_received
|
325
|
-
write_message(ml_socket, NEXT_MESSAGE)
|
326
|
-
|
327
|
-
# If there are no more messages to send, we are done
|
328
|
-
else end_cycle_received
|
329
|
-
finished = true
|
330
|
-
# End cycle for ready consumers
|
331
|
-
@__emit_queues.each_pair do |stream, consumers|
|
332
|
-
consumers.each_pair do |consumer, emitter|
|
333
|
-
if emitter[:ready]
|
334
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
335
|
-
consumers_running -= 1
|
336
|
-
if consumers_running == 0
|
337
|
-
exit(0)
|
338
|
-
end
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
end
|
343
|
-
end
|
344
|
-
|
345
|
-
# Multilang sent an error message
|
346
|
-
when "fail"
|
347
|
-
cdisplay("ERROR : #{obj['msg']}")
|
348
|
-
cycle_killed = true
|
349
|
-
exit(0)
|
350
|
-
|
351
|
-
# Multilang sent a log message
|
352
|
-
when "log"
|
353
|
-
cdisplay "LOG: #{obj['msg']}"
|
354
|
-
|
355
|
-
# Multilang sent signal to end the cycle
|
356
|
-
when "end_cycle"
|
357
|
-
if end_cycle_policy != "explicit"
|
358
|
-
cdisplay "received end_cycle command for non explicit policy"
|
359
|
-
next
|
360
|
-
end
|
361
|
-
end_cycle_received = true
|
362
|
-
when "kill_cycle"
|
363
|
-
cycle_killed = true
|
364
|
-
exit(0)
|
365
|
-
end
|
366
|
-
|
367
|
-
# Multilang sent a ping
|
368
|
-
elsif obj['ping']
|
369
|
-
write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
|
370
|
-
end
|
371
|
-
end
|
372
|
-
end
|
373
|
-
|
374
|
-
# Exit after ending consumer cycles
|
375
|
-
if consumers_running == 0
|
376
|
-
exit(0)
|
377
|
-
end
|
378
|
-
rescue => e
|
379
|
-
cycle_killed = true
|
380
|
-
cdisplay e.message
|
381
|
-
cdisplay e.backtrace
|
382
|
-
ensure
|
383
|
-
ml_socket.close()
|
384
|
-
end
|
385
|
-
end
|
386
|
-
server_thread.join()
|
387
|
-
rescue => e
|
388
|
-
ensure
|
389
|
-
# cleanup
|
390
|
-
if cycle_killed
|
391
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
392
|
-
end
|
393
|
-
pid = wait_thread[:pid]
|
394
|
-
stdout.close
|
395
|
-
Process.kill('INT', pid)
|
396
|
-
Process.exit!(true)
|
397
|
-
end
|
398
|
-
end
|
399
|
-
|
400
|
-
rescue PTY::ChildExited
|
401
|
-
end
|
402
|
-
end
|
403
|
-
end
|
404
|
-
|
405
|
-
|
406
|
-
def self.run_each()
|
407
|
-
|
408
|
-
# Index streams and consumers by their pipes for lookup
|
409
|
-
consumer_hash = build_consumer_hash()
|
410
|
-
|
411
|
-
# Keep track of how many consumers to handle before exiting
|
412
|
-
consumers_running = consumer_hash.keys.length
|
413
|
-
|
414
|
-
# Kill the cycle on error
|
415
|
-
cycle_killed = false
|
416
|
-
|
417
|
-
# Setup multilang socket
|
418
|
-
require 'socket'
|
419
|
-
host = "0.0.0.0"
|
420
|
-
server = TCPServer.new(0)
|
421
|
-
port = server.addr[1]
|
422
|
-
|
423
|
-
# Spawn multilang process
|
424
|
-
cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
|
425
|
-
|
426
|
-
begin
|
427
|
-
# Start the operation...
|
428
|
-
Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
|
429
|
-
begin
|
430
|
-
server_thread = Thread.new do
|
431
|
-
ml_socket = server.accept()
|
432
|
-
begin
|
433
|
-
# Setup streams from consumers, multilang, and the consumee
|
434
|
-
read_streams = consumer_hash.keys.concat [@__consumee_pipes["rd_child_1"], ml_socket, stdout]
|
435
|
-
|
436
|
-
# Handshake
|
437
|
-
handshake(ml_socket, ml_socket)
|
438
|
-
prepare(ml_socket, ml_socket)
|
439
|
-
|
440
|
-
# Begin cycle
|
441
|
-
multilang_queue = []
|
442
|
-
mutlilang_count = 0
|
443
|
-
end_cycle_received = false
|
444
|
-
column_aliases = nil
|
445
|
-
|
446
|
-
|
447
|
-
# Receive and handle messages
|
448
|
-
loop do
|
449
|
-
|
450
|
-
# Read from a stream
|
451
|
-
rs = select_read_streams(read_streams)
|
452
|
-
rs.each do |r|
|
453
|
-
|
454
|
-
# Read STDOUT from program straight to user
|
455
|
-
if r == stdout
|
456
|
-
msg = r.gets
|
457
|
-
if !msg.nil?
|
458
|
-
msg = msg.sub(/\n/, "")
|
459
|
-
cdisplay("LOG: #{msg}")
|
460
|
-
end
|
461
|
-
next
|
462
|
-
end
|
463
|
-
|
464
|
-
# Receive an object
|
465
|
-
obj = read_message(r)
|
466
|
-
|
467
|
-
if obj["command"]
|
468
|
-
case obj["command"]
|
469
|
-
|
470
|
-
# Multilang emitted a tuple
|
471
|
-
when "emit"
|
472
|
-
|
473
|
-
stream = obj["stream"]
|
474
|
-
|
475
|
-
# Send or enqueue tuple for each consumer
|
476
|
-
tuple_json = build_tuple_json(obj['tuple'], obj['meta'], column_aliases)
|
477
|
-
|
478
|
-
@__emit_queues[stream].each_pair do |consumer, emitter|
|
479
|
-
if emitter[:ready]
|
480
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
481
|
-
else
|
482
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
483
|
-
end
|
484
|
-
end
|
485
|
-
|
486
|
-
# Consumer is ready for a message
|
487
|
-
when "next"
|
488
|
-
stream = consumer_hash[r][:stream]
|
489
|
-
consumer = consumer_hash[r][:consumer]
|
490
|
-
|
491
|
-
|
492
|
-
@__emit_queues[stream][consumer][:ready] = true
|
493
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
494
|
-
|
495
|
-
# End cycle for consumer if it has processed all tuples
|
496
|
-
if tuple_json.nil? && end_cycle_received
|
497
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
498
|
-
consumers_running -= 1
|
499
|
-
if consumers_running == 0
|
500
|
-
break
|
501
|
-
end
|
502
|
-
|
503
|
-
|
504
|
-
# TODO break if last consumer
|
505
|
-
elsif !tuple_json.nil?
|
506
|
-
# Emit tuple to consumer
|
507
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
508
|
-
emitted = true
|
509
|
-
end
|
510
|
-
|
511
|
-
# Multilang is done emitting a group of tuples
|
512
|
-
when "done"
|
513
|
-
mutlilang_count -= 1
|
514
|
-
|
515
|
-
# Send the next tuple to multilang
|
516
|
-
if !multilang_queue.empty?
|
517
|
-
write_message(ml_socket, multilang_queue.shift)
|
518
|
-
|
519
|
-
# If there are no more messages to send, we are done
|
520
|
-
elsif end_cycle_received && mutlilang_count == 0
|
521
|
-
finished = true
|
522
|
-
|
523
|
-
# End cycle for ready consumers
|
524
|
-
@__emit_queues.each_pair do |stream, consumers|
|
525
|
-
consumers.each_pair do |consumer, emitter|
|
526
|
-
if emitter[:ready]
|
527
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
528
|
-
consumers_running -= 1
|
529
|
-
if consumers_running == 0
|
530
|
-
break
|
531
|
-
end
|
532
|
-
end
|
533
|
-
end
|
534
|
-
end
|
535
|
-
end
|
536
|
-
|
537
|
-
# Multilang sent an error message
|
538
|
-
when "fail"
|
539
|
-
cdisplay("ERROR : #{obj['msg']}")
|
540
|
-
cycle_killed = true
|
541
|
-
exit(0)
|
542
|
-
|
543
|
-
# Multilang sent a log message
|
544
|
-
when "log"
|
545
|
-
cdisplay "LOG: #{obj['msg']}"
|
546
|
-
|
547
|
-
# Consumee operation sent signal to end_cycle
|
548
|
-
when "end_cycle"
|
549
|
-
end_cycle_received = true
|
550
|
-
if mutlilang_count == 0
|
551
|
-
@__emit_queues.each_pair do |stream, consumers|
|
552
|
-
consumers.each_pair do |consumer, emitter|
|
553
|
-
if emitter[:ready]
|
554
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
555
|
-
consumers_running -= 1
|
556
|
-
if consumers_running == 0
|
557
|
-
exit(0)
|
558
|
-
end
|
559
|
-
end
|
560
|
-
end
|
561
|
-
end
|
562
|
-
end
|
563
|
-
|
564
|
-
when "kill_cycle"
|
565
|
-
cycle_killed = true
|
566
|
-
exit(0)
|
567
|
-
end
|
568
|
-
|
569
|
-
# Received a tuple from consumee
|
570
|
-
elsif obj['tuple']
|
571
|
-
column_aliases = obj['column_aliases']
|
572
|
-
# Send or enqueue to multilang
|
573
|
-
mutlilang_count += 1
|
574
|
-
if multilang_queue.empty?
|
575
|
-
write_message(ml_socket, obj.to_json)
|
576
|
-
else
|
577
|
-
multilang_queue << obj.to_json
|
578
|
-
end
|
579
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
580
|
-
|
581
|
-
# Multilang sent a ping
|
582
|
-
elsif obj['ping']
|
583
|
-
write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
|
584
|
-
end
|
585
|
-
end
|
586
|
-
|
587
|
-
# Exit after ending consumer cycles
|
588
|
-
if consumers_running == 0
|
589
|
-
exit(0)
|
590
|
-
end
|
591
|
-
|
592
|
-
end
|
593
|
-
ensure
|
594
|
-
ml_socket.close()
|
595
|
-
end
|
596
|
-
end
|
597
|
-
server_thread.join()
|
598
|
-
rescue => e
|
599
|
-
cdisplay e.message
|
600
|
-
cdisplay e.backtrace
|
601
|
-
ensure
|
602
|
-
# cleanup
|
603
|
-
if cycle_killed
|
604
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
605
|
-
send_to_consumees(KILL_CYCLE_MESSAGE)
|
606
|
-
end
|
607
|
-
pid = wait_thread[:pid]
|
608
|
-
stdin.close
|
609
|
-
stdout.close
|
610
|
-
stderr.close
|
611
|
-
end
|
612
|
-
end
|
613
|
-
rescue PTY::ChildExited
|
614
|
-
cdisplay("The child process exited!")
|
615
|
-
end
|
616
|
-
|
617
|
-
end
|
618
|
-
|
619
|
-
|
620
|
-
def self.run_group_by()
|
621
|
-
|
622
|
-
# Index streams and consumers by their pipes for lookup
|
623
|
-
consumer_hash = build_consumer_hash
|
624
|
-
|
625
|
-
# Keep track of how many consumers to handle before exiting
|
626
|
-
consumers_running = consumer_hash.keys.length
|
627
|
-
|
628
|
-
# Kill the cycle on error
|
629
|
-
cycle_killed = false
|
630
|
-
|
631
|
-
# Setup groups
|
632
|
-
group_by = @__node['group_by']
|
633
|
-
group_tuples = {}
|
634
|
-
emit_count = 0 # used to track how many emits are expected once groups are sent to multilang
|
635
|
-
emitted_tuples = [] # used to send to consumers after once groupings are emitted
|
636
|
-
tuple_queue = []
|
637
|
-
|
638
|
-
# Setup multilang pipe
|
639
|
-
require 'socket'
|
640
|
-
host = "0.0.0.0"
|
641
|
-
server = TCPServer.new(0)
|
642
|
-
port = server.addr[1]
|
643
|
-
|
644
|
-
# Spawn multilang process
|
645
|
-
cmd = command("--execute_live --name #{@__name} --host \"#{host}\" --port #{port}")
|
646
|
-
|
647
|
-
begin
|
648
|
-
# Start the operation...
|
649
|
-
Open3.popen3(cmd) do |stdin, stdout, stderr, wait_thread|
|
650
|
-
begin
|
651
|
-
server_thread = Thread.new do
|
652
|
-
ml_socket = server.accept()
|
653
|
-
begin
|
654
|
-
# Setup streams from consumers, multilang, and the consumee
|
655
|
-
read_streams = consumer_hash.keys.concat [stdout, ml_socket, @__consumee_pipes["rd_child_1"]]
|
656
|
-
|
657
|
-
# Handshake
|
658
|
-
handshake(ml_socket, ml_socket)
|
659
|
-
prepare(ml_socket, ml_socket)
|
660
|
-
|
661
|
-
# Begin cycle
|
662
|
-
end_cycle_received = false
|
663
|
-
finished_emitting = false
|
664
|
-
|
665
|
-
# Handle streams
|
666
|
-
loop do
|
667
|
-
# Read from a stream
|
668
|
-
rs = select_read_streams(read_streams)
|
669
|
-
rs.each do |r|
|
670
|
-
|
671
|
-
# Read STDOUT from program straight to user
|
672
|
-
if r == stdout
|
673
|
-
msg = r.gets
|
674
|
-
msg = msg.sub(/\n/, "")
|
675
|
-
cdisplay("log: #{msg}")
|
676
|
-
next
|
677
|
-
end
|
678
|
-
|
679
|
-
|
680
|
-
# Receive an object
|
681
|
-
obj = read_message(r)
|
682
|
-
|
683
|
-
if obj["command"]
|
684
|
-
case obj["command"]
|
685
|
-
|
686
|
-
when "done"
|
687
|
-
|
688
|
-
if end_cycle_received
|
689
|
-
tuple_json = tuple_queue.shift
|
690
|
-
if !tuple_json.nil?
|
691
|
-
write_message(ml_socket, tuple_json)
|
692
|
-
end
|
693
|
-
end
|
694
|
-
|
695
|
-
next
|
696
|
-
|
697
|
-
# Begin aggregation
|
698
|
-
when "end_cycle"
|
699
|
-
end_cycle_received = true
|
700
|
-
read_streams = [ml_socket]
|
701
|
-
|
702
|
-
group_tuples.each do |group_tuple, tuples|
|
703
|
-
tuple_queue << "{\"command\": \"begin_group\", \"tuple\": #{group_tuple.to_json}, \"meta\":{}}\n"
|
704
|
-
tuples.each do |t|
|
705
|
-
tuple_queue << "{\"command\": \"aggregate\", #{t}}\n"
|
706
|
-
end
|
707
|
-
tuple_queue << "{\"command\": \"end_group\"}\n"
|
708
|
-
|
709
|
-
# keep track of how many emits are expected
|
710
|
-
emit_count += 1
|
711
|
-
end
|
712
|
-
|
713
|
-
tuple_json = tuple_queue.shift
|
714
|
-
if !tuple_json.nil?
|
715
|
-
write_message(ml_socket, tuple_json)
|
716
|
-
end
|
717
|
-
|
718
|
-
# Multilang has emitted a grouped tuple
|
719
|
-
when "emit"
|
720
|
-
stream = obj['stream']
|
721
|
-
emit_count -= 1
|
722
|
-
# Enqueue for consumers
|
723
|
-
tuple_json = build_tuple_json(obj['tuple'], obj['meta'], obj['column_aliases'])
|
724
|
-
@__emit_queues.each_pair do |stream, consumers|
|
725
|
-
consumers.each_key do |consumer|
|
726
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
727
|
-
end
|
728
|
-
end
|
729
|
-
|
730
|
-
# End cycle when done emitting
|
731
|
-
if end_cycle_received && emit_count == 0
|
732
|
-
finished_emitting = true
|
733
|
-
break
|
734
|
-
elsif end_cycle_received
|
735
|
-
tuple_json = tuple_queue.shift
|
736
|
-
if !tuple_json.nil?
|
737
|
-
write_message(ml_socket, tuple_json)
|
738
|
-
end
|
739
|
-
end
|
740
|
-
|
741
|
-
# An error has occured
|
742
|
-
when "kill_cycle"
|
743
|
-
cycle_killed = true
|
744
|
-
exit(0)
|
745
|
-
end
|
746
|
-
|
747
|
-
# Received a tuple from operation
|
748
|
-
elsif obj["tuple"]
|
749
|
-
tuple = obj["tuple"].to_json
|
750
|
-
meta = obj["meta"].to_json
|
751
|
-
column_aliases = obj["column_aliases"] || {}
|
752
|
-
aliases = Hash[column_aliases.map{|h| [h["alias"],h["concrete_name"]]}]
|
753
|
-
gt = {}
|
754
|
-
|
755
|
-
# Get the column names to group on
|
756
|
-
group_by.each do |field|
|
757
|
-
field_name = aliases[field] || field
|
758
|
-
gt[field] = obj["tuple"][field_name]
|
759
|
-
end
|
760
|
-
|
761
|
-
msg_no_brackets = "\"tuple\": #{tuple}, \"meta\": #{meta}, \"column_aliases\": #{column_aliases.to_json}"
|
762
|
-
|
763
|
-
# Group tuple into existing group or create new group
|
764
|
-
if group_tuples[gt]
|
765
|
-
group_tuples[gt] << msg_no_brackets
|
766
|
-
else
|
767
|
-
group_tuples[gt] = [msg_no_brackets]
|
768
|
-
end
|
769
|
-
|
770
|
-
# Ask operation for next tuple
|
771
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
772
|
-
|
773
|
-
# Multilang sent a ping
|
774
|
-
elsif obj['ping']
|
775
|
-
write_message(ml_socket, PONG_PREFIX + "#{Time.now.utc.to_f}" + PONG_SUFFIX)
|
776
|
-
end
|
777
|
-
end
|
778
|
-
|
779
|
-
# Send tuples to consumers
|
780
|
-
if finished_emitting && consumers_running > 0
|
781
|
-
|
782
|
-
# Send first tuple
|
783
|
-
@__emit_queues.each_pair do |stream, consumers|
|
784
|
-
consumers.each_key do |consumer|
|
785
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
786
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
787
|
-
end
|
788
|
-
end
|
789
|
-
|
790
|
-
# Sent tuples to consumers as appropriate
|
791
|
-
loop do
|
792
|
-
|
793
|
-
# Retrieve messages from consumers
|
794
|
-
rs, ws, es = IO.select(consumer_hash.keys, [], [])
|
795
|
-
|
796
|
-
# Emit tuples to consumers
|
797
|
-
emitted = false
|
798
|
-
rs.each do |r|
|
799
|
-
|
800
|
-
# Read from consumer
|
801
|
-
msg = read_message(r)
|
802
|
-
consumer = consumer_hash[r][:consumer]
|
803
|
-
stream = consumer_hash[r][:stream]
|
804
|
-
|
805
|
-
# Consumer is ready for next message
|
806
|
-
if msg["command"] && msg["command"] == "next"
|
807
|
-
|
808
|
-
@__emit_queues[stream][consumer][:ready] = true
|
809
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
810
|
-
|
811
|
-
# If all messages have been sent to a consumer, end its cycle
|
812
|
-
if tuple_json.nil?
|
813
|
-
write_stream = get_write_stream(stream, consumer)
|
814
|
-
write_message(write_stream, END_CYCLE_MESSAGE)
|
815
|
-
consumers_running -= 1
|
816
|
-
if consumers_running == 0
|
817
|
-
break
|
818
|
-
end
|
819
|
-
else
|
820
|
-
# Emit tuple to consumer
|
821
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
822
|
-
emitted = true
|
823
|
-
end
|
824
|
-
end
|
825
|
-
|
826
|
-
end
|
827
|
-
# Exit when done emitting
|
828
|
-
if !emitted
|
829
|
-
exit(0)
|
830
|
-
end
|
831
|
-
end
|
832
|
-
break
|
833
|
-
|
834
|
-
# Exit after ending all consumer cycles
|
835
|
-
elsif consumers_running == 0
|
836
|
-
break
|
837
|
-
end
|
838
|
-
end
|
839
|
-
ensure
|
840
|
-
ml_socket.close()
|
841
|
-
end
|
842
|
-
end
|
843
|
-
server_thread.join()
|
844
|
-
|
845
|
-
rescue Errno::EIO
|
846
|
-
cdisplay("Errno:EIO error")
|
847
|
-
ensure
|
848
|
-
# cleanup
|
849
|
-
if cycle_killed
|
850
|
-
send_to_consumers(KILL_CYCLE_MESSAGE, false)
|
851
|
-
send_to_consumees(KILL_CYCLE_MESSAGE)
|
852
|
-
end
|
853
|
-
|
854
|
-
pid = wait_thread[:pid]
|
855
|
-
stdin.close
|
856
|
-
stdout.close
|
857
|
-
stderr.close
|
858
|
-
Process.kill('INT', pid)
|
859
|
-
Process.exit!(true)
|
860
|
-
end
|
861
|
-
end
|
862
|
-
rescue PTY::ChildExited
|
863
|
-
if File.exists?("#{ml_pipe}.in")
|
864
|
-
File.delete("#{ml_pipe}.in")
|
865
|
-
end
|
866
|
-
cdisplay("The child process exited!")
|
867
|
-
end
|
868
|
-
end
|
869
|
-
|
870
|
-
|
871
|
-
def self.run_join()
|
872
|
-
|
873
|
-
lhs_fields = @__node["lhs_fields"]
|
874
|
-
rhs_fields = @__node["rhs_fields"]
|
875
|
-
join_type = @__node["join_type"]
|
876
|
-
|
877
|
-
|
878
|
-
# Sanity check
|
879
|
-
if lhs_fields.nil? || rhs_fields.nil? || join_type.nil?
|
880
|
-
return
|
881
|
-
end
|
882
|
-
|
883
|
-
|
884
|
-
# Index the consumee streams for left and right sides
|
885
|
-
consumer_hash = build_consumer_hash()
|
886
|
-
|
887
|
-
# Kill the cycle on error
|
888
|
-
cycle_killed = false
|
889
|
-
|
890
|
-
# read all tuples from lefthand and right hand streams
|
891
|
-
read_streams = [@__consumee_pipes["rd_child_1"], @__consumee_pipes["rd_child_2"]]
|
892
|
-
|
893
|
-
# Index left and right hand consumees by their streams for lookup
|
894
|
-
consumee_hash = {}
|
895
|
-
# Left side
|
896
|
-
lhs = @__options[:join_options][:lhs]
|
897
|
-
consumee_hash[@__consumee_pipes["rd_child_1"]] = lhs
|
898
|
-
# Right side
|
899
|
-
rhs = @__options[:join_options][:rhs]
|
900
|
-
consumee_hash[@__consumee_pipes["rd_child_2"]] = rhs
|
901
|
-
|
902
|
-
# Keep track of how many consumers to handle before exiting
|
903
|
-
consumers_running = 1
|
904
|
-
|
905
|
-
# Index the incoming tuples by their join key
|
906
|
-
lhs_tuples = {}
|
907
|
-
rhs_tuples = {}
|
908
|
-
|
909
|
-
tuple_queue = []
|
910
|
-
|
911
|
-
# Begin cycle
|
912
|
-
left_end_cycle_received = false
|
913
|
-
right_end_cycle_received = false
|
914
|
-
|
915
|
-
# Receive and handle messages
|
916
|
-
loop do
|
917
|
-
|
918
|
-
# Read from a stream
|
919
|
-
rs = select_read_streams(read_streams)
|
920
|
-
rs.each do |r|
|
921
|
-
|
922
|
-
# Receive an object
|
923
|
-
obj = read_message(r)
|
924
|
-
|
925
|
-
if obj["command"]
|
926
|
-
case obj["command"]
|
927
|
-
|
928
|
-
# A consumee is done emitting
|
929
|
-
when "end_cycle"
|
930
|
-
|
931
|
-
if consumee_hash[r] == lhs
|
932
|
-
left_end_cycle_received = true
|
933
|
-
elsif consumee_hash[r] == rhs
|
934
|
-
right_end_cycle_received = true
|
935
|
-
end
|
936
|
-
|
937
|
-
# We are done receiving from streams
|
938
|
-
if left_end_cycle_received && right_end_cycle_received
|
939
|
-
break
|
940
|
-
end
|
941
|
-
|
942
|
-
when "kill_cycle"
|
943
|
-
cycle_killed = true
|
944
|
-
return
|
945
|
-
end
|
946
|
-
|
947
|
-
# Received a tuple from consumee
|
948
|
-
elsif obj['tuple']
|
949
|
-
tuple = obj['tuple']
|
950
|
-
if consumee_hash[r] == lhs
|
951
|
-
lhs_tuples[tuple[lhs_fields]] ||= []
|
952
|
-
lhs_tuples[tuple[lhs_fields]] << tuple
|
953
|
-
elsif consumee_hash[r] == rhs
|
954
|
-
rhs_tuples[tuple[rhs_fields]] ||= []
|
955
|
-
rhs_tuples[tuple[rhs_fields]] << tuple
|
956
|
-
end
|
957
|
-
end
|
958
|
-
|
959
|
-
#Ask operation for next tuple
|
960
|
-
if consumee_hash[r] == lhs
|
961
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
962
|
-
elsif consumee_hash[r] == rhs
|
963
|
-
write_message(@__consumee_pipes["wr_child_2"], NEXT_MESSAGE)
|
964
|
-
end
|
965
|
-
end
|
966
|
-
|
967
|
-
# Break out if emits ended
|
968
|
-
if left_end_cycle_received && right_end_cycle_received
|
969
|
-
break
|
970
|
-
end
|
971
|
-
end
|
972
|
-
# Build the joined tuples
|
973
|
-
joined_tuples = []
|
974
|
-
|
975
|
-
# If no joined tuples, end the cycle
|
976
|
-
if lhs_tuples.empty? && rhs_fields.empty?
|
977
|
-
@__emit_queues.each_pair do |stream, consumers|
|
978
|
-
consumers.each_pair do |consumer, emitter|
|
979
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
980
|
-
end
|
981
|
-
end
|
982
|
-
return
|
983
|
-
end
|
984
|
-
joined_fields = (lhs_tuples.values[0][0].keys.concat rhs_tuples.values[0][0].keys).uniq
|
985
|
-
case join_type
|
986
|
-
when "inner"
|
987
|
-
lhs_tuples.each_pair do |key, lhs_tuples|
|
988
|
-
lhs_tuples.each do |lhs_tuple|
|
989
|
-
|
990
|
-
tuple = {}
|
991
|
-
if !rhs_tuples[key].nil?
|
992
|
-
rhs_tuples[key].each do |rhs_tuple|
|
993
|
-
# Check for a valid join
|
994
|
-
if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
|
995
|
-
# Add the fields
|
996
|
-
joined_fields.each do |field|
|
997
|
-
if lhs_tuple.has_key? field
|
998
|
-
tuple[field] = lhs_tuple[field]
|
999
|
-
else
|
1000
|
-
tuple[field] = rhs_tuple[field]
|
1001
|
-
end
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
# Add the tuple
|
1005
|
-
joined_tuples << tuple
|
1006
|
-
end
|
1007
|
-
end
|
1008
|
-
end
|
1009
|
-
end
|
1010
|
-
end
|
1011
|
-
when "left"
|
1012
|
-
lhs_tuples.each_pair do |key, lhs_tuples|
|
1013
|
-
lhs_tuples.each do |lhs_tuple|
|
1014
|
-
|
1015
|
-
joined = false
|
1016
|
-
if rhs_tuples.has_key? key
|
1017
|
-
rhs_tuples[key].each do |rhs_tuple|
|
1018
|
-
# Check for a valid join
|
1019
|
-
if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
|
1020
|
-
tuple = {}
|
1021
|
-
|
1022
|
-
# Add the fields
|
1023
|
-
joined_fields.each do |field|
|
1024
|
-
if lhs_tuple.has_key? field
|
1025
|
-
tuple[field] = lhs_tuple[field]
|
1026
|
-
else
|
1027
|
-
tuple[field] = rhs_tuple[field]
|
1028
|
-
end
|
1029
|
-
end
|
1030
|
-
joined_tuples << tuple
|
1031
|
-
joined = true
|
1032
|
-
end
|
1033
|
-
end
|
1034
|
-
end
|
1035
|
-
if !joined
|
1036
|
-
tuple = {}
|
1037
|
-
lhs_tuples.each.each do |hash|
|
1038
|
-
hash.each do |key, value|
|
1039
|
-
tuple[key] = value
|
1040
|
-
end
|
1041
|
-
end
|
1042
|
-
|
1043
|
-
joined_fields.each do |field|
|
1044
|
-
if !tuple.has_key? field
|
1045
|
-
tuple[field] = nil
|
1046
|
-
end
|
1047
|
-
end
|
1048
|
-
# Add the tuple
|
1049
|
-
joined_tuples << tuple
|
1050
|
-
end
|
1051
|
-
end
|
1052
|
-
end
|
1053
|
-
when "right"
|
1054
|
-
rhs_tuples.each_pair do |key, rhs_tuples|
|
1055
|
-
rhs_tuples.each do |rhs_tuple|
|
1056
|
-
|
1057
|
-
joined = false
|
1058
|
-
if lhs_tuples.has_key? key
|
1059
|
-
lhs_tuples[key].each do |lhs_tuple|
|
1060
|
-
# Check for a valid join
|
1061
|
-
if !rhs_tuple[lhs_fields].nil? && !lhs_tuple[rhs_fields].nil?
|
1062
|
-
tuple = {}
|
1063
|
-
|
1064
|
-
# Add the fields
|
1065
|
-
joined_fields.each do |field|
|
1066
|
-
if rhs_tuple.has_key? field
|
1067
|
-
tuple[field] = rhs_tuple[field]
|
1068
|
-
else
|
1069
|
-
tuple[field] = lhs_tuple[field]
|
1070
|
-
end
|
1071
|
-
end
|
1072
|
-
joined_tuples << tuple
|
1073
|
-
joined = true
|
1074
|
-
end
|
1075
|
-
end
|
1076
|
-
end
|
1077
|
-
if !joined
|
1078
|
-
tuple = {}
|
1079
|
-
rhs_tuples.each.each do |hash|
|
1080
|
-
hash.each do |key, value|
|
1081
|
-
tuple[key] = value
|
1082
|
-
end
|
1083
|
-
end
|
1084
|
-
|
1085
|
-
joined_fields.each do |field|
|
1086
|
-
if !tuple.has_key? field
|
1087
|
-
tuple[field] = nil
|
1088
|
-
end
|
1089
|
-
end
|
1090
|
-
# Add the tuple
|
1091
|
-
joined_tuples << tuple
|
1092
|
-
end
|
1093
|
-
end
|
1094
|
-
end
|
1095
|
-
when "outer"
|
1096
|
-
lhs_tuples.each_pair do |key, lhs_tuples|
|
1097
|
-
lhs_tuples.each do |lhs_tuple|
|
1098
|
-
|
1099
|
-
joined = false
|
1100
|
-
if rhs_tuples.has_key? key
|
1101
|
-
rhs_tuples[key].each do |rhs_tuple|
|
1102
|
-
# Check for a valid join
|
1103
|
-
if !lhs_tuple[lhs_fields].nil? && !rhs_tuple[rhs_fields].nil?
|
1104
|
-
tuple = {}
|
1105
|
-
|
1106
|
-
# Add the fields
|
1107
|
-
joined_fields.each do |field|
|
1108
|
-
if lhs_tuple.has_key? field
|
1109
|
-
tuple[field] = lhs_tuple[field]
|
1110
|
-
else
|
1111
|
-
tuple[field] = rhs_tuple[field]
|
1112
|
-
end
|
1113
|
-
end
|
1114
|
-
joined_tuples << tuple
|
1115
|
-
joined = true
|
1116
|
-
end
|
1117
|
-
end
|
1118
|
-
end
|
1119
|
-
if !joined
|
1120
|
-
tuple = {}
|
1121
|
-
lhs_tuples.each.each do |hash|
|
1122
|
-
hash.each do |key, value|
|
1123
|
-
tuple[key] = value
|
1124
|
-
end
|
1125
|
-
end
|
1126
|
-
|
1127
|
-
joined_fields.each do |field|
|
1128
|
-
if !tuple.has_key? field
|
1129
|
-
tuple[field] = nil
|
1130
|
-
end
|
1131
|
-
end
|
1132
|
-
# Add the tuple
|
1133
|
-
joined_tuples << tuple
|
1134
|
-
end
|
1135
|
-
end
|
1136
|
-
end
|
1137
|
-
end
|
1138
|
-
|
1139
|
-
# Setup output queues
|
1140
|
-
joined_tuples.each do |tuple|
|
1141
|
-
tuple_json = build_tuple_json(tuple)
|
1142
|
-
@__emit_queues.each_pair do |stream, consumers|
|
1143
|
-
consumers.each_key do |consumer|
|
1144
|
-
@__emit_queues[stream][consumer][:write_queue] << tuple_json
|
1145
|
-
end
|
1146
|
-
end
|
1147
|
-
end
|
1148
|
-
|
1149
|
-
# Send first tuple
|
1150
|
-
@__emit_queues.each_pair do |stream, consumers|
|
1151
|
-
consumers.each_key do |consumer|
|
1152
|
-
|
1153
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
1154
|
-
if tuple_json.nil?
|
1155
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
1156
|
-
consumers_running -= 1
|
1157
|
-
if consumers_running == 0
|
1158
|
-
return
|
1159
|
-
end
|
1160
|
-
else
|
1161
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
1162
|
-
end
|
1163
|
-
end
|
1164
|
-
end
|
1165
|
-
|
1166
|
-
# Sent tuples to consumers as appropriate
|
1167
|
-
loop do
|
1168
|
-
|
1169
|
-
# Retrieve messages from consumers
|
1170
|
-
rs, ws, es = IO.select(consumer_hash.keys, [], [])
|
1171
|
-
|
1172
|
-
# Emit tuples to consumers
|
1173
|
-
rs.each do |r|
|
1174
|
-
|
1175
|
-
# Read from consumer
|
1176
|
-
msg = read_message(r)
|
1177
|
-
consumer = consumer_hash[r][:consumer]
|
1178
|
-
stream = consumer_hash[r][:stream]
|
1179
|
-
|
1180
|
-
# Consumer is ready for next message
|
1181
|
-
if msg["command"]
|
1182
|
-
case msg["command"]
|
1183
|
-
when"next"
|
1184
|
-
|
1185
|
-
@__emit_queues[stream][consumer][:ready] = true
|
1186
|
-
tuple_json = get_consumer_tuple(stream, consumer)
|
1187
|
-
|
1188
|
-
# If all messages have been sent to a consumer, end its cycle
|
1189
|
-
if tuple_json.nil?
|
1190
|
-
send_command_tuple(stream, consumer, END_CYCLE_MESSAGE)
|
1191
|
-
consumers_running -= 1
|
1192
|
-
if consumers_running == 0
|
1193
|
-
return
|
1194
|
-
end
|
1195
|
-
else
|
1196
|
-
# Emit tuple to consumer
|
1197
|
-
emit_consumer_tuple(stream, consumer, tuple_json)
|
1198
|
-
end
|
1199
|
-
when "kill_cycle"
|
1200
|
-
cycle_killed = true
|
1201
|
-
return
|
1202
|
-
end
|
1203
|
-
end
|
1204
|
-
end
|
1205
|
-
end
|
1206
|
-
end
|
1207
|
-
|
1208
|
-
def self.run_filter()
|
1209
|
-
self.run_each()
|
1210
|
-
end
|
1211
|
-
|
1212
|
-
|
1213
|
-
# Send a message to all consumers of the operation
|
1214
|
-
def self.run_sink()
|
1215
|
-
columns = @__node["columns"]
|
1216
|
-
|
1217
|
-
type_map = {
|
1218
|
-
"string" => String,
|
1219
|
-
"double" => Float,
|
1220
|
-
"integer" => Integer,
|
1221
|
-
"float" => Float,
|
1222
|
-
"array" => Array,
|
1223
|
-
"map" => Hash
|
1224
|
-
}
|
1225
|
-
|
1226
|
-
col_map = {}
|
1227
|
-
columns.each do |hash|
|
1228
|
-
key = hash.keys[0]
|
1229
|
-
type = hash[key]
|
1230
|
-
col_map[key] = type_map[type]
|
1231
|
-
end
|
1232
|
-
|
1233
|
-
tuples = []
|
1234
|
-
|
1235
|
-
output = @__options["output"]
|
1236
|
-
loop do
|
1237
|
-
# Read messages
|
1238
|
-
obj = read_message(@__consumee_pipes["rd_child_1"])
|
1239
|
-
|
1240
|
-
# Add row
|
1241
|
-
if obj['tuple']
|
1242
|
-
|
1243
|
-
tuple = obj['tuple']
|
1244
|
-
display_json = Hash[obj['tuple'].map{|k, v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
|
1245
|
-
|
1246
|
-
if col_map.keys.length > tuple.keys.length
|
1247
|
-
cdisplay "Error: invalid keys for sink tuple : Expected #{col_map.keys} , got: #{tuple.keys}"
|
1248
|
-
cdisplay("\n \nPress Ctrl-C to exit", false)
|
1249
|
-
return
|
1250
|
-
end
|
1251
|
-
# Check tuple columns for valid entry
|
1252
|
-
columns_to_check = col_map.length
|
1253
|
-
tuple.keys.each do |col|
|
1254
|
-
value = tuple[col]
|
1255
|
-
types = type_map.each_value.map {|t| value.is_a? t}
|
1256
|
-
matched_column = check_tuple_for_alias(col, obj['column_aliases'], col_map.keys)
|
1257
|
-
if !matched_column.nil?
|
1258
|
-
if value.nil? || (value.is_a? col_map[matched_column])
|
1259
|
-
columns_to_check -= 1
|
1260
|
-
# Set the proper column name
|
1261
|
-
if col != matched_column
|
1262
|
-
tuple[matched_column] = value
|
1263
|
-
tuple.delete(col)
|
1264
|
-
end
|
1265
|
-
else
|
1266
|
-
break
|
1267
|
-
end
|
1268
|
-
end
|
1269
|
-
end
|
1270
|
-
|
1271
|
-
if columns_to_check != 0
|
1272
|
-
cdisplay "Error: invalid schema for sink tuple #{display_json}"
|
1273
|
-
return
|
1274
|
-
end
|
1275
|
-
tuples << obj
|
1276
|
-
if @__options[:interactive]
|
1277
|
-
cdisplay "received #{display_json}"
|
1278
|
-
end
|
1279
|
-
write_message(@__consumee_pipes["wr_child_1"], NEXT_MESSAGE)
|
1280
|
-
|
1281
|
-
# End cycle
|
1282
|
-
elsif obj['command']
|
1283
|
-
case obj['command']
|
1284
|
-
when "end_cycle"
|
1285
|
-
break
|
1286
|
-
when "kill_cycle"
|
1287
|
-
break
|
1288
|
-
end
|
1289
|
-
end
|
1290
|
-
end
|
1291
|
-
|
1292
|
-
if tuples.empty?
|
1293
|
-
cdisplay "No tuples received"
|
1294
|
-
return
|
1295
|
-
end
|
1296
|
-
|
1297
|
-
# Build table
|
1298
|
-
require("terminal-table")
|
1299
|
-
table = Terminal::Table.new :title => @__name
|
1300
|
-
require("csv")
|
1301
|
-
csv_str = CSV.generate do |csv|
|
1302
|
-
header_written = false;
|
1303
|
-
tuples.each do |obj|
|
1304
|
-
begin
|
1305
|
-
|
1306
|
-
t = obj['tuple']
|
1307
|
-
m = obj['meta'] || {}
|
1308
|
-
|
1309
|
-
if t
|
1310
|
-
if header_written == false
|
1311
|
-
keys = [t.keys, m.keys].flatten
|
1312
|
-
csv << keys
|
1313
|
-
table << keys
|
1314
|
-
table << :separator
|
1315
|
-
header_written = true
|
1316
|
-
end
|
1317
|
-
|
1318
|
-
vals = [t.values, m.values].flatten
|
1319
|
-
csv << vals
|
1320
|
-
table << vals.flat_map{|v| "#{v.to_s}"[0..100]}
|
1321
|
-
end
|
1322
|
-
rescue JSON::ParserError
|
1323
|
-
cdisplay("invalid JSON")
|
1324
|
-
next
|
1325
|
-
rescue => e
|
1326
|
-
cdisplay e.message
|
1327
|
-
cdisplay e.backtrace
|
1328
|
-
end
|
1329
|
-
end
|
1330
|
-
end
|
1331
|
-
|
1332
|
-
# Output table
|
1333
|
-
cdisplay("\n#{table.to_s}")
|
1334
|
-
cdisplay "#{tuples.length} rows"
|
1335
|
-
|
1336
|
-
# Write file
|
1337
|
-
if output
|
1338
|
-
filename = "#{output}.csv"
|
1339
|
-
f = File.open(filename, "w")
|
1340
|
-
f.write(csv_str)
|
1341
|
-
f.close()
|
1342
|
-
cdisplay("output written to #{filename}")
|
1343
|
-
end
|
1344
|
-
end
|
1345
|
-
|
1346
|
-
|
1347
|
-
private
|
1348
|
-
|
1349
|
-
BUFSIZE = 8192
|
1350
|
-
|
1351
|
-
# Each reading pipe has a read buffer and message queue
|
1352
|
-
@__read_buffers = {}
|
1353
|
-
@__read_buffered_messages = {}
|
1354
|
-
|
1355
|
-
|
1356
|
-
# Return availible reading streams
|
1357
|
-
def self.select_read_streams(read_streams)
|
1358
|
-
|
1359
|
-
rs = []
|
1360
|
-
read_streams.each do |read_stream|
|
1361
|
-
@__read_buffered_messages[read_stream] ||= []
|
1362
|
-
if !@__read_buffered_messages[read_stream].empty?
|
1363
|
-
rs << read_stream
|
1364
|
-
end
|
1365
|
-
end
|
1366
|
-
return rs unless rs.empty?
|
1367
|
-
rs, ws, es = IO.select(read_streams, [], [])
|
1368
|
-
return rs
|
1369
|
-
end
|
1370
|
-
|
1371
|
-
|
1372
|
-
# Read a JSON message
|
1373
|
-
def self.read_message(read_stream)
|
1374
|
-
|
1375
|
-
@__read_buffers[read_stream] ||= ""
|
1376
|
-
@__read_buffered_messages[read_stream] ||= []
|
1377
|
-
if !@__read_buffered_messages[read_stream].empty?
|
1378
|
-
obj = @__read_buffered_messages[read_stream].shift
|
1379
|
-
return obj
|
1380
|
-
end
|
1381
|
-
# read message from stream
|
1382
|
-
loop do
|
1383
|
-
|
1384
|
-
while !@__read_buffers[read_stream].include? ENDMARKER
|
1385
|
-
segment = read_stream.sysread(BUFSIZE)
|
1386
|
-
@__read_buffers[read_stream] << segment
|
1387
|
-
end
|
1388
|
-
|
1389
|
-
# cdisplay "READMESSAGE: read #{segment.length} bytes, read buffer length : #{@__read_buffer.length}"
|
1390
|
-
# TODO this include is redundant
|
1391
|
-
read_buffer = @__read_buffers[read_stream]
|
1392
|
-
if read_buffer.include? ENDMARKER
|
1393
|
-
objs = read_buffer.split(ENDMARKER)
|
1394
|
-
ends = read_buffer.scan(ENDMARKER)
|
1395
|
-
if objs.count == ends.count # We have a full number of messages
|
1396
|
-
objs.each do |obj|
|
1397
|
-
begin
|
1398
|
-
@__read_buffered_messages[read_stream] << JSON.parse(obj)
|
1399
|
-
# cdisplay "READMESSAGE: got hash #{hash}"
|
1400
|
-
rescue JSON::ParserError
|
1401
|
-
cdisplay "READMESSAGE: invalid JSON #{obj}"
|
1402
|
-
end
|
1403
|
-
end
|
1404
|
-
@__read_buffers[read_stream] = ""
|
1405
|
-
return @__read_buffered_messages[read_stream].shift
|
1406
|
-
else
|
1407
|
-
|
1408
|
-
(0..ends.count-1).each do |i|
|
1409
|
-
obj = objs[i]
|
1410
|
-
begin
|
1411
|
-
@__read_buffered_messages[read_stream] << JSON.parse(obj)
|
1412
|
-
rescue JSON::ParserError
|
1413
|
-
cdisplay "READMESSAGE: invalid JSON #{obj}"
|
1414
|
-
end
|
1415
|
-
end
|
1416
|
-
|
1417
|
-
# cdisplay "adding leftovers : \n #{objs[ends.count..-1]}"
|
1418
|
-
@__read_buffers[read_stream] = objs[ends.count..-1].join(ENDMARKER)
|
1419
|
-
return @__read_buffered_messages[read_stream].shift
|
1420
|
-
end
|
1421
|
-
end
|
1422
|
-
end
|
1423
|
-
end
|
1424
|
-
|
1425
|
-
|
1426
|
-
# Write JSON message
|
1427
|
-
def self.write_message(write_stream, msg)
|
1428
|
-
write_msg = msg.strip + ENDMARKER
|
1429
|
-
write_stream.write write_msg
|
1430
|
-
write_stream.flush
|
1431
|
-
end
|
1432
|
-
|
1433
|
-
|
1434
|
-
# Handshake connection to multilang
|
1435
|
-
def self.handshake(write_stream, read_stream)
|
1436
|
-
begin
|
1437
|
-
write_message write_stream, HANDSHAKE_MESSAGE
|
1438
|
-
msg = read_message(read_stream)
|
1439
|
-
rescue => e
|
1440
|
-
cdisplay("Error handshaking node")
|
1441
|
-
raise e
|
1442
|
-
end
|
1443
|
-
end
|
1444
|
-
|
1445
|
-
|
1446
|
-
# Instruct multilang to run prepare step
|
1447
|
-
def self.prepare(write_stream, read_stream)
|
1448
|
-
begin
|
1449
|
-
write_message write_stream, PREPARE_MESSAGE
|
1450
|
-
msg = read_message(read_stream)
|
1451
|
-
rescue => e
|
1452
|
-
cdisplay("Error running prepare")
|
1453
|
-
raise e
|
1454
|
-
end
|
1455
|
-
end
|
1456
|
-
|
1457
|
-
|
1458
|
-
# Instruct multilang to begin cycle
|
1459
|
-
def self.begin_cycle(write_stream, read_stream)
|
1460
|
-
write_message(write_stream, BEGIN_CYCLE_MESSAGE)
|
1461
|
-
|
1462
|
-
while 1 do
|
1463
|
-
|
1464
|
-
msg = read_message(read_stream)
|
1465
|
-
obj = Hash[msg]
|
1466
|
-
case obj["command"]
|
1467
|
-
when "log"
|
1468
|
-
cdisplay "LOG: #{obj['msg']}"
|
1469
|
-
when "done"
|
1470
|
-
break
|
1471
|
-
else
|
1472
|
-
cdisplay("Error beginning cycle")
|
1473
|
-
raise obj["msg"]
|
1474
|
-
end
|
1475
|
-
end
|
1476
|
-
|
1477
|
-
end
|
1478
|
-
|
1479
|
-
# Build the hash of consumer streams for lookup when receiving responses
|
1480
|
-
def self.build_consumer_hash()
|
1481
|
-
consumer_hash = {}
|
1482
|
-
@__emit_queues.each_pair do |stream, consumers|
|
1483
|
-
consumers.each_key do |consumer|
|
1484
|
-
|
1485
|
-
pipes = @__consumer_pipes[stream][consumer]
|
1486
|
-
if pipes.has_key? "rd_parent_1"
|
1487
|
-
read_stream = pipes["rd_parent_1"]
|
1488
|
-
consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
|
1489
|
-
|
1490
|
-
elsif pipes.has_key? "rd_parent_2"
|
1491
|
-
read_stream = pipes["rd_parent_2"]
|
1492
|
-
consumer_hash[read_stream] = {:stream => stream, :consumer => consumer}
|
1493
|
-
end
|
1494
|
-
end
|
1495
|
-
end
|
1496
|
-
|
1497
|
-
return consumer_hash
|
1498
|
-
end
|
1499
|
-
|
1500
|
-
# Send object to every consumer of the operation, regardless of stream
|
1501
|
-
def self.send_to_consumees(json_obj)
|
1502
|
-
pipes = @__consumee_pipes
|
1503
|
-
# Left hand(or singular) input
|
1504
|
-
if (pipes.has_key? "wr_child_1")
|
1505
|
-
write_stream = pipes["wr_child_1"]
|
1506
|
-
write_message(write_stream, json_obj)
|
1507
|
-
end
|
1508
|
-
|
1509
|
-
# Right hand input
|
1510
|
-
if (pipes.has_key? "wr_child_2")
|
1511
|
-
write_stream = pipes["wr_child_2"]
|
1512
|
-
write_message(write_stream, json_obj)
|
1513
|
-
end
|
1514
|
-
end
|
1515
|
-
|
1516
|
-
# Send object to every consumer of the operation, regardless of stream
|
1517
|
-
def self.send_to_consumers(json_obj, display = false)
|
1518
|
-
@__consumer_pipes.each_pair do |stream, consumers|
|
1519
|
-
consumers.each_pair do |consumer, pipe|
|
1520
|
-
# Single or Left hand pipe
|
1521
|
-
if (pipe.has_key? "wr_parent_1")
|
1522
|
-
write_stream = get_write_stream(stream, consumer, 1)
|
1523
|
-
write_message(write_stream, json_obj)
|
1524
|
-
elsif (pipe.has_key? "wr_parent_2")
|
1525
|
-
write_stream = get_write_stream(stream, consumer, 2)
|
1526
|
-
write_message(write_stream, json_obj)
|
1527
|
-
end
|
1528
|
-
if display
|
1529
|
-
cdisplay "emitted #{json_obj.chomp} to #{consumer}"
|
1530
|
-
end
|
1531
|
-
end
|
1532
|
-
end
|
1533
|
-
end
|
1534
|
-
|
1535
|
-
|
1536
|
-
# Get the write pipe of the stream consumer
|
1537
|
-
def self.get_write_stream(stream, consumer, number=1)
|
1538
|
-
wr_pipe = "wr_parent_" + number.to_s
|
1539
|
-
@__consumer_pipes[stream][consumer][wr_pipe]
|
1540
|
-
end
|
1541
|
-
|
1542
|
-
|
1543
|
-
# Get tuple for sending to consumer of stream
|
1544
|
-
def self.get_consumer_tuple(stream, consumer)
|
1545
|
-
@__emit_queues[stream][consumer][:write_queue].shift
|
1546
|
-
end
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
# Send a command message to a consumer
|
1551
|
-
def self.send_command_tuple(stream, consumer, json_obj)
|
1552
|
-
pipe = @__consumer_pipes[stream][consumer]
|
1553
|
-
# Single or Left hand pipe
|
1554
|
-
if (pipe.has_key? "wr_parent_1")
|
1555
|
-
write_stream = get_write_stream(stream, consumer, 1)
|
1556
|
-
write_message(write_stream, json_obj)
|
1557
|
-
|
1558
|
-
# Right hand pipe
|
1559
|
-
elsif (pipe.has_key? "wr_parent_2")
|
1560
|
-
write_stream = get_write_stream(stream, consumer, 2)
|
1561
|
-
write_message(write_stream, json_obj)
|
1562
|
-
end
|
1563
|
-
@__emit_queues[stream][consumer][:ready] = false
|
1564
|
-
end
|
1565
|
-
|
1566
|
-
|
1567
|
-
# Emit tuple_json to the consumer of a stream
|
1568
|
-
def self.emit_consumer_tuple(stream, consumer, tuple_json)
|
1569
|
-
begin
|
1570
|
-
display_json = Hash[JSON.parse(tuple_json)["tuple"].map {|k,v| [Zillabyte::Runner::Operation.truncate_message(k), Zillabyte::Runner::Operation.truncate_message(v)]}].to_json
|
1571
|
-
rescue JSON::ParserError
|
1572
|
-
cdisplay "Error: invalid JSON"
|
1573
|
-
end
|
1574
|
-
|
1575
|
-
pipe = @__consumer_pipes[stream][consumer]
|
1576
|
-
# Single or Left hand pipe
|
1577
|
-
if (pipe.has_key? "wr_parent_1")
|
1578
|
-
write_stream = get_write_stream(stream, consumer, 1)
|
1579
|
-
write_message(write_stream, tuple_json)
|
1580
|
-
|
1581
|
-
# Right hand pipe
|
1582
|
-
elsif (pipe.has_key? "wr_parent_2")
|
1583
|
-
write_stream = get_write_stream(stream, consumer, 2)
|
1584
|
-
write_message(write_stream, tuple_json)
|
1585
|
-
end
|
1586
|
-
|
1587
|
-
@__emit_queues[stream][consumer][:ready] = false
|
1588
|
-
cdisplay "emitted tuple #{display_json} to #{consumer} "
|
1589
|
-
end
|
1590
|
-
|
1591
|
-
|
1592
|
-
# Check if a tuple's column or its aliases matches an expected column and returns the valid column
|
1593
|
-
def self.check_tuple_for_alias(column, column_aliases, valid_columns)
|
1594
|
-
|
1595
|
-
valid_columns.each do |expected|
|
1596
|
-
if column == expected
|
1597
|
-
return expected
|
1598
|
-
end
|
1599
|
-
|
1600
|
-
column_aliases.each do |hash|
|
1601
|
-
concrete_name = hash["concrete_name"]
|
1602
|
-
if (column == concrete_name || column == hash['alias']) && hash["alias"] == expected
|
1603
|
-
return expected
|
1604
|
-
end
|
1605
|
-
end
|
1606
|
-
end
|
1607
|
-
|
1608
|
-
return nil
|
1609
|
-
end
|
1610
|
-
|
1611
|
-
# Build a tuple and format into JSON
|
1612
|
-
def self.build_tuple_json(tuple, meta = nil, column_aliases = nil)
|
1613
|
-
meta ||= {}
|
1614
|
-
column_aliases ||= {}
|
1615
|
-
values = {}
|
1616
|
-
tuple.each do |k, v|
|
1617
|
-
if(k == "id")
|
1618
|
-
next
|
1619
|
-
elsif(k == "confidence" or k == "since" or k == "source")
|
1620
|
-
meta[k] = v
|
1621
|
-
else
|
1622
|
-
values[k] = v
|
1623
|
-
end
|
1624
|
-
end
|
1625
|
-
tuple_json = {"op" => @__name, "tuple" => values, "meta" => meta, "column_aliases" => column_aliases}.to_json
|
1626
|
-
|
1627
|
-
return tuple_json
|
1628
|
-
end
|
1629
|
-
|
1630
|
-
|
1631
|
-
# Construct a multilang command
|
1632
|
-
def self.command(arg, ignore_stderr=false)
|
1633
|
-
cdisplay("could not extract meta information. missing zillabyte.conf.yml?") if @__meta.nil?
|
1634
|
-
full_script = File.join(@__dir, @__meta["script"])
|
1635
|
-
stderr_opt = "2> /dev/null" if ignore_stderr
|
1636
|
-
|
1637
|
-
case @__meta["language"]
|
1638
|
-
when "ruby"
|
1639
|
-
# Execute in the bundler context
|
1640
|
-
cmd = "cd \"#{@__dir}\"; unset BUNDLE_GEMFILE; ZILLABYTE_HARNESS=1 bundle exec ruby \"#{full_script}\" #{arg} #{stderr_opt}"
|
1641
|
-
when "python"#{
|
1642
|
-
if(File.directory?("#{@__dir}/vEnv"))
|
1643
|
-
cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte #{@__dir}/vEnv/bin/python \"#{full_script}\" #{arg} #{stderr_opt}"
|
1644
|
-
else
|
1645
|
-
cmd = "cd \"#{@__dir}\"; PYTHONPATH=~/zb1/multilang/python/Zillabyte python \"#{full_script}\" #{arg} #{stderr_opt}"
|
1646
|
-
end
|
1647
|
-
when "js"
|
1648
|
-
cmd = "cd \"#{@__dir}\"; NODE_PATH=~/zb1/multilang/js/src/lib #{Zillabyte::API::NODEJS_BIN} \"#{full_script}\" #{arg} #{stderr_opt}"
|
1649
|
-
else
|
1650
|
-
cdisplay("no language specified")
|
1651
|
-
end
|
1652
|
-
return cmd
|
1653
|
-
end
|
1654
|
-
|
1655
|
-
|
1656
|
-
# Display a colored, formatted message
|
1657
|
-
def self.cdisplay(msg, useName=true)
|
1658
|
-
@__tester.cdisplay(@__name, msg, useName)
|
1659
|
-
end
|
1660
|
-
|
1661
|
-
end; end; end
|
1662
|
-
|