zillabyte 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/ruby/README.md +2 -0
- data/ruby/lib/zillabyte.rb +20 -0
- data/ruby/lib/zillabyte/common/progress.rb +17 -0
- data/ruby/lib/zillabyte/harness.rb +16 -0
- data/ruby/lib/zillabyte/harness/aggregate.rb +29 -0
- data/ruby/lib/zillabyte/harness/counter.rb +11 -0
- data/ruby/lib/zillabyte/harness/each.rb +29 -0
- data/ruby/lib/zillabyte/harness/groupby.rb +9 -0
- data/ruby/lib/zillabyte/harness/helper.rb +326 -0
- data/ruby/lib/zillabyte/harness/live_delegator.rb +369 -0
- data/ruby/lib/zillabyte/harness/simple_function.rb +131 -0
- data/ruby/lib/zillabyte/harness/simple_spout.rb +90 -0
- data/ruby/lib/zillabyte/harness/sink.rb +23 -0
- data/ruby/lib/zillabyte/harness/spout.rb +48 -0
- data/ruby/lib/zillabyte/harness/topology.rb +132 -0
- data/ruby/lib/zillabyte/harness/tuple.rb +32 -0
- data/ruby/lib/zillabyte/version.rb +3 -0
- metadata +103 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OTlmOGNkMGVlNzY4MTdjMThlODE3NDk3ZTUwMjEzNjIyMzBlOWZkMQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MDYwN2YwYmMyN2M2Yjg2Y2E3YzIxMWNlYjE0N2Y5ZDQ4OWNkNjczZg==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
MDFjMDk3NGU2NmUyYTUyNGViMTAwYWI1MjBiMzdkMzk1MThjYmIxMjM5NzE2
|
10
|
+
NzMwMGRiMjUyYzM2YzJhNjJhY2ZlZGE2N2ZmMDM5N2VlZTdhMDNmMmZlMDQx
|
11
|
+
OTI0MGM1MmUxNzVjOTM2OWM5ZWU2Y2NjYWJlNjQxNjA4NGVmZTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZDcyOTM1YjkzZjBjYmNjZDY1MzJjOTVjZTJjMTYyNDEyZDQzMGEzYmJiZGFi
|
14
|
+
ZTAyYjM1NjAyOTdjNjU1NTg1MzJjY2JmNDA3YTZjYTZmNTk3ZGJhNzEyN2U3
|
15
|
+
NjM5MTYyMTJiOGI1YThlN2MzNWZkNWJhYTVkMTU5YjMwMzkwZWY=
|
data/ruby/README.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "zillabyte/harness"
|
2
|
+
require "zillabyte/common/progress"
|
3
|
+
|
4
|
+
module Zillabyte
|
5
|
+
|
6
|
+
def self.new(name=nil)
|
7
|
+
@topology = Zillabyte::Harness::Topology.build(name)
|
8
|
+
@topology
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.simple_function(*args, &block)
|
12
|
+
Zillabyte::Harness::SimpleFunction.build(*args, &block)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.simple_spout(*args, &block)
|
16
|
+
Zillabyte::Harness::SimpleSpout.build(*args, &block)
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Zillabyte::Harness::Aggregate
|
2
|
+
attr_accessor :_name, :_type, :_emits, :_start, :_aggregate, :_complete
|
3
|
+
|
4
|
+
def initialize()
|
5
|
+
@_name = "aggregate_"+Zillabyte::Harness::Counter.get()
|
6
|
+
@_type = 'aggregate'
|
7
|
+
end
|
8
|
+
|
9
|
+
def name(v)
|
10
|
+
@_name = v
|
11
|
+
end
|
12
|
+
|
13
|
+
def emits(v)
|
14
|
+
@_emits = v
|
15
|
+
end
|
16
|
+
|
17
|
+
def start(&block)
|
18
|
+
@_start = block
|
19
|
+
end
|
20
|
+
|
21
|
+
def aggregate(&block)
|
22
|
+
@_aggregate = block
|
23
|
+
end
|
24
|
+
|
25
|
+
def complete(&block)
|
26
|
+
@_complete = block
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Zillabyte::Harness::Each
|
2
|
+
attr_accessor :_name, :_type, :_emits, :_consumes, :_prepare, :_execute
|
3
|
+
|
4
|
+
def initialize()
|
5
|
+
@_name = "each_"+Zillabyte::Harness::Counter.get()
|
6
|
+
@_type = 'each'
|
7
|
+
end
|
8
|
+
|
9
|
+
def name(v)
|
10
|
+
@_name = v
|
11
|
+
end
|
12
|
+
|
13
|
+
def emits(v)
|
14
|
+
@_emits = v
|
15
|
+
end
|
16
|
+
|
17
|
+
def consumes(v)
|
18
|
+
@_consumes = v
|
19
|
+
end
|
20
|
+
|
21
|
+
def prepare(&block)
|
22
|
+
@_prepare = block
|
23
|
+
end
|
24
|
+
|
25
|
+
def execute(&block)
|
26
|
+
@_execute = block
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,326 @@
|
|
1
|
+
class Zillabyte::Harness::Helper
|
2
|
+
|
3
|
+
def self.opt_parser()
|
4
|
+
options = {
|
5
|
+
"name" => Dir.pwd.split("/")[-1]
|
6
|
+
}
|
7
|
+
OptionParser.new do |opts|
|
8
|
+
opts.on("--execute_live") do |v|
|
9
|
+
options[:command] = :execute
|
10
|
+
end
|
11
|
+
opts.on("--info") do |v|
|
12
|
+
options[:command] = :info
|
13
|
+
end
|
14
|
+
opts.on("--name NAME") do |v|
|
15
|
+
options[:name] = v
|
16
|
+
end
|
17
|
+
opts.on("--pipe PIPE") do |v|
|
18
|
+
options[:pipe] = v
|
19
|
+
end
|
20
|
+
opts.on("--file FNAME") do |v|
|
21
|
+
options[:file] = v
|
22
|
+
end
|
23
|
+
end.parse(self.argv)
|
24
|
+
options
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.write_hash_to_file(hash, file)
|
28
|
+
file.write(hash.to_json+"\n")
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.print_error(msg)
|
32
|
+
puts msg
|
33
|
+
exit(1)
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.check_name(operation, name, names)
|
37
|
+
ee = "Error in \"#{operation}\" at \"name\": \n\t "
|
38
|
+
|
39
|
+
if(!name.instance_of?(String) or name == "")
|
40
|
+
msg = "#{ee}\"Name\" must be a non-empty STRING at #{name}."
|
41
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
42
|
+
end
|
43
|
+
if(names[name] and names[name] != "new" and names[name] != "sink" and operation != "new" and operation != "sink")
|
44
|
+
msg = "#{ee}The \"name\" \"#{name}\" was previously defined in a #{names[name]}!"
|
45
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
46
|
+
end
|
47
|
+
names[name] = operation
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.check_emits(operation, emits, streams)
|
52
|
+
ee = "Error in \"#{operation}\" at \"emits\": \n\t "
|
53
|
+
if(operation == "simple_function" or operation == "simple_spout")
|
54
|
+
pp = @@_print_check_simple_function_emits
|
55
|
+
nn = "relation"
|
56
|
+
else
|
57
|
+
pp = @@_print_check_emits
|
58
|
+
nn = "stream"
|
59
|
+
end
|
60
|
+
|
61
|
+
if(!emits.instance_of?(Array))
|
62
|
+
msg = "#{ee}\"Emits\" must be an ARRAY at #{emits}. #{pp}"
|
63
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
64
|
+
end
|
65
|
+
n_emits = emits.length
|
66
|
+
if(n_emits == 0)
|
67
|
+
msg = "#{ee}Must emit at least one #{nn}, \"emits\" cannot be an empty array. #{pp}"
|
68
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
69
|
+
end
|
70
|
+
|
71
|
+
current_op_streams = {}
|
72
|
+
emits.each do |e|
|
73
|
+
if(!e.instance_of?(Array) or e.length != 2)
|
74
|
+
msg = "#{ee}Invalid format for \"emits\" in #{e}. #{pp}"
|
75
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
76
|
+
end
|
77
|
+
if(!e[0].instance_of?(String) or e[0] == "")
|
78
|
+
msg = "#{ee}\"Emits\" #{nn} name must be a non-empty STRING in #{e}. #{pp}"
|
79
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
80
|
+
end
|
81
|
+
if(current_op_streams[e[0]])
|
82
|
+
msg = "#{ee}The #{nn} \"#{e[0]}\" is listed multiple times in the same \"emits\". #{pp}"
|
83
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
84
|
+
end
|
85
|
+
current_op_streams[e[0]] = 1
|
86
|
+
if(streams[e[0]] and streams[e[0]] != e[1])
|
87
|
+
msg = "#{ee}The #{nn} name \"#{e[0]}\" was previously defined with a different set of fields! #{pp}"
|
88
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
89
|
+
end
|
90
|
+
streams[e[0]] = e[1]
|
91
|
+
if(e[1].length == 0)
|
92
|
+
msg = "#{ee}Must be at least one output field to #{nn} \"#{e[0]}\". #{pp}"
|
93
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
94
|
+
end
|
95
|
+
|
96
|
+
if(operation == "simple_function" or operation == "simple_spout")
|
97
|
+
Zillabyte::Harness::Helper.check_simple_function_emits(e)
|
98
|
+
else
|
99
|
+
if(!e[1].instance_of?(Array))
|
100
|
+
msg = "#{ee}Field names must be an ARRAY of STRINGS in stream #{e[0]}. #{pp}"
|
101
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
102
|
+
end
|
103
|
+
e[1].each do |f|
|
104
|
+
if(!f.instance_of?(String) or f == "")
|
105
|
+
msg = "#{ee}Field names must be non-empty STRINGS in stream #{e[0]}. #{pp}"
|
106
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
if(n_emits > 1)
|
112
|
+
return true
|
113
|
+
else
|
114
|
+
return false
|
115
|
+
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.check_consumes(h, streams)
|
120
|
+
if(h._type == "each")
|
121
|
+
ee = "Error in \"each\" at \"consumes\": \n\t "
|
122
|
+
pp = @@_print_check_each_consumes
|
123
|
+
elsif(h._type == "sink")
|
124
|
+
ee = "Error in \"sink\" at \"consumes\": \n\t "
|
125
|
+
pp = @@_print_check_sink
|
126
|
+
end
|
127
|
+
|
128
|
+
consumes = h._consumes
|
129
|
+
if(!consumes)
|
130
|
+
msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\" or \"spout\" emitted multiple streams. #{pp}"
|
131
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
132
|
+
end
|
133
|
+
if(!consumes.instance_of?(String) or consumes == "")
|
134
|
+
msg = "#{ee}\"Consumes\" must be a non-empty STRING at #{consumes}. #{pp}"
|
135
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
136
|
+
end
|
137
|
+
if(!streams[consumes])
|
138
|
+
msg = "#{ee}The stream \"#{consumes}\" specified in \"consumes\" does not exist! #{pp}"
|
139
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
140
|
+
end
|
141
|
+
|
142
|
+
if(h._type == "sink")
|
143
|
+
Zillabyte::Harness::Helper.check_sink_consumes(h, streams)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def self.check_sink(sink, nodes)
|
148
|
+
ee = "Error in \"sink\": \n\t "
|
149
|
+
pp = @@_print_check_sink
|
150
|
+
|
151
|
+
name = sink._name
|
152
|
+
columns = sink._columns
|
153
|
+
if(!name)
|
154
|
+
msg = "#{ee}Relation name must be specified! #{pp}"
|
155
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
156
|
+
end
|
157
|
+
Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
|
158
|
+
|
159
|
+
if(columns.length == 0)
|
160
|
+
msg = "#{ee}Must be at least one output field to relation \"#{name}\". #{pp}"
|
161
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
162
|
+
end
|
163
|
+
Zillabyte::Harness::Helper.check_sink_columns(sink)
|
164
|
+
|
165
|
+
nodes.each do |s|
|
166
|
+
if(s._type != "sink")
|
167
|
+
next
|
168
|
+
end
|
169
|
+
if(s._name == name and s._columns != columns)
|
170
|
+
msg = "#{ee}The relation \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
|
171
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def self.check_sink_columns(sink)
|
177
|
+
name = sink._name
|
178
|
+
columns = sink._columns
|
179
|
+
columns.each do |col|
|
180
|
+
cname = col.keys()[0]
|
181
|
+
ctype = col[cname]
|
182
|
+
Zillabyte::Harness::Helper.check_sink_column_format("sink",cname,ctype,name)
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
def self.check_sink_column_format(operation, cname, ctype, relation_name)
|
187
|
+
if(operation == "sink")
|
188
|
+
ee = "Error in \"sink\" at \"column\": \n\t "
|
189
|
+
pp = @@_print_check_sink
|
190
|
+
elsif(operation == "simple_function")
|
191
|
+
ee = "Error in \"simple_function\" at \"emits\": \n\t "
|
192
|
+
pp = @@_print_check_simple_function_emits
|
193
|
+
end
|
194
|
+
|
195
|
+
if(!cname.instance_of?(String) or cname == "")
|
196
|
+
msg = "#{ee}Field names must be non-empty STRINGS in relation \"#{relation_name}\". #{pp}"
|
197
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
198
|
+
end
|
199
|
+
if(!ctype.instance_of?(Symbol))
|
200
|
+
msg = "#{ee}Field data types must be SYMBOLS in relation \"#{relation_name}\". #{pp}"
|
201
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
202
|
+
end
|
203
|
+
if(ctype != :string and ctype != :integer and ctype != :float and ctype != :double && ctype != :boolean)
|
204
|
+
msg = "#{ee}Invalid field data type at \"#{ctype}\" in relation \"#{relation_name}\". #{pp}"
|
205
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def self.check_sink_consumes(sink, streams)
|
210
|
+
ee = "Error in \"sink\" at \"consumes\": \n\t "
|
211
|
+
pp = @@_print_check_sink
|
212
|
+
|
213
|
+
name = sink._name
|
214
|
+
columns = sink._columns
|
215
|
+
consumes = sink._consumes
|
216
|
+
|
217
|
+
stream_fields = streams[consumes]
|
218
|
+
if(stream_fields.length != columns.length)
|
219
|
+
msg = "#{ee}Number of columns in \"sink\" differs from number of fields in the consumed stream at relation \"#{name}\". #{pp}"
|
220
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
221
|
+
end
|
222
|
+
columns.each do |col|
|
223
|
+
col_name = col.keys()[0]
|
224
|
+
if(!stream_fields.include?(col_name))
|
225
|
+
msg = "#{ee}The column \"#{col_name}\", is not emitted by the stream \"#{consumes}\". #{pp}"
|
226
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def self.check_simple_function_emits(emits)
|
232
|
+
ee = "Error in \"simple_function\" at \"emits\": \n\t "
|
233
|
+
pp = @@_print_check_simple_function_emits
|
234
|
+
|
235
|
+
name = emits[0]
|
236
|
+
columns = emits[1]
|
237
|
+
if(!columns.instance_of?(Array))
|
238
|
+
msg = "#{ee}Field names must be an ARRAY of HASHES in relation \"#{name}\". #{pp}"
|
239
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
240
|
+
end
|
241
|
+
columns.each do |col|
|
242
|
+
if(!col.instance_of?(Hash))
|
243
|
+
msg = "#{ee}Fields names must be listed in HASH format in relation \"#{name}\". #{pp}"
|
244
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
245
|
+
end
|
246
|
+
colkeys = col.keys()
|
247
|
+
if(colkeys.length != 1)
|
248
|
+
msg = "#{ee}Each field must be a separate HASH with {field_name : data_type} in relation \"#{name}\". #{pp}"
|
249
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
250
|
+
end
|
251
|
+
colkey = colkeys[0]
|
252
|
+
colval = col[colkey]
|
253
|
+
Zillabyte::Harness::Helper.check_sink_column_format("simple_function",colkey,colval,name)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
# Test helper...
|
260
|
+
def self.argv()
|
261
|
+
@_argv || ARGV
|
262
|
+
end
|
263
|
+
|
264
|
+
# Test helper..
|
265
|
+
def self.argv=(v)
|
266
|
+
@_argv = v
|
267
|
+
end
|
268
|
+
|
269
|
+
@@_print_check_emits = "\n
|
270
|
+
\"Emits\" Syntax:
|
271
|
+
- \"Emits\" must be a non-empty ARRAY.
|
272
|
+
- Each element of \"emits\" must be an ARRAY of length = 2.
|
273
|
+
* The first element should be the unique stream name defined as a non-empty STRING.
|
274
|
+
* The second element should be an ARRAY of field names for that stream.
|
275
|
+
e.g.
|
276
|
+
\t emits = [ [ \"stream_1\", [ \"field_11\", \"field_12\", ... ] ],
|
277
|
+
\t [ \"stream_2\", [ \"field_21\", \"field_22\", ... ] ] ] .
|
278
|
+
- Stream and field names must all be non-empty STRINGS."
|
279
|
+
|
280
|
+
@@_print_check_simple_function_emits = "\n
|
281
|
+
\"Emits\" Syntax:
|
282
|
+
- \"Emits\" must be a non-empty ARRAY.
|
283
|
+
- Each element of \"emits\" must be an ARRAY of length = 2.
|
284
|
+
* The first element should be the unique relation name defined as a non-empty STRING.
|
285
|
+
* The second element should be an ARRAY of HASHES with field names and data types for that relation. e.g.
|
286
|
+
\t emits = [ [ \"relation_1\", [ {\"field_11\" => \"type_11\"}, {\"field_12\" => \"type_12\"}, ... ] ],
|
287
|
+
\t [ \"relation_2\", [ {\"field_21\" => \"type_21\"}, {\"field_22\" => \"type_22\"}, ... ] ] ] .
|
288
|
+
- Relation and field names must all be non-empty STRINGS.
|
289
|
+
- Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, and :boolean."
|
290
|
+
|
291
|
+
@@_print_check_sink = "\n
|
292
|
+
\"Sink\" Syntax:
|
293
|
+
- Sinks must be specified using the following syntax:
|
294
|
+
Single stream:
|
295
|
+
\t flow.sink do |h|
|
296
|
+
\t \t h.name \"name_of_relation\"
|
297
|
+
\t \t h.columns \"field_1\" :type_1
|
298
|
+
\t \t h.columns \"field_2\" :type_2 ...
|
299
|
+
\t end
|
300
|
+
Multiple streams:
|
301
|
+
\t flow.sink do |h|
|
302
|
+
\t \t h.name \"relation_name\"
|
303
|
+
\t \t h.consumes \"stream_consumed\"
|
304
|
+
\t \t h.columns \"field_1\" :type_1
|
305
|
+
\t \t h.columns \"field_2\" :type_2 ...
|
306
|
+
\t end
|
307
|
+
- \"Sink\" relation \"name\" must be specified as a non-empty STRING!
|
308
|
+
- Field names must be non-empty STRINGS.
|
309
|
+
- Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, and :boolean.
|
310
|
+
- If there are multiple streams, \"consumes\" must be specified for each sink as a non-empty STRING!
|
311
|
+
* \"Consumes\" is the name of a stream emitted by an \"each\" or a \"spout\" which the \"sink\" should save as a table.
|
312
|
+
* The columns specified in \"sink\" must match the fields emitted by the stream."
|
313
|
+
|
314
|
+
@@_print_check_each_consumes = "\n
|
315
|
+
\"Each\" Syntax for multiple streams:
|
316
|
+
\t flow.each do |h|
|
317
|
+
\t \t h.name \"name\", => optional
|
318
|
+
\t \t h.emits emits,
|
319
|
+
\t \t h.consumes \"consumed_stream\"
|
320
|
+
\t \t h.prepare ...
|
321
|
+
\t \t h.execute ...
|
322
|
+
\t end
|
323
|
+
- If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
|
324
|
+
* \"Consumes\" is the name of a stream emitted by a preceding \"each\" or \"spout\" which the current \"each\" operates on."
|
325
|
+
|
326
|
+
end
|