zillabyte 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTlmOGNkMGVlNzY4MTdjMThlODE3NDk3ZTUwMjEzNjIyMzBlOWZkMQ==
5
+ data.tar.gz: !binary |-
6
+ MDYwN2YwYmMyN2M2Yjg2Y2E3YzIxMWNlYjE0N2Y5ZDQ4OWNkNjczZg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MDFjMDk3NGU2NmUyYTUyNGViMTAwYWI1MjBiMzdkMzk1MThjYmIxMjM5NzE2
10
+ NzMwMGRiMjUyYzM2YzJhNjJhY2ZlZGE2N2ZmMDM5N2VlZTdhMDNmMmZlMDQx
11
+ OTI0MGM1MmUxNzVjOTM2OWM5ZWU2Y2NjYWJlNjQxNjA4NGVmZTM=
12
+ data.tar.gz: !binary |-
13
+ ZDcyOTM1YjkzZjBjYmNjZDY1MzJjOTVjZTJjMTYyNDEyZDQzMGEzYmJiZGFi
14
+ ZTAyYjM1NjAyOTdjNjU1NTg1MzJjY2JmNDA3YTZjYTZmNTk3ZGJhNzEyN2U3
15
+ NjM5MTYyMTJiOGI1YThlN2MzNWZkNWJhYTVkMTU5YjMwMzkwZWY=
data/ruby/README.md ADDED
@@ -0,0 +1,2 @@
1
+ api.client.ruby
2
+ ===============
@@ -0,0 +1,20 @@
1
+ require "zillabyte/harness"
2
+ require "zillabyte/common/progress"
3
+
4
+ module Zillabyte
5
+
6
+ def self.new(name=nil)
7
+ @topology = Zillabyte::Harness::Topology.build(name)
8
+ @topology
9
+ end
10
+
11
+ def self.simple_function(*args, &block)
12
+ Zillabyte::Harness::SimpleFunction.build(*args, &block)
13
+ end
14
+
15
+ def self.simple_spout(*args, &block)
16
+ Zillabyte::Harness::SimpleSpout.build(*args, &block)
17
+ end
18
+
19
+
20
+ end
@@ -0,0 +1,17 @@
1
+ module Zillabyte::Common
2
+ class Progress
3
+
4
+ def update(*args)
5
+ display(*args)
6
+ end
7
+
8
+ def display(*args)
9
+ puts *args
10
+ end
11
+
12
+ def error(*args)
13
+ puts "error: #{args.first}"
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,16 @@
1
+ module Zillabyte
2
+
3
+ module Harness
4
+
5
+ def self.load
6
+ Dir[File.join(File.dirname(__FILE__), "harness", "*.rb")].sort.each do |file|
7
+ require file
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+
14
+ # Load all helpers...
15
+ Zillabyte::Harness.load()
16
+
@@ -0,0 +1,29 @@
1
+ class Zillabyte::Harness::Aggregate
2
+ attr_accessor :_name, :_type, :_emits, :_start, :_aggregate, :_complete
3
+
4
+ def initialize()
5
+ @_name = "aggregate_"+Zillabyte::Harness::Counter.get()
6
+ @_type = 'aggregate'
7
+ end
8
+
9
+ def name(v)
10
+ @_name = v
11
+ end
12
+
13
+ def emits(v)
14
+ @_emits = v
15
+ end
16
+
17
+ def start(&block)
18
+ @_start = block
19
+ end
20
+
21
+ def aggregate(&block)
22
+ @_aggregate = block
23
+ end
24
+
25
+ def complete(&block)
26
+ @_complete = block
27
+ end
28
+
29
+ end
@@ -0,0 +1,11 @@
1
+ class Zillabyte::Harness::Counter
2
+ @@count = 0
3
+ def self.get
4
+ @@count += 1
5
+ @@count.to_s
6
+ end
7
+
8
+ def self.reset
9
+ @@count = 0
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ class Zillabyte::Harness::Each
2
+ attr_accessor :_name, :_type, :_emits, :_consumes, :_prepare, :_execute
3
+
4
+ def initialize()
5
+ @_name = "each_"+Zillabyte::Harness::Counter.get()
6
+ @_type = 'each'
7
+ end
8
+
9
+ def name(v)
10
+ @_name = v
11
+ end
12
+
13
+ def emits(v)
14
+ @_emits = v
15
+ end
16
+
17
+ def consumes(v)
18
+ @_consumes = v
19
+ end
20
+
21
+ def prepare(&block)
22
+ @_prepare = block
23
+ end
24
+
25
+ def execute(&block)
26
+ @_execute = block
27
+ end
28
+
29
+ end
@@ -0,0 +1,9 @@
1
+ class Zillabyte::Harness::GroupBy
2
+ attr_accessor :_type, :_fields
3
+
4
+ def initialize(fields)
5
+ @_type = 'group_by'
6
+ @_fields = fields
7
+ end
8
+
9
+ end
@@ -0,0 +1,326 @@
1
+ class Zillabyte::Harness::Helper
2
+
3
+ def self.opt_parser()
4
+ options = {
5
+ "name" => Dir.pwd.split("/")[-1]
6
+ }
7
+ OptionParser.new do |opts|
8
+ opts.on("--execute_live") do |v|
9
+ options[:command] = :execute
10
+ end
11
+ opts.on("--info") do |v|
12
+ options[:command] = :info
13
+ end
14
+ opts.on("--name NAME") do |v|
15
+ options[:name] = v
16
+ end
17
+ opts.on("--pipe PIPE") do |v|
18
+ options[:pipe] = v
19
+ end
20
+ opts.on("--file FNAME") do |v|
21
+ options[:file] = v
22
+ end
23
+ end.parse(self.argv)
24
+ options
25
+ end
26
+
27
+ def self.write_hash_to_file(hash, file)
28
+ file.write(hash.to_json+"\n")
29
+ end
30
+
31
+ def self.print_error(msg)
32
+ puts msg
33
+ exit(1)
34
+ end
35
+
36
+ def self.check_name(operation, name, names)
37
+ ee = "Error in \"#{operation}\" at \"name\": \n\t "
38
+
39
+ if(!name.instance_of?(String) or name == "")
40
+ msg = "#{ee}\"Name\" must be a non-empty STRING at #{name}."
41
+ Zillabyte::Harness::Helper.print_error(msg)
42
+ end
43
+ if(names[name] and names[name] != "new" and names[name] != "sink" and operation != "new" and operation != "sink")
44
+ msg = "#{ee}The \"name\" \"#{name}\" was previously defined in a #{names[name]}!"
45
+ Zillabyte::Harness::Helper.print_error(msg)
46
+ end
47
+ names[name] = operation
48
+
49
+ end
50
+
51
+ def self.check_emits(operation, emits, streams)
52
+ ee = "Error in \"#{operation}\" at \"emits\": \n\t "
53
+ if(operation == "simple_function" or operation == "simple_spout")
54
+ pp = @@_print_check_simple_function_emits
55
+ nn = "relation"
56
+ else
57
+ pp = @@_print_check_emits
58
+ nn = "stream"
59
+ end
60
+
61
+ if(!emits.instance_of?(Array))
62
+ msg = "#{ee}\"Emits\" must be an ARRAY at #{emits}. #{pp}"
63
+ Zillabyte::Harness::Helper.print_error(msg)
64
+ end
65
+ n_emits = emits.length
66
+ if(n_emits == 0)
67
+ msg = "#{ee}Must emit at least one #{nn}, \"emits\" cannot be an empty array. #{pp}"
68
+ Zillabyte::Harness::Helper.print_error(msg)
69
+ end
70
+
71
+ current_op_streams = {}
72
+ emits.each do |e|
73
+ if(!e.instance_of?(Array) or e.length != 2)
74
+ msg = "#{ee}Invalid format for \"emits\" in #{e}. #{pp}"
75
+ Zillabyte::Harness::Helper.print_error(msg)
76
+ end
77
+ if(!e[0].instance_of?(String) or e[0] == "")
78
+ msg = "#{ee}\"Emits\" #{nn} name must be a non-empty STRING in #{e}. #{pp}"
79
+ Zillabyte::Harness::Helper.print_error(msg)
80
+ end
81
+ if(current_op_streams[e[0]])
82
+ msg = "#{ee}The #{nn} \"#{e[0]}\" is listed multiple times in the same \"emits\". #{pp}"
83
+ Zillabyte::Harness::Helper.print_error(msg)
84
+ end
85
+ current_op_streams[e[0]] = 1
86
+ if(streams[e[0]] and streams[e[0]] != e[1])
87
+ msg = "#{ee}The #{nn} name \"#{e[0]}\" was previously defined with a different set of fields! #{pp}"
88
+ Zillabyte::Harness::Helper.print_error(msg)
89
+ end
90
+ streams[e[0]] = e[1]
91
+ if(e[1].length == 0)
92
+ msg = "#{ee}Must be at least one output field to #{nn} \"#{e[0]}\". #{pp}"
93
+ Zillabyte::Harness::Helper.print_error(msg)
94
+ end
95
+
96
+ if(operation == "simple_function" or operation == "simple_spout")
97
+ Zillabyte::Harness::Helper.check_simple_function_emits(e)
98
+ else
99
+ if(!e[1].instance_of?(Array))
100
+ msg = "#{ee}Field names must be an ARRAY of STRINGS in stream #{e[0]}. #{pp}"
101
+ Zillabyte::Harness::Helper.print_error(msg)
102
+ end
103
+ e[1].each do |f|
104
+ if(!f.instance_of?(String) or f == "")
105
+ msg = "#{ee}Field names must be non-empty STRINGS in stream #{e[0]}. #{pp}"
106
+ Zillabyte::Harness::Helper.print_error(msg)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ if(n_emits > 1)
112
+ return true
113
+ else
114
+ return false
115
+ end
116
+
117
+ end
118
+
119
+ def self.check_consumes(h, streams)
120
+ if(h._type == "each")
121
+ ee = "Error in \"each\" at \"consumes\": \n\t "
122
+ pp = @@_print_check_each_consumes
123
+ elsif(h._type == "sink")
124
+ ee = "Error in \"sink\" at \"consumes\": \n\t "
125
+ pp = @@_print_check_sink
126
+ end
127
+
128
+ consumes = h._consumes
129
+ if(!consumes)
130
+ msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\" or \"spout\" emitted multiple streams. #{pp}"
131
+ Zillabyte::Harness::Helper.print_error(msg)
132
+ end
133
+ if(!consumes.instance_of?(String) or consumes == "")
134
+ msg = "#{ee}\"Consumes\" must be a non-empty STRING at #{consumes}. #{pp}"
135
+ Zillabyte::Harness::Helper.print_error(msg)
136
+ end
137
+ if(!streams[consumes])
138
+ msg = "#{ee}The stream \"#{consumes}\" specified in \"consumes\" does not exist! #{pp}"
139
+ Zillabyte::Harness::Helper.print_error(msg)
140
+ end
141
+
142
+ if(h._type == "sink")
143
+ Zillabyte::Harness::Helper.check_sink_consumes(h, streams)
144
+ end
145
+ end
146
+
147
+ def self.check_sink(sink, nodes)
148
+ ee = "Error in \"sink\": \n\t "
149
+ pp = @@_print_check_sink
150
+
151
+ name = sink._name
152
+ columns = sink._columns
153
+ if(!name)
154
+ msg = "#{ee}Relation name must be specified! #{pp}"
155
+ Zillabyte::Harness::Helper.print_error(msg)
156
+ end
157
+ Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
158
+
159
+ if(columns.length == 0)
160
+ msg = "#{ee}Must be at least one output field to relation \"#{name}\". #{pp}"
161
+ Zillabyte::Harness::Helper.print_error(msg)
162
+ end
163
+ Zillabyte::Harness::Helper.check_sink_columns(sink)
164
+
165
+ nodes.each do |s|
166
+ if(s._type != "sink")
167
+ next
168
+ end
169
+ if(s._name == name and s._columns != columns)
170
+ msg = "#{ee}The relation \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
171
+ Zillabyte::Harness::Helper.print_error(msg)
172
+ end
173
+ end
174
+ end
175
+
176
+ def self.check_sink_columns(sink)
177
+ name = sink._name
178
+ columns = sink._columns
179
+ columns.each do |col|
180
+ cname = col.keys()[0]
181
+ ctype = col[cname]
182
+ Zillabyte::Harness::Helper.check_sink_column_format("sink",cname,ctype,name)
183
+ end
184
+ end
185
+
186
+ def self.check_sink_column_format(operation, cname, ctype, relation_name)
187
+ if(operation == "sink")
188
+ ee = "Error in \"sink\" at \"column\": \n\t "
189
+ pp = @@_print_check_sink
190
+ elsif(operation == "simple_function")
191
+ ee = "Error in \"simple_function\" at \"emits\": \n\t "
192
+ pp = @@_print_check_simple_function_emits
193
+ end
194
+
195
+ if(!cname.instance_of?(String) or cname == "")
196
+ msg = "#{ee}Field names must be non-empty STRINGS in relation \"#{relation_name}\". #{pp}"
197
+ Zillabyte::Harness::Helper.print_error(msg)
198
+ end
199
+ if(!ctype.instance_of?(Symbol))
200
+ msg = "#{ee}Field data types must be SYMBOLS in relation \"#{relation_name}\". #{pp}"
201
+ Zillabyte::Harness::Helper.print_error(msg)
202
+ end
203
+ if(ctype != :string and ctype != :integer and ctype != :float and ctype != :double && ctype != :boolean)
204
+ msg = "#{ee}Invalid field data type at \"#{ctype}\" in relation \"#{relation_name}\". #{pp}"
205
+ Zillabyte::Harness::Helper.print_error(msg)
206
+ end
207
+ end
208
+
209
+ def self.check_sink_consumes(sink, streams)
210
+ ee = "Error in \"sink\" at \"consumes\": \n\t "
211
+ pp = @@_print_check_sink
212
+
213
+ name = sink._name
214
+ columns = sink._columns
215
+ consumes = sink._consumes
216
+
217
+ stream_fields = streams[consumes]
218
+ if(stream_fields.length != columns.length)
219
+ msg = "#{ee}Number of columns in \"sink\" differs from number of fields in the consumed stream at relation \"#{name}\". #{pp}"
220
+ Zillabyte::Harness::Helper.print_error(msg)
221
+ end
222
+ columns.each do |col|
223
+ col_name = col.keys()[0]
224
+ if(!stream_fields.include?(col_name))
225
+ msg = "#{ee}The column \"#{col_name}\", is not emitted by the stream \"#{consumes}\". #{pp}"
226
+ Zillabyte::Harness::Helper.print_error(msg)
227
+ end
228
+ end
229
+ end
230
+
231
+ def self.check_simple_function_emits(emits)
232
+ ee = "Error in \"simple_function\" at \"emits\": \n\t "
233
+ pp = @@_print_check_simple_function_emits
234
+
235
+ name = emits[0]
236
+ columns = emits[1]
237
+ if(!columns.instance_of?(Array))
238
+ msg = "#{ee}Field names must be an ARRAY of HASHES in relation \"#{name}\". #{pp}"
239
+ Zillabyte::Harness::Helper.print_error(msg)
240
+ end
241
+ columns.each do |col|
242
+ if(!col.instance_of?(Hash))
243
+ msg = "#{ee}Fields names must be listed in HASH format in relation \"#{name}\". #{pp}"
244
+ Zillabyte::Harness::Helper.print_error(msg)
245
+ end
246
+ colkeys = col.keys()
247
+ if(colkeys.length != 1)
248
+ msg = "#{ee}Each field must be a separate HASH with {field_name : data_type} in relation \"#{name}\". #{pp}"
249
+ Zillabyte::Harness::Helper.print_error(msg)
250
+ end
251
+ colkey = colkeys[0]
252
+ colval = col[colkey]
253
+ Zillabyte::Harness::Helper.check_sink_column_format("simple_function",colkey,colval,name)
254
+ end
255
+ end
256
+
257
+
258
+
259
+ # Test helper...
260
+ def self.argv()
261
+ @_argv || ARGV
262
+ end
263
+
264
+ # Test helper..
265
+ def self.argv=(v)
266
+ @_argv = v
267
+ end
268
+
269
+ @@_print_check_emits = "\n
270
+ \"Emits\" Syntax:
271
+ - \"Emits\" must be a non-empty ARRAY.
272
+ - Each element of \"emits\" must be an ARRAY of length = 2.
273
+ * The first element should be the unique stream name defined as a non-empty STRING.
274
+ * The second element should be an ARRAY of field names for that stream.
275
+ e.g.
276
+ \t emits = [ [ \"stream_1\", [ \"field_11\", \"field_12\", ... ] ],
277
+ \t [ \"stream_2\", [ \"field_21\", \"field_22\", ... ] ] ] .
278
+ - Stream and field names must all be non-empty STRINGS."
279
+
280
+ @@_print_check_simple_function_emits = "\n
281
+ \"Emits\" Syntax:
282
+ - \"Emits\" must be a non-empty ARRAY.
283
+ - Each element of \"emits\" must be an ARRAY of length = 2.
284
+ * The first element should be the unique relation name defined as a non-empty STRING.
285
+ * The second element should be an ARRAY of HASHES with field names and data types for that relation. e.g.
286
+ \t emits = [ [ \"relation_1\", [ {\"field_11\" => \"type_11\"}, {\"field_12\" => \"type_12\"}, ... ] ],
287
+ \t [ \"relation_2\", [ {\"field_21\" => \"type_21\"}, {\"field_22\" => \"type_22\"}, ... ] ] ] .
288
+ - Relation and field names must all be non-empty STRINGS.
289
+ - Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, and :boolean."
290
+
291
+ @@_print_check_sink = "\n
292
+ \"Sink\" Syntax:
293
+ - Sinks must be specified using the following syntax:
294
+ Single stream:
295
+ \t flow.sink do |h|
296
+ \t \t h.name \"name_of_relation\"
297
+ \t \t h.columns \"field_1\" :type_1
298
+ \t \t h.columns \"field_2\" :type_2 ...
299
+ \t end
300
+ Multiple streams:
301
+ \t flow.sink do |h|
302
+ \t \t h.name \"relation_name\"
303
+ \t \t h.consumes \"stream_consumed\"
304
+ \t \t h.columns \"field_1\" :type_1
305
+ \t \t h.columns \"field_2\" :type_2 ...
306
+ \t end
307
+ - \"Sink\" relation \"name\" must be specified as a non-empty STRING!
308
+ - Field names must be non-empty STRINGS.
309
+ - Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, and :boolean.
310
+ - If there are multiple streams, \"consumes\" must be specified for each sink as a non-empty STRING!
311
+ * \"Consumes\" is the name of a stream emitted by an \"each\" or a \"spout\" which the \"sink\" should save as a table.
312
+ * The columns specified in \"sink\" must match the fields emitted by the stream."
313
+
314
+ @@_print_check_each_consumes = "\n
315
+ \"Each\" Syntax for multiple streams:
316
+ \t flow.each do |h|
317
+ \t \t h.name \"name\", => optional
318
+ \t \t h.emits emits,
319
+ \t \t h.consumes \"consumed_stream\"
320
+ \t \t h.prepare ...
321
+ \t \t h.execute ...
322
+ \t end
323
+ - If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
324
+ * \"Consumes\" is the name of a stream emitted by a preceding \"each\" or \"spout\" which the current \"each\" operates on."
325
+
326
+ end