zillabyte 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTlmOGNkMGVlNzY4MTdjMThlODE3NDk3ZTUwMjEzNjIyMzBlOWZkMQ==
5
+ data.tar.gz: !binary |-
6
+ MDYwN2YwYmMyN2M2Yjg2Y2E3YzIxMWNlYjE0N2Y5ZDQ4OWNkNjczZg==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MDFjMDk3NGU2NmUyYTUyNGViMTAwYWI1MjBiMzdkMzk1MThjYmIxMjM5NzE2
10
+ NzMwMGRiMjUyYzM2YzJhNjJhY2ZlZGE2N2ZmMDM5N2VlZTdhMDNmMmZlMDQx
11
+ OTI0MGM1MmUxNzVjOTM2OWM5ZWU2Y2NjYWJlNjQxNjA4NGVmZTM=
12
+ data.tar.gz: !binary |-
13
+ ZDcyOTM1YjkzZjBjYmNjZDY1MzJjOTVjZTJjMTYyNDEyZDQzMGEzYmJiZGFi
14
+ ZTAyYjM1NjAyOTdjNjU1NTg1MzJjY2JmNDA3YTZjYTZmNTk3ZGJhNzEyN2U3
15
+ NjM5MTYyMTJiOGI1YThlN2MzNWZkNWJhYTVkMTU5YjMwMzkwZWY=
data/ruby/README.md ADDED
@@ -0,0 +1,2 @@
1
+ api.client.ruby
2
+ ===============
@@ -0,0 +1,20 @@
1
+ require "zillabyte/harness"
2
+ require "zillabyte/common/progress"
3
+
4
+ module Zillabyte
5
+
6
+ def self.new(name=nil)
7
+ @topology = Zillabyte::Harness::Topology.build(name)
8
+ @topology
9
+ end
10
+
11
+ def self.simple_function(*args, &block)
12
+ Zillabyte::Harness::SimpleFunction.build(*args, &block)
13
+ end
14
+
15
+ def self.simple_spout(*args, &block)
16
+ Zillabyte::Harness::SimpleSpout.build(*args, &block)
17
+ end
18
+
19
+
20
+ end
@@ -0,0 +1,17 @@
1
+ module Zillabyte::Common
2
+ class Progress
3
+
4
+ def update(*args)
5
+ display(*args)
6
+ end
7
+
8
+ def display(*args)
9
+ puts *args
10
+ end
11
+
12
+ def error(*args)
13
+ puts "error: #{args.first}"
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,16 @@
1
+ module Zillabyte
2
+
3
+ module Harness
4
+
5
+ def self.load
6
+ Dir[File.join(File.dirname(__FILE__), "harness", "*.rb")].sort.each do |file|
7
+ require file
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+
14
+ # Load all helpers...
15
+ Zillabyte::Harness.load()
16
+
@@ -0,0 +1,29 @@
1
+ class Zillabyte::Harness::Aggregate
2
+ attr_accessor :_name, :_type, :_emits, :_start, :_aggregate, :_complete
3
+
4
+ def initialize()
5
+ @_name = "aggregate_"+Zillabyte::Harness::Counter.get()
6
+ @_type = 'aggregate'
7
+ end
8
+
9
+ def name(v)
10
+ @_name = v
11
+ end
12
+
13
+ def emits(v)
14
+ @_emits = v
15
+ end
16
+
17
+ def start(&block)
18
+ @_start = block
19
+ end
20
+
21
+ def aggregate(&block)
22
+ @_aggregate = block
23
+ end
24
+
25
+ def complete(&block)
26
+ @_complete = block
27
+ end
28
+
29
+ end
@@ -0,0 +1,11 @@
1
+ class Zillabyte::Harness::Counter
2
+ @@count = 0
3
+ def self.get
4
+ @@count += 1
5
+ @@count.to_s
6
+ end
7
+
8
+ def self.reset
9
+ @@count = 0
10
+ end
11
+ end
@@ -0,0 +1,29 @@
1
+ class Zillabyte::Harness::Each
2
+ attr_accessor :_name, :_type, :_emits, :_consumes, :_prepare, :_execute
3
+
4
+ def initialize()
5
+ @_name = "each_"+Zillabyte::Harness::Counter.get()
6
+ @_type = 'each'
7
+ end
8
+
9
+ def name(v)
10
+ @_name = v
11
+ end
12
+
13
+ def emits(v)
14
+ @_emits = v
15
+ end
16
+
17
+ def consumes(v)
18
+ @_consumes = v
19
+ end
20
+
21
+ def prepare(&block)
22
+ @_prepare = block
23
+ end
24
+
25
+ def execute(&block)
26
+ @_execute = block
27
+ end
28
+
29
+ end
@@ -0,0 +1,9 @@
1
+ class Zillabyte::Harness::GroupBy
2
+ attr_accessor :_type, :_fields
3
+
4
+ def initialize(fields)
5
+ @_type = 'group_by'
6
+ @_fields = fields
7
+ end
8
+
9
+ end
@@ -0,0 +1,326 @@
1
+ class Zillabyte::Harness::Helper
2
+
3
+ def self.opt_parser()
4
+ options = {
5
+ "name" => Dir.pwd.split("/")[-1]
6
+ }
7
+ OptionParser.new do |opts|
8
+ opts.on("--execute_live") do |v|
9
+ options[:command] = :execute
10
+ end
11
+ opts.on("--info") do |v|
12
+ options[:command] = :info
13
+ end
14
+ opts.on("--name NAME") do |v|
15
+ options[:name] = v
16
+ end
17
+ opts.on("--pipe PIPE") do |v|
18
+ options[:pipe] = v
19
+ end
20
+ opts.on("--file FNAME") do |v|
21
+ options[:file] = v
22
+ end
23
+ end.parse(self.argv)
24
+ options
25
+ end
26
+
27
+ def self.write_hash_to_file(hash, file)
28
+ file.write(hash.to_json+"\n")
29
+ end
30
+
31
+ def self.print_error(msg)
32
+ puts msg
33
+ exit(1)
34
+ end
35
+
36
+ def self.check_name(operation, name, names)
37
+ ee = "Error in \"#{operation}\" at \"name\": \n\t "
38
+
39
+ if(!name.instance_of?(String) or name == "")
40
+ msg = "#{ee}\"Name\" must be a non-empty STRING at #{name}."
41
+ Zillabyte::Harness::Helper.print_error(msg)
42
+ end
43
+ if(names[name] and names[name] != "new" and names[name] != "sink" and operation != "new" and operation != "sink")
44
+ msg = "#{ee}The \"name\" \"#{name}\" was previously defined in a #{names[name]}!"
45
+ Zillabyte::Harness::Helper.print_error(msg)
46
+ end
47
+ names[name] = operation
48
+
49
+ end
50
+
51
+ def self.check_emits(operation, emits, streams)
52
+ ee = "Error in \"#{operation}\" at \"emits\": \n\t "
53
+ if(operation == "simple_function" or operation == "simple_spout")
54
+ pp = @@_print_check_simple_function_emits
55
+ nn = "relation"
56
+ else
57
+ pp = @@_print_check_emits
58
+ nn = "stream"
59
+ end
60
+
61
+ if(!emits.instance_of?(Array))
62
+ msg = "#{ee}\"Emits\" must be an ARRAY at #{emits}. #{pp}"
63
+ Zillabyte::Harness::Helper.print_error(msg)
64
+ end
65
+ n_emits = emits.length
66
+ if(n_emits == 0)
67
+ msg = "#{ee}Must emit at least one #{nn}, \"emits\" cannot be an empty array. #{pp}"
68
+ Zillabyte::Harness::Helper.print_error(msg)
69
+ end
70
+
71
+ current_op_streams = {}
72
+ emits.each do |e|
73
+ if(!e.instance_of?(Array) or e.length != 2)
74
+ msg = "#{ee}Invalid format for \"emits\" in #{e}. #{pp}"
75
+ Zillabyte::Harness::Helper.print_error(msg)
76
+ end
77
+ if(!e[0].instance_of?(String) or e[0] == "")
78
+ msg = "#{ee}\"Emits\" #{nn} name must be a non-empty STRING in #{e}. #{pp}"
79
+ Zillabyte::Harness::Helper.print_error(msg)
80
+ end
81
+ if(current_op_streams[e[0]])
82
+ msg = "#{ee}The #{nn} \"#{e[0]}\" is listed multiple times in the same \"emits\". #{pp}"
83
+ Zillabyte::Harness::Helper.print_error(msg)
84
+ end
85
+ current_op_streams[e[0]] = 1
86
+ if(streams[e[0]] and streams[e[0]] != e[1])
87
+ msg = "#{ee}The #{nn} name \"#{e[0]}\" was previously defined with a different set of fields! #{pp}"
88
+ Zillabyte::Harness::Helper.print_error(msg)
89
+ end
90
+ streams[e[0]] = e[1]
91
+ if(e[1].length == 0)
92
+ msg = "#{ee}Must be at least one output field to #{nn} \"#{e[0]}\". #{pp}"
93
+ Zillabyte::Harness::Helper.print_error(msg)
94
+ end
95
+
96
+ if(operation == "simple_function" or operation == "simple_spout")
97
+ Zillabyte::Harness::Helper.check_simple_function_emits(e)
98
+ else
99
+ if(!e[1].instance_of?(Array))
100
+ msg = "#{ee}Field names must be an ARRAY of STRINGS in stream #{e[0]}. #{pp}"
101
+ Zillabyte::Harness::Helper.print_error(msg)
102
+ end
103
+ e[1].each do |f|
104
+ if(!f.instance_of?(String) or f == "")
105
+ msg = "#{ee}Field names must be non-empty STRINGS in stream #{e[0]}. #{pp}"
106
+ Zillabyte::Harness::Helper.print_error(msg)
107
+ end
108
+ end
109
+ end
110
+ end
111
+ if(n_emits > 1)
112
+ return true
113
+ else
114
+ return false
115
+ end
116
+
117
+ end
118
+
119
+ def self.check_consumes(h, streams)
120
+ if(h._type == "each")
121
+ ee = "Error in \"each\" at \"consumes\": \n\t "
122
+ pp = @@_print_check_each_consumes
123
+ elsif(h._type == "sink")
124
+ ee = "Error in \"sink\" at \"consumes\": \n\t "
125
+ pp = @@_print_check_sink
126
+ end
127
+
128
+ consumes = h._consumes
129
+ if(!consumes)
130
+ msg = "#{ee}\"Consumes\" must be specified since a preceding \"each\" or \"spout\" emitted multiple streams. #{pp}"
131
+ Zillabyte::Harness::Helper.print_error(msg)
132
+ end
133
+ if(!consumes.instance_of?(String) or consumes == "")
134
+ msg = "#{ee}\"Consumes\" must be a non-empty STRING at #{consumes}. #{pp}"
135
+ Zillabyte::Harness::Helper.print_error(msg)
136
+ end
137
+ if(!streams[consumes])
138
+ msg = "#{ee}The stream \"#{consumes}\" specified in \"consumes\" does not exist! #{pp}"
139
+ Zillabyte::Harness::Helper.print_error(msg)
140
+ end
141
+
142
+ if(h._type == "sink")
143
+ Zillabyte::Harness::Helper.check_sink_consumes(h, streams)
144
+ end
145
+ end
146
+
147
+ def self.check_sink(sink, nodes)
148
+ ee = "Error in \"sink\": \n\t "
149
+ pp = @@_print_check_sink
150
+
151
+ name = sink._name
152
+ columns = sink._columns
153
+ if(!name)
154
+ msg = "#{ee}Relation name must be specified! #{pp}"
155
+ Zillabyte::Harness::Helper.print_error(msg)
156
+ end
157
+ Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
158
+
159
+ if(columns.length == 0)
160
+ msg = "#{ee}Must be at least one output field to relation \"#{name}\". #{pp}"
161
+ Zillabyte::Harness::Helper.print_error(msg)
162
+ end
163
+ Zillabyte::Harness::Helper.check_sink_columns(sink)
164
+
165
+ nodes.each do |s|
166
+ if(s._type != "sink")
167
+ next
168
+ end
169
+ if(s._name == name and s._columns != columns)
170
+ msg = "#{ee}The relation \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
171
+ Zillabyte::Harness::Helper.print_error(msg)
172
+ end
173
+ end
174
+ end
175
+
176
+ def self.check_sink_columns(sink)
177
+ name = sink._name
178
+ columns = sink._columns
179
+ columns.each do |col|
180
+ cname = col.keys()[0]
181
+ ctype = col[cname]
182
+ Zillabyte::Harness::Helper.check_sink_column_format("sink",cname,ctype,name)
183
+ end
184
+ end
185
+
186
+ def self.check_sink_column_format(operation, cname, ctype, relation_name)
187
+ if(operation == "sink")
188
+ ee = "Error in \"sink\" at \"column\": \n\t "
189
+ pp = @@_print_check_sink
190
+ elsif(operation == "simple_function")
191
+ ee = "Error in \"simple_function\" at \"emits\": \n\t "
192
+ pp = @@_print_check_simple_function_emits
193
+ end
194
+
195
+ if(!cname.instance_of?(String) or cname == "")
196
+ msg = "#{ee}Field names must be non-empty STRINGS in relation \"#{relation_name}\". #{pp}"
197
+ Zillabyte::Harness::Helper.print_error(msg)
198
+ end
199
+ if(!ctype.instance_of?(Symbol))
200
+ msg = "#{ee}Field data types must be SYMBOLS in relation \"#{relation_name}\". #{pp}"
201
+ Zillabyte::Harness::Helper.print_error(msg)
202
+ end
203
+ if(ctype != :string and ctype != :integer and ctype != :float and ctype != :double && ctype != :boolean)
204
+ msg = "#{ee}Invalid field data type at \"#{ctype}\" in relation \"#{relation_name}\". #{pp}"
205
+ Zillabyte::Harness::Helper.print_error(msg)
206
+ end
207
+ end
208
+
209
+ def self.check_sink_consumes(sink, streams)
210
+ ee = "Error in \"sink\" at \"consumes\": \n\t "
211
+ pp = @@_print_check_sink
212
+
213
+ name = sink._name
214
+ columns = sink._columns
215
+ consumes = sink._consumes
216
+
217
+ stream_fields = streams[consumes]
218
+ if(stream_fields.length != columns.length)
219
+ msg = "#{ee}Number of columns in \"sink\" differs from number of fields in the consumed stream at relation \"#{name}\". #{pp}"
220
+ Zillabyte::Harness::Helper.print_error(msg)
221
+ end
222
+ columns.each do |col|
223
+ col_name = col.keys()[0]
224
+ if(!stream_fields.include?(col_name))
225
+ msg = "#{ee}The column \"#{col_name}\", is not emitted by the stream \"#{consumes}\". #{pp}"
226
+ Zillabyte::Harness::Helper.print_error(msg)
227
+ end
228
+ end
229
+ end
230
+
231
+ def self.check_simple_function_emits(emits)
232
+ ee = "Error in \"simple_function\" at \"emits\": \n\t "
233
+ pp = @@_print_check_simple_function_emits
234
+
235
+ name = emits[0]
236
+ columns = emits[1]
237
+ if(!columns.instance_of?(Array))
238
+ msg = "#{ee}Field names must be an ARRAY of HASHES in relation \"#{name}\". #{pp}"
239
+ Zillabyte::Harness::Helper.print_error(msg)
240
+ end
241
+ columns.each do |col|
242
+ if(!col.instance_of?(Hash))
243
+ msg = "#{ee}Fields names must be listed in HASH format in relation \"#{name}\". #{pp}"
244
+ Zillabyte::Harness::Helper.print_error(msg)
245
+ end
246
+ colkeys = col.keys()
247
+ if(colkeys.length != 1)
248
+ msg = "#{ee}Each field must be a separate HASH with {field_name : data_type} in relation \"#{name}\". #{pp}"
249
+ Zillabyte::Harness::Helper.print_error(msg)
250
+ end
251
+ colkey = colkeys[0]
252
+ colval = col[colkey]
253
+ Zillabyte::Harness::Helper.check_sink_column_format("simple_function",colkey,colval,name)
254
+ end
255
+ end
256
+
257
+
258
+
259
+ # Test helper...
260
+ def self.argv()
261
+ @_argv || ARGV
262
+ end
263
+
264
+ # Test helper..
265
+ def self.argv=(v)
266
+ @_argv = v
267
+ end
268
+
269
+ @@_print_check_emits = "\n
270
+ \"Emits\" Syntax:
271
+ - \"Emits\" must be a non-empty ARRAY.
272
+ - Each element of \"emits\" must be an ARRAY of length = 2.
273
+ * The first element should be the unique stream name defined as a non-empty STRING.
274
+ * The second element should be an ARRAY of field names for that stream.
275
+ e.g.
276
+ \t emits = [ [ \"stream_1\", [ \"field_11\", \"field_12\", ... ] ],
277
+ \t [ \"stream_2\", [ \"field_21\", \"field_22\", ... ] ] ] .
278
+ - Stream and field names must all be non-empty STRINGS."
279
+
280
+ @@_print_check_simple_function_emits = "\n
281
+ \"Emits\" Syntax:
282
+ - \"Emits\" must be a non-empty ARRAY.
283
+ - Each element of \"emits\" must be an ARRAY of length = 2.
284
+ * The first element should be the unique relation name defined as a non-empty STRING.
285
+ * The second element should be an ARRAY of HASHES with field names and data types for that relation. e.g.
286
+ \t emits = [ [ \"relation_1\", [ {\"field_11\" => \"type_11\"}, {\"field_12\" => \"type_12\"}, ... ] ],
287
+ \t [ \"relation_2\", [ {\"field_21\" => \"type_21\"}, {\"field_22\" => \"type_22\"}, ... ] ] ] .
288
+ - Relation and field names must all be non-empty STRINGS.
289
+ - Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, and :boolean."
290
+
291
+ @@_print_check_sink = "\n
292
+ \"Sink\" Syntax:
293
+ - Sinks must be specified using the following syntax:
294
+ Single stream:
295
+ \t flow.sink do |h|
296
+ \t \t h.name \"name_of_relation\"
297
+ \t \t h.columns \"field_1\" :type_1
298
+ \t \t h.columns \"field_2\" :type_2 ...
299
+ \t end
300
+ Multiple streams:
301
+ \t flow.sink do |h|
302
+ \t \t h.name \"relation_name\"
303
+ \t \t h.consumes \"stream_consumed\"
304
+ \t \t h.columns \"field_1\" :type_1
305
+ \t \t h.columns \"field_2\" :type_2 ...
306
+ \t end
307
+ - \"Sink\" relation \"name\" must be specified as a non-empty STRING!
308
+ - Field names must be non-empty STRINGS.
309
+ - Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, and :boolean.
310
+ - If there are multiple streams, \"consumes\" must be specified for each sink as a non-empty STRING!
311
+ * \"Consumes\" is the name of a stream emitted by an \"each\" or a \"spout\" which the \"sink\" should save as a table.
312
+ * The columns specified in \"sink\" must match the fields emitted by the stream."
313
+
314
+ @@_print_check_each_consumes = "\n
315
+ \"Each\" Syntax for multiple streams:
316
+ \t flow.each do |h|
317
+ \t \t h.name \"name\", => optional
318
+ \t \t h.emits emits,
319
+ \t \t h.consumes \"consumed_stream\"
320
+ \t \t h.prepare ...
321
+ \t \t h.execute ...
322
+ \t end
323
+ - If there are multiple streams, \"consumes\" must be specified as a non-empty STRING!
324
+ * \"Consumes\" is the name of a stream emitted by a preceding \"each\" or \"spout\" which the current \"each\" operates on."
325
+
326
+ end