zillabyte 0.9.48 → 0.9.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ruby/lib/zillabyte/harness/app.rb +4 -1
- data/ruby/lib/zillabyte/harness/base.rb +9 -1
- data/ruby/lib/zillabyte/harness/helper.rb +43 -15
- data/ruby/lib/zillabyte/harness/live_delegator.rb +28 -1
- data/ruby/lib/zillabyte/harness/operation_handler.rb +2 -0
- data/ruby/lib/zillabyte/harness/source.rb +3 -3
- data/ruby/lib/zillabyte/harness/source_from_csv.rb +111 -0
- data/ruby/lib/zillabyte/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11c2e486f17f6b1e1198b6700e8519b4d1f9fecf
|
4
|
+
data.tar.gz: 7d57f06481ef828626ce8a960ff93e97dec78ee2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d9e61f6045437500441db54366e0e167da9fe108e8f5ccd086f3cd9f23445fa5d27b879a6d63828dd3dd4fcaa73b278ec27205ea9fa947dd6df65b54f271e2f
|
7
|
+
data.tar.gz: 9b0d9d9dc62671f262b35082a4a7bc85d71ca5833984c890d70bdee2dcfb68045f25ebf82268966101de7df445c11ad6b562c29dfae1daff6326fe9fae4b7611
|
@@ -30,11 +30,14 @@ class Zillabyte::Harness::App < Zillabyte::Harness::Base
|
|
30
30
|
_source_common(Zillabyte::Harness::Stream, *args, &block)
|
31
31
|
end
|
32
32
|
|
33
|
+
def source_from_csv(*args, &block)
|
34
|
+
_source_from_csv_common(Zillabyte::Harness::Stream, *args)
|
35
|
+
end
|
33
36
|
|
34
37
|
def source_from_kinesis(*args, &block)
|
35
38
|
_source_common(Zillabyte::Harness::Stream, "kinesis_demo_source")
|
36
39
|
end
|
37
|
-
|
40
|
+
end
|
38
41
|
|
39
42
|
|
40
43
|
|
@@ -35,5 +35,13 @@ class Zillabyte::Harness::Base
|
|
35
35
|
return Zillabyte::Harness::StreamBuilder.new(stream)
|
36
36
|
end
|
37
37
|
|
38
|
-
|
38
|
+
def _source_from_csv_common(stream_class, *args, &block)
|
39
|
+
op = Zillabyte::Harness::OperationHandler.new(self, stream_class)
|
40
|
+
stream = op.build_multilang_operation("source_from_csv", *args, &block)
|
41
|
+
.add_operation_properties_to_info(:name, :type)
|
42
|
+
.handle_operation
|
43
|
+
.get_output_streams
|
44
|
+
return Zillabyte::Harness::StreamBuilder.new(stream)
|
45
|
+
end
|
46
|
+
|
39
47
|
end
|
@@ -90,10 +90,10 @@ class Zillabyte::Harness::Helper
|
|
90
90
|
ee = "Error in \"source\": \n\t "
|
91
91
|
pp = @@_print_check_source
|
92
92
|
|
93
|
-
rm = !source.
|
93
|
+
rm = !source._dataset.nil?
|
94
94
|
mm = !source._matches.nil?
|
95
95
|
if(rm or mm)
|
96
|
-
msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a
|
96
|
+
msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a dataset, see the following. #{pp}"
|
97
97
|
Zillabyte::Harness::Helper.print_error(msg)
|
98
98
|
end
|
99
99
|
|
@@ -113,16 +113,31 @@ class Zillabyte::Harness::Helper
|
|
113
113
|
pp = @@_print_check_source
|
114
114
|
|
115
115
|
if(args.length != 1)
|
116
|
-
msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a
|
116
|
+
msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
|
117
117
|
Zillabyte::Harness::Helper.print_error(msg)
|
118
118
|
end
|
119
119
|
|
120
120
|
if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
|
121
|
-
msg = "#{ee}Invalid argument to \"source\". When sourcing from a
|
121
|
+
msg = "#{ee}Invalid argument to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
|
122
122
|
Zillabyte::Harness::Helper.print_error(msg)
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
+
def self.check_source_from_csv_args(args)
|
127
|
+
ee = "Error in \"source_from_csv\": \n\t "
|
128
|
+
pp = @@_print_check_source_from_csv
|
129
|
+
|
130
|
+
if(args.length > 2)
|
131
|
+
msg = "#{ee}Invalid number of arguments to \"source_from_csv\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
|
132
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
133
|
+
end
|
134
|
+
if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
|
135
|
+
msg = "#{ee}Invalid argument to \"source\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
|
136
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
|
126
141
|
def self.check_emits(operation, emits, streams)
|
127
142
|
if operation == "component"
|
128
143
|
oo = "outputs"
|
@@ -302,13 +317,13 @@ class Zillabyte::Harness::Helper
|
|
302
317
|
columns = sink._columns
|
303
318
|
|
304
319
|
if(!name)
|
305
|
-
msg = "#{ee}
|
320
|
+
msg = "#{ee}dataset name must be specified! #{pp}"
|
306
321
|
Zillabyte::Harness::Helper.print_error(msg)
|
307
322
|
end
|
308
323
|
Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
|
309
324
|
|
310
325
|
if(columns.length == 0)
|
311
|
-
msg = "#{ee}Must be at least one output field to
|
326
|
+
msg = "#{ee}Must be at least one output field to dataset \"#{name}\". #{pp}"
|
312
327
|
Zillabyte::Harness::Helper.print_error(msg)
|
313
328
|
end
|
314
329
|
Zillabyte::Harness::Helper.check_sink_columns(sink)
|
@@ -318,7 +333,7 @@ class Zillabyte::Harness::Helper
|
|
318
333
|
next
|
319
334
|
end
|
320
335
|
if(s._name == name and s._columns != columns)
|
321
|
-
msg = "#{ee}The
|
336
|
+
msg = "#{ee}The dataset \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
|
322
337
|
Zillabyte::Harness::Helper.print_error(msg)
|
323
338
|
end
|
324
339
|
end
|
@@ -334,19 +349,19 @@ class Zillabyte::Harness::Helper
|
|
334
349
|
end
|
335
350
|
end
|
336
351
|
|
337
|
-
def self.check_field_format(operation, pp, cname, ctype,
|
352
|
+
def self.check_field_format(operation, pp, cname, ctype, dataset_name)
|
338
353
|
ee = "Error in \"#{operation}\": \n\t "
|
339
354
|
|
340
355
|
if(!(cname.is_a?(String) or cname.is_a?(Symbol)) or (cname =~ /^\w+$/).nil?)
|
341
|
-
msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{
|
356
|
+
msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{dataset_name}\". #{pp}"
|
342
357
|
Zillabyte::Harness::Helper.print_error(msg)
|
343
358
|
end
|
344
359
|
if(!ctype.instance_of?(Symbol))
|
345
|
-
msg = "#{ee}Field data types must be SYMBOLS in \"#{
|
360
|
+
msg = "#{ee}Field data types must be SYMBOLS in \"#{dataset_name}\". #{pp}"
|
346
361
|
Zillabyte::Harness::Helper.print_error(msg)
|
347
362
|
end
|
348
363
|
if(!ALLOWED_TYPES.member?(ctype))
|
349
|
-
msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{
|
364
|
+
msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{dataset_name}\". #{pp}"
|
350
365
|
Zillabyte::Harness::Helper.print_error(msg)
|
351
366
|
end
|
352
367
|
end
|
@@ -466,11 +481,11 @@ class Zillabyte::Harness::Helper
|
|
466
481
|
@@_print_check_sink = <<-OUTPUT
|
467
482
|
\n\n"Sink" Syntax:
|
468
483
|
stream.sink do
|
469
|
-
name "
|
484
|
+
name "name_of_dataset"
|
470
485
|
column "field_1", :type_1
|
471
486
|
column "field_2", :type_2 ...
|
472
487
|
end
|
473
|
-
- "Sink"
|
488
|
+
- "Sink" dataset "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
|
474
489
|
- Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
|
475
490
|
- Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
|
476
491
|
- Field types must be SYMBOLS. The following types are allowed #{ALLOWED_TYPES.join(", ")}.
|
@@ -478,8 +493,8 @@ OUTPUT
|
|
478
493
|
|
479
494
|
@@_print_check_source = <<-OUTPUT
|
480
495
|
\n\n"Source" Syntax:
|
481
|
-
Sourcing from a
|
482
|
-
app.source("
|
496
|
+
Sourcing from a dataset:
|
497
|
+
app.source("dataset" name)
|
483
498
|
|
484
499
|
Custom source:
|
485
500
|
app.source do
|
@@ -495,6 +510,19 @@ OUTPUT
|
|
495
510
|
- The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
|
496
511
|
* the "begin_cycle" block is where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
|
497
512
|
* the "next_tuple" block is where the tuples are actually emitted.
|
513
|
+
OUTPUT
|
514
|
+
|
515
|
+
@@_print_check_source_from_csv = <<-OUTPUT
|
516
|
+
\n\n"SourceFromCSV" Syntax:
|
517
|
+
Sourcing from a CSV with headers:
|
518
|
+
app.source_from_csv("csv_path")
|
519
|
+
|
520
|
+
Source from a CSV without headers
|
521
|
+
app.source_from_csv("csv_path", headers = ["name", "email"])
|
522
|
+
|
523
|
+
- The "csv_path" is the path to the CSV within your app directory.
|
524
|
+
- The "headers" field is an array of header names for the source to use.
|
525
|
+
|
498
526
|
OUTPUT
|
499
527
|
|
500
528
|
@@_print_check_filter = <<-OUTPUT
|
@@ -399,13 +399,40 @@ module Zillabyte
|
|
399
399
|
end
|
400
400
|
|
401
401
|
def next_tuple
|
402
|
-
self.
|
402
|
+
self.instance_exec(&@harness._next_tuple)
|
403
403
|
end
|
404
404
|
|
405
405
|
end
|
406
406
|
|
407
|
+
|
408
|
+
|
409
|
+
class SourceFromCSVController < Storm::Source
|
410
|
+
|
411
|
+
def initialize(harness, progress)
|
412
|
+
@harness = harness
|
413
|
+
@progress = progress
|
414
|
+
|
415
|
+
Storm::Protocol.emits = harness._emits
|
416
|
+
Storm::Protocol.end_cycle_policy = harness._end_cycle_policy
|
417
|
+
end
|
418
|
+
|
419
|
+
def begin_cycle(*args)
|
420
|
+
@harness.begin_cycle
|
421
|
+
end
|
422
|
+
|
423
|
+
def prepare(*args)
|
424
|
+
end
|
425
|
+
|
426
|
+
def next_tuple
|
427
|
+
@harness.next_tuple(self)
|
428
|
+
end
|
429
|
+
|
430
|
+
end
|
431
|
+
|
432
|
+
|
407
433
|
class EachController < Storm::Each
|
408
434
|
|
435
|
+
|
409
436
|
def initialize(harness, progress)
|
410
437
|
@harness = harness
|
411
438
|
@progress = progress
|
@@ -18,6 +18,8 @@ class Zillabyte::Harness::OperationHandler
|
|
18
18
|
case type
|
19
19
|
when "source"
|
20
20
|
@_operation = Zillabyte::Harness::Source.new(@_app, *args)
|
21
|
+
when "source_from_csv"
|
22
|
+
@_operation = Zillabyte::Harness::SourceFromCSV.new(@_app, *args)
|
21
23
|
when "each"
|
22
24
|
@_operation = Zillabyte::Harness::Each.new(@_app, *args)
|
23
25
|
when "filter"
|
@@ -71,7 +71,7 @@ class Zillabyte::Harness::Source
|
|
71
71
|
attr_accessor :_app, :_node, :_relation, :_options
|
72
72
|
|
73
73
|
class Node < Zillabyte::Harness::CommonNode
|
74
|
-
attr_accessor :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
|
74
|
+
attr_accessor :_filename, :_headers, :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
|
75
75
|
|
76
76
|
def initialize(v, options = {})
|
77
77
|
@_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
|
@@ -87,8 +87,8 @@ class Zillabyte::Harness::Source
|
|
87
87
|
when Array
|
88
88
|
@_matches = v
|
89
89
|
end
|
90
|
-
end
|
91
|
-
|
90
|
+
end
|
91
|
+
|
92
92
|
def parallelism(v)
|
93
93
|
throw "parallelism cannot be specified for sources"
|
94
94
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# OPERATION SourceFromCSV
|
2
|
+
# TAGLINE
|
3
|
+
# A source from CSV allows your app to easy source from a CSV on your local machine
|
4
|
+
# DESCRIPTION
|
5
|
+
#
|
6
|
+
# ## Sourcing From A CSV With Headers
|
7
|
+
#
|
8
|
+
# By default, the SourceFromCSV will parse the first line of the CSV for headers, these headers are the keys of the tuple hash object
|
9
|
+
# emitted by the source.
|
10
|
+
#
|
11
|
+
# ## Source From A CSV Without Headers
|
12
|
+
#
|
13
|
+
# You can also specify the headers as a keyword argument to the SourceFromCSV operation. See the examples below for the syntax.
|
14
|
+
#
|
15
|
+
# RUBY_SYNTAX
|
16
|
+
#
|
17
|
+
#
|
18
|
+
# # Sourcing from a CSV with Headers
|
19
|
+
# # Assume that companies.csv is a CSV with headers ["name", "email"]
|
20
|
+
# app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
|
21
|
+
#
|
22
|
+
# # Sourcing from a CSV without Headers
|
23
|
+
#
|
24
|
+
# app.source_from_csv("companies.csv", headers=["name", "email"])
|
25
|
+
#
|
26
|
+
#
|
27
|
+
# SYNTAX_NOTES
|
28
|
+
#
|
29
|
+
# The first argument to the source_from_csv is the relative path of the CSV within the app directory. Absolute paths are
|
30
|
+
# not available when pushing to the Zillabyte Servers.
|
31
|
+
#
|
32
|
+
#
|
33
|
+
#
|
34
|
+
# RUBY_EXAMPLE
|
35
|
+
#
|
36
|
+
# # # Sourcing from a CSV with Headers
|
37
|
+
# # Assume that companies.csv is a CSV with headers ["name", "email"]
|
38
|
+
# stream = app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
|
39
|
+
#
|
40
|
+
# stream = stream.each do |tuple|
|
41
|
+
# name = stream["name"]
|
42
|
+
# email = tuple["email"]
|
43
|
+
#
|
44
|
+
# # use the name and email here ...
|
45
|
+
#
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
class Zillabyte::Harness::SourceFromCSV
|
49
|
+
|
50
|
+
attr_accessor :_app, :_node, :_options
|
51
|
+
|
52
|
+
|
53
|
+
class Node < Zillabyte::Harness::CommonNode
|
54
|
+
attr_accessor :_filename, :_file, :_headers, :_end_cycle_policy, :_harness
|
55
|
+
|
56
|
+
def initialize(v, options = {})
|
57
|
+
@_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
|
58
|
+
@_type = 'source'
|
59
|
+
@_end_cycle_policy = :null_emit
|
60
|
+
require 'csv'
|
61
|
+
end
|
62
|
+
|
63
|
+
def begin_cycle
|
64
|
+
@_file = File.open(@_filename)
|
65
|
+
if @_headers.nil?
|
66
|
+
@_headers = CSV.parse_line(@_file.readline)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def next_tuple(harness)
|
71
|
+
begin
|
72
|
+
line = @_file.readline
|
73
|
+
csvline = CSV.parse_line(line)
|
74
|
+
tuple = {}
|
75
|
+
@_headers.each do |header|
|
76
|
+
tuple[header] = csvline.shift
|
77
|
+
end
|
78
|
+
harness.emit tuple
|
79
|
+
rescue EOFError => e
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def initialize(app, *args)
|
85
|
+
@_app = app
|
86
|
+
@_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
|
87
|
+
end
|
88
|
+
|
89
|
+
def build_node(&block)
|
90
|
+
@_node = Node.new(@_args, @_options)
|
91
|
+
@_node._filename = @_args[0]
|
92
|
+
|
93
|
+
if @_args.length > 1
|
94
|
+
case @_args[1]
|
95
|
+
when Array
|
96
|
+
@_node._headers = @_args[1]
|
97
|
+
else
|
98
|
+
throw "Header argument must be an array of filenames"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
@_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def run_operation
|
107
|
+
c = Zillabyte::Harness::SourceFromCSVController.new(@_node, Zillabyte::Common::Progress.new)
|
108
|
+
c.run(@_app._options)
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zillabyte
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.49
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zillabyte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.9.
|
33
|
+
version: 0.9.49
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.9.
|
40
|
+
version: 0.9.49
|
41
41
|
description: The Official Zillabyte Gem
|
42
42
|
email:
|
43
43
|
- gem@zillabyte.com
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- ruby/lib/zillabyte/harness/operation_handler.rb
|
69
69
|
- ruby/lib/zillabyte/harness/sink.rb
|
70
70
|
- ruby/lib/zillabyte/harness/source.rb
|
71
|
+
- ruby/lib/zillabyte/harness/source_from_csv.rb
|
71
72
|
- ruby/lib/zillabyte/harness/stream.rb
|
72
73
|
- ruby/lib/zillabyte/harness/stream_builder.rb
|
73
74
|
- ruby/lib/zillabyte/harness/tuple.rb
|