zillabyte 0.9.48 → 0.9.49
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ruby/lib/zillabyte/harness/app.rb +4 -1
- data/ruby/lib/zillabyte/harness/base.rb +9 -1
- data/ruby/lib/zillabyte/harness/helper.rb +43 -15
- data/ruby/lib/zillabyte/harness/live_delegator.rb +28 -1
- data/ruby/lib/zillabyte/harness/operation_handler.rb +2 -0
- data/ruby/lib/zillabyte/harness/source.rb +3 -3
- data/ruby/lib/zillabyte/harness/source_from_csv.rb +111 -0
- data/ruby/lib/zillabyte/version.rb +1 -1
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11c2e486f17f6b1e1198b6700e8519b4d1f9fecf
|
4
|
+
data.tar.gz: 7d57f06481ef828626ce8a960ff93e97dec78ee2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d9e61f6045437500441db54366e0e167da9fe108e8f5ccd086f3cd9f23445fa5d27b879a6d63828dd3dd4fcaa73b278ec27205ea9fa947dd6df65b54f271e2f
|
7
|
+
data.tar.gz: 9b0d9d9dc62671f262b35082a4a7bc85d71ca5833984c890d70bdee2dcfb68045f25ebf82268966101de7df445c11ad6b562c29dfae1daff6326fe9fae4b7611
|
@@ -30,11 +30,14 @@ class Zillabyte::Harness::App < Zillabyte::Harness::Base
|
|
30
30
|
_source_common(Zillabyte::Harness::Stream, *args, &block)
|
31
31
|
end
|
32
32
|
|
33
|
+
def source_from_csv(*args, &block)
|
34
|
+
_source_from_csv_common(Zillabyte::Harness::Stream, *args)
|
35
|
+
end
|
33
36
|
|
34
37
|
def source_from_kinesis(*args, &block)
|
35
38
|
_source_common(Zillabyte::Harness::Stream, "kinesis_demo_source")
|
36
39
|
end
|
37
|
-
|
40
|
+
end
|
38
41
|
|
39
42
|
|
40
43
|
|
@@ -35,5 +35,13 @@ class Zillabyte::Harness::Base
|
|
35
35
|
return Zillabyte::Harness::StreamBuilder.new(stream)
|
36
36
|
end
|
37
37
|
|
38
|
-
|
38
|
+
def _source_from_csv_common(stream_class, *args, &block)
|
39
|
+
op = Zillabyte::Harness::OperationHandler.new(self, stream_class)
|
40
|
+
stream = op.build_multilang_operation("source_from_csv", *args, &block)
|
41
|
+
.add_operation_properties_to_info(:name, :type)
|
42
|
+
.handle_operation
|
43
|
+
.get_output_streams
|
44
|
+
return Zillabyte::Harness::StreamBuilder.new(stream)
|
45
|
+
end
|
46
|
+
|
39
47
|
end
|
@@ -90,10 +90,10 @@ class Zillabyte::Harness::Helper
|
|
90
90
|
ee = "Error in \"source\": \n\t "
|
91
91
|
pp = @@_print_check_source
|
92
92
|
|
93
|
-
rm = !source.
|
93
|
+
rm = !source._dataset.nil?
|
94
94
|
mm = !source._matches.nil?
|
95
95
|
if(rm or mm)
|
96
|
-
msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a
|
96
|
+
msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a dataset, see the following. #{pp}"
|
97
97
|
Zillabyte::Harness::Helper.print_error(msg)
|
98
98
|
end
|
99
99
|
|
@@ -113,16 +113,31 @@ class Zillabyte::Harness::Helper
|
|
113
113
|
pp = @@_print_check_source
|
114
114
|
|
115
115
|
if(args.length != 1)
|
116
|
-
msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a
|
116
|
+
msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
|
117
117
|
Zillabyte::Harness::Helper.print_error(msg)
|
118
118
|
end
|
119
119
|
|
120
120
|
if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
|
121
|
-
msg = "#{ee}Invalid argument to \"source\". When sourcing from a
|
121
|
+
msg = "#{ee}Invalid argument to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
|
122
122
|
Zillabyte::Harness::Helper.print_error(msg)
|
123
123
|
end
|
124
124
|
end
|
125
125
|
|
126
|
+
def self.check_source_from_csv_args(args)
|
127
|
+
ee = "Error in \"source_from_csv\": \n\t "
|
128
|
+
pp = @@_print_check_source_from_csv
|
129
|
+
|
130
|
+
if(args.length > 2)
|
131
|
+
msg = "#{ee}Invalid number of arguments to \"source_from_csv\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
|
132
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
133
|
+
end
|
134
|
+
if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
|
135
|
+
msg = "#{ee}Invalid argument to \"source\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
|
136
|
+
Zillabyte::Harness::Helper.print_error(msg)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
|
126
141
|
def self.check_emits(operation, emits, streams)
|
127
142
|
if operation == "component"
|
128
143
|
oo = "outputs"
|
@@ -302,13 +317,13 @@ class Zillabyte::Harness::Helper
|
|
302
317
|
columns = sink._columns
|
303
318
|
|
304
319
|
if(!name)
|
305
|
-
msg = "#{ee}
|
320
|
+
msg = "#{ee}dataset name must be specified! #{pp}"
|
306
321
|
Zillabyte::Harness::Helper.print_error(msg)
|
307
322
|
end
|
308
323
|
Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
|
309
324
|
|
310
325
|
if(columns.length == 0)
|
311
|
-
msg = "#{ee}Must be at least one output field to
|
326
|
+
msg = "#{ee}Must be at least one output field to dataset \"#{name}\". #{pp}"
|
312
327
|
Zillabyte::Harness::Helper.print_error(msg)
|
313
328
|
end
|
314
329
|
Zillabyte::Harness::Helper.check_sink_columns(sink)
|
@@ -318,7 +333,7 @@ class Zillabyte::Harness::Helper
|
|
318
333
|
next
|
319
334
|
end
|
320
335
|
if(s._name == name and s._columns != columns)
|
321
|
-
msg = "#{ee}The
|
336
|
+
msg = "#{ee}The dataset \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
|
322
337
|
Zillabyte::Harness::Helper.print_error(msg)
|
323
338
|
end
|
324
339
|
end
|
@@ -334,19 +349,19 @@ class Zillabyte::Harness::Helper
|
|
334
349
|
end
|
335
350
|
end
|
336
351
|
|
337
|
-
def self.check_field_format(operation, pp, cname, ctype,
|
352
|
+
def self.check_field_format(operation, pp, cname, ctype, dataset_name)
|
338
353
|
ee = "Error in \"#{operation}\": \n\t "
|
339
354
|
|
340
355
|
if(!(cname.is_a?(String) or cname.is_a?(Symbol)) or (cname =~ /^\w+$/).nil?)
|
341
|
-
msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{
|
356
|
+
msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{dataset_name}\". #{pp}"
|
342
357
|
Zillabyte::Harness::Helper.print_error(msg)
|
343
358
|
end
|
344
359
|
if(!ctype.instance_of?(Symbol))
|
345
|
-
msg = "#{ee}Field data types must be SYMBOLS in \"#{
|
360
|
+
msg = "#{ee}Field data types must be SYMBOLS in \"#{dataset_name}\". #{pp}"
|
346
361
|
Zillabyte::Harness::Helper.print_error(msg)
|
347
362
|
end
|
348
363
|
if(!ALLOWED_TYPES.member?(ctype))
|
349
|
-
msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{
|
364
|
+
msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{dataset_name}\". #{pp}"
|
350
365
|
Zillabyte::Harness::Helper.print_error(msg)
|
351
366
|
end
|
352
367
|
end
|
@@ -466,11 +481,11 @@ class Zillabyte::Harness::Helper
|
|
466
481
|
@@_print_check_sink = <<-OUTPUT
|
467
482
|
\n\n"Sink" Syntax:
|
468
483
|
stream.sink do
|
469
|
-
name "
|
484
|
+
name "name_of_dataset"
|
470
485
|
column "field_1", :type_1
|
471
486
|
column "field_2", :type_2 ...
|
472
487
|
end
|
473
|
-
- "Sink"
|
488
|
+
- "Sink" dataset "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
|
474
489
|
- Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
|
475
490
|
- Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
|
476
491
|
- Field types must be SYMBOLS. The following types are allowed #{ALLOWED_TYPES.join(", ")}.
|
@@ -478,8 +493,8 @@ OUTPUT
|
|
478
493
|
|
479
494
|
@@_print_check_source = <<-OUTPUT
|
480
495
|
\n\n"Source" Syntax:
|
481
|
-
Sourcing from a
|
482
|
-
app.source("
|
496
|
+
Sourcing from a dataset:
|
497
|
+
app.source("dataset" name)
|
483
498
|
|
484
499
|
Custom source:
|
485
500
|
app.source do
|
@@ -495,6 +510,19 @@ OUTPUT
|
|
495
510
|
- The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
|
496
511
|
* the "begin_cycle" block is where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
|
497
512
|
* the "next_tuple" block is where the tuples are actually emitted.
|
513
|
+
OUTPUT
|
514
|
+
|
515
|
+
@@_print_check_source_from_csv = <<-OUTPUT
|
516
|
+
\n\n"SourceFromCSV" Syntax:
|
517
|
+
Sourcing from a CSV with headers:
|
518
|
+
app.source_from_csv("csv_path")
|
519
|
+
|
520
|
+
Source from a CSV without headers
|
521
|
+
app.source_from_csv("csv_path", headers = ["name", "email"])
|
522
|
+
|
523
|
+
- The "csv_path" is the path to the CSV within your app directory.
|
524
|
+
- The "headers" field is an array of header names for the source to use.
|
525
|
+
|
498
526
|
OUTPUT
|
499
527
|
|
500
528
|
@@_print_check_filter = <<-OUTPUT
|
@@ -399,13 +399,40 @@ module Zillabyte
|
|
399
399
|
end
|
400
400
|
|
401
401
|
def next_tuple
|
402
|
-
self.
|
402
|
+
self.instance_exec(&@harness._next_tuple)
|
403
403
|
end
|
404
404
|
|
405
405
|
end
|
406
406
|
|
407
|
+
|
408
|
+
|
409
|
+
class SourceFromCSVController < Storm::Source
|
410
|
+
|
411
|
+
def initialize(harness, progress)
|
412
|
+
@harness = harness
|
413
|
+
@progress = progress
|
414
|
+
|
415
|
+
Storm::Protocol.emits = harness._emits
|
416
|
+
Storm::Protocol.end_cycle_policy = harness._end_cycle_policy
|
417
|
+
end
|
418
|
+
|
419
|
+
def begin_cycle(*args)
|
420
|
+
@harness.begin_cycle
|
421
|
+
end
|
422
|
+
|
423
|
+
def prepare(*args)
|
424
|
+
end
|
425
|
+
|
426
|
+
def next_tuple
|
427
|
+
@harness.next_tuple(self)
|
428
|
+
end
|
429
|
+
|
430
|
+
end
|
431
|
+
|
432
|
+
|
407
433
|
class EachController < Storm::Each
|
408
434
|
|
435
|
+
|
409
436
|
def initialize(harness, progress)
|
410
437
|
@harness = harness
|
411
438
|
@progress = progress
|
@@ -18,6 +18,8 @@ class Zillabyte::Harness::OperationHandler
|
|
18
18
|
case type
|
19
19
|
when "source"
|
20
20
|
@_operation = Zillabyte::Harness::Source.new(@_app, *args)
|
21
|
+
when "source_from_csv"
|
22
|
+
@_operation = Zillabyte::Harness::SourceFromCSV.new(@_app, *args)
|
21
23
|
when "each"
|
22
24
|
@_operation = Zillabyte::Harness::Each.new(@_app, *args)
|
23
25
|
when "filter"
|
@@ -71,7 +71,7 @@ class Zillabyte::Harness::Source
|
|
71
71
|
attr_accessor :_app, :_node, :_relation, :_options
|
72
72
|
|
73
73
|
class Node < Zillabyte::Harness::CommonNode
|
74
|
-
attr_accessor :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
|
74
|
+
attr_accessor :_filename, :_headers, :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
|
75
75
|
|
76
76
|
def initialize(v, options = {})
|
77
77
|
@_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
|
@@ -87,8 +87,8 @@ class Zillabyte::Harness::Source
|
|
87
87
|
when Array
|
88
88
|
@_matches = v
|
89
89
|
end
|
90
|
-
end
|
91
|
-
|
90
|
+
end
|
91
|
+
|
92
92
|
def parallelism(v)
|
93
93
|
throw "parallelism cannot be specified for sources"
|
94
94
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# OPERATION SourceFromCSV
|
2
|
+
# TAGLINE
|
3
|
+
# A source from CSV allows your app to easy source from a CSV on your local machine
|
4
|
+
# DESCRIPTION
|
5
|
+
#
|
6
|
+
# ## Sourcing From A CSV With Headers
|
7
|
+
#
|
8
|
+
# By default, the SourceFromCSV will parse the first line of the CSV for headers, these headers are the keys of the tuple hash object
|
9
|
+
# emitted by the source.
|
10
|
+
#
|
11
|
+
# ## Source From A CSV Without Headers
|
12
|
+
#
|
13
|
+
# You can also specify the headers as a keyword argument to the SourceFromCSV operation. See the examples below for the syntax.
|
14
|
+
#
|
15
|
+
# RUBY_SYNTAX
|
16
|
+
#
|
17
|
+
#
|
18
|
+
# # Sourcing from a CSV with Headers
|
19
|
+
# # Assume that companies.csv is a CSV with headers ["name", "email"]
|
20
|
+
# app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
|
21
|
+
#
|
22
|
+
# # Sourcing from a CSV without Headers
|
23
|
+
#
|
24
|
+
# app.source_from_csv("companies.csv", headers=["name", "email"])
|
25
|
+
#
|
26
|
+
#
|
27
|
+
# SYNTAX_NOTES
|
28
|
+
#
|
29
|
+
# The first argument to the source_from_csv is the relative path of the CSV within the app directory. Absolute paths are
|
30
|
+
# not available when pushing to the Zillabyte Servers.
|
31
|
+
#
|
32
|
+
#
|
33
|
+
#
|
34
|
+
# RUBY_EXAMPLE
|
35
|
+
#
|
36
|
+
# # # Sourcing from a CSV with Headers
|
37
|
+
# # Assume that companies.csv is a CSV with headers ["name", "email"]
|
38
|
+
# stream = app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
|
39
|
+
#
|
40
|
+
# stream = stream.each do |tuple|
|
41
|
+
# name = stream["name"]
|
42
|
+
# email = tuple["email"]
|
43
|
+
#
|
44
|
+
# # use the name and email here ...
|
45
|
+
#
|
46
|
+
# end
|
47
|
+
#
|
48
|
+
class Zillabyte::Harness::SourceFromCSV
|
49
|
+
|
50
|
+
attr_accessor :_app, :_node, :_options
|
51
|
+
|
52
|
+
|
53
|
+
class Node < Zillabyte::Harness::CommonNode
|
54
|
+
attr_accessor :_filename, :_file, :_headers, :_end_cycle_policy, :_harness
|
55
|
+
|
56
|
+
def initialize(v, options = {})
|
57
|
+
@_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
|
58
|
+
@_type = 'source'
|
59
|
+
@_end_cycle_policy = :null_emit
|
60
|
+
require 'csv'
|
61
|
+
end
|
62
|
+
|
63
|
+
def begin_cycle
|
64
|
+
@_file = File.open(@_filename)
|
65
|
+
if @_headers.nil?
|
66
|
+
@_headers = CSV.parse_line(@_file.readline)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def next_tuple(harness)
|
71
|
+
begin
|
72
|
+
line = @_file.readline
|
73
|
+
csvline = CSV.parse_line(line)
|
74
|
+
tuple = {}
|
75
|
+
@_headers.each do |header|
|
76
|
+
tuple[header] = csvline.shift
|
77
|
+
end
|
78
|
+
harness.emit tuple
|
79
|
+
rescue EOFError => e
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def initialize(app, *args)
|
85
|
+
@_app = app
|
86
|
+
@_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
|
87
|
+
end
|
88
|
+
|
89
|
+
def build_node(&block)
|
90
|
+
@_node = Node.new(@_args, @_options)
|
91
|
+
@_node._filename = @_args[0]
|
92
|
+
|
93
|
+
if @_args.length > 1
|
94
|
+
case @_args[1]
|
95
|
+
when Array
|
96
|
+
@_node._headers = @_args[1]
|
97
|
+
else
|
98
|
+
throw "Header argument must be an array of filenames"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
@_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def run_operation
|
107
|
+
c = Zillabyte::Harness::SourceFromCSVController.new(@_node, Zillabyte::Common::Progress.new)
|
108
|
+
c.run(@_app._options)
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zillabyte
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.49
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zillabyte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-12-
|
11
|
+
date: 2014-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.9.
|
33
|
+
version: 0.9.49
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.9.
|
40
|
+
version: 0.9.49
|
41
41
|
description: The Official Zillabyte Gem
|
42
42
|
email:
|
43
43
|
- gem@zillabyte.com
|
@@ -68,6 +68,7 @@ files:
|
|
68
68
|
- ruby/lib/zillabyte/harness/operation_handler.rb
|
69
69
|
- ruby/lib/zillabyte/harness/sink.rb
|
70
70
|
- ruby/lib/zillabyte/harness/source.rb
|
71
|
+
- ruby/lib/zillabyte/harness/source_from_csv.rb
|
71
72
|
- ruby/lib/zillabyte/harness/stream.rb
|
72
73
|
- ruby/lib/zillabyte/harness/stream_builder.rb
|
73
74
|
- ruby/lib/zillabyte/harness/tuple.rb
|