zillabyte 0.9.48 → 0.9.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d757597a1cc573c8e11bd13735efceb7bc03e73
4
- data.tar.gz: d5c2a9a059791646d05d16c40fe906942522ba13
3
+ metadata.gz: 11c2e486f17f6b1e1198b6700e8519b4d1f9fecf
4
+ data.tar.gz: 7d57f06481ef828626ce8a960ff93e97dec78ee2
5
5
  SHA512:
6
- metadata.gz: 6b109dc4ec9e2b27c610306c41bf4f35b9922431a1a47579c182326275263e7d6da63ab901719bde554e3d0f69ad23790109552d8e095b36f0803c5d501f3ea0
7
- data.tar.gz: d5501fffe0acbb455dbecde048b382fd3642db7391c06eb23a80a9423e1555e6c905e72b5814ddc4c1442e04b002bc154010ca8a80fadeb74b72c7434da26cb0
6
+ metadata.gz: 7d9e61f6045437500441db54366e0e167da9fe108e8f5ccd086f3cd9f23445fa5d27b879a6d63828dd3dd4fcaa73b278ec27205ea9fa947dd6df65b54f271e2f
7
+ data.tar.gz: 9b0d9d9dc62671f262b35082a4a7bc85d71ca5833984c890d70bdee2dcfb68045f25ebf82268966101de7df445c11ad6b562c29dfae1daff6326fe9fae4b7611
@@ -30,11 +30,14 @@ class Zillabyte::Harness::App < Zillabyte::Harness::Base
30
30
  _source_common(Zillabyte::Harness::Stream, *args, &block)
31
31
  end
32
32
 
33
+ def source_from_csv(*args, &block)
34
+ _source_from_csv_common(Zillabyte::Harness::Stream, *args)
35
+ end
33
36
 
34
37
  def source_from_kinesis(*args, &block)
35
38
  _source_common(Zillabyte::Harness::Stream, "kinesis_demo_source")
36
39
  end
37
-
40
+ end
38
41
 
39
42
 
40
43
 
@@ -35,5 +35,13 @@ class Zillabyte::Harness::Base
35
35
  return Zillabyte::Harness::StreamBuilder.new(stream)
36
36
  end
37
37
 
38
-
38
+ def _source_from_csv_common(stream_class, *args, &block)
39
+ op = Zillabyte::Harness::OperationHandler.new(self, stream_class)
40
+ stream = op.build_multilang_operation("source_from_csv", *args, &block)
41
+ .add_operation_properties_to_info(:name, :type)
42
+ .handle_operation
43
+ .get_output_streams
44
+ return Zillabyte::Harness::StreamBuilder.new(stream)
45
+ end
46
+
39
47
  end
@@ -90,10 +90,10 @@ class Zillabyte::Harness::Helper
90
90
  ee = "Error in \"source\": \n\t "
91
91
  pp = @@_print_check_source
92
92
 
93
- rm = !source._relation.nil?
93
+ rm = !source._dataset.nil?
94
94
  mm = !source._matches.nil?
95
95
  if(rm or mm)
96
- msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a relation, see the following. #{pp}"
96
+ msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a dataset, see the following. #{pp}"
97
97
  Zillabyte::Harness::Helper.print_error(msg)
98
98
  end
99
99
 
@@ -113,16 +113,31 @@ class Zillabyte::Harness::Helper
113
113
  pp = @@_print_check_source
114
114
 
115
115
  if(args.length != 1)
116
- msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a relation, please supply a single SQL query-string or SXP query-array. #{pp}"
116
+ msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
117
117
  Zillabyte::Harness::Helper.print_error(msg)
118
118
  end
119
119
 
120
120
  if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
121
- msg = "#{ee}Invalid argument to \"source\". When sourcing from a relation, please supply a single SQL query-string or SXP query-array. #{pp}"
121
+ msg = "#{ee}Invalid argument to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
122
122
  Zillabyte::Harness::Helper.print_error(msg)
123
123
  end
124
124
  end
125
125
 
126
+ def self.check_source_from_csv_args(args)
127
+ ee = "Error in \"source_from_csv\": \n\t "
128
+ pp = @@_print_check_source_from_csv
129
+
130
+ if(args.length > 2)
131
+ msg = "#{ee}Invalid number of arguments to \"source_from_csv\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
132
+ Zillabyte::Harness::Helper.print_error(msg)
133
+ end
134
+ if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
135
+ msg = "#{ee}Invalid argument to \"source\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
136
+ Zillabyte::Harness::Helper.print_error(msg)
137
+ end
138
+ end
139
+
140
+
126
141
  def self.check_emits(operation, emits, streams)
127
142
  if operation == "component"
128
143
  oo = "outputs"
@@ -302,13 +317,13 @@ class Zillabyte::Harness::Helper
302
317
  columns = sink._columns
303
318
 
304
319
  if(!name)
305
- msg = "#{ee}Relation name must be specified! #{pp}"
320
+ msg = "#{ee}dataset name must be specified! #{pp}"
306
321
  Zillabyte::Harness::Helper.print_error(msg)
307
322
  end
308
323
  Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
309
324
 
310
325
  if(columns.length == 0)
311
- msg = "#{ee}Must be at least one output field to relation \"#{name}\". #{pp}"
326
+ msg = "#{ee}Must be at least one output field to dataset \"#{name}\". #{pp}"
312
327
  Zillabyte::Harness::Helper.print_error(msg)
313
328
  end
314
329
  Zillabyte::Harness::Helper.check_sink_columns(sink)
@@ -318,7 +333,7 @@ class Zillabyte::Harness::Helper
318
333
  next
319
334
  end
320
335
  if(s._name == name and s._columns != columns)
321
- msg = "#{ee}The relation \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
336
+ msg = "#{ee}The dataset \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
322
337
  Zillabyte::Harness::Helper.print_error(msg)
323
338
  end
324
339
  end
@@ -334,19 +349,19 @@ class Zillabyte::Harness::Helper
334
349
  end
335
350
  end
336
351
 
337
- def self.check_field_format(operation, pp, cname, ctype, relation_name)
352
+ def self.check_field_format(operation, pp, cname, ctype, dataset_name)
338
353
  ee = "Error in \"#{operation}\": \n\t "
339
354
 
340
355
  if(!(cname.is_a?(String) or cname.is_a?(Symbol)) or (cname =~ /^\w+$/).nil?)
341
- msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{relation_name}\". #{pp}"
356
+ msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{dataset_name}\". #{pp}"
342
357
  Zillabyte::Harness::Helper.print_error(msg)
343
358
  end
344
359
  if(!ctype.instance_of?(Symbol))
345
- msg = "#{ee}Field data types must be SYMBOLS in \"#{relation_name}\". #{pp}"
360
+ msg = "#{ee}Field data types must be SYMBOLS in \"#{dataset_name}\". #{pp}"
346
361
  Zillabyte::Harness::Helper.print_error(msg)
347
362
  end
348
363
  if(!ALLOWED_TYPES.member?(ctype))
349
- msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{relation_name}\". #{pp}"
364
+ msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{dataset_name}\". #{pp}"
350
365
  Zillabyte::Harness::Helper.print_error(msg)
351
366
  end
352
367
  end
@@ -466,11 +481,11 @@ class Zillabyte::Harness::Helper
466
481
  @@_print_check_sink = <<-OUTPUT
467
482
  \n\n"Sink" Syntax:
468
483
  stream.sink do
469
- name "name_of_relation"
484
+ name "name_of_dataset"
470
485
  column "field_1", :type_1
471
486
  column "field_2", :type_2 ...
472
487
  end
473
- - "Sink" relation "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
488
+ - "Sink" dataset "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
474
489
  - Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
475
490
  - Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
476
491
  - Field types must be SYMBOLS. The following types are allowed #{ALLOWED_TYPES.join(", ")}.
@@ -478,8 +493,8 @@ OUTPUT
478
493
 
479
494
  @@_print_check_source = <<-OUTPUT
480
495
  \n\n"Source" Syntax:
481
- Sourcing from a relation:
482
- app.source("SQL query" or [SXP queries])
496
+ Sourcing from a dataset:
497
+ app.source("dataset" name)
483
498
 
484
499
  Custom source:
485
500
  app.source do
@@ -495,6 +510,19 @@ OUTPUT
495
510
  - The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
496
511
  * the "begin_cycle" block is where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
497
512
  * the "next_tuple" block is where the tuples are actually emitted.
513
+ OUTPUT
514
+
515
+ @@_print_check_source_from_csv = <<-OUTPUT
516
+ \n\n"SourceFromCSV" Syntax:
517
+ Sourcing from a CSV with headers:
518
+ app.source_from_csv("csv_path")
519
+
520
+ Source from a CSV without headers
521
+ app.source_from_csv("csv_path", headers = ["name", "email"])
522
+
523
+ - The "csv_path" is the path to the CSV within your app directory.
524
+ - The "headers" field is an array of header names for the source to use.
525
+
498
526
  OUTPUT
499
527
 
500
528
  @@_print_check_filter = <<-OUTPUT
@@ -399,13 +399,40 @@ module Zillabyte
399
399
  end
400
400
 
401
401
  def next_tuple
402
- self.instance_eval &@harness._next_tuple
402
+ self.instance_exec(&@harness._next_tuple)
403
403
  end
404
404
 
405
405
  end
406
406
 
407
+
408
+
409
+ class SourceFromCSVController < Storm::Source
410
+
411
+ def initialize(harness, progress)
412
+ @harness = harness
413
+ @progress = progress
414
+
415
+ Storm::Protocol.emits = harness._emits
416
+ Storm::Protocol.end_cycle_policy = harness._end_cycle_policy
417
+ end
418
+
419
+ def begin_cycle(*args)
420
+ @harness.begin_cycle
421
+ end
422
+
423
+ def prepare(*args)
424
+ end
425
+
426
+ def next_tuple
427
+ @harness.next_tuple(self)
428
+ end
429
+
430
+ end
431
+
432
+
407
433
  class EachController < Storm::Each
408
434
 
435
+
409
436
  def initialize(harness, progress)
410
437
  @harness = harness
411
438
  @progress = progress
@@ -18,6 +18,8 @@ class Zillabyte::Harness::OperationHandler
18
18
  case type
19
19
  when "source"
20
20
  @_operation = Zillabyte::Harness::Source.new(@_app, *args)
21
+ when "source_from_csv"
22
+ @_operation = Zillabyte::Harness::SourceFromCSV.new(@_app, *args)
21
23
  when "each"
22
24
  @_operation = Zillabyte::Harness::Each.new(@_app, *args)
23
25
  when "filter"
@@ -71,7 +71,7 @@ class Zillabyte::Harness::Source
71
71
  attr_accessor :_app, :_node, :_relation, :_options
72
72
 
73
73
  class Node < Zillabyte::Harness::CommonNode
74
- attr_accessor :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
74
+ attr_accessor :_filename, :_headers, :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
75
75
 
76
76
  def initialize(v, options = {})
77
77
  @_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
@@ -87,8 +87,8 @@ class Zillabyte::Harness::Source
87
87
  when Array
88
88
  @_matches = v
89
89
  end
90
- end
91
-
90
+ end
91
+
92
92
  def parallelism(v)
93
93
  throw "parallelism cannot be specified for sources"
94
94
  end
@@ -0,0 +1,111 @@
1
+ # OPERATION SourceFromCSV
2
+ # TAGLINE
3
+ # A source from CSV allows your app to easy source from a CSV on your local machine
4
+ # DESCRIPTION
5
+ #
6
+ # ## Sourcing From A CSV With Headers
7
+ #
8
+ # By default, the SourceFromCSV will parse the first line of the CSV for headers, these headers are the keys of the tuple hash object
9
+ # emitted by the source.
10
+ #
11
+ # ## Source From A CSV Without Headers
12
+ #
13
+ # You can also specify the headers as a keyword argument to the SourceFromCSV operation. See the examples below for the syntax.
14
+ #
15
+ # RUBY_SYNTAX
16
+ #
17
+ #
18
+ # # Sourcing from a CSV with Headers
19
+ # # Assume that companies.csv is a CSV with headers ["name", "email"]
20
+ # app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
21
+ #
22
+ # # Sourcing from a CSV without Headers
23
+ #
24
+ # app.source_from_csv("companies.csv", headers=["name", "email"])
25
+ #
26
+ #
27
+ # SYNTAX_NOTES
28
+ #
29
+ # The first argument to the source_from_csv is the relative path of the CSV within the app directory. Absolute paths are
30
+ # not available when pushing to the Zillabyte Servers.
31
+ #
32
+ #
33
+ #
34
+ # RUBY_EXAMPLE
35
+ #
36
+ # # # Sourcing from a CSV with Headers
37
+ # # Assume that companies.csv is a CSV with headers ["name", "email"]
38
+ # stream = app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
39
+ #
40
+ # stream = stream.each do |tuple|
41
+ # name = stream["name"]
42
+ # email = tuple["email"]
43
+ #
44
+ # # use the name and email here ...
45
+ #
46
+ # end
47
+ #
48
+ class Zillabyte::Harness::SourceFromCSV
49
+
50
+ attr_accessor :_app, :_node, :_options
51
+
52
+
53
+ class Node < Zillabyte::Harness::CommonNode
54
+ attr_accessor :_filename, :_file, :_headers, :_end_cycle_policy, :_harness
55
+
56
+ def initialize(v, options = {})
57
+ @_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
58
+ @_type = 'source'
59
+ @_end_cycle_policy = :null_emit
60
+ require 'csv'
61
+ end
62
+
63
+ def begin_cycle
64
+ @_file = File.open(@_filename)
65
+ if @_headers.nil?
66
+ @_headers = CSV.parse_line(@_file.readline)
67
+ end
68
+ end
69
+
70
+ def next_tuple(harness)
71
+ begin
72
+ line = @_file.readline
73
+ csvline = CSV.parse_line(line)
74
+ tuple = {}
75
+ @_headers.each do |header|
76
+ tuple[header] = csvline.shift
77
+ end
78
+ harness.emit tuple
79
+ rescue EOFError => e
80
+ end
81
+ end
82
+ end
83
+
84
+ def initialize(app, *args)
85
+ @_app = app
86
+ @_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
87
+ end
88
+
89
+ def build_node(&block)
90
+ @_node = Node.new(@_args, @_options)
91
+ @_node._filename = @_args[0]
92
+
93
+ if @_args.length > 1
94
+ case @_args[1]
95
+ when Array
96
+ @_node._headers = @_args[1]
97
+ else
98
+ throw "Header argument must be an array of filenames"
99
+ end
100
+ end
101
+
102
+ @_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
103
+ end
104
+
105
+
106
+ def run_operation
107
+ c = Zillabyte::Harness::SourceFromCSVController.new(@_node, Zillabyte::Common::Progress.new)
108
+ c.run(@_app._options)
109
+ end
110
+
111
+ end
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.9.48" unless defined?(VERSION)
2
+ VERSION = "0.9.49" unless defined?(VERSION)
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.48
4
+ version: 0.9.49
5
5
  platform: ruby
6
6
  authors:
7
7
  - zillabyte
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-22 00:00:00.000000000 Z
11
+ date: 2014-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.9.48
33
+ version: 0.9.49
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.9.48
40
+ version: 0.9.49
41
41
  description: The Official Zillabyte Gem
42
42
  email:
43
43
  - gem@zillabyte.com
@@ -68,6 +68,7 @@ files:
68
68
  - ruby/lib/zillabyte/harness/operation_handler.rb
69
69
  - ruby/lib/zillabyte/harness/sink.rb
70
70
  - ruby/lib/zillabyte/harness/source.rb
71
+ - ruby/lib/zillabyte/harness/source_from_csv.rb
71
72
  - ruby/lib/zillabyte/harness/stream.rb
72
73
  - ruby/lib/zillabyte/harness/stream_builder.rb
73
74
  - ruby/lib/zillabyte/harness/tuple.rb