zillabyte 0.9.48 → 0.9.49

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d757597a1cc573c8e11bd13735efceb7bc03e73
4
- data.tar.gz: d5c2a9a059791646d05d16c40fe906942522ba13
3
+ metadata.gz: 11c2e486f17f6b1e1198b6700e8519b4d1f9fecf
4
+ data.tar.gz: 7d57f06481ef828626ce8a960ff93e97dec78ee2
5
5
  SHA512:
6
- metadata.gz: 6b109dc4ec9e2b27c610306c41bf4f35b9922431a1a47579c182326275263e7d6da63ab901719bde554e3d0f69ad23790109552d8e095b36f0803c5d501f3ea0
7
- data.tar.gz: d5501fffe0acbb455dbecde048b382fd3642db7391c06eb23a80a9423e1555e6c905e72b5814ddc4c1442e04b002bc154010ca8a80fadeb74b72c7434da26cb0
6
+ metadata.gz: 7d9e61f6045437500441db54366e0e167da9fe108e8f5ccd086f3cd9f23445fa5d27b879a6d63828dd3dd4fcaa73b278ec27205ea9fa947dd6df65b54f271e2f
7
+ data.tar.gz: 9b0d9d9dc62671f262b35082a4a7bc85d71ca5833984c890d70bdee2dcfb68045f25ebf82268966101de7df445c11ad6b562c29dfae1daff6326fe9fae4b7611
@@ -30,11 +30,14 @@ class Zillabyte::Harness::App < Zillabyte::Harness::Base
30
30
  _source_common(Zillabyte::Harness::Stream, *args, &block)
31
31
  end
32
32
 
33
+ def source_from_csv(*args, &block)
34
+ _source_from_csv_common(Zillabyte::Harness::Stream, *args)
35
+ end
33
36
 
34
37
  def source_from_kinesis(*args, &block)
35
38
  _source_common(Zillabyte::Harness::Stream, "kinesis_demo_source")
36
39
  end
37
-
40
+ end
38
41
 
39
42
 
40
43
 
@@ -35,5 +35,13 @@ class Zillabyte::Harness::Base
35
35
  return Zillabyte::Harness::StreamBuilder.new(stream)
36
36
  end
37
37
 
38
-
38
+ def _source_from_csv_common(stream_class, *args, &block)
39
+ op = Zillabyte::Harness::OperationHandler.new(self, stream_class)
40
+ stream = op.build_multilang_operation("source_from_csv", *args, &block)
41
+ .add_operation_properties_to_info(:name, :type)
42
+ .handle_operation
43
+ .get_output_streams
44
+ return Zillabyte::Harness::StreamBuilder.new(stream)
45
+ end
46
+
39
47
  end
@@ -90,10 +90,10 @@ class Zillabyte::Harness::Helper
90
90
  ee = "Error in \"source\": \n\t "
91
91
  pp = @@_print_check_source
92
92
 
93
- rm = !source._relation.nil?
93
+ rm = !source._dataset.nil?
94
94
  mm = !source._matches.nil?
95
95
  if(rm or mm)
96
- msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a relation, see the following. #{pp}"
96
+ msg = "#{ee}A custom \"source\" may not contain a \"matches\" clause. To source from a dataset, see the following. #{pp}"
97
97
  Zillabyte::Harness::Helper.print_error(msg)
98
98
  end
99
99
 
@@ -113,16 +113,31 @@ class Zillabyte::Harness::Helper
113
113
  pp = @@_print_check_source
114
114
 
115
115
  if(args.length != 1)
116
- msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a relation, please supply a single SQL query-string or SXP query-array. #{pp}"
116
+ msg = "#{ee}Invalid number of arguments to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
117
117
  Zillabyte::Harness::Helper.print_error(msg)
118
118
  end
119
119
 
120
120
  if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
121
- msg = "#{ee}Invalid argument to \"source\". When sourcing from a relation, please supply a single SQL query-string or SXP query-array. #{pp}"
121
+ msg = "#{ee}Invalid argument to \"source\". When sourcing from a dataset, please supply a single dataset name string. #{pp}"
122
122
  Zillabyte::Harness::Helper.print_error(msg)
123
123
  end
124
124
  end
125
125
 
126
+ def self.check_source_from_csv_args(args)
127
+ ee = "Error in \"source_from_csv\": \n\t "
128
+ pp = @@_print_check_source_from_csv
129
+
130
+ if(args.length > 2)
131
+ msg = "#{ee}Invalid number of arguments to \"source_from_csv\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
132
+ Zillabyte::Harness::Helper.print_error(msg)
133
+ end
134
+ if(!args[0].instance_of?(String) and !args[0].instance_of?(Array))
135
+ msg = "#{ee}Invalid argument to \"source\". When sourcing from a CSV, please supply a single CSV filename and optionally a header keyword argument as an array of header names. #{pp}"
136
+ Zillabyte::Harness::Helper.print_error(msg)
137
+ end
138
+ end
139
+
140
+
126
141
  def self.check_emits(operation, emits, streams)
127
142
  if operation == "component"
128
143
  oo = "outputs"
@@ -302,13 +317,13 @@ class Zillabyte::Harness::Helper
302
317
  columns = sink._columns
303
318
 
304
319
  if(!name)
305
- msg = "#{ee}Relation name must be specified! #{pp}"
320
+ msg = "#{ee}dataset name must be specified! #{pp}"
306
321
  Zillabyte::Harness::Helper.print_error(msg)
307
322
  end
308
323
  Zillabyte::Harness::Helper.check_name("sink", sink._name, {})
309
324
 
310
325
  if(columns.length == 0)
311
- msg = "#{ee}Must be at least one output field to relation \"#{name}\". #{pp}"
326
+ msg = "#{ee}Must be at least one output field to dataset \"#{name}\". #{pp}"
312
327
  Zillabyte::Harness::Helper.print_error(msg)
313
328
  end
314
329
  Zillabyte::Harness::Helper.check_sink_columns(sink)
@@ -318,7 +333,7 @@ class Zillabyte::Harness::Helper
318
333
  next
319
334
  end
320
335
  if(s._name == name and s._columns != columns)
321
- msg = "#{ee}The relation \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
336
+ msg = "#{ee}The dataset \"#{name}\" has already been specified and contains a different set of fields/types. #{pp}"
322
337
  Zillabyte::Harness::Helper.print_error(msg)
323
338
  end
324
339
  end
@@ -334,19 +349,19 @@ class Zillabyte::Harness::Helper
334
349
  end
335
350
  end
336
351
 
337
- def self.check_field_format(operation, pp, cname, ctype, relation_name)
352
+ def self.check_field_format(operation, pp, cname, ctype, dataset_name)
338
353
  ee = "Error in \"#{operation}\": \n\t "
339
354
 
340
355
  if(!(cname.is_a?(String) or cname.is_a?(Symbol)) or (cname =~ /^\w+$/).nil?)
341
- msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{relation_name}\". #{pp}"
356
+ msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{dataset_name}\". #{pp}"
342
357
  Zillabyte::Harness::Helper.print_error(msg)
343
358
  end
344
359
  if(!ctype.instance_of?(Symbol))
345
- msg = "#{ee}Field data types must be SYMBOLS in \"#{relation_name}\". #{pp}"
360
+ msg = "#{ee}Field data types must be SYMBOLS in \"#{dataset_name}\". #{pp}"
346
361
  Zillabyte::Harness::Helper.print_error(msg)
347
362
  end
348
363
  if(!ALLOWED_TYPES.member?(ctype))
349
- msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{relation_name}\". #{pp}"
364
+ msg = "#{ee}Invalid field data type at \"#{ctype}\" in \"#{dataset_name}\". #{pp}"
350
365
  Zillabyte::Harness::Helper.print_error(msg)
351
366
  end
352
367
  end
@@ -466,11 +481,11 @@ class Zillabyte::Harness::Helper
466
481
  @@_print_check_sink = <<-OUTPUT
467
482
  \n\n"Sink" Syntax:
468
483
  stream.sink do
469
- name "name_of_relation"
484
+ name "name_of_dataset"
470
485
  column "field_1", :type_1
471
486
  column "field_2", :type_2 ...
472
487
  end
473
- - "Sink" relation "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
488
+ - "Sink" dataset "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
474
489
  - Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
475
490
  - Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
476
491
  - Field types must be SYMBOLS. The following types are allowed #{ALLOWED_TYPES.join(", ")}.
@@ -478,8 +493,8 @@ OUTPUT
478
493
 
479
494
  @@_print_check_source = <<-OUTPUT
480
495
  \n\n"Source" Syntax:
481
- Sourcing from a relation:
482
- app.source("SQL query" or [SXP queries])
496
+ Sourcing from a dataset:
497
+ app.source("dataset" name)
483
498
 
484
499
  Custom source:
485
500
  app.source do
@@ -495,6 +510,19 @@ OUTPUT
495
510
  - The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
496
511
  * the "begin_cycle" block is where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
497
512
  * the "next_tuple" block is where the tuples are actually emitted.
513
+ OUTPUT
514
+
515
+ @@_print_check_source_from_csv = <<-OUTPUT
516
+ \n\n"SourceFromCSV" Syntax:
517
+ Sourcing from a CSV with headers:
518
+ app.source_from_csv("csv_path")
519
+
520
+ Source from a CSV without headers
521
+ app.source_from_csv("csv_path", headers = ["name", "email"])
522
+
523
+ - The "csv_path" is the path to the CSV within your app directory.
524
+ - The "headers" field is an array of header names for the source to use.
525
+
498
526
  OUTPUT
499
527
 
500
528
  @@_print_check_filter = <<-OUTPUT
@@ -399,13 +399,40 @@ module Zillabyte
399
399
  end
400
400
 
401
401
  def next_tuple
402
- self.instance_eval &@harness._next_tuple
402
+ self.instance_exec(&@harness._next_tuple)
403
403
  end
404
404
 
405
405
  end
406
406
 
407
+
408
+
409
+ class SourceFromCSVController < Storm::Source
410
+
411
+ def initialize(harness, progress)
412
+ @harness = harness
413
+ @progress = progress
414
+
415
+ Storm::Protocol.emits = harness._emits
416
+ Storm::Protocol.end_cycle_policy = harness._end_cycle_policy
417
+ end
418
+
419
+ def begin_cycle(*args)
420
+ @harness.begin_cycle
421
+ end
422
+
423
+ def prepare(*args)
424
+ end
425
+
426
+ def next_tuple
427
+ @harness.next_tuple(self)
428
+ end
429
+
430
+ end
431
+
432
+
407
433
  class EachController < Storm::Each
408
434
 
435
+
409
436
  def initialize(harness, progress)
410
437
  @harness = harness
411
438
  @progress = progress
@@ -18,6 +18,8 @@ class Zillabyte::Harness::OperationHandler
18
18
  case type
19
19
  when "source"
20
20
  @_operation = Zillabyte::Harness::Source.new(@_app, *args)
21
+ when "source_from_csv"
22
+ @_operation = Zillabyte::Harness::SourceFromCSV.new(@_app, *args)
21
23
  when "each"
22
24
  @_operation = Zillabyte::Harness::Each.new(@_app, *args)
23
25
  when "filter"
@@ -71,7 +71,7 @@ class Zillabyte::Harness::Source
71
71
  attr_accessor :_app, :_node, :_relation, :_options
72
72
 
73
73
  class Node < Zillabyte::Harness::CommonNode
74
- attr_accessor :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
74
+ attr_accessor :_filename, :_headers, :_matches, :_relation, :_end_cycle_policy, :_begin_cycle, :_next_tuple
75
75
 
76
76
  def initialize(v, options = {})
77
77
  @_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
@@ -87,8 +87,8 @@ class Zillabyte::Harness::Source
87
87
  when Array
88
88
  @_matches = v
89
89
  end
90
- end
91
-
90
+ end
91
+
92
92
  def parallelism(v)
93
93
  throw "parallelism cannot be specified for sources"
94
94
  end
@@ -0,0 +1,111 @@
1
+ # OPERATION SourceFromCSV
2
+ # TAGLINE
3
+ # A source from CSV allows your app to easy source from a CSV on your local machine
4
+ # DESCRIPTION
5
+ #
6
+ # ## Sourcing From A CSV With Headers
7
+ #
8
+ # By default, the SourceFromCSV will parse the first line of the CSV for headers, these headers are the keys of the tuple hash object
9
+ # emitted by the source.
10
+ #
11
+ # ## Source From A CSV Without Headers
12
+ #
13
+ # You can also specify the headers as a keyword argument to the SourceFromCSV operation. See the examples below for the syntax.
14
+ #
15
+ # RUBY_SYNTAX
16
+ #
17
+ #
18
+ # # Sourcing from a CSV with Headers
19
+ # # Assume that companies.csv is a CSV with headers ["name", "email"]
20
+ # app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
21
+ #
22
+ # # Sourcing from a CSV without Headers
23
+ #
24
+ # app.source_from_csv("companies.csv", headers=["name", "email"])
25
+ #
26
+ #
27
+ # SYNTAX_NOTES
28
+ #
29
+ # The first argument to the source_from_csv is the relative path of the CSV within the app directory. Absolute paths are
30
+ # not available when pushing to the Zillabyte Servers.
31
+ #
32
+ #
33
+ #
34
+ # RUBY_EXAMPLE
35
+ #
36
+ # # # Sourcing from a CSV with Headers
37
+ # # Assume that companies.csv is a CSV with headers ["name", "email"]
38
+ # stream = app.source_from_csv("companies.csv") # emits tuples in form {:name => "company", :email => "example@company.com"}
39
+ #
40
+ # stream = stream.each do |tuple|
41
+ # name = stream["name"]
42
+ # email = tuple["email"]
43
+ #
44
+ # # use the name and email here ...
45
+ #
46
+ # end
47
+ #
48
+ class Zillabyte::Harness::SourceFromCSV
49
+
50
+ attr_accessor :_app, :_node, :_options
51
+
52
+
53
+ class Node < Zillabyte::Harness::CommonNode
54
+ attr_accessor :_filename, :_file, :_headers, :_end_cycle_policy, :_harness
55
+
56
+ def initialize(v, options = {})
57
+ @_name = options[:name] || "source_"+Zillabyte::Harness::Counter.get()
58
+ @_type = 'source'
59
+ @_end_cycle_policy = :null_emit
60
+ require 'csv'
61
+ end
62
+
63
+ def begin_cycle
64
+ @_file = File.open(@_filename)
65
+ if @_headers.nil?
66
+ @_headers = CSV.parse_line(@_file.readline)
67
+ end
68
+ end
69
+
70
+ def next_tuple(harness)
71
+ begin
72
+ line = @_file.readline
73
+ csvline = CSV.parse_line(line)
74
+ tuple = {}
75
+ @_headers.each do |header|
76
+ tuple[header] = csvline.shift
77
+ end
78
+ harness.emit tuple
79
+ rescue EOFError => e
80
+ end
81
+ end
82
+ end
83
+
84
+ def initialize(app, *args)
85
+ @_app = app
86
+ @_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
87
+ end
88
+
89
+ def build_node(&block)
90
+ @_node = Node.new(@_args, @_options)
91
+ @_node._filename = @_args[0]
92
+
93
+ if @_args.length > 1
94
+ case @_args[1]
95
+ when Array
96
+ @_node._headers = @_args[1]
97
+ else
98
+ throw "Header argument must be an array of filenames"
99
+ end
100
+ end
101
+
102
+ @_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
103
+ end
104
+
105
+
106
+ def run_operation
107
+ c = Zillabyte::Harness::SourceFromCSVController.new(@_node, Zillabyte::Common::Progress.new)
108
+ c.run(@_app._options)
109
+ end
110
+
111
+ end
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.9.48" unless defined?(VERSION)
2
+ VERSION = "0.9.49" unless defined?(VERSION)
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.48
4
+ version: 0.9.49
5
5
  platform: ruby
6
6
  authors:
7
7
  - zillabyte
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-12-22 00:00:00.000000000 Z
11
+ date: 2014-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.9.48
33
+ version: 0.9.49
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.9.48
40
+ version: 0.9.49
41
41
  description: The Official Zillabyte Gem
42
42
  email:
43
43
  - gem@zillabyte.com
@@ -68,6 +68,7 @@ files:
68
68
  - ruby/lib/zillabyte/harness/operation_handler.rb
69
69
  - ruby/lib/zillabyte/harness/sink.rb
70
70
  - ruby/lib/zillabyte/harness/source.rb
71
+ - ruby/lib/zillabyte/harness/source_from_csv.rb
71
72
  - ruby/lib/zillabyte/harness/stream.rb
72
73
  - ruby/lib/zillabyte/harness/stream_builder.rb
73
74
  - ruby/lib/zillabyte/harness/tuple.rb