zillabyte 0.9.36 → 0.9.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d7231040a821d67441f1e33bdb7ea719ea37111d
4
- data.tar.gz: ca2c97df4729f8f6c251e8712e27beb2cb35e51a
3
+ metadata.gz: 8ee2c261c664c1674df0530fcf41f5b909998822
4
+ data.tar.gz: 06196fc228374cb2eb014c91b4b563dbe1f5f5e8
5
5
  SHA512:
6
- metadata.gz: 1c4e2fcc1793f57db5bd85015a47401566d2c0a7b96cfd7220f3dc2135fbe02cb613d3c34a57f6ce9ee9fa280c18a09d7d35609d5ea0029afda1b17c2d8e52ad
7
- data.tar.gz: ca468ce6b88861174acbe45440bec83b8f9156d44b66992d9be2a186d0b09762c675733b15aa2b31760b40020b4570ce3675d91114d11609821884515d1ca9ef
6
+ metadata.gz: df3498fec8e7c6aec0e146d7614ff0b41d8c9d8386c2c08237995352b142e13c642680bd6d4250b4aab6a112de57181a6890373f52dcb515277dfbc29c3e3bff
7
+ data.tar.gz: 0daa4f3157c1349ce402ce53848e06d0b70267fe5782ffdee2bad4883fe8d149ad829900b1bd6ae41b214923e23c5f2b768d94d9d04c8f609d60700f013d89ad
@@ -3,6 +3,9 @@ module Zillabyte
3
3
  module Harness
4
4
 
5
5
  def self.load
6
+ require File.join(File.dirname(__FILE__), "harness", "app.rb")
7
+ require File.join(File.dirname(__FILE__), "harness", "base.rb")
8
+ require File.join(File.dirname(__FILE__), "harness", "common_node.rb")
6
9
  Dir[File.join(File.dirname(__FILE__), "harness", "*.rb")].sort.each do |file|
7
10
  require file
8
11
  end
@@ -1,34 +1,110 @@
1
+ # OPERATION Clump
2
+ # TAGLINE
3
+ # Clumps tuples together in the stream for faster micro-batch processing
4
+ # DESCRIPTION
5
+ # The `clump` operation will collect a small 'clump' of tuples before passing it on to your code. This allows you to
6
+ # easily handle situations that require small bursts of computation. For example, suppose you are accessing a third
7
+ # party database and wish to query it in bursts of 100 commands at a time.
8
+ #
9
+ # RUBY_SYNTAX
10
+ # # Simplified syntax:
11
+ #
12
+ # stream.clump(:size => 100) do |tuples|
13
+ # # tuples is an array
14
+ # |=block=|
15
+ # end
16
+ #
17
+ # # Custom clump:
18
+ # stream.clump do
19
+ # name "name" # the name of the operation
20
+ # size 100 # the target size of the clump
21
+ # emits "stream_1", "stream_2" # optional for single output stream
22
+ # output_format :replace # :replace or :merge, optional, defaults to :replace
23
+ # prepare |=block=| # optional if no initialization needed
24
+ # execute |=block=|
25
+ # end
26
+ #
27
+ # RUBY_NOTES
28
+ # The `size` parameter is optional. It is the target size of the clump. The system is guaranteed to not exceed `size`, but there are no guarantees about smaller clumps. If no `size` is given, then 100 is default.
29
+ #
30
+ # The simplified syntax can be used if a prepare block and other customizations are not needed.
31
+ #
32
+ # * **Note:** this syntax only works for a single output stream.
33
+ #
34
+ # The allowed output formats are :replace and :merge.
35
+ #
36
+ # * **:replace** : discards the input tuple values and only emits the specified values. This is the default.
37
+ # * **:merge** : re-emits the input tuple values along with the specified values.
38
+ #
39
+ # The "prepare" and "execute" blocks can be in do...end format or {} format.
40
+ #
41
+ # * **"prepare"** block : where any setup is done to prepare for tuple processing in the "execute" block.
42
+ # * **"execute"** block : where the tuples are actually processed. It must take in a single argument (the "tuple").
43
+ #
44
+ # RUBY_EXAMPLE
45
+ #
46
+ # # Sink data into an external sql database
47
+ # stream.clump(:size => 50) do |tuples|
48
+ # sql = "INSERT INTO table VALUES #{tuples.map{|t| t['value']}.join(',')}"
49
+ # @my_sql_service.execute(sql)
50
+ # end
51
+ #
52
+
1
53
  class Zillabyte::Harness::Clump
2
- attr_accessor :_name, :_type, :_emits, :_output_format, :_parallelism, :_prepare, :_execute
54
+ attr_accessor :_app, :_node, :_options
3
55
 
4
- def initialize()
5
- @_name = "clump_"+Zillabyte::Harness::Counter.get()
6
- @_type = "clump"
7
- @_output_format = :replace
8
- end
9
-
10
- def name(v)
11
- @_name = v
12
- end
56
+ class Node < Zillabyte::Harness::CommonNode
57
+ attr_accessor :_output_format, :_execute, :_size
58
+
59
+ def initialize(v, options = {})
60
+ @_name = v[0] || "clump_"+Zillabyte::Harness::Counter.get()
61
+ @_type = "clump"
62
+ @_size = options[:size] || 100
63
+ @_output_format = :replace
64
+ self._emits = options[:emits]
65
+ end
13
66
 
14
- def emits(*v)
15
- @_emits = *v
16
- end
67
+ def output_format(v)
68
+ @_output_format = v
69
+ end
17
70
 
18
- def output_format(v)
19
- @_output_format = v
71
+ def execute(&block)
72
+ @_execute = block
73
+ end
74
+
75
+ def size(size)
76
+ @_size = size
77
+ end
20
78
  end
21
79
 
22
- def parallelism(v)
23
- @_parallelism = v
80
+ def initialize(app, *args)
81
+ @_app = app
82
+ @_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
24
83
  end
25
84
 
26
- def prepare(&block)
27
- @_prepare = block
28
- end
85
+ def build_node(&block)
86
+ @_node = Node.new(@_args, @_options)
87
+ # Does the block take 0 arguments? If so it's not just an execute block.
88
+ if(block.arity == 0)
89
+ @_node.instance_eval(&block)
90
+ Zillabyte::Harness::Helper.check_name("clump", @_node._name, @_app._names)
91
+ if @_node._emits
92
+ Zillabyte::Harness::Helper.check_emits("clump", @_node._emits, @_app._streams)
93
+ else
94
+ @_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
95
+ end
96
+ # Takes more than 0? Then it takes |tuple| and is an execute block. Give it a generated stream name.
97
+ else
98
+ @_node._emits ||= ["stream_"+Zillabyte::Harness::Counter.get()]
99
+ @_node._execute = block
100
+ end
29
101
 
30
- def execute(&block)
31
- @_execute = block
102
+ Zillabyte::Harness::Helper.check_each(@_node)
32
103
  end
33
104
 
105
+ def run_operation
106
+ c = Zillabyte::Harness::EachController.new(@_node, Zillabyte::Common::Progress.new)
107
+ c.run(@_app._options)
108
+ end
34
109
  end
110
+
@@ -2,7 +2,6 @@ require 'optparse'
2
2
 
3
3
  class Zillabyte::Harness::Helper
4
4
 
5
- META_NAMES = ["id", "confidence", "since", "source"]
6
5
  ALLOWED_OUTPUT_FORMATS = [:replace, :merge]
7
6
  ALLOWED_END_CYCLE_POLICIES = [:null_emit, :explicit, :infinite]
8
7
  ALLOWED_TYPES = [:string, :integer, :float, :double, :boolean, :array, :map]
@@ -342,10 +341,6 @@ class Zillabyte::Harness::Helper
342
341
  msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{relation_name}\". #{pp}"
343
342
  Zillabyte::Harness::Helper.print_error(msg)
344
343
  end
345
- if(cname =~ /^v[0-9]+$/i or META_NAMES.member?(cname.downcase))
346
- msg = "#{ee}\"v[number]\", \"id\", \"confidence\", \"since\" and \"source\" are special names in Zillabyte. Please name your field something else. #{pp}"
347
- Zillabyte::Harness::Helper.print_error(msg)
348
- end
349
344
  if(!ctype.instance_of?(Symbol))
350
345
  msg = "#{ee}Field data types must be SYMBOLS in \"#{relation_name}\". #{pp}"
351
346
  Zillabyte::Harness::Helper.print_error(msg)
@@ -109,7 +109,6 @@ module Zillabyte
109
109
 
110
110
  stream = nil
111
111
  tuple = {}
112
- meta = {}
113
112
 
114
113
  if (args.size == 1)
115
114
  tuple = args[0]
@@ -119,16 +118,13 @@ module Zillabyte
119
118
  tuple = args[1]
120
119
  elsif(args[0].instance_of?(Hash))
121
120
  tuple = args[0]
122
- meta = args[1]
123
121
  end
124
122
  else
125
123
  stream = args[0]
126
124
  tuple = args[1]
127
- meta = args[2]
128
125
  end
129
126
 
130
127
  if tuple.is_a?(Tuple)
131
- meta = tuple.meta || {} if meta == {}
132
128
  tuple = tuple.values
133
129
  end
134
130
 
@@ -140,7 +136,7 @@ module Zillabyte
140
136
  end
141
137
  end
142
138
 
143
- m = {:command => :emit, :tuple => tuple, :stream => stream, :meta => meta}
139
+ m = {:command => :emit, :tuple => tuple, :stream => stream}
144
140
  send_msg_to_parent m
145
141
  end
146
142
 
@@ -273,6 +269,11 @@ module Zillabyte
273
269
  if(m)
274
270
  if m['command'] && m['command'] == 'prepare'
275
271
  prepare()
272
+ elsif m['tuples']
273
+ t_ary = m['tuples'].map do |t|
274
+ Tuple.new(m)
275
+ end
276
+ execute t_ary
276
277
  else
277
278
  t = Tuple.from_hash(m)
278
279
  if(t)
@@ -34,6 +34,8 @@ class Zillabyte::Harness::OperationHandler
34
34
  @_operation = Zillabyte::Harness::ComponentInput.new(@_app, *args)
35
35
  when "component_output"
36
36
  @_operation = Zillabyte::Harness::ComponentOutput.new(@_app, *args)
37
+ when "clump"
38
+ @_operation = Zillabyte::Harness::Clump.new(@_app, *args)
37
39
  else
38
40
  throw "Unknown type"
39
41
  end
@@ -181,16 +181,27 @@ class Zillabyte::Harness::Stream
181
181
  end
182
182
 
183
183
  # Groups together N arbitrary tuples (usefull for mini-batch processing)
184
- def clump(*args)
184
+ def clump(*args, &block)
185
185
  op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
186
- op.build_jvm_operation("clump", *args)
186
+ op.build_multilang_operation("clump", *args, &block)
187
187
  .add_operation_properties_to_info(:name, :type)
188
- .add_input_args_to_info_as(:clump)
189
188
  .create_arc_info_from_stream(self)
190
189
  .handle_operation
191
190
  .get_output_streams
192
191
  end
193
192
 
193
+ def group_by(*args, &block)
194
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
195
+ op.build_multilang_operation("group_by", *args, &block)
196
+ .add_operation_properties_to_info(:name, :type, :group_by)
197
+ .add_optional_operation_properties_to_info(:parallelism)
198
+ .create_arc_info_from_stream(self)
199
+ .handle_operation
200
+ .get_output_streams
201
+ end
202
+
203
+
204
+
194
205
  # Renames fields..
195
206
  def rename(rename_map={})
196
207
  op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
@@ -36,10 +36,6 @@ class Tuple
36
36
  @hash.keys
37
37
  end
38
38
 
39
- def meta
40
- @hash["meta"]
41
- end
42
-
43
39
  def to_s
44
40
  values.to_s
45
41
  end
@@ -93,14 +89,6 @@ class Tuple
93
89
  fail "Missing tuple field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
94
90
  return nil
95
91
  end
96
- begin
97
- hash.fetch("meta")
98
- rescue SignalException => e
99
- raise
100
- rescue Exception => e
101
- fail "Missing meta field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
102
- return nil
103
- end
104
92
  Tuple.new(hash)
105
93
  end
106
94
  end
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.9.36" unless defined?(VERSION)
2
+ VERSION = "0.9.37" unless defined?(VERSION)
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.36
4
+ version: 0.9.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - zillabyte
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-22 00:00:00.000000000 Z
11
+ date: 2014-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.9.36
33
+ version: 0.9.37
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.9.36
40
+ version: 0.9.37
41
41
  description: The Official Zillabyte Gem
42
42
  email:
43
43
  - gem@zillabyte.com