zillabyte 0.9.36 → 0.9.37

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d7231040a821d67441f1e33bdb7ea719ea37111d
4
- data.tar.gz: ca2c97df4729f8f6c251e8712e27beb2cb35e51a
3
+ metadata.gz: 8ee2c261c664c1674df0530fcf41f5b909998822
4
+ data.tar.gz: 06196fc228374cb2eb014c91b4b563dbe1f5f5e8
5
5
  SHA512:
6
- metadata.gz: 1c4e2fcc1793f57db5bd85015a47401566d2c0a7b96cfd7220f3dc2135fbe02cb613d3c34a57f6ce9ee9fa280c18a09d7d35609d5ea0029afda1b17c2d8e52ad
7
- data.tar.gz: ca468ce6b88861174acbe45440bec83b8f9156d44b66992d9be2a186d0b09762c675733b15aa2b31760b40020b4570ce3675d91114d11609821884515d1ca9ef
6
+ metadata.gz: df3498fec8e7c6aec0e146d7614ff0b41d8c9d8386c2c08237995352b142e13c642680bd6d4250b4aab6a112de57181a6890373f52dcb515277dfbc29c3e3bff
7
+ data.tar.gz: 0daa4f3157c1349ce402ce53848e06d0b70267fe5782ffdee2bad4883fe8d149ad829900b1bd6ae41b214923e23c5f2b768d94d9d04c8f609d60700f013d89ad
@@ -3,6 +3,9 @@ module Zillabyte
3
3
  module Harness
4
4
 
5
5
  def self.load
6
+ require File.join(File.dirname(__FILE__), "harness", "app.rb")
7
+ require File.join(File.dirname(__FILE__), "harness", "base.rb")
8
+ require File.join(File.dirname(__FILE__), "harness", "common_node.rb")
6
9
  Dir[File.join(File.dirname(__FILE__), "harness", "*.rb")].sort.each do |file|
7
10
  require file
8
11
  end
@@ -1,34 +1,110 @@
1
+ # OPERATION Clump
2
+ # TAGLINE
3
+ # Clumps tuples together in the stream for faster micro-batch processing
4
+ # DESCRIPTION
5
+ # The `clump` operation will collect a small 'clump' of tuples before passing it on to your code. This allows you to
6
+ # easily handle situations that require small bursts of computation. For example, suppose you are accessing a third
7
+ # party database and wish to query it in bursts of 100 commands at a time.
8
+ #
9
+ # RUBY_SYNTAX
10
+ # # Simplified syntax:
11
+ #
12
+ # stream.clump(:size => 100) do |tuples|
13
+ # # tuples is an array
14
+ # |=block=|
15
+ # end
16
+ #
17
+ # # Custom clump:
18
+ # stream.clump do
19
+ # name "name" # the name of the operation
20
+ # size 100 # the target size of the clump
21
+ # emits "stream_1", "stream_2" # optional for single output stream
22
+ # output_format :replace # :replace or :merge, optional, defaults to :replace
23
+ # prepare |=block=| # optional if no initialization needed
24
+ # execute |=block=|
25
+ # end
26
+ #
27
+ # RUBY_NOTES
28
+ # The `size` parameter is optional. It is the target size of the clump. The system is guaranteed to not exceed `size`, but there are no guarantees about smaller clumps. If no `size` is given, then 100 is default.
29
+ #
30
+ # The simplified syntax can be used if a prepare block and other customizations are not needed.
31
+ #
32
+ # * **Note:** this syntax only works for a single output stream.
33
+ #
34
+ # The allowed output formats are :replace and :merge.
35
+ #
36
+ # * **:replace** : discards the input tuple values and only emits the specified values. This is the default.
37
+ # * **:merge** : re-emits the input tuple values along with the specified values.
38
+ #
39
+ # The "prepare" and "execute" blocks can be in do...end format or {} format.
40
+ #
41
+ # * **"prepare"** block : where any setup is done to prepare for tuple processing in the "execute" block.
42
+ # * **"execute"** block : where the tuples are actually processed. It must take in a single argument (the "tuple").
43
+ #
44
+ # RUBY_EXAMPLE
45
+ #
46
+ # # Sink data into an external sql database
47
+ # stream.clump(:size => 50) do |tuples|
48
+ # sql = "INSERT INTO table VALUES #{tuples.map{|t| t['value']}.join(',')}"
49
+ # @my_sql_service.execute(sql)
50
+ # end
51
+ #
52
+
1
53
  class Zillabyte::Harness::Clump
2
- attr_accessor :_name, :_type, :_emits, :_output_format, :_parallelism, :_prepare, :_execute
54
+ attr_accessor :_app, :_node, :_options
3
55
 
4
- def initialize()
5
- @_name = "clump_"+Zillabyte::Harness::Counter.get()
6
- @_type = "clump"
7
- @_output_format = :replace
8
- end
9
-
10
- def name(v)
11
- @_name = v
12
- end
56
+ class Node < Zillabyte::Harness::CommonNode
57
+ attr_accessor :_output_format, :_execute, :_size
58
+
59
+ def initialize(v, options = {})
60
+ @_name = v[0] || "clump_"+Zillabyte::Harness::Counter.get()
61
+ @_type = "clump"
62
+ @_size = options[:size] || 100
63
+ @_output_format = :replace
64
+ self._emits = options[:emits]
65
+ end
13
66
 
14
- def emits(*v)
15
- @_emits = *v
16
- end
67
+ def output_format(v)
68
+ @_output_format = v
69
+ end
17
70
 
18
- def output_format(v)
19
- @_output_format = v
71
+ def execute(&block)
72
+ @_execute = block
73
+ end
74
+
75
+ def size(size)
76
+ @_size = size
77
+ end
20
78
  end
21
79
 
22
- def parallelism(v)
23
- @_parallelism = v
80
+ def initialize(app, *args)
81
+ @_app = app
82
+ @_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
24
83
  end
25
84
 
26
- def prepare(&block)
27
- @_prepare = block
28
- end
85
+ def build_node(&block)
86
+ @_node = Node.new(@_args, @_options)
87
+ # Does the block take 0 arguments? If so it's not just an execute block.
88
+ if(block.arity == 0)
89
+ @_node.instance_eval(&block)
90
+ Zillabyte::Harness::Helper.check_name("clump", @_node._name, @_app._names)
91
+ if @_node._emits
92
+ Zillabyte::Harness::Helper.check_emits("clump", @_node._emits, @_app._streams)
93
+ else
94
+ @_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
95
+ end
96
+ # Takes more than 0? Then it takes |tuple| and is an execute block. Give it a generated stream name.
97
+ else
98
+ @_node._emits ||= ["stream_"+Zillabyte::Harness::Counter.get()]
99
+ @_node._execute = block
100
+ end
29
101
 
30
- def execute(&block)
31
- @_execute = block
102
+ Zillabyte::Harness::Helper.check_each(@_node)
32
103
  end
33
104
 
105
+ def run_operation
106
+ c = Zillabyte::Harness::EachController.new(@_node, Zillabyte::Common::Progress.new)
107
+ c.run(@_app._options)
108
+ end
34
109
  end
110
+
@@ -2,7 +2,6 @@ require 'optparse'
2
2
 
3
3
  class Zillabyte::Harness::Helper
4
4
 
5
- META_NAMES = ["id", "confidence", "since", "source"]
6
5
  ALLOWED_OUTPUT_FORMATS = [:replace, :merge]
7
6
  ALLOWED_END_CYCLE_POLICIES = [:null_emit, :explicit, :infinite]
8
7
  ALLOWED_TYPES = [:string, :integer, :float, :double, :boolean, :array, :map]
@@ -342,10 +341,6 @@ class Zillabyte::Harness::Helper
342
341
  msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{relation_name}\". #{pp}"
343
342
  Zillabyte::Harness::Helper.print_error(msg)
344
343
  end
345
- if(cname =~ /^v[0-9]+$/i or META_NAMES.member?(cname.downcase))
346
- msg = "#{ee}\"v[number]\", \"id\", \"confidence\", \"since\" and \"source\" are special names in Zillabyte. Please name your field something else. #{pp}"
347
- Zillabyte::Harness::Helper.print_error(msg)
348
- end
349
344
  if(!ctype.instance_of?(Symbol))
350
345
  msg = "#{ee}Field data types must be SYMBOLS in \"#{relation_name}\". #{pp}"
351
346
  Zillabyte::Harness::Helper.print_error(msg)
@@ -109,7 +109,6 @@ module Zillabyte
109
109
 
110
110
  stream = nil
111
111
  tuple = {}
112
- meta = {}
113
112
 
114
113
  if (args.size == 1)
115
114
  tuple = args[0]
@@ -119,16 +118,13 @@ module Zillabyte
119
118
  tuple = args[1]
120
119
  elsif(args[0].instance_of?(Hash))
121
120
  tuple = args[0]
122
- meta = args[1]
123
121
  end
124
122
  else
125
123
  stream = args[0]
126
124
  tuple = args[1]
127
- meta = args[2]
128
125
  end
129
126
 
130
127
  if tuple.is_a?(Tuple)
131
- meta = tuple.meta || {} if meta == {}
132
128
  tuple = tuple.values
133
129
  end
134
130
 
@@ -140,7 +136,7 @@ module Zillabyte
140
136
  end
141
137
  end
142
138
 
143
- m = {:command => :emit, :tuple => tuple, :stream => stream, :meta => meta}
139
+ m = {:command => :emit, :tuple => tuple, :stream => stream}
144
140
  send_msg_to_parent m
145
141
  end
146
142
 
@@ -273,6 +269,11 @@ module Zillabyte
273
269
  if(m)
274
270
  if m['command'] && m['command'] == 'prepare'
275
271
  prepare()
272
+ elsif m['tuples']
273
+ t_ary = m['tuples'].map do |t|
274
+ Tuple.new(m)
275
+ end
276
+ execute t_ary
276
277
  else
277
278
  t = Tuple.from_hash(m)
278
279
  if(t)
@@ -34,6 +34,8 @@ class Zillabyte::Harness::OperationHandler
34
34
  @_operation = Zillabyte::Harness::ComponentInput.new(@_app, *args)
35
35
  when "component_output"
36
36
  @_operation = Zillabyte::Harness::ComponentOutput.new(@_app, *args)
37
+ when "clump"
38
+ @_operation = Zillabyte::Harness::Clump.new(@_app, *args)
37
39
  else
38
40
  throw "Unknown type"
39
41
  end
@@ -181,16 +181,27 @@ class Zillabyte::Harness::Stream
181
181
  end
182
182
 
183
183
  # Groups together N arbitrary tuples (usefull for mini-batch processing)
184
- def clump(*args)
184
+ def clump(*args, &block)
185
185
  op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
186
- op.build_jvm_operation("clump", *args)
186
+ op.build_multilang_operation("clump", *args, &block)
187
187
  .add_operation_properties_to_info(:name, :type)
188
- .add_input_args_to_info_as(:clump)
189
188
  .create_arc_info_from_stream(self)
190
189
  .handle_operation
191
190
  .get_output_streams
192
191
  end
193
192
 
193
+ def group_by(*args, &block)
194
+ op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
195
+ op.build_multilang_operation("group_by", *args, &block)
196
+ .add_operation_properties_to_info(:name, :type, :group_by)
197
+ .add_optional_operation_properties_to_info(:parallelism)
198
+ .create_arc_info_from_stream(self)
199
+ .handle_operation
200
+ .get_output_streams
201
+ end
202
+
203
+
204
+
194
205
  # Renames fields..
195
206
  def rename(rename_map={})
196
207
  op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
@@ -36,10 +36,6 @@ class Tuple
36
36
  @hash.keys
37
37
  end
38
38
 
39
- def meta
40
- @hash["meta"]
41
- end
42
-
43
39
  def to_s
44
40
  values.to_s
45
41
  end
@@ -93,14 +89,6 @@ class Tuple
93
89
  fail "Missing tuple field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
94
90
  return nil
95
91
  end
96
- begin
97
- hash.fetch("meta")
98
- rescue SignalException => e
99
- raise
100
- rescue Exception => e
101
- fail "Missing meta field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
102
- return nil
103
- end
104
92
  Tuple.new(hash)
105
93
  end
106
94
  end
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.9.36" unless defined?(VERSION)
2
+ VERSION = "0.9.37" unless defined?(VERSION)
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.36
4
+ version: 0.9.37
5
5
  platform: ruby
6
6
  authors:
7
7
  - zillabyte
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-22 00:00:00.000000000 Z
11
+ date: 2014-11-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.9.36
33
+ version: 0.9.37
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.9.36
40
+ version: 0.9.37
41
41
  description: The Official Zillabyte Gem
42
42
  email:
43
43
  - gem@zillabyte.com