zillabyte 0.9.36 → 0.9.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ruby/lib/zillabyte/harness.rb +3 -0
- data/ruby/lib/zillabyte/harness/clump.rb +98 -22
- data/ruby/lib/zillabyte/harness/helper.rb +0 -5
- data/ruby/lib/zillabyte/harness/live_delegator.rb +6 -5
- data/ruby/lib/zillabyte/harness/operation_handler.rb +2 -0
- data/ruby/lib/zillabyte/harness/stream.rb +14 -3
- data/ruby/lib/zillabyte/harness/tuple.rb +0 -12
- data/ruby/lib/zillabyte/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ee2c261c664c1674df0530fcf41f5b909998822
|
4
|
+
data.tar.gz: 06196fc228374cb2eb014c91b4b563dbe1f5f5e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df3498fec8e7c6aec0e146d7614ff0b41d8c9d8386c2c08237995352b142e13c642680bd6d4250b4aab6a112de57181a6890373f52dcb515277dfbc29c3e3bff
|
7
|
+
data.tar.gz: 0daa4f3157c1349ce402ce53848e06d0b70267fe5782ffdee2bad4883fe8d149ad829900b1bd6ae41b214923e23c5f2b768d94d9d04c8f609d60700f013d89ad
|
@@ -3,6 +3,9 @@ module Zillabyte
|
|
3
3
|
module Harness
|
4
4
|
|
5
5
|
def self.load
|
6
|
+
require File.join(File.dirname(__FILE__), "harness", "app.rb")
|
7
|
+
require File.join(File.dirname(__FILE__), "harness", "base.rb")
|
8
|
+
require File.join(File.dirname(__FILE__), "harness", "common_node.rb")
|
6
9
|
Dir[File.join(File.dirname(__FILE__), "harness", "*.rb")].sort.each do |file|
|
7
10
|
require file
|
8
11
|
end
|
@@ -1,34 +1,110 @@
|
|
1
|
+
# OPERATION Clump
|
2
|
+
# TAGLINE
|
3
|
+
# Clumps tuples together in the stream for faster micro-batch processing
|
4
|
+
# DESCRIPTION
|
5
|
+
# The `clump` operation will collect a small 'clump' of tuples before passing it on to your code. This allows you to
|
6
|
+
# easily handle situations that require small bursts of computation. For example, suppose you are accessing a third
|
7
|
+
# party database and wish to query it in bursts of 100 commands at a time.
|
8
|
+
#
|
9
|
+
# RUBY_SYNTAX
|
10
|
+
# # Simplified syntax:
|
11
|
+
#
|
12
|
+
# stream.clump(:size => 100) do |tuples|
|
13
|
+
# # tuples is an array
|
14
|
+
# |=block=|
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# # Custom clump:
|
18
|
+
# stream.clump do
|
19
|
+
# name "name" # the name of the operation
|
20
|
+
# size 100 # the target size of the clump
|
21
|
+
# emits "stream_1", "stream_2" # optional for single output stream
|
22
|
+
# output_format :replace # :replace or :merge, optional, defaults to :replace
|
23
|
+
# prepare |=block=| # optional if no initialization needed
|
24
|
+
# execute |=block=|
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# RUBY_NOTES
|
28
|
+
# The `size` parameter is optional. It is the target size of the clump. The system is guaranteed to not exceed `size`, but there are no guarantees about smaller clumps. If no `size` is given, then 100 is default.
|
29
|
+
#
|
30
|
+
# The simplified syntax can be used if a prepare block and other customizations are not needed.
|
31
|
+
#
|
32
|
+
# * **Note:** this syntax only works for a single output stream.
|
33
|
+
#
|
34
|
+
# The allowed output formats are :replace and :merge.
|
35
|
+
#
|
36
|
+
# * **:replace** : discards the input tuple values and only emits the specified values. This is the default.
|
37
|
+
# * **:merge** : re-emits the input tuple values along with the specified values.
|
38
|
+
#
|
39
|
+
# The "prepare" and "execute" blocks can be in do...end format or {} format.
|
40
|
+
#
|
41
|
+
# * **"prepare"** block : where any setup is done to prepare for tuple processing in the "execute" block.
|
42
|
+
# * **"execute"** block : where the tuples are actually processed. It must take in a single argument (the "tuple").
|
43
|
+
#
|
44
|
+
# RUBY_EXAMPLE
|
45
|
+
#
|
46
|
+
# # Sink data into an external sql database
|
47
|
+
# stream.clump(:size => 50) do |tuples|
|
48
|
+
# sql = "INSERT INTO table VALUES #{tuples.map{|t| t['value']}.join(',')}"
|
49
|
+
# @my_sql_service.execute(sql)
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
|
1
53
|
class Zillabyte::Harness::Clump
|
2
|
-
attr_accessor :
|
54
|
+
attr_accessor :_app, :_node, :_options
|
3
55
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
56
|
+
class Node < Zillabyte::Harness::CommonNode
|
57
|
+
attr_accessor :_output_format, :_execute, :_size
|
58
|
+
|
59
|
+
def initialize(v, options = {})
|
60
|
+
@_name = v[0] || "clump_"+Zillabyte::Harness::Counter.get()
|
61
|
+
@_type = "clump"
|
62
|
+
@_size = options[:size] || 100
|
63
|
+
@_output_format = :replace
|
64
|
+
self._emits = options[:emits]
|
65
|
+
end
|
13
66
|
|
14
|
-
|
15
|
-
|
16
|
-
|
67
|
+
def output_format(v)
|
68
|
+
@_output_format = v
|
69
|
+
end
|
17
70
|
|
18
|
-
|
19
|
-
|
71
|
+
def execute(&block)
|
72
|
+
@_execute = block
|
73
|
+
end
|
74
|
+
|
75
|
+
def size(size)
|
76
|
+
@_size = size
|
77
|
+
end
|
20
78
|
end
|
21
79
|
|
22
|
-
def
|
23
|
-
@
|
80
|
+
def initialize(app, *args)
|
81
|
+
@_app = app
|
82
|
+
@_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
|
24
83
|
end
|
25
84
|
|
26
|
-
def
|
27
|
-
@
|
28
|
-
|
85
|
+
def build_node(&block)
|
86
|
+
@_node = Node.new(@_args, @_options)
|
87
|
+
# Does the block take 0 arguments? If so it's not just an execute block.
|
88
|
+
if(block.arity == 0)
|
89
|
+
@_node.instance_eval(&block)
|
90
|
+
Zillabyte::Harness::Helper.check_name("clump", @_node._name, @_app._names)
|
91
|
+
if @_node._emits
|
92
|
+
Zillabyte::Harness::Helper.check_emits("clump", @_node._emits, @_app._streams)
|
93
|
+
else
|
94
|
+
@_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
|
95
|
+
end
|
96
|
+
# Takes more than 0? Then it takes |tuple| and is an execute block. Give it a generated stream name.
|
97
|
+
else
|
98
|
+
@_node._emits ||= ["stream_"+Zillabyte::Harness::Counter.get()]
|
99
|
+
@_node._execute = block
|
100
|
+
end
|
29
101
|
|
30
|
-
|
31
|
-
@_execute = block
|
102
|
+
Zillabyte::Harness::Helper.check_each(@_node)
|
32
103
|
end
|
33
104
|
|
105
|
+
def run_operation
|
106
|
+
c = Zillabyte::Harness::EachController.new(@_node, Zillabyte::Common::Progress.new)
|
107
|
+
c.run(@_app._options)
|
108
|
+
end
|
34
109
|
end
|
110
|
+
|
@@ -2,7 +2,6 @@ require 'optparse'
|
|
2
2
|
|
3
3
|
class Zillabyte::Harness::Helper
|
4
4
|
|
5
|
-
META_NAMES = ["id", "confidence", "since", "source"]
|
6
5
|
ALLOWED_OUTPUT_FORMATS = [:replace, :merge]
|
7
6
|
ALLOWED_END_CYCLE_POLICIES = [:null_emit, :explicit, :infinite]
|
8
7
|
ALLOWED_TYPES = [:string, :integer, :float, :double, :boolean, :array, :map]
|
@@ -342,10 +341,6 @@ class Zillabyte::Harness::Helper
|
|
342
341
|
msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{relation_name}\". #{pp}"
|
343
342
|
Zillabyte::Harness::Helper.print_error(msg)
|
344
343
|
end
|
345
|
-
if(cname =~ /^v[0-9]+$/i or META_NAMES.member?(cname.downcase))
|
346
|
-
msg = "#{ee}\"v[number]\", \"id\", \"confidence\", \"since\" and \"source\" are special names in Zillabyte. Please name your field something else. #{pp}"
|
347
|
-
Zillabyte::Harness::Helper.print_error(msg)
|
348
|
-
end
|
349
344
|
if(!ctype.instance_of?(Symbol))
|
350
345
|
msg = "#{ee}Field data types must be SYMBOLS in \"#{relation_name}\". #{pp}"
|
351
346
|
Zillabyte::Harness::Helper.print_error(msg)
|
@@ -109,7 +109,6 @@ module Zillabyte
|
|
109
109
|
|
110
110
|
stream = nil
|
111
111
|
tuple = {}
|
112
|
-
meta = {}
|
113
112
|
|
114
113
|
if (args.size == 1)
|
115
114
|
tuple = args[0]
|
@@ -119,16 +118,13 @@ module Zillabyte
|
|
119
118
|
tuple = args[1]
|
120
119
|
elsif(args[0].instance_of?(Hash))
|
121
120
|
tuple = args[0]
|
122
|
-
meta = args[1]
|
123
121
|
end
|
124
122
|
else
|
125
123
|
stream = args[0]
|
126
124
|
tuple = args[1]
|
127
|
-
meta = args[2]
|
128
125
|
end
|
129
126
|
|
130
127
|
if tuple.is_a?(Tuple)
|
131
|
-
meta = tuple.meta || {} if meta == {}
|
132
128
|
tuple = tuple.values
|
133
129
|
end
|
134
130
|
|
@@ -140,7 +136,7 @@ module Zillabyte
|
|
140
136
|
end
|
141
137
|
end
|
142
138
|
|
143
|
-
m = {:command => :emit, :tuple => tuple, :stream => stream
|
139
|
+
m = {:command => :emit, :tuple => tuple, :stream => stream}
|
144
140
|
send_msg_to_parent m
|
145
141
|
end
|
146
142
|
|
@@ -273,6 +269,11 @@ module Zillabyte
|
|
273
269
|
if(m)
|
274
270
|
if m['command'] && m['command'] == 'prepare'
|
275
271
|
prepare()
|
272
|
+
elsif m['tuples']
|
273
|
+
t_ary = m['tuples'].map do |t|
|
274
|
+
Tuple.new(m)
|
275
|
+
end
|
276
|
+
execute t_ary
|
276
277
|
else
|
277
278
|
t = Tuple.from_hash(m)
|
278
279
|
if(t)
|
@@ -34,6 +34,8 @@ class Zillabyte::Harness::OperationHandler
|
|
34
34
|
@_operation = Zillabyte::Harness::ComponentInput.new(@_app, *args)
|
35
35
|
when "component_output"
|
36
36
|
@_operation = Zillabyte::Harness::ComponentOutput.new(@_app, *args)
|
37
|
+
when "clump"
|
38
|
+
@_operation = Zillabyte::Harness::Clump.new(@_app, *args)
|
37
39
|
else
|
38
40
|
throw "Unknown type"
|
39
41
|
end
|
@@ -181,16 +181,27 @@ class Zillabyte::Harness::Stream
|
|
181
181
|
end
|
182
182
|
|
183
183
|
# Groups together N arbitrary tuples (usefull for mini-batch processing)
|
184
|
-
def clump(*args)
|
184
|
+
def clump(*args, &block)
|
185
185
|
op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
|
186
|
-
op.
|
186
|
+
op.build_multilang_operation("clump", *args, &block)
|
187
187
|
.add_operation_properties_to_info(:name, :type)
|
188
|
-
.add_input_args_to_info_as(:clump)
|
189
188
|
.create_arc_info_from_stream(self)
|
190
189
|
.handle_operation
|
191
190
|
.get_output_streams
|
192
191
|
end
|
193
192
|
|
193
|
+
def group_by(*args, &block)
|
194
|
+
op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
|
195
|
+
op.build_multilang_operation("group_by", *args, &block)
|
196
|
+
.add_operation_properties_to_info(:name, :type, :group_by)
|
197
|
+
.add_optional_operation_properties_to_info(:parallelism)
|
198
|
+
.create_arc_info_from_stream(self)
|
199
|
+
.handle_operation
|
200
|
+
.get_output_streams
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
|
194
205
|
# Renames fields..
|
195
206
|
def rename(rename_map={})
|
196
207
|
op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
|
@@ -36,10 +36,6 @@ class Tuple
|
|
36
36
|
@hash.keys
|
37
37
|
end
|
38
38
|
|
39
|
-
def meta
|
40
|
-
@hash["meta"]
|
41
|
-
end
|
42
|
-
|
43
39
|
def to_s
|
44
40
|
values.to_s
|
45
41
|
end
|
@@ -93,14 +89,6 @@ class Tuple
|
|
93
89
|
fail "Missing tuple field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
|
94
90
|
return nil
|
95
91
|
end
|
96
|
-
begin
|
97
|
-
hash.fetch("meta")
|
98
|
-
rescue SignalException => e
|
99
|
-
raise
|
100
|
-
rescue Exception => e
|
101
|
-
fail "Missing meta field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
|
102
|
-
return nil
|
103
|
-
end
|
104
92
|
Tuple.new(hash)
|
105
93
|
end
|
106
94
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zillabyte
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.37
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zillabyte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.9.
|
33
|
+
version: 0.9.37
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.9.
|
40
|
+
version: 0.9.37
|
41
41
|
description: The Official Zillabyte Gem
|
42
42
|
email:
|
43
43
|
- gem@zillabyte.com
|