zillabyte 0.9.36 → 0.9.37
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ruby/lib/zillabyte/harness.rb +3 -0
- data/ruby/lib/zillabyte/harness/clump.rb +98 -22
- data/ruby/lib/zillabyte/harness/helper.rb +0 -5
- data/ruby/lib/zillabyte/harness/live_delegator.rb +6 -5
- data/ruby/lib/zillabyte/harness/operation_handler.rb +2 -0
- data/ruby/lib/zillabyte/harness/stream.rb +14 -3
- data/ruby/lib/zillabyte/harness/tuple.rb +0 -12
- data/ruby/lib/zillabyte/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8ee2c261c664c1674df0530fcf41f5b909998822
|
4
|
+
data.tar.gz: 06196fc228374cb2eb014c91b4b563dbe1f5f5e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df3498fec8e7c6aec0e146d7614ff0b41d8c9d8386c2c08237995352b142e13c642680bd6d4250b4aab6a112de57181a6890373f52dcb515277dfbc29c3e3bff
|
7
|
+
data.tar.gz: 0daa4f3157c1349ce402ce53848e06d0b70267fe5782ffdee2bad4883fe8d149ad829900b1bd6ae41b214923e23c5f2b768d94d9d04c8f609d60700f013d89ad
|
@@ -3,6 +3,9 @@ module Zillabyte
|
|
3
3
|
module Harness
|
4
4
|
|
5
5
|
def self.load
|
6
|
+
require File.join(File.dirname(__FILE__), "harness", "app.rb")
|
7
|
+
require File.join(File.dirname(__FILE__), "harness", "base.rb")
|
8
|
+
require File.join(File.dirname(__FILE__), "harness", "common_node.rb")
|
6
9
|
Dir[File.join(File.dirname(__FILE__), "harness", "*.rb")].sort.each do |file|
|
7
10
|
require file
|
8
11
|
end
|
@@ -1,34 +1,110 @@
|
|
1
|
+
# OPERATION Clump
|
2
|
+
# TAGLINE
|
3
|
+
# Clumps tuples together in the stream for faster micro-batch processing
|
4
|
+
# DESCRIPTION
|
5
|
+
# The `clump` operation will collect a small 'clump' of tuples before passing it on to your code. This allows you to
|
6
|
+
# easily handle situations that require small bursts of computation. For example, suppose you are accessing a third
|
7
|
+
# party database and wish to query it in bursts of 100 commands at a time.
|
8
|
+
#
|
9
|
+
# RUBY_SYNTAX
|
10
|
+
# # Simplified syntax:
|
11
|
+
#
|
12
|
+
# stream.clump(:size => 100) do |tuples|
|
13
|
+
# # tuples is an array
|
14
|
+
# |=block=|
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# # Custom clump:
|
18
|
+
# stream.clump do
|
19
|
+
# name "name" # the name of the operation
|
20
|
+
# size 100 # the target size of the clump
|
21
|
+
# emits "stream_1", "stream_2" # optional for single output stream
|
22
|
+
# output_format :replace # :replace or :merge, optional, defaults to :replace
|
23
|
+
# prepare |=block=| # optional if no initialization needed
|
24
|
+
# execute |=block=|
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# RUBY_NOTES
|
28
|
+
# The `size` parameter is optional. It is the target size of the clump. The system is guaranteed to not exceed `size`, but there are no guarantees about smaller clumps. If no `size` is given, then 100 is default.
|
29
|
+
#
|
30
|
+
# The simplified syntax can be used if a prepare block and other customizations are not needed.
|
31
|
+
#
|
32
|
+
# * **Note:** this syntax only works for a single output stream.
|
33
|
+
#
|
34
|
+
# The allowed output formats are :replace and :merge.
|
35
|
+
#
|
36
|
+
# * **:replace** : discards the input tuple values and only emits the specified values. This is the default.
|
37
|
+
# * **:merge** : re-emits the input tuple values along with the specified values.
|
38
|
+
#
|
39
|
+
# The "prepare" and "execute" blocks can be in do...end format or {} format.
|
40
|
+
#
|
41
|
+
# * **"prepare"** block : where any setup is done to prepare for tuple processing in the "execute" block.
|
42
|
+
# * **"execute"** block : where the tuples are actually processed. It must take in a single argument (the "tuple").
|
43
|
+
#
|
44
|
+
# RUBY_EXAMPLE
|
45
|
+
#
|
46
|
+
# # Sink data into an external sql database
|
47
|
+
# stream.clump(:size => 50) do |tuples|
|
48
|
+
# sql = "INSERT INTO table VALUES #{tuples.map{|t| t['value']}.join(',')}"
|
49
|
+
# @my_sql_service.execute(sql)
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
|
1
53
|
class Zillabyte::Harness::Clump
|
2
|
-
attr_accessor :
|
54
|
+
attr_accessor :_app, :_node, :_options
|
3
55
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
56
|
+
class Node < Zillabyte::Harness::CommonNode
|
57
|
+
attr_accessor :_output_format, :_execute, :_size
|
58
|
+
|
59
|
+
def initialize(v, options = {})
|
60
|
+
@_name = v[0] || "clump_"+Zillabyte::Harness::Counter.get()
|
61
|
+
@_type = "clump"
|
62
|
+
@_size = options[:size] || 100
|
63
|
+
@_output_format = :replace
|
64
|
+
self._emits = options[:emits]
|
65
|
+
end
|
13
66
|
|
14
|
-
|
15
|
-
|
16
|
-
|
67
|
+
def output_format(v)
|
68
|
+
@_output_format = v
|
69
|
+
end
|
17
70
|
|
18
|
-
|
19
|
-
|
71
|
+
def execute(&block)
|
72
|
+
@_execute = block
|
73
|
+
end
|
74
|
+
|
75
|
+
def size(size)
|
76
|
+
@_size = size
|
77
|
+
end
|
20
78
|
end
|
21
79
|
|
22
|
-
def
|
23
|
-
@
|
80
|
+
def initialize(app, *args)
|
81
|
+
@_app = app
|
82
|
+
@_args, @_options = Zillabyte::Harness::Helper.get_vector_and_hashes(args)
|
24
83
|
end
|
25
84
|
|
26
|
-
def
|
27
|
-
@
|
28
|
-
|
85
|
+
def build_node(&block)
|
86
|
+
@_node = Node.new(@_args, @_options)
|
87
|
+
# Does the block take 0 arguments? If so it's not just an execute block.
|
88
|
+
if(block.arity == 0)
|
89
|
+
@_node.instance_eval(&block)
|
90
|
+
Zillabyte::Harness::Helper.check_name("clump", @_node._name, @_app._names)
|
91
|
+
if @_node._emits
|
92
|
+
Zillabyte::Harness::Helper.check_emits("clump", @_node._emits, @_app._streams)
|
93
|
+
else
|
94
|
+
@_node._emits = ["stream_"+Zillabyte::Harness::Counter.get()]
|
95
|
+
end
|
96
|
+
# Takes more than 0? Then it takes |tuple| and is an execute block. Give it a generated stream name.
|
97
|
+
else
|
98
|
+
@_node._emits ||= ["stream_"+Zillabyte::Harness::Counter.get()]
|
99
|
+
@_node._execute = block
|
100
|
+
end
|
29
101
|
|
30
|
-
|
31
|
-
@_execute = block
|
102
|
+
Zillabyte::Harness::Helper.check_each(@_node)
|
32
103
|
end
|
33
104
|
|
105
|
+
def run_operation
|
106
|
+
c = Zillabyte::Harness::EachController.new(@_node, Zillabyte::Common::Progress.new)
|
107
|
+
c.run(@_app._options)
|
108
|
+
end
|
34
109
|
end
|
110
|
+
|
@@ -2,7 +2,6 @@ require 'optparse'
|
|
2
2
|
|
3
3
|
class Zillabyte::Harness::Helper
|
4
4
|
|
5
|
-
META_NAMES = ["id", "confidence", "since", "source"]
|
6
5
|
ALLOWED_OUTPUT_FORMATS = [:replace, :merge]
|
7
6
|
ALLOWED_END_CYCLE_POLICIES = [:null_emit, :explicit, :infinite]
|
8
7
|
ALLOWED_TYPES = [:string, :integer, :float, :double, :boolean, :array, :map]
|
@@ -342,10 +341,6 @@ class Zillabyte::Harness::Helper
|
|
342
341
|
msg = "#{ee}Field names must be non-empty STRINGS with only alphanumeric and underscore characters in \"#{relation_name}\". #{pp}"
|
343
342
|
Zillabyte::Harness::Helper.print_error(msg)
|
344
343
|
end
|
345
|
-
if(cname =~ /^v[0-9]+$/i or META_NAMES.member?(cname.downcase))
|
346
|
-
msg = "#{ee}\"v[number]\", \"id\", \"confidence\", \"since\" and \"source\" are special names in Zillabyte. Please name your field something else. #{pp}"
|
347
|
-
Zillabyte::Harness::Helper.print_error(msg)
|
348
|
-
end
|
349
344
|
if(!ctype.instance_of?(Symbol))
|
350
345
|
msg = "#{ee}Field data types must be SYMBOLS in \"#{relation_name}\". #{pp}"
|
351
346
|
Zillabyte::Harness::Helper.print_error(msg)
|
@@ -109,7 +109,6 @@ module Zillabyte
|
|
109
109
|
|
110
110
|
stream = nil
|
111
111
|
tuple = {}
|
112
|
-
meta = {}
|
113
112
|
|
114
113
|
if (args.size == 1)
|
115
114
|
tuple = args[0]
|
@@ -119,16 +118,13 @@ module Zillabyte
|
|
119
118
|
tuple = args[1]
|
120
119
|
elsif(args[0].instance_of?(Hash))
|
121
120
|
tuple = args[0]
|
122
|
-
meta = args[1]
|
123
121
|
end
|
124
122
|
else
|
125
123
|
stream = args[0]
|
126
124
|
tuple = args[1]
|
127
|
-
meta = args[2]
|
128
125
|
end
|
129
126
|
|
130
127
|
if tuple.is_a?(Tuple)
|
131
|
-
meta = tuple.meta || {} if meta == {}
|
132
128
|
tuple = tuple.values
|
133
129
|
end
|
134
130
|
|
@@ -140,7 +136,7 @@ module Zillabyte
|
|
140
136
|
end
|
141
137
|
end
|
142
138
|
|
143
|
-
m = {:command => :emit, :tuple => tuple, :stream => stream
|
139
|
+
m = {:command => :emit, :tuple => tuple, :stream => stream}
|
144
140
|
send_msg_to_parent m
|
145
141
|
end
|
146
142
|
|
@@ -273,6 +269,11 @@ module Zillabyte
|
|
273
269
|
if(m)
|
274
270
|
if m['command'] && m['command'] == 'prepare'
|
275
271
|
prepare()
|
272
|
+
elsif m['tuples']
|
273
|
+
t_ary = m['tuples'].map do |t|
|
274
|
+
Tuple.new(m)
|
275
|
+
end
|
276
|
+
execute t_ary
|
276
277
|
else
|
277
278
|
t = Tuple.from_hash(m)
|
278
279
|
if(t)
|
@@ -34,6 +34,8 @@ class Zillabyte::Harness::OperationHandler
|
|
34
34
|
@_operation = Zillabyte::Harness::ComponentInput.new(@_app, *args)
|
35
35
|
when "component_output"
|
36
36
|
@_operation = Zillabyte::Harness::ComponentOutput.new(@_app, *args)
|
37
|
+
when "clump"
|
38
|
+
@_operation = Zillabyte::Harness::Clump.new(@_app, *args)
|
37
39
|
else
|
38
40
|
throw "Unknown type"
|
39
41
|
end
|
@@ -181,16 +181,27 @@ class Zillabyte::Harness::Stream
|
|
181
181
|
end
|
182
182
|
|
183
183
|
# Groups together N arbitrary tuples (usefull for mini-batch processing)
|
184
|
-
def clump(*args)
|
184
|
+
def clump(*args, &block)
|
185
185
|
op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
|
186
|
-
op.
|
186
|
+
op.build_multilang_operation("clump", *args, &block)
|
187
187
|
.add_operation_properties_to_info(:name, :type)
|
188
|
-
.add_input_args_to_info_as(:clump)
|
189
188
|
.create_arc_info_from_stream(self)
|
190
189
|
.handle_operation
|
191
190
|
.get_output_streams
|
192
191
|
end
|
193
192
|
|
193
|
+
def group_by(*args, &block)
|
194
|
+
op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
|
195
|
+
op.build_multilang_operation("group_by", *args, &block)
|
196
|
+
.add_operation_properties_to_info(:name, :type, :group_by)
|
197
|
+
.add_optional_operation_properties_to_info(:parallelism)
|
198
|
+
.create_arc_info_from_stream(self)
|
199
|
+
.handle_operation
|
200
|
+
.get_output_streams
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
|
194
205
|
# Renames fields..
|
195
206
|
def rename(rename_map={})
|
196
207
|
op = Zillabyte::Harness::OperationHandler.new(@_app, self.class)
|
@@ -36,10 +36,6 @@ class Tuple
|
|
36
36
|
@hash.keys
|
37
37
|
end
|
38
38
|
|
39
|
-
def meta
|
40
|
-
@hash["meta"]
|
41
|
-
end
|
42
|
-
|
43
39
|
def to_s
|
44
40
|
values.to_s
|
45
41
|
end
|
@@ -93,14 +89,6 @@ class Tuple
|
|
93
89
|
fail "Missing tuple field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
|
94
90
|
return nil
|
95
91
|
end
|
96
|
-
begin
|
97
|
-
hash.fetch("meta")
|
98
|
-
rescue SignalException => e
|
99
|
-
raise
|
100
|
-
rescue Exception => e
|
101
|
-
fail "Missing meta field in emitted JSON object?: #{e.message}\nJSON: #{hash}\nStacktrace:#{e.backtrace.join('\n\t')}"
|
102
|
-
return nil
|
103
|
-
end
|
104
92
|
Tuple.new(hash)
|
105
93
|
end
|
106
94
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zillabyte
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.37
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zillabyte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.9.
|
33
|
+
version: 0.9.37
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.9.
|
40
|
+
version: 0.9.37
|
41
41
|
description: The Official Zillabyte Gem
|
42
42
|
email:
|
43
43
|
- gem@zillabyte.com
|