zillabyte 0.9.32 → 0.9.33
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/ruby/lib/zillabyte/harness/component_input.rb +4 -4
- data/ruby/lib/zillabyte/harness/component_output.rb +4 -4
- data/ruby/lib/zillabyte/harness/each.rb +6 -4
- data/ruby/lib/zillabyte/harness/filter.rb +5 -4
- data/ruby/lib/zillabyte/harness/group_by.rb +7 -4
- data/ruby/lib/zillabyte/harness/injected_component.rb +6 -4
- data/ruby/lib/zillabyte/harness/join.rb +7 -4
- data/ruby/lib/zillabyte/harness/sink.rb +5 -4
- data/ruby/lib/zillabyte/harness/source.rb +7 -4
- data/ruby/lib/zillabyte/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YjU5NTNkMWYyM2NmMzcyZWI3NmQ5NTE1YzRkZDEzNzBiNzZiZjZiOQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YzQ4Y2E0Y2Y0NGMzNWNkNWZkMDdmMDRhZTdlMmVjZTNjOGY1MjRiYg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OGEzYzUwMDViMWFkMDc1NmM2M2RiYTZmNjg2YWM4ZTQ3MjY3NmZiMmQ3ZDM3
|
10
|
+
OWI2NmMzNDI4NDA4YzA0NjUxMjQ1ZTI5Zjc5ZTgyNDA2NjdkODVhYjE3MTNm
|
11
|
+
ZWUyZGNiOGE2N2JlOTRiMGEyZDBjNjQzNWIzNGJiZTY1OTA3OWM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZjlkMDk1YTk5MTU0Y2UyYWIzOGJmMGVhZDMyNGZiN2M4NjY5Mjk4ZTllODE3
|
14
|
+
YTg3MmQ0NzY1ZjRiNDA1YjVhMWU1NTdlN2QyMjg5MmMyOTZjZjNkYzFiOTlm
|
15
|
+
ZjJlODdkYTQ3MzEzZDc0ZWMzZWNhMDJmOTcyNGU5ZWIyYjEyODM=
|
@@ -1,19 +1,19 @@
|
|
1
1
|
# OPERATION Component Input
|
2
2
|
# TAGLINE
|
3
3
|
# Declare the input of a component
|
4
|
-
#
|
4
|
+
# DESCRIPTION
|
5
5
|
# Components work by declaring a “schema” of their inputs and outputs.
|
6
6
|
# The inputs are the expected fields they will look for within incoming tuples.
|
7
7
|
# In an example case, our component declares an input stream named “urls”, and will look for the “url” field of any incoming tuples.
|
8
8
|
#
|
9
|
-
#
|
9
|
+
# RUBY_SYNTAX
|
10
10
|
# component.inputs do
|
11
11
|
# name "input_stream_name"
|
12
12
|
# field "field_1", :type_1
|
13
13
|
# field "field_2", :type_2
|
14
14
|
# end
|
15
15
|
#
|
16
|
-
#
|
16
|
+
# RUBY_NOTES
|
17
17
|
# "Inputs" stream "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
|
18
18
|
#
|
19
19
|
# Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
|
@@ -22,7 +22,7 @@
|
|
22
22
|
#
|
23
23
|
# Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, :boolean, :array and :map.
|
24
24
|
#
|
25
|
-
#
|
25
|
+
# RUBY_EXAMPLE
|
26
26
|
# require 'zillabyte'
|
27
27
|
#
|
28
28
|
# comp = Zillabyte.component("ruby_component")
|
@@ -1,16 +1,16 @@
|
|
1
1
|
# OPERATION Component Output
|
2
2
|
# TAGLINE
|
3
3
|
# Declare the output of a component
|
4
|
-
#
|
4
|
+
# DESCRIPTION
|
5
5
|
# In a similar manner to component inputs, we declare the component output schema for the component.
|
6
6
|
# This determines the output of the component for other Zillabyte functions to expect. This is just like a sink in apps.
|
7
|
-
#
|
7
|
+
# RUBY_SYNTAX
|
8
8
|
# component_stream.outputs do
|
9
9
|
# name "output_stream_name"
|
10
10
|
# field "field_1", :type_1
|
11
11
|
# field "field_2", :type_2
|
12
12
|
# end
|
13
|
-
#
|
13
|
+
# RUBY_NOTES
|
14
14
|
# "Outputs" stream "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
|
15
15
|
#
|
16
16
|
# Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
|
@@ -18,7 +18,7 @@
|
|
18
18
|
# Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
|
19
19
|
#
|
20
20
|
# Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, :boolean, :array and :map.
|
21
|
-
#
|
21
|
+
# RUBY_EXAMPLE
|
22
22
|
# stream.outputs do
|
23
23
|
# name "plural"
|
24
24
|
# field "word", :string
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# OPERATION Each
|
2
2
|
# TAGLINE
|
3
3
|
# The each block can be thought of as a map operation that runs across multiple machines.
|
4
|
-
#
|
4
|
+
# DESCRIPTION
|
5
5
|
# The `each` operation is the simplest operation within Zillabyte. It is analogous to the inner
|
6
6
|
# block of a conventional "for" loop which is used to apply the same set of manipulations to
|
7
7
|
# each record. Similarly, Zillabyte applies the same code logic specified in the each to each
|
8
8
|
# tuple the operation receives.
|
9
9
|
#
|
10
|
-
#
|
10
|
+
# RUBY_SYNTAX
|
11
11
|
# # Simplified syntax:
|
12
12
|
#
|
13
13
|
# stream.each do |tuple|
|
@@ -23,20 +23,22 @@
|
|
23
23
|
# execute |=block=|
|
24
24
|
# end
|
25
25
|
#
|
26
|
-
#
|
26
|
+
# RUBY_NOTES
|
27
27
|
# The simplified syntax can be used if a prepare block and other customizations are not needed.
|
28
28
|
#
|
29
29
|
# * **Note:** this syntax only works for a single output stream.
|
30
30
|
#
|
31
31
|
# The allowed output formats are :replace and :merge.
|
32
|
+
#
|
32
33
|
# * **:replace** : discards the input tuple values and only emits the specified values. This is the default.
|
33
34
|
# * **:merge** : re-emits the input tuple values along with the specified values.
|
34
35
|
#
|
35
36
|
# The "prepare" and "execute" blocks can be in do...end format or {} format.
|
37
|
+
#
|
36
38
|
# * **"prepare"** block : where any setup is done to prepare for tuple processing in the "execute" block.
|
37
39
|
# * **"execute"** block : where the tuples are actually processed. It must take in a single argument (the "tuple").
|
38
40
|
#
|
39
|
-
#
|
41
|
+
# RUBY_EXAMPLE
|
40
42
|
#
|
41
43
|
# # This is a simple example which emits the url of a tuple
|
42
44
|
# s = s.each do |tuple|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# OPERATION Filter
|
2
2
|
# TAGLINE
|
3
3
|
# The filter is a conditional each.
|
4
|
-
#
|
4
|
+
# DESCRIPTION
|
5
5
|
# Conditional expressions can be used to determine if a
|
6
6
|
# tuple received by the operation is emitted.
|
7
7
|
#
|
8
|
-
#
|
8
|
+
# RUBY_SYNTAX
|
9
9
|
# # Simplified syntax:
|
10
10
|
# stream.filter do |tuple|
|
11
11
|
# |=block=|
|
@@ -19,16 +19,17 @@
|
|
19
19
|
# keep |=block=|
|
20
20
|
# end
|
21
21
|
#
|
22
|
-
#
|
22
|
+
# RUBY_NOTES
|
23
23
|
# The simplified syntax can be used if a prepare block and other customizations are not needed.
|
24
24
|
#
|
25
25
|
# * **Note:** this syntax only works for a single output stream.
|
26
26
|
#
|
27
27
|
# The "prepare" and "keep" blocks can be in do...end format or {} format.
|
28
|
+
#
|
28
29
|
# * **"prepare"** block : where any setup is done to prepare for tuple processing in the "keep" block.
|
29
30
|
# * **"keep"** block : tuples pass through the filter if "keep" returns "True". It must take in a single argument (the "tuple").
|
30
31
|
#
|
31
|
-
#
|
32
|
+
# RUBY_EXAMPLE
|
32
33
|
# # Filter for simple string inclusion
|
33
34
|
# stream = stream.filter { |tuple| tuple["url"].include? "hello world" }
|
34
35
|
#
|
@@ -1,9 +1,10 @@
|
|
1
1
|
# OPERATION Group By
|
2
|
+
# TAGLINE
|
2
3
|
# Group tuples by keys
|
3
|
-
#
|
4
|
+
# DESCRIPTION
|
4
5
|
# The `group_by` operation can be used to implement aggregation-type functions
|
5
6
|
# such as counting and summing.
|
6
|
-
#
|
7
|
+
# RUBY_SYNTAX
|
7
8
|
# stream.group_by do
|
8
9
|
# name "name" # optional
|
9
10
|
# group_by "field_1", "field_2", ...
|
@@ -12,13 +13,15 @@
|
|
12
13
|
# aggregate |=block=|
|
13
14
|
# end_group |=block=|
|
14
15
|
# end
|
15
|
-
#
|
16
|
+
# RUBY_NOTES
|
16
17
|
#
|
17
18
|
# The "begin_group", "aggregate" and "end_group" blocks can be in do...end format or {} format.
|
19
|
+
#
|
18
20
|
# * **"begin_group"** block : where the initial values for the aggregation are set. It must take in a single argument (the "grouping tuple", which is emitted at the beginning of each group and contains the values of the fields specified in "group_by").
|
19
21
|
# * **"aggregate"** block : where the aggregation is performed. It must take in a single argument (the "tuple").
|
20
22
|
# * **"end_group"** block : where the final aggregated value is emitted.
|
21
|
-
#
|
23
|
+
#
|
24
|
+
# RUBY_EXAMPLE
|
22
25
|
# # Declare the group_by, grouping on the :word field
|
23
26
|
# stream = stream.group_by(:word) do
|
24
27
|
#
|
@@ -1,12 +1,12 @@
|
|
1
1
|
# OPERATION Call Component
|
2
2
|
# TAGLINE
|
3
3
|
# Call a component
|
4
|
-
#
|
4
|
+
# DESCRIPTION
|
5
5
|
# Components can be embedded into other components or even applications.
|
6
6
|
# When running an application that has an embedded component, that component is
|
7
7
|
# joined into the same execution environment as its parent, allowing for high throughput component execution.
|
8
8
|
# To embed a component, use the "call_component" operation
|
9
|
-
#
|
9
|
+
# RUBY_SYNTAX
|
10
10
|
# stream.call_component do
|
11
11
|
# component_id "component_id"
|
12
12
|
# name "name" # optional
|
@@ -14,21 +14,23 @@
|
|
14
14
|
# outputs "output_stream_name_1", ...
|
15
15
|
# output_format :replace OR :merge # optional, defaults to :replace
|
16
16
|
# end
|
17
|
-
#
|
17
|
+
# RUBY_NOTES
|
18
18
|
# The "component_id" MUST be given and correspond to the name or id listed in the output of "zillabyte components".
|
19
19
|
#
|
20
20
|
# The allowed output formats are :replace and :merge. Note that only linear components, i.e. those with only "each" and "filter" operations support :merge.
|
21
|
+
#
|
21
22
|
# * **:replace** : discards the input tuple values and only emits the output values. This is the default.
|
22
23
|
# * **:merge** : re-emits the input tuple values along with the output values.
|
23
24
|
#
|
24
25
|
# To correctly stich in the component, the implicit assumptions below WILL BE USED:
|
26
|
+
#
|
25
27
|
# * The "stream" that "call_component" is invoked on MUST correspond to the first listed input stream to the component.
|
26
28
|
# * The streams specified in "additional_inputs" MUST correspond to the other listed input streams in order.
|
27
29
|
# * Tuples emitted from the preceeding operation MUST contain the fields listed for the corresponding component input streams.
|
28
30
|
# * The streams specified in "outputs" must correspond to the listed output streams to the component in order.
|
29
31
|
# * The number of input and output streams specified must match the number listed for the component.
|
30
32
|
#
|
31
|
-
#
|
33
|
+
# RUBY_EXAMPLE
|
32
34
|
# require 'zillabyte'
|
33
35
|
# app = Zillabyte.app("plural_app")
|
34
36
|
#
|
@@ -1,13 +1,14 @@
|
|
1
1
|
# OPERATION Join
|
2
|
+
# TAGLINE
|
2
3
|
# Join streams together
|
3
|
-
#
|
4
|
+
# DESCRIPTION
|
4
5
|
# Joins allow you to combine two streams into one in a manner similar
|
5
6
|
# to traditional relational systems. Realtime tuple data from a variety
|
6
7
|
# of sources can be combined into a single stream via joins on specified
|
7
8
|
# fields.
|
8
|
-
#
|
9
|
+
# RUBY_SYNTAX
|
9
10
|
# lhs_stream.join_with( rhs_stream_object, options )
|
10
|
-
#
|
11
|
+
# RUBY_NOTES
|
11
12
|
# Options should be specified as a hash. The following keys are recognized:
|
12
13
|
#
|
13
14
|
# Mandatory:
|
@@ -15,9 +16,11 @@
|
|
15
16
|
# * **"on"** : specifies the fields to join on. The value must be a STRING or a length-2 ARRAY. If value = a STRING, the LH and RH join fields will both be set to this STRING. If value = a length-2 ARRAY, the LH join field will be set to array[0] and the RH join field will be set to array[1].
|
16
17
|
#
|
17
18
|
# Optional:
|
19
|
+
#
|
18
20
|
# * **"type"** : specifies the join type. The default is :left. Options are [:inner, :outer, :left, :right]
|
19
21
|
# * **"emits"** : specifies the stream to emit. A "join" may only emit a single stream.
|
20
|
-
#
|
22
|
+
#
|
23
|
+
# RUBY_EXAMPLE
|
21
24
|
#
|
22
25
|
# # Left join on a shared field, in this case, an :owner_id
|
23
26
|
# joined_stream = auto_stream.join_with(repairs_stream, :on => :owner_id)
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# OPERATION Sink
|
2
2
|
# TAGLINE
|
3
3
|
# The `sink` is a passive operation that defines the schema of the rows that need to be saved.
|
4
|
-
#
|
4
|
+
# DESCRIPTION
|
5
5
|
# The `sink` marks the end of a stream. It saves the data it receives according to the schema defined within it.
|
6
6
|
#
|
7
|
-
#
|
7
|
+
# RUBY_SYNTAX
|
8
8
|
# stream.sink do
|
9
9
|
# name "name_of_relation"
|
10
10
|
# column "field_1", :type_1
|
11
11
|
# column "field_2", :type_2 ...
|
12
12
|
# end
|
13
13
|
#
|
14
|
-
#
|
14
|
+
# RUBY_NOTES
|
15
15
|
# "Sink" relation "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
|
16
16
|
#
|
17
17
|
# Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
|
@@ -20,7 +20,8 @@
|
|
20
20
|
#
|
21
21
|
# Field types must be SYMBOLS. The following types are allowed :string, :integer, :float, :double, :boolean, :array and :map.
|
22
22
|
#
|
23
|
-
#
|
23
|
+
# RUBY_EXAMPLE
|
24
|
+
#
|
24
25
|
# stream.sink{
|
25
26
|
# name "patent_sink"
|
26
27
|
# column "doc_number", :integer
|
@@ -3,7 +3,7 @@
|
|
3
3
|
# A source is where the data for your app originates and is defined
|
4
4
|
# on the app object.
|
5
5
|
#
|
6
|
-
#
|
6
|
+
# DESCRIPTION
|
7
7
|
#
|
8
8
|
# ## Sourcing From Datasets
|
9
9
|
# The easiest way to stream data into a Zillabyte
|
@@ -20,7 +20,7 @@
|
|
20
20
|
# and specifying any preparatory steps such as initializing global
|
21
21
|
# values in the begin_cycle block.
|
22
22
|
#
|
23
|
-
#
|
23
|
+
# RUBY_SYNTAX
|
24
24
|
# #Sourcing from a dataset:
|
25
25
|
# app.source("dataset_name") # fetch a dataset from Zillabyte
|
26
26
|
#
|
@@ -32,16 +32,19 @@
|
|
32
32
|
# begin_cycle |=block=| # optional if no initialization needed
|
33
33
|
# next_tuple |=block=| # The code to execute on each tuple fetch
|
34
34
|
# end
|
35
|
-
#
|
35
|
+
# RUBY_NOTES
|
36
36
|
#
|
37
37
|
# The "end_cycle_policy" is used to specify when a cycle should end. Two options are available:
|
38
|
+
#
|
38
39
|
# * **:null_emit** : end the cycle when a field contains "nil" or when nothing is emitted from the "next_tuple" block.
|
39
40
|
# * **:explicit** : the end of a cycle is explicitly declared in the "next_tuple" block. This is done by including the "end_cycle" keyword in the "next_tuple" block, e.g. end_cycle if @queue.nil?.
|
40
41
|
#
|
41
42
|
# The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
|
43
|
+
#
|
42
44
|
# * **"begin_cycle"** block : where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
|
43
45
|
# * **"next_tuple"** block : where the tuples are actually emitted.
|
44
|
-
|
46
|
+
##
|
47
|
+
# RUBY_EXAMPLE
|
45
48
|
#
|
46
49
|
# # Source from the "homepages" relation
|
47
50
|
# stream = app.source("homepages")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zillabyte
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.33
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- zillabyte
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.9.
|
33
|
+
version: 0.9.33
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.9.
|
40
|
+
version: 0.9.33
|
41
41
|
description: The Official Zillabyte Gem
|
42
42
|
email:
|
43
43
|
- gem@zillabyte.com
|
@@ -92,7 +92,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
92
|
version: '0'
|
93
93
|
requirements: []
|
94
94
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.
|
95
|
+
rubygems_version: 2.2.2
|
96
96
|
signing_key:
|
97
97
|
specification_version: 4
|
98
98
|
summary: The Official Zillabyte Gem
|