zillabyte 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1af0e7dd6900cca31d707e201cfbdfcc8e4fe3f3
4
- data.tar.gz: 37532e5dcb0754700d339bf0c4863c7161098983
3
+ metadata.gz: a43d9f7b3f9e80e5258eb7aa7eadd5e737da2098
4
+ data.tar.gz: 9cd1867ebe50c7ed22659a79c52be31023e75648
5
5
  SHA512:
6
- metadata.gz: 42a4e7ecf444f7cc461d2175d2a6a7ecb8658de9452a1c1b91a202c6060fba9e561cc766fbe0edfa941d8a14dfaabf99e333449a38ebd9dbbf938b9a5700654f
7
- data.tar.gz: 137d82edc66b7950b671989b3a4631d1e39e88606442130c8ad8a5f389f0da717743725cb6e0f01160732497e04cb8eb5b39afd4b8e8928697a0709ff08a266b
6
+ metadata.gz: 0cf5964779b966dc2eb8ff59479cc994e14d76d56bb42738b51f2d2564609caecc5f552947daad3cea0f153d209c9efeae9f942f9cc734b8364e0a8114869f56
7
+ data.tar.gz: 0429f42873893ec3a0035a9c8abe63a75be5c6e2d961596445c1e8c783683f954857e1ed43f9579260fa5d02cde4a37ce2618db19c6d6b71e1556b1945ec309c
@@ -1,3 +1,53 @@
1
+ # OPERATION Each
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The each block can be thought of as a map operation that runs across multiple machines.
4
+ # The rows of the web_pages dataset are distributed across our system and processed.
5
+ # The result of the each is another stream object that will contain the emitted rows.
6
+ # The input argument tuple contains a single row, that can be accessed like a hash object.
7
+ #
8
+ #
9
+ # LANGUAGE_SYNTAX
10
+ # Simplified syntax:
11
+ #
12
+ # stream.each do |tuple|
13
+ # |=block=|
14
+ # end
15
+ # - This is equivalent to just specifying an \"execute\" block below.
16
+ # - Note that this syntax only works for a single output stream.
17
+ #
18
+ # Custom each:
19
+ # stream.each do
20
+ # name "name" # the name of the operation
21
+ # emits "stream_1", "stream_2" # optional for single output stream
22
+ # output_format :replace # :replace or :merge, optional, defaults to :replace
23
+ # prepare |=block=| # optional if no initialization needed
24
+ # execute |=block=|
25
+ # end
26
+ #
27
+ # - The allowed output formats are :replace and :merge.
28
+ # * :replace - discards the input tuple values and only emits the specified values. This is the default.
29
+ # * :merge - re-emits the input tuple values along with the specified values.
30
+ # - The "prepare" and "execute" blocks can be in do...end format or {} format.
31
+ # * the "prepare" block is where any setup is done to prepare for tuple processing in the "execute" block.
32
+ # * the "execute" block is where the tuples are actually processed. It must take in a single argument (the "tuple").
33
+ #
34
+ # EXAMPLE
35
+ #
36
+ # # This is a simple example which emits the url of a tuple
37
+ # s = s.each do |tuple|
38
+ # emit :url => tuple["url"]
39
+ # end
40
+ #
41
+ #
42
+ #
43
+ # # Eaches can also be used for conditional emitting
44
+ # stream = result_stream.each{ |tuple|
45
+ #
46
+ # if tuple['html'].include? "hello_world"
47
+ # emit :url => tuple['url']
48
+ # end
49
+ #
50
+ # }
1
51
  class Zillabyte::Harness::Each
2
52
  attr_accessor :_app, :_node, :_options
3
53
 
@@ -1,3 +1,38 @@
1
+ # OPERATION Filter
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The filter is a conditional each. Conditional expressions can be used to determine if a
4
+ # tuple received by the operation is emitted.
5
+ #
6
+ # LANGUAGE_SYNTAX
7
+ # Simplified syntax:
8
+ # stream.filter do |tuple|
9
+ # |=block=|
10
+ # end
11
+ # - The condition for keeping the tuple should be specified in the block.
12
+ # - This is equivalent to just specifying a "keep" block below.
13
+ # Custom filter:
14
+ # stream.filter do
15
+ # name "name" \t\t\t => optional
16
+ # emits "stream_1", "stream_2", ... \t\t => optional for single output stream
17
+ # prepare |=block=| \t\t\t\t => optional if no initialization needed
18
+ # keep |=block=|
19
+ # end
20
+ # - The "prepare" and "keep" blocks can be in do...end format or {} format.
21
+ # * the "prepare" block is where any setup is done to prepare for tuple processing in the "keep" block.
22
+ # * tuples will pass through the filter if "keep" returns "True". It must take in a single argument (the "tuple").
23
+ #
24
+ # EXAMPLE
25
+ # # Filter for simple string inclusion
26
+ # stream = stream.filter { |tuple| tuple["url"].include? "hello world" }
27
+ #
28
+ # # Custom Filter
29
+ # stream = stream.filter do\
30
+ # name "hello_world_filter"
31
+ # emits "hello_stream"
32
+ # keep do |tuple|
33
+ # return tuple["url"].include? "url"
34
+ # end
35
+ # end
1
36
  class Zillabyte::Harness::Filter
2
37
  attr_accessor :_app, :_node, :_options
3
38
 
@@ -1,3 +1,42 @@
1
+ # OPERATION Group By
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The `group_by` function can be used to implement aggregate computations
4
+ # such as counting and summing. The following is a full example of a
5
+ # word count operation in Zillabyte.
6
+ #
7
+ # LANGUAGE_SYNTAX
8
+ # stream.group_by do
9
+ # name "name" \t\t\t\t\t => optional
10
+ # group_by "field_1", "field_2", ...
11
+ # emits "stream_1", "stream_2", ... \t\t => optional for single output stream
12
+ # begin_group |=block=|
13
+ # aggregate |=block=|
14
+ # end_group |=block=|
15
+ # end
16
+ # - The "begin_group", "aggregate" and "end_group" blocks can be in do...end format or {} format.
17
+ # * the "begin_group" block is where the initial values for the aggregation are set. It must take in a single argument (the "grouping tuple", which is emitted at the beginning of each group and contains the values of the fields specified in "group_by").
18
+ # * the "aggregate" block is where the aggregation is performed. It must take in a single argument (the "tuple").
19
+ # * the "end_group" block is where the final aggregated value is emitted.
20
+ # EXAMPLE
21
+ # # Declare the group_by, grouping on the :word field
22
+ # stream = stream.group_by(:word) do
23
+ #
24
+ # # Save the word being grouped, initialize any state
25
+ # begin_group do |g_tuple|
26
+ # @word = g_tuple[:word]
27
+ # @count = 0
28
+ # end
29
+ #
30
+ # # In this case, simply increment the counter associated with the word
31
+ # aggregate do |tuple|
32
+ # @count += 1
33
+ # end
34
+ #
35
+ # # Emit the grouped word, along with its count
36
+ # end_group do |g_tuple|
37
+ # emit :word => @word, :count => @count
38
+ # end
39
+ # end
1
40
  class Zillabyte::Harness::GroupBy
2
41
  attr_accessor :_app, :_node, :_fields, :_options
3
42
 
@@ -387,8 +387,9 @@ class Zillabyte::Harness::Helper
387
387
  end
388
388
  end
389
389
 
390
- def self.check_loop_back(node_name, nodes)
391
- ee = "Error in \"loop_back\": \n\t "
390
+ def self.check_loop_back(stream, node_name, nodes)
391
+ ee = "Error in \"loop_back\" to \"#{node_name}\": \n\t "
392
+ previous_node_name = stream._previous_node_name
392
393
  found = false
393
394
  nodes.each do |node|
394
395
  if node._name == node_name
@@ -398,6 +399,12 @@ class Zillabyte::Harness::Helper
398
399
  Zillabyte::Harness::Helper.print_error(msg)
399
400
  end
400
401
  end
402
+ if node._name == previous_node_name
403
+ if node._emits.size < 2
404
+ msg = "#{ee}The preceding operation does not emit multiple streams. Please make sure that it emits one stream for the loop back and another stream for downstream operations."
405
+ Zillabyte::Harness::Helper.print_error(msg)
406
+ end
407
+ end
401
408
  end
402
409
  if !found
403
410
  msg = "#{ee}The specified loop-back node \"#{node_name}\" was not found in the operations preceding it."
@@ -1,3 +1,31 @@
1
+ # OPERATION Join
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # Joins allow you to combine two streams into one in a manner similar
4
+ # to traditional relational systems. Realtime tuple data from a variety
5
+ # of sources can be combined into a single stream via joins on specified
6
+ # fields. Here is an example of a `join` in Zillabyte.
7
+ # LANGUAGE_SYNTAX
8
+ # lhs_stream.join_with( rhs_stream_object, options )
9
+ # - Options should be specified as a hash. The following keys are recognized:
10
+ # Mandatory:
11
+ # * "on" -- specifies the fields to join on. The value must be a STRING or a length-2 ARRAY.
12
+ # If value = a STRING, the LH and RH join fields will both be set to this STRING.
13
+ # If value = a length-2 ARRAY, the LH join field will be set to array[0] and the RH join field will be set to array[1].
14
+ # Optional:
15
+ # * "type" -- specifies the join type. The default is :left. Options are [:inner, :outer, :left, :right]
16
+ # * "emits" -- specifies the stream to emit. A "join" may only emit a single stream.
17
+ # EXAMPLE
18
+ #
19
+ # # Left join on a shared field, in this case, an :owner_id
20
+ # joined_stream = auto_stream.join_with(repairs_stream, :on => :owner_id)
21
+ #
22
+ # # Join with budget_stream's :average on left, employee_stream's :number on right.
23
+ # joined_stream = budget_stream.join_with(
24
+ # employee_stream,
25
+ # :on => [:average, :number],
26
+ # :type => :inner
27
+ # )
28
+ #
1
29
  class Zillabyte::Harness::Join
2
30
  attr_accessor :_app, :_node, :_args
3
31
 
@@ -1,3 +1,26 @@
1
+ # OPERATION Sink
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The `sink` is a passive operation that defines the schema of the rows that need to be saved.
4
+ # Of all the operations where a stream is consumed, only the `sink` requires a schema to be defined.
5
+ #
6
+ # LANGUAGE_SYNTAX
7
+ # stream.sink do
8
+ # name "name_of_relation"
9
+ # column "field_1", :type_1
10
+ # column "field_2", :type_2 ...
11
+ # end
12
+ # - "Sink" relation "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
13
+ # - Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
14
+ # - Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
15
+ # - Field types must be SYMBOLS. The following types are allowed [:string, :integer, :float, :double, :boolean, :array, :map]
16
+ #
17
+ # EXAMPLE
18
+ # stream.sink{
19
+ # name "patent_sink"
20
+ # column "doc_number", :integer
21
+ # column "title", :string
22
+ # column "date", :string
23
+ # }
1
24
  class Zillabyte::Harness::Sink
2
25
  attr_accessor :_app, :_node, :_options
3
26
 
@@ -1,3 +1,60 @@
1
+ # OPERATION Source
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # A `source` is where the data for your app originates and is defined
4
+ # on the app object. The easiest way to stream data into a Zillabyte
5
+ # app is to use a Zillabyte dataset. A simple `source` takes in the
6
+ # name of a dataset and produces a stream object.
7
+ #
8
+ # A `source` can also use data outside of the public datasets Zillabyte
9
+ # provides. Any external data available on the web can be streamed
10
+ # into a Zillabyte app. To do so, we use the expanded syntax of a
11
+ # `source` and include the code needed to generate the `source` data
12
+ # in the `next_tuple` block. The expanded syntax also allows for
13
+ # additional customizations such as providing a `name` for the `source`
14
+ # and specifying any preparatory steps such as initializing global
15
+ # values in the `begin_cycle` block.
16
+ #
17
+ # LANGUAGE_SYNTAX
18
+ #
19
+ # Sourcing from a dataset:
20
+ # app.source("dataset_name")
21
+ # Custom source:
22
+ # app.source do
23
+ # name "name" \t\t\t\t\t => optional
24
+ # emits "stream_1", "stream_2", ... \t\t => optional for single output stream
25
+ # end_cycle_policy :null_emit OR :explicit \t => default :null_emit
26
+ # begin_cycle |=block=| \t\t\t\t => optional if no initialization needed
27
+ # next_tuple |=block=|
28
+ # end
29
+ # - The "end_cycle_policy" is used to specify when a cycle should end. Two options are available:
30
+ # * :null_emit - end the cycle when a field contains "nil" or when nothing is emitted from the "next_tuple" block.
31
+ # * :explicit - the end of a cycle is explicitly declared in the "next_tuple" block. This is done by including the "end_cycle" keyword in the "next_tuple" block, e.g. end_cycle if @queue.nil?.
32
+ # - The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
33
+ # * the "begin_cycle" block is where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
34
+ # * the "next_tuple" block is where the tuples are actually emitted.
35
+ # EXAMPLE
36
+ #
37
+ # # Source from the "homepages" relation
38
+ # stream = app.source("homepages")
39
+ #
40
+ # # Custom Source
41
+ # app = Zillabyte.app(name = "small_cycle")
42
+ # stream = app.source do
43
+ # DICTIONARY = %w(apple bananna cherry dairy eel) unless defined? DICTIONARY
44
+ # emits "small_cycle_relation"
45
+ # end_cycle_policy :explicit
46
+ # begin_cycle do
47
+ # @count = 0
48
+ # end
49
+ #
50
+ # next_tuple do
51
+ # url = "http://#{@count}.example.com/#{@count}.html"
52
+ # html = "<html>#{DICTIONARY.sample}</html>"
53
+ # emit("small_cycle_relation", {:url => url, :html => html})
54
+ # @count += 1
55
+ # end_cycle if @count==1000
56
+ # end
57
+ # end
1
58
  class Zillabyte::Harness::Source
2
59
  attr_accessor :_app, :_node, :_relation
3
60
 
@@ -73,9 +73,8 @@ class Zillabyte::Harness::Stream
73
73
  def loop_back(*args, &block)
74
74
  # This is not a real operation, just telling the stream to loop back to the previous operation
75
75
  loop_back_node = args[0]
76
- Zillabyte::Harness::Helper.check_loop_back(loop_back_node, @_app._nodes)
76
+ Zillabyte::Harness::Helper.check_loop_back(self, loop_back_node, @_app._nodes)
77
77
  Zillabyte::Harness::Helper.write_arc_to_file({"name" => self._name, "origin" => self._previous_node_name, "dest" => loop_back_node, "loop_back" => 1}, @_app._socket)
78
- self
79
78
  end
80
79
 
81
80
  def sink(*args, &block)
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.9.0" unless defined?(VERSION)
2
+ VERSION = "0.9.1" unless defined?(VERSION)
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - zillabyte
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-24 00:00:00.000000000 Z
11
+ date: 2014-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.9.0
33
+ version: 0.9.1
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.9.0
40
+ version: 0.9.1
41
41
  description: The Official Zillabyte Gem
42
42
  email:
43
43
  - gem@zillabyte.com