zillabyte 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1af0e7dd6900cca31d707e201cfbdfcc8e4fe3f3
4
- data.tar.gz: 37532e5dcb0754700d339bf0c4863c7161098983
3
+ metadata.gz: a43d9f7b3f9e80e5258eb7aa7eadd5e737da2098
4
+ data.tar.gz: 9cd1867ebe50c7ed22659a79c52be31023e75648
5
5
  SHA512:
6
- metadata.gz: 42a4e7ecf444f7cc461d2175d2a6a7ecb8658de9452a1c1b91a202c6060fba9e561cc766fbe0edfa941d8a14dfaabf99e333449a38ebd9dbbf938b9a5700654f
7
- data.tar.gz: 137d82edc66b7950b671989b3a4631d1e39e88606442130c8ad8a5f389f0da717743725cb6e0f01160732497e04cb8eb5b39afd4b8e8928697a0709ff08a266b
6
+ metadata.gz: 0cf5964779b966dc2eb8ff59479cc994e14d76d56bb42738b51f2d2564609caecc5f552947daad3cea0f153d209c9efeae9f942f9cc734b8364e0a8114869f56
7
+ data.tar.gz: 0429f42873893ec3a0035a9c8abe63a75be5c6e2d961596445c1e8c783683f954857e1ed43f9579260fa5d02cde4a37ce2618db19c6d6b71e1556b1945ec309c
@@ -1,3 +1,53 @@
1
+ # OPERATION Each
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The each block can be thought of as a map operation that runs across multiple machines.
4
+ # The rows of the web_pages dataset are distributed across our system and processed.
5
+ # The result of the each is another stream object that will contain the emitted rows.
6
+ # The input argument tuple contains a single row, that can be accessed like a hash object.
7
+ #
8
+ #
9
+ # LANGUAGE_SYNTAX
10
+ # Simplified syntax:
11
+ #
12
+ # stream.each do |tuple|
13
+ # |=block=|
14
+ # end
15
+ # - This is equivalent to just specifying an \"execute\" block below.
16
+ # - Note that this syntax only works for a single output stream.
17
+ #
18
+ # Custom each:
19
+ # stream.each do
20
+ # name "name" # the name of the operation
21
+ # emits "stream_1", "stream_2" # optional for single output stream
22
+ # output_format :replace # :replace or :merge, optional, defaults to :replace
23
+ # prepare |=block=| # optional if no initialization needed
24
+ # execute |=block=|
25
+ # end
26
+ #
27
+ # - The allowed output formats are :replace and :merge.
28
+ # * :replace - discards the input tuple values and only emits the specified values. This is the default.
29
+ # * :merge - re-emits the input tuple values along with the specified values.
30
+ # - The "prepare" and "execute" blocks can be in do...end format or {} format.
31
+ # * the "prepare" block is where any setup is done to prepare for tuple processing in the "execute" block.
32
+ # * the "execute" block is where the tuples are actually processed. It must take in a single argument (the "tuple").
33
+ #
34
+ # EXAMPLE
35
+ #
36
+ # # This is a simple example which emits the url of a tuple
37
+ # s = s.each do |tuple|
38
+ # emit :url => tuple["url"]
39
+ # end
40
+ #
41
+ #
42
+ #
43
+ # # Eaches can also be used for conditional emitting
44
+ # stream = result_stream.each{ |tuple|
45
+ #
46
+ # if tuple['html'].include? "hello_world"
47
+ # emit :url => tuple['url']
48
+ # end
49
+ #
50
+ # }
1
51
  class Zillabyte::Harness::Each
2
52
  attr_accessor :_app, :_node, :_options
3
53
 
@@ -1,3 +1,38 @@
1
+ # OPERATION Filter
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The filter is a conditional each. Conditional expressions can be used to determine if a
4
+ # tuple received by the operation is emitted.
5
+ #
6
+ # LANGUAGE_SYNTAX
7
+ # Simplified syntax:
8
+ # stream.filter do |tuple|
9
+ # |=block=|
10
+ # end
11
+ # - The condition for keeping the tuple should be specified in the block.
12
+ # - This is equivalent to just specifying a "keep" block below.
13
+ # Custom filter:
14
+ # stream.filter do
15
+ # name "name" \t\t\t => optional
16
+ # emits "stream_1", "stream_2", ... \t\t => optional for single output stream
17
+ # prepare |=block=| \t\t\t\t => optional if no initialization needed
18
+ # keep |=block=|
19
+ # end
20
+ # - The "prepare" and "keep" blocks can be in do...end format or {} format.
21
+ # * the "prepare" block is where any setup is done to prepare for tuple processing in the "keep" block.
22
+ # * tuples will pass through the filter if "keep" returns "True". It must take in a single argument (the "tuple").
23
+ #
24
+ # EXAMPLE
25
+ # # Filter for simple string inclusion
26
+ # stream = stream.filter { |tuple| tuple["url"].include? "hello world" }
27
+ #
28
+ # # Custom Filter
29
+ # stream = stream.filter do\
30
+ # name "hello_world_filter"
31
+ # emits "hello_stream"
32
+ # keep do |tuple|
33
+ # return tuple["url"].include? "url"
34
+ # end
35
+ # end
1
36
  class Zillabyte::Harness::Filter
2
37
  attr_accessor :_app, :_node, :_options
3
38
 
@@ -1,3 +1,42 @@
1
+ # OPERATION Group By
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The `group_by` function can be used to implement aggregate computations
4
+ # such as counting and summing. The following is a full example of a
5
+ # word count operation in Zillabyte.
6
+ #
7
+ # LANGUAGE_SYNTAX
8
+ # stream.group_by do
9
+ # name "name" \t\t\t\t\t => optional
10
+ # group_by "field_1", "field_2", ...
11
+ # emits "stream_1", "stream_2", ... \t\t => optional for single output stream
12
+ # begin_group |=block=|
13
+ # aggregate |=block=|
14
+ # end_group |=block=|
15
+ # end
16
+ # - The "begin_group", "aggregate" and "end_group" blocks can be in do...end format or {} format.
17
+ # * the "begin_group" block is where the initial values for the aggregation are set. It must take in a single argument (the "grouping tuple", which is emitted at the beginning of each group and contains the values of the fields specified in "group_by").
18
+ # * the "aggregate" block is where the aggregation is performed. It must take in a single argument (the "tuple").
19
+ # * the "end_group" block is where the final aggregated value is emitted.
20
+ # EXAMPLE
21
+ # # Declare the group_by, grouping on the :word field
22
+ # stream = stream.group_by(:word) do
23
+ #
24
+ # # Save the word being grouped, initialize any state
25
+ # begin_group do |g_tuple|
26
+ # @word = g_tuple[:word]
27
+ # @count = 0
28
+ # end
29
+ #
30
+ # # In this case, simply increment the counter associated with the word
31
+ # aggregate do |tuple|
32
+ # @count += 1
33
+ # end
34
+ #
35
+ # # Emit the grouped word, along with its count
36
+ # end_group do |g_tuple|
37
+ # emit :word => @word, :count => @count
38
+ # end
39
+ # end
1
40
  class Zillabyte::Harness::GroupBy
2
41
  attr_accessor :_app, :_node, :_fields, :_options
3
42
 
@@ -387,8 +387,9 @@ class Zillabyte::Harness::Helper
387
387
  end
388
388
  end
389
389
 
390
- def self.check_loop_back(node_name, nodes)
391
- ee = "Error in \"loop_back\": \n\t "
390
+ def self.check_loop_back(stream, node_name, nodes)
391
+ ee = "Error in \"loop_back\" to \"#{node_name}\": \n\t "
392
+ previous_node_name = stream._previous_node_name
392
393
  found = false
393
394
  nodes.each do |node|
394
395
  if node._name == node_name
@@ -398,6 +399,12 @@ class Zillabyte::Harness::Helper
398
399
  Zillabyte::Harness::Helper.print_error(msg)
399
400
  end
400
401
  end
402
+ if node._name == previous_node_name
403
+ if node._emits.size < 2
404
+ msg = "#{ee}The preceding operation does not emit multiple streams. Please make sure that it emits one stream for the loop back and another stream for downstream operations."
405
+ Zillabyte::Harness::Helper.print_error(msg)
406
+ end
407
+ end
401
408
  end
402
409
  if !found
403
410
  msg = "#{ee}The specified loop-back node \"#{node_name}\" was not found in the operations preceding it."
@@ -1,3 +1,31 @@
1
+ # OPERATION Join
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # Joins allow you to combine two streams into one in a manner similar
4
+ # to traditional relational systems. Realtime tuple data from a variety
5
+ # of sources can be combined into a single stream via joins on specified
6
+ # fields. Here is an example of a `join` in Zillabyte.
7
+ # LANGUAGE_SYNTAX
8
+ # lhs_stream.join_with( rhs_stream_object, options )
9
+ # - Options should be specified as a hash. The following keys are recognized:
10
+ # Mandatory:
11
+ # * "on" -- specifies the fields to join on. The value must be a STRING or a length-2 ARRAY.
12
+ # If value = a STRING, the LH and RH join fields will both be set to this STRING.
13
+ # If value = a length-2 ARRAY, the LH join field will be set to array[0] and the RH join field will be set to array[1].
14
+ # Optional:
15
+ # * "type" -- specifies the join type. The default is :left. Options are [:inner, :outer, :left, :right]
16
+ # * "emits" -- specifies the stream to emit. A "join" may only emit a single stream.
17
+ # EXAMPLE
18
+ #
19
+ # # Left join on a shared field, in this case, an :owner_id
20
+ # joined_stream = auto_stream.join_with(repairs_stream, :on => :owner_id)
21
+ #
22
+ # # Join with budget_stream's :average on left, employee_stream's :number on right.
23
+ # joined_stream = budget_stream.join_with(
24
+ # employee_stream,
25
+ # :on => [:average, :number],
26
+ # :type => :inner
27
+ # )
28
+ #
1
29
  class Zillabyte::Harness::Join
2
30
  attr_accessor :_app, :_node, :_args
3
31
 
@@ -1,3 +1,26 @@
1
+ # OPERATION Sink
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # The `sink` is a passive operation that defines the schema of the rows that need to be saved.
4
+ # Of all the operations where a stream is consumed, only the `sink` requires a schema to be defined.
5
+ #
6
+ # LANGUAGE_SYNTAX
7
+ # stream.sink do
8
+ # name "name_of_relation"
9
+ # column "field_1", :type_1
10
+ # column "field_2", :type_2 ...
11
+ # end
12
+ # - "Sink" relation "name" must be specified as a non-empty STRING with only alphanumeric and underscore characters!
13
+ # - Field names must be non-empty STRINGS with only alphanumeric or underscore characters.
14
+ # - Field names cannot be "v[number]", "id", "confidence", "since" or "source" which are reserved Zillabyte names.
15
+ # - Field types must be SYMBOLS. The following types are allowed [:string, :integer, :float, :double, :boolean, :array, :map]
16
+ #
17
+ # EXAMPLE
18
+ # stream.sink{
19
+ # name "patent_sink"
20
+ # column "doc_number", :integer
21
+ # column "title", :string
22
+ # column "date", :string
23
+ # }
1
24
  class Zillabyte::Harness::Sink
2
25
  attr_accessor :_app, :_node, :_options
3
26
 
@@ -1,3 +1,60 @@
1
+ # OPERATION Source
2
+ # HIGH_LEVEL_DESCRIPTION
3
+ # A `source` is where the data for your app originates and is defined
4
+ # on the app object. The easiest way to stream data into a Zillabyte
5
+ # app is to use a Zillabyte dataset. A simple `source` takes in the
6
+ # name of a dataset and produces a stream object.
7
+ #
8
+ # A `source` can also use data outside of the public datasets Zillabyte
9
+ # provides. Any external data available on the web can be streamed
10
+ # into a Zillabyte app. To do so, we use the expanded syntax of a
11
+ # `source` and include the code needed to generate the `source` data
12
+ # in the `next_tuple` block. The expanded syntax also allows for
13
+ # additional customizations such as providing a `name` for the `source`
14
+ # and specifying any preparatory steps such as initializing global
15
+ # values in the `begin_cycle` block.
16
+ #
17
+ # LANGUAGE_SYNTAX
18
+ #
19
+ # Sourcing from a dataset:
20
+ # app.source("dataset_name")
21
+ # Custom source:
22
+ # app.source do
23
+ # name "name" \t\t\t\t\t => optional
24
+ # emits "stream_1", "stream_2", ... \t\t => optional for single output stream
25
+ # end_cycle_policy :null_emit OR :explicit \t => default :null_emit
26
+ # begin_cycle |=block=| \t\t\t\t => optional if no initialization needed
27
+ # next_tuple |=block=|
28
+ # end
29
+ # - The "end_cycle_policy" is used to specify when a cycle should end. Two options are available:
30
+ # * :null_emit - end the cycle when a field contains "nil" or when nothing is emitted from the "next_tuple" block.
31
+ # * :explicit - the end of a cycle is explicitly declared in the "next_tuple" block. This is done by including the "end_cycle" keyword in the "next_tuple" block, e.g. end_cycle if @queue.nil?.
32
+ # - The "begin_cycle" and "next_tuple" blocks can be in do...end format or {} format.
33
+ # * the "begin_cycle" block is where any setup is done to initialize the content and quantity of tuples emitted by the "next_tuple" block.
34
+ # * the "next_tuple" block is where the tuples are actually emitted.
35
+ # EXAMPLE
36
+ #
37
+ # # Source from the "homepages" relation
38
+ # stream = app.source("homepages")
39
+ #
40
+ # # Custom Source
41
+ # app = Zillabyte.app(name = "small_cycle")
42
+ # stream = app.source do
43
+ # DICTIONARY = %w(apple bananna cherry dairy eel) unless defined? DICTIONARY
44
+ # emits "small_cycle_relation"
45
+ # end_cycle_policy :explicit
46
+ # begin_cycle do
47
+ # @count = 0
48
+ # end
49
+ #
50
+ # next_tuple do
51
+ # url = "http://#{@count}.example.com/#{@count}.html"
52
+ # html = "<html>#{DICTIONARY.sample}</html>"
53
+ # emit("small_cycle_relation", {:url => url, :html => html})
54
+ # @count += 1
55
+ # end_cycle if @count==1000
56
+ # end
57
+ # end
1
58
  class Zillabyte::Harness::Source
2
59
  attr_accessor :_app, :_node, :_relation
3
60
 
@@ -73,9 +73,8 @@ class Zillabyte::Harness::Stream
73
73
  def loop_back(*args, &block)
74
74
  # This is not a real operation, just telling the stream to loop back to the previous operation
75
75
  loop_back_node = args[0]
76
- Zillabyte::Harness::Helper.check_loop_back(loop_back_node, @_app._nodes)
76
+ Zillabyte::Harness::Helper.check_loop_back(self, loop_back_node, @_app._nodes)
77
77
  Zillabyte::Harness::Helper.write_arc_to_file({"name" => self._name, "origin" => self._previous_node_name, "dest" => loop_back_node, "loop_back" => 1}, @_app._socket)
78
- self
79
78
  end
80
79
 
81
80
  def sink(*args, &block)
@@ -1,3 +1,3 @@
1
1
  module Zillabyte
2
- VERSION = "0.9.0" unless defined?(VERSION)
2
+ VERSION = "0.9.1" unless defined?(VERSION)
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zillabyte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - zillabyte
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-24 00:00:00.000000000 Z
11
+ date: 2014-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 0.9.0
33
+ version: 0.9.1
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 0.9.0
40
+ version: 0.9.1
41
41
  description: The Official Zillabyte Gem
42
42
  email:
43
43
  - gem@zillabyte.com