rflow 0.0.5 → 1.0.0a1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +21 -0
  5. data/.yardopts +1 -0
  6. data/Gemfile +5 -1
  7. data/Guardfile +8 -0
  8. data/LICENSE +190 -0
  9. data/NOTES +26 -13
  10. data/README.md +448 -0
  11. data/Rakefile +5 -12
  12. data/bin/rflow +23 -20
  13. data/example/basic_config.rb +2 -2
  14. data/example/basic_extensions.rb +8 -8
  15. data/example/http_config.rb +1 -1
  16. data/example/http_extensions.rb +15 -15
  17. data/lib/rflow.rb +15 -387
  18. data/lib/rflow/component.rb +105 -50
  19. data/lib/rflow/component/port.rb +25 -24
  20. data/lib/rflow/components/raw.rb +4 -4
  21. data/lib/rflow/components/raw/extensions.rb +2 -2
  22. data/lib/rflow/configuration.rb +54 -36
  23. data/lib/rflow/configuration/component.rb +2 -3
  24. data/lib/rflow/configuration/connection.rb +9 -10
  25. data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
  26. data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
  27. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
  28. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
  29. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
  30. data/lib/rflow/configuration/port.rb +3 -4
  31. data/lib/rflow/configuration/ruby_dsl.rb +59 -35
  32. data/lib/rflow/configuration/setting.rb +8 -7
  33. data/lib/rflow/configuration/shard.rb +24 -0
  34. data/lib/rflow/configuration/uuid_keyed.rb +3 -3
  35. data/lib/rflow/connection.rb +21 -10
  36. data/lib/rflow/connections/zmq_connection.rb +45 -44
  37. data/lib/rflow/logger.rb +67 -0
  38. data/lib/rflow/master.rb +127 -0
  39. data/lib/rflow/message.rb +14 -14
  40. data/lib/rflow/pid_file.rb +84 -0
  41. data/lib/rflow/shard.rb +148 -0
  42. data/lib/rflow/version.rb +1 -1
  43. data/rflow.gemspec +22 -28
  44. data/schema/message.avsc +8 -8
  45. data/spec/fixtures/config_ints.rb +4 -4
  46. data/spec/fixtures/config_shards.rb +30 -0
  47. data/spec/fixtures/extensions_ints.rb +8 -8
  48. data/spec/rflow_component_port_spec.rb +58 -0
  49. data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
  50. data/spec/rflow_configuration_spec.rb +4 -4
  51. data/spec/rflow_message_data_raw.rb +2 -2
  52. data/spec/rflow_message_data_spec.rb +6 -6
  53. data/spec/rflow_message_spec.rb +13 -13
  54. data/spec/rflow_spec.rb +294 -71
  55. data/spec/schema_spec.rb +2 -2
  56. data/spec/spec_helper.rb +6 -4
  57. data/temp.rb +21 -21
  58. metadata +56 -65
  59. data/.rvmrc +0 -1
  60. data/README +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c322bc6cf9c3b4ccd46b13cf56d3c2460dc5f0be
4
- data.tar.gz: af2d6fb3e7051a074c56fa10b70b5a02b23bb0d7
3
+ metadata.gz: 57c6f0b7c61b30886bbf0f4b2f65821aa5b1b0f9
4
+ data.tar.gz: 62f58d281509732effeca0c1a041df2668497b80
5
5
  SHA512:
6
- metadata.gz: 5a3cd46af3c815d2cb5840d48a0e38f7e28dbe911276fac75d3466bef9d05d7d49e38baa010b5ce419a67aea5829d2f227d2ff74aa5e689f6c1e6109d882ad81
7
- data.tar.gz: 539c61aca94e84e1ccb00acba1ef87c7a6556dc65180f9f905561b09e147eab83ff0a409dd01b84a1113ba820cf89c6470ee64c1ace68736232423ba3a1d9668
6
+ metadata.gz: 8d74949a15024641aef4123ca703d2f2ccf6fb5f97dca9829a282ca53bd6d36c347c844b189255955c7fa058bf903853d0c3acf13fda4dc2e2b3f40e49129310
7
+ data.tar.gz: f6233d9cc128220c886b6ed4970b544040cace77af6701b6f7429da304ad7de00b5536455469e21988f37d2fa1faaedf3f4324f08eee343669b03c6bdaece735
@@ -0,0 +1 @@
1
+ rflow-dev
@@ -0,0 +1 @@
1
+ ruby-2.1.1
@@ -0,0 +1,21 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - 2.1.0
7
+
8
+ before_install:
9
+ - sudo apt-get install libtool autoconf automake uuid-dev build-essential
10
+ - wget http://download.zeromq.org/zeromq-3.2.4.tar.gz && tar zxvf zeromq-3.2.4.tar.gz && cd zeromq-3.2.4 && ./configure && make && sudo make install && cd ..
11
+ # Only has 4.0.4, need 3.2 version due to old em-zeromq
12
+ # - sudo add-apt-repository -y ppa:chris-lea/zeromq
13
+ # - sudo apt-get update
14
+ # - sudo apt-get install libzmq3 libzmq3-dev
15
+
16
+ script: bundle exec rspec spec
17
+
18
+ notifications:
19
+ hipchat:
20
+ rooms:
21
+ secure: a4nrCmDPwhteJA65QFRlBdnsknT+4y/JtZL/sLPCObOahFWvLOXMggPXvHAOssCaa2ydYrMMvWNliOz63nuu3qAnR90H7aOU3o+2K3zeACy0cAjF27lDonLhaYHeUz07oPwr/iDlFC8bDfFDempjIFFnXSc/LhUWaCltnJ7W5vI=
@@ -0,0 +1 @@
1
+ --output ./doc --main README.md --files schema/*.avsc lib/**/*.rb bin/*.rb - README.md LICENSE
data/Gemfile CHANGED
@@ -1,5 +1,9 @@
1
- source "http://rubygems.org"
1
+ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in rflow.gemspec
4
4
  gemspec
5
5
 
6
+ group :development do
7
+ gem 'guard'
8
+ gem 'guard-rspec'
9
+ end
@@ -0,0 +1,8 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :rspec do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
data/LICENSE ADDED
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright 2014 RedJack LLC
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.
data/NOTES CHANGED
@@ -1,3 +1,16 @@
1
+ RFlow starts
2
+ read in DB
3
+ create new shards
4
+ - Create a set of workers with the shard configuration
5
+ - each worker creates a set of components
6
+
7
+ - create components
8
+
9
+
10
+
11
+
12
+
13
+
1
14
  RFlow Manager
2
15
 
3
16
  Components
@@ -20,12 +33,12 @@ rflow <config file>
20
33
  - place pid files in deployment's run directory
21
34
  Configure components via zmq
22
35
  Daemonize self
23
-
36
+
24
37
 
25
38
 
26
39
  class Component
27
40
  def self.input_port
28
- end
41
+ end
29
42
 
30
43
  def self.output_port
31
44
  end
@@ -33,11 +46,11 @@ class Component
33
46
  attr_accessor :state
34
47
 
35
48
  def initialize(config, run_directory)
36
-
49
+
37
50
  end
38
51
 
39
52
  def run
40
-
53
+
41
54
  end
42
55
 
43
56
  def configure
@@ -56,7 +69,7 @@ class PassThrough < Component
56
69
  # This will initialize the ports
57
70
  super
58
71
  # Do stuff to initialize component
59
- end
72
+ end
60
73
 
61
74
  end
62
75
 
@@ -66,7 +79,7 @@ Computation Requirements:
66
79
  - management bus connection information
67
80
  - group and instance UUID
68
81
  - beacon interval
69
- - run directory, containing
82
+ - run directory, containing
70
83
  - PID files
71
84
  - log dir + logs
72
85
  - computation-specific configuration (conf dir)
@@ -90,7 +103,7 @@ Computation Requirements:
90
103
 
91
104
  External Computations:
92
105
  - Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
93
- -
106
+ -
94
107
 
95
108
 
96
109
  RFlow
@@ -100,7 +113,7 @@ RFlow
100
113
 
101
114
  Translate
102
115
  - Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
103
-
116
+
104
117
 
105
118
 
106
119
 
@@ -112,7 +125,7 @@ Plugins:
112
125
  - necessary to tell system?
113
126
  - need a protocol for defining schema transfer
114
127
  - each message has attached schema
115
-
128
+
116
129
 
117
130
  lib/rflow/message.rb
118
131
 
@@ -122,7 +135,7 @@ RFlow::Management
122
135
  - Somewhere for external people to register new computations with running system
123
136
  - computation says that its running and asks for Connection configuration
124
137
  - how will it specify where in the workflow it wants to run????
125
-
138
+
126
139
  RFlow::Message(complete on-the-wire Avro message format)
127
140
  data_type, provenance, external_ids, empty, data (see below)
128
141
 
@@ -142,7 +155,7 @@ RFlow::Connection::AMQP
142
155
 
143
156
  RFlow::Connection::ZMQ
144
157
 
145
-
158
+
146
159
 
147
160
 
148
161
  computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
@@ -152,12 +165,12 @@ AMQP::Topic - responsible for setting up a topic -> queue binding
152
165
  r.outgoing = amqp connection, channel, vhost, login, password, queue name
153
166
  behavior -> n x m, "round-robin" among the connected outgoing
154
167
  incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
155
-
168
+
156
169
 
157
170
  ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
158
171
  r.incoming = zmq connection string (tcp://ip:port), type pub
159
172
  r.outgoing = zmq connection string (tcp://ip:port), type sub
160
- behavior -> n x m, broadcast sending,
173
+ behavior -> n x m, broadcast sending,
161
174
 
162
175
  ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
163
176
  r.incoming = zmq connection string (tcp://ip:port), type push
@@ -0,0 +1,448 @@
1
+ # RFlow
2
+
3
+ [![Build Status](https://travis-ci.org/redjack/rflow.png?branch=master)](https://travis-ci.org/redjack/rflow)
4
+
5
+ RFlow is a Ruby framework inspired by
6
+ [flow-based programming](http://en.wikipedia.org/wiki/Flow-based_programming)
7
+ (FBP), which was previously inspired by
8
+ [Communicating Sequential Processes](http://en.wikipedia.org/wiki/Communicating_sequential_processes)
9
+ (CSP). It has some conceptual similarities to Javascript's
10
+ [NoFlo](http://noflojs.org/) system, Java's
11
+ [Storm](http://storm.incubator.apache.org/), and Clojure's
12
+ [core.async](http://clojure.github.io/core.async/) library.
13
+
14
+ In short, components communicate with each other by sending/receiving
15
+ messages via their output/input ports over connections. Ports are
16
+ "wired" together output->input with connections, and messages are
17
+ explicitly serialized before being sent over the connection. RFlow
18
+ supports generalized connection types and message serialization,
19
+ however only two are in current use, namely ZeroMQ connections and
20
+ Avro serialization.
21
+
22
+ RFlow currently runs as a single-threaded, evented system on top of
23
+ [Eventmachine](http://rubyeventmachine.com/), meaning that any code
24
+ should be coded in an asynchronous style so as to not block the
25
+ Eventmachine reactor (and thus block all the other components). There
26
+ is currently work being done to "shard" the workflow among multiple
27
+ processes and/or threads.
28
+
29
+ Some of the long-term goals of RFlow are to allow for components and
30
+ portions of the workflow to be defined in any language that supports
31
+ Avro and ZeroMQ, which a numerous.
32
+
33
+
34
+ ## Definitions
35
+
36
+ * __Component__ - the basic unit of RFlow computation. Each
37
+ component is a shared-nothing, individual computation module that
38
+ communicates with the rest of the system through explicit message
39
+ passing via input and output ports.
40
+
41
+ * __Port__ - a named entity on each component that is responsible for
42
+ receiving data (and input port) or sending data (and output port).
43
+ Ports can be "keyed" or "indexed" to allow better multiplexing of
44
+ messages out/in a single port, as well as allow a single port to be
45
+ accessed by an array.
46
+
47
+ * __Connection__ - a directed link between an output port and an input
48
+ port. RFlow supports generalized connection types, however only
49
+ ZeroMQ IPC links are currently used.
50
+
51
+ * __Message__ - a bit of serialized data that is sent out an output
52
+ port and recieved on an input port. Due to the serialization,
53
+ message types and schemas are explicitly defined. In a departure
54
+ from "pure" FBP, RFlow supports sending multiple message types via a
55
+ single connection.
56
+
57
+ * __Workflow__ - the common name for the digraph created when the
58
+ components (nodes) are wired together via connections to their
59
+ respective output/input ports.
60
+
61
+
62
+ ## Component Examples
63
+
64
+ The following describes the API of an RFlow component:
65
+
66
+ ```ruby
67
+ class SimpleComponent < RFlow::Component
68
+ input_port :in
69
+ output_port :out
70
+
71
+ def configure!(config); end
72
+ def run!; end
73
+ def process_message(input_port, input_port_key, connection, message); end
74
+ def shutdown!; end
75
+ def cleanup!; end
76
+ end
77
+ ```
78
+
79
+ * `input_port` and `output_port` define the named ports that will
80
+ receive data or send data, respectively. These class methods create
81
+ accessors for their respective port names, to be used later in the
82
+ `process_message` or `run!` methods. There can be multiple (or no)
83
+ input and output ports.
84
+
85
+ * `configure!` (called with a hash configuration) is called after the
86
+ component is instantiated but before the workflow has been wired or
87
+ any messages have been sent. Note that this is called outside the
88
+ Eventmachine reactor.
89
+
90
+ * `run!` is called after all the components have been wired together
91
+ with connections and the entire workflow has been created. For a
92
+ component that is a source of messages, this is where messages will
93
+ be sent. For example, if the component is reading from a file, this
94
+ is where the file will be opened, the contents read into a message,
95
+ and the message sent out the output port. `run!` is called within
96
+ the Eventmachine reactor.
97
+
98
+ * `process_message` is an evented callback that is called whenever the
99
+ component receives a message on one of its input ports.
100
+ `process_message` is called withing the Eventmachine reactor
101
+
102
+ * `shutdown!` is called when the flow is being terminated, and is
103
+ meant to allow the components to do penultimate processing and send
104
+ any final messages. All components in a flow will be told to
105
+ `shutdown!` before they are told to `cleanup!`.
106
+
107
+ * `cleanup!` is the final call to each component, and allow the
108
+ component to clean up any external resources that it might have
109
+ outstanding, such as file handles or network sockets.
110
+
111
+ "Source" components will often do all of their work within the `run!`
112
+ method, and often gather message data from an external source, such as
113
+ file, database, or network socket. The following component generates a
114
+ set of integers between a configured start/finish, incrementing by a
115
+ configured step:
116
+
117
+ ```ruby
118
+ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
119
+ output_port :out
120
+
121
+ def configure!(config)
122
+ @start = config['start'].to_i
123
+ @finish = config['finish'].to_i
124
+ @step = config['step'] ? config['step'].to_i : 1
125
+ # If interval seconds is not given, it will default to 0
126
+ @interval_seconds = config['interval_seconds'].to_i
127
+ end
128
+
129
+ # Note that this uses the timer (sometimes with 0 interval) so as
130
+ # not to block the reactor
131
+ def run!
132
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
133
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
134
+ message.data.data_object = @start
135
+ out.send_message message
136
+ @start += @step
137
+ timer.cancel if @start > @finish
138
+ end
139
+ end
140
+ end
141
+ ```
142
+
143
+ "Middle" components receive messages on input port(s), perform a bit
144
+ of computation, and then send a message out the output port(s). The
145
+ following component accepts a Ruby expression string via its config,
146
+ and then uses that as an expression to determine what port to send an
147
+ incoming message:
148
+
149
+ ```ruby
150
+ class RFlow::Components::RubyProcFilter < RFlow::Component
151
+ input_port :in
152
+ output_port :filtered
153
+ output_port :dropped
154
+ output_port :errored
155
+
156
+ def configure!(config)
157
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
158
+ end
159
+
160
+ def process_message(input_port, input_port_key, connection, message)
161
+ begin
162
+ if @filter_proc.call(message)
163
+ filtered.send_message message
164
+ else
165
+ dropped.send_message message
166
+ end
167
+ rescue Exception => e
168
+ errored.send_message message
169
+ end
170
+ end
171
+ end
172
+ ```
173
+
174
+ "Sink" components accept messages on an input port and do not have an
175
+ output port. They often operate on external sinks, such as writing
176
+ messages to a file, database, or network socket. The following
177
+ component writes the inspected message to a file (defined via the
178
+ configuration):
179
+
180
+ ```ruby
181
+ class RFlow::Components::FileOutput < RFlow::Component
182
+ input_port :in
183
+
184
+ attr_accessor :output_file_path, :output_file
185
+
186
+ def configure!(config)
187
+ self.output_file_path = config['output_file_path']
188
+ self.output_file = File.new output_file_path, 'w+'
189
+ end
190
+
191
+ def process_message(input_port, input_port_key, connection, message)
192
+ output_file.puts message.data.data_object.inspect
193
+ output_file.flush
194
+ end
195
+
196
+ def cleanup
197
+ output_file.close
198
+ end
199
+ end
200
+ ```
201
+
202
+ ## RFlow Messages
203
+
204
+ RFlow messages are instances of
205
+ [`RFlow::Message`](lib/rflow/message.rb), which are ultimately
206
+ serialized via an Avro [schema](schema/message.zvsc).
207
+
208
+ There are two parts of the message "envelope": a provenance and the
209
+ embedded data object "payload".
210
+
211
+ The `provenance` is a way for a component to annotate a message with a
212
+ bit of data that should (by convention) be carried through the
213
+ workflow with the message, as well as being copied to derived
214
+ messages. For example, a TCP server component would spin up a TCP
215
+ server and, upon recieving a connection and packets on a session, it
216
+ would marshal the packets into `RFlow::Messsage`s and send them out
217
+ its output ports. Messages received on its input port, however, need
218
+ to have a way to be matched to the corresponding underlying TCP
219
+ connection. `provenance` provides a method for the TCP server
220
+ component to add a bit of metadata (namely an identifier for the TCP
221
+ connection) such that later messages that contain the same provenance
222
+ can be matched to the correct underlying TCP connection.
223
+
224
+
225
+ The other parts of the message envelope are related to the embedded
226
+ data object. In addition to the data object itself (which is encoded
227
+ with a specific Avro schema), there are a few fields that describe the
228
+ embedded data, namely the `data_type_name`, the
229
+ `data_serialization_type`, and the `data_schema`. By including all
230
+ this metadata in each message, the system is completely dynamic and
231
+ allow for multiple message types to be included on a single
232
+ connection, as well as enabling non-RFlow components to be created in
233
+ any language. This does come at the expense of larger messages which
234
+ results in greater message overhead.
235
+
236
+ For example, if we have a simple integer data type that we would like
237
+ to serialize via Avro, we can register the schema with the following
238
+ `add_available_data_type` code shown below:
239
+
240
+ ```ruby
241
+ long_integer_schema = '{"type": "long"}'
242
+ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
243
+ ```
244
+
245
+ This will make the schema and message type available to RFlow, such
246
+ that it will be able to create a new message with:
247
+
248
+ ```ruby
249
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
250
+ ```
251
+
252
+ and will automatically reconstitute a message from the connection and
253
+ call a component's `process_message`.
254
+
255
+ The deserialized Avro Ruby object is available as the `data_object`
256
+ accessor on the `data` class, i.e.:
257
+
258
+ ```ruby
259
+ message.data.data_object = 1024
260
+ ```
261
+
262
+ The `data_object` is the deserialized Avro Ruby object and, as such,
263
+ allows the Avro object to be accessed as a Ruby object. In order to
264
+ provide a more convenient interface to the underlying Avro object,
265
+ RFlow allows modules to be dynamically mixed in to the `data` class
266
+ object.
267
+
268
+ For example, the module below provides a bit of extra functionality to
269
+ the above-mentioned `RFlow::Message::Data::Integer` message type,
270
+ namely to default the integer to 0 upon being mixed in, provide a
271
+ better named accessor, and add a `default?` method to the `data` object:
272
+
273
+ ```ruby
274
+ module SimpleDataExtension
275
+ def self.extended(base_data)
276
+ base_data.data_object = 0
277
+ end
278
+
279
+ def int; data_object; end
280
+ def int=(new_int); data_object = new_int; end
281
+
282
+ def default?;
283
+ data_object == 0
284
+ end
285
+ end
286
+ ```
287
+
288
+ Once a module is defined, it needs to be registered to the appropriate
289
+ message data type. Note that multiple modules can be registered for a
290
+ given message data type.
291
+
292
+ ```ruby
293
+ RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
294
+ ```
295
+
296
+ The result of this is that the following code will work:
297
+
298
+ ```ruby
299
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
300
+ message.data.int == 0 # => true
301
+ message.data.default? # => true
302
+ message.data.int = 1024
303
+ messaga.data.default? # => false
304
+ ```
305
+
306
+
307
+ ## RFlow Workflow Configuration
308
+
309
+ RFlow currently stores its configuration in a SQLite database which
310
+ are internally accessed via ActiveRecord. Given that SQLite is a
311
+ rather simple and standard interface, non-RFlow components could
312
+ access it and determine their respsective ZMQ connections.
313
+
314
+ DB schemas for the configuration database are in
315
+ [lib/rflow/configuration/migrations](lib/rflow/configuration/migrations)
316
+ and define the complete workflow configuration. Note that each of the
317
+ tables uses a UUID primary key, and UUIDs are used within RFlow to
318
+ identify specific components.
319
+
320
+ * settings - general application settings, such as log levels, app
321
+ names, directories, etc
322
+
323
+ * components - a list of the components including its name,
324
+ specification (Ruby class), and options. Note that the options are
325
+ serialized to the database as YAML, and components should understand
326
+ that the round-trip through the database might not be perfect (e.g.
327
+ Ruby symbols might become strings). A component also has a number of
328
+ input ports and output ports.
329
+
330
+ * ports - belonging to a component (via `component_uuid` foreign key),
331
+ also has a `type` colum for ActiveRecord STI, which gets set to
332
+ either a `RFlow::Configuration::InputPort` or
333
+ `RFlow::Configuration::OutputPort`.
334
+
335
+ * connections - a connection between two ports via foriegn keys
336
+ `input_port_uuid` and `output_port_uuid`. Like ports, connections
337
+ are typed via AR STI (`RFlow::Configuration::ZMQConnection` or
338
+ `RFlow::Configuration::AMQPConnection`) and have a YAML serialized
339
+ `options` hash. A connection also (potentially) defines the port
340
+ keys.
341
+
342
+ RFlow also provides a RubyDSL for configuration-like file to be used
343
+ to load the database:
344
+
345
+ ```ruby
346
+ RFlow::Configuration::RubyDSL.configure do |config|
347
+ # Configure the settings, which include paths for various files, log
348
+ # levels, and component specific stuffs
349
+ config.setting('rflow.log_level', 'DEBUG')
350
+ config.setting('rflow.application_directory_path', '../tmp')
351
+
352
+ config.setting('rflow.application_name', 'testapp')
353
+
354
+ # Instantiate components
355
+ config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', {
356
+ 'start' => 0,
357
+ 'finish' => 10,
358
+ 'step' => 3,
359
+ 'interval_seconds' => 1
360
+ }
361
+ config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', {
362
+ 'start' => 20,
363
+ 'finish' => 30
364
+ }
365
+ config.component 'filter', 'RFlow::Components::RubyProcFilter', {
366
+ 'filter_proc_string' => 'lambda {|message| true}'
367
+ }
368
+ config.component 'output1', 'RFlow::Components::FileOutput', {
369
+ 'output_file_path' => '/tmp/out1'
370
+ }
371
+ config.component 'output2', 'RFlow::Components::FileOutput', {
372
+ 'output_file_path' => '/tmp/out2'
373
+ }
374
+
375
+ # Wire components together
376
+ config.connect 'generate_ints1#out' => 'filter#in'
377
+ config.connect 'generate_ints2#out' => 'filter#in'
378
+ config.connect 'filter#filtered' => 'replicate#in'
379
+ config.connect 'filter#out' => 'output1#in'
380
+ config.connect 'filter#filtered' => 'output2#in'
381
+ end
382
+ ```
383
+
384
+ ## Command-Line Operation
385
+
386
+ RFlow includes the `rflow` binary that can load a database from a Ruby
387
+ DSL, as well as start/stop the wokflow application as a daemon.
388
+ Invoking the `rflow` binary without any options will give a brief help:
389
+
390
+ ```
391
+ Usage: rflow [options] (start|stop|status|load)
392
+ -d, --database DB Config database (sqlite) path (GENERALLY REQUIRED)
393
+ -c, --config CONFIG Config file path (only valid for load)
394
+ -e, --extensions FILE1[,FILE_N] Extension file paths (will load)
395
+ -g, --gems GEM1[,GEM_N] Extension gems (will require)
396
+ -l, --log LOGFILE Initial startup log file (in addition to stdout)
397
+ -v, --verbose [LEVEL] Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO
398
+ -f Run in the foreground
399
+ --version Show RFlow version and exit
400
+ -h, --help Show this message and exit
401
+ ```
402
+
403
+ In general, the process for getting started is to first create a
404
+ configuration database via `rflow load`:
405
+
406
+ ```
407
+ rflow load -d my_config.sqlite -c my_ruby_dsl.rb
408
+ ```
409
+
410
+ which will create the `my_config.sqlite` configuration database loaded
411
+ with the `my_ruby_dsl.rb` configuration DSL.
412
+
413
+ Once a config database exists, you can start up the application that
414
+ it describes with `rflow start`. The `--extensions` argument allows
415
+ loading of arbitrary Ruby code (via Ruby's `load`), which is usually
416
+ where the component implementations are stored, as well as data type
417
+ registrations.
418
+
419
+ ```
420
+ rflow start -d my_config.sqlite -e my_component.rb,my_other_component.rb,my_data_type.rb
421
+ ```
422
+
423
+ By default, RFlow will daemonize, write its pid file to
424
+ `./run/app.pid` and write its log file to `./log/app.log`. The `-f`
425
+ flag will keep RFlow in the foreground.
426
+
427
+ RFlow also supports two signals that allow for useful management of a
428
+ running RFlow daemon's log. Sending a `SIGUSR1` to the running RFlow
429
+ process will cause RFlow to close and reopen its log file, which
430
+ allows for easy log management without restarting RFlow. In addition,
431
+ sending a `SIGUSR2` will toggle RFlow's log-level to `DEBUG`, and a
432
+ subsequent `SIGUSR2` will toggle the log-level back to what was
433
+ originally set. This allows for easy debugging of a running RFlow
434
+ process.
435
+
436
+ Copyright 2014 RedJack LLC
437
+
438
+ Licensed under the Apache License, Version 2.0 (the "License");
439
+ you may not use this file except in compliance with the License.
440
+ You may obtain a copy of the License at
441
+
442
+ http://www.apache.org/licenses/LICENSE-2.0
443
+
444
+ Unless required by applicable law or agreed to in writing, software
445
+ distributed under the License is distributed on an "AS IS" BASIS,
446
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
447
+ See the License for the specific language governing permissions and
448
+ limitations under the License.