rflow 0.0.5 → 1.0.0a1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +21 -0
- data/.yardopts +1 -0
- data/Gemfile +5 -1
- data/Guardfile +8 -0
- data/LICENSE +190 -0
- data/NOTES +26 -13
- data/README.md +448 -0
- data/Rakefile +5 -12
- data/bin/rflow +23 -20
- data/example/basic_config.rb +2 -2
- data/example/basic_extensions.rb +8 -8
- data/example/http_config.rb +1 -1
- data/example/http_extensions.rb +15 -15
- data/lib/rflow.rb +15 -387
- data/lib/rflow/component.rb +105 -50
- data/lib/rflow/component/port.rb +25 -24
- data/lib/rflow/components/raw.rb +4 -4
- data/lib/rflow/components/raw/extensions.rb +2 -2
- data/lib/rflow/configuration.rb +54 -36
- data/lib/rflow/configuration/component.rb +2 -3
- data/lib/rflow/configuration/connection.rb +9 -10
- data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
- data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
- data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
- data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
- data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
- data/lib/rflow/configuration/port.rb +3 -4
- data/lib/rflow/configuration/ruby_dsl.rb +59 -35
- data/lib/rflow/configuration/setting.rb +8 -7
- data/lib/rflow/configuration/shard.rb +24 -0
- data/lib/rflow/configuration/uuid_keyed.rb +3 -3
- data/lib/rflow/connection.rb +21 -10
- data/lib/rflow/connections/zmq_connection.rb +45 -44
- data/lib/rflow/logger.rb +67 -0
- data/lib/rflow/master.rb +127 -0
- data/lib/rflow/message.rb +14 -14
- data/lib/rflow/pid_file.rb +84 -0
- data/lib/rflow/shard.rb +148 -0
- data/lib/rflow/version.rb +1 -1
- data/rflow.gemspec +22 -28
- data/schema/message.avsc +8 -8
- data/spec/fixtures/config_ints.rb +4 -4
- data/spec/fixtures/config_shards.rb +30 -0
- data/spec/fixtures/extensions_ints.rb +8 -8
- data/spec/rflow_component_port_spec.rb +58 -0
- data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
- data/spec/rflow_configuration_spec.rb +4 -4
- data/spec/rflow_message_data_raw.rb +2 -2
- data/spec/rflow_message_data_spec.rb +6 -6
- data/spec/rflow_message_spec.rb +13 -13
- data/spec/rflow_spec.rb +294 -71
- data/spec/schema_spec.rb +2 -2
- data/spec/spec_helper.rb +6 -4
- data/temp.rb +21 -21
- metadata +56 -65
- data/.rvmrc +0 -1
- data/README +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57c6f0b7c61b30886bbf0f4b2f65821aa5b1b0f9
|
4
|
+
data.tar.gz: 62f58d281509732effeca0c1a041df2668497b80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d74949a15024641aef4123ca703d2f2ccf6fb5f97dca9829a282ca53bd6d36c347c844b189255955c7fa058bf903853d0c3acf13fda4dc2e2b3f40e49129310
|
7
|
+
data.tar.gz: f6233d9cc128220c886b6ed4970b544040cace77af6701b6f7429da304ad7de00b5536455469e21988f37d2fa1faaedf3f4324f08eee343669b03c6bdaece735
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rflow-dev
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.1.1
|
data/.travis.yml
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
language: ruby
|
2
|
+
|
3
|
+
rvm:
|
4
|
+
- 1.9.3
|
5
|
+
- 2.0.0
|
6
|
+
- 2.1.0
|
7
|
+
|
8
|
+
before_install:
|
9
|
+
- sudo apt-get install libtool autoconf automake uuid-dev build-essential
|
10
|
+
- wget http://download.zeromq.org/zeromq-3.2.4.tar.gz && tar zxvf zeromq-3.2.4.tar.gz && cd zeromq-3.2.4 && ./configure && make && sudo make install && cd ..
|
11
|
+
# Only has 4.0.4, need 3.2 version due to old em-zeromq
|
12
|
+
# - sudo add-apt-repository -y ppa:chris-lea/zeromq
|
13
|
+
# - sudo apt-get update
|
14
|
+
# - sudo apt-get install libzmq3 libzmq3-dev
|
15
|
+
|
16
|
+
script: bundle exec rspec spec
|
17
|
+
|
18
|
+
notifications:
|
19
|
+
hipchat:
|
20
|
+
rooms:
|
21
|
+
secure: a4nrCmDPwhteJA65QFRlBdnsknT+4y/JtZL/sLPCObOahFWvLOXMggPXvHAOssCaa2ydYrMMvWNliOz63nuu3qAnR90H7aOU3o+2K3zeACy0cAjF27lDonLhaYHeUz07oPwr/iDlFC8bDfFDempjIFFnXSc/LhUWaCltnJ7W5vI=
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--output ./doc --main README.md --files schema/*.avsc lib/**/*.rb bin/*.rb - README.md LICENSE
|
data/Gemfile
CHANGED
data/Guardfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,190 @@
|
|
1
|
+
Apache License
|
2
|
+
Version 2.0, January 2004
|
3
|
+
http://www.apache.org/licenses/
|
4
|
+
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6
|
+
|
7
|
+
1. Definitions.
|
8
|
+
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
11
|
+
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13
|
+
the copyright owner that is granting the License.
|
14
|
+
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
16
|
+
other entities that control, are controlled by, or are under common
|
17
|
+
control with that entity. For the purposes of this definition,
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
19
|
+
direction or management of such entity, whether by contract or
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22
|
+
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24
|
+
exercising permissions granted by this License.
|
25
|
+
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
27
|
+
including but not limited to software source code, documentation
|
28
|
+
source, and configuration files.
|
29
|
+
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
31
|
+
transformation or translation of a Source form, including but
|
32
|
+
not limited to compiled object code, generated documentation,
|
33
|
+
and conversions to other media types.
|
34
|
+
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
36
|
+
Object form, made available under the License, as indicated by a
|
37
|
+
copyright notice that is included in or attached to the work
|
38
|
+
(an example is provided in the Appendix below).
|
39
|
+
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46
|
+
the Work and Derivative Works thereof.
|
47
|
+
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
49
|
+
the original version of the Work and any modifications or additions
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
61
|
+
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
64
|
+
subsequently incorporated within the Work.
|
65
|
+
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
72
|
+
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78
|
+
where such license applies only to those patent claims licensable
|
79
|
+
by such Contributor that are necessarily infringed by their
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
82
|
+
institute patent litigation against any entity (including a
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
85
|
+
or contributory patent infringement, then any patent licenses
|
86
|
+
granted to You under this License for that Work shall terminate
|
87
|
+
as of the date such litigation is filed.
|
88
|
+
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
91
|
+
modifications, and in Source or Object form, provided that You
|
92
|
+
meet the following conditions:
|
93
|
+
|
94
|
+
(a) You must give any other recipients of the Work or
|
95
|
+
Derivative Works a copy of this License; and
|
96
|
+
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
98
|
+
stating that You changed the files; and
|
99
|
+
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
102
|
+
attribution notices from the Source form of the Work,
|
103
|
+
excluding those notices that do not pertain to any part of
|
104
|
+
the Derivative Works; and
|
105
|
+
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
108
|
+
include a readable copy of the attribution notices contained
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
111
|
+
of the following places: within a NOTICE text file distributed
|
112
|
+
as part of the Derivative Works; within the Source form or
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
114
|
+
within a display generated by the Derivative Works, if and
|
115
|
+
wherever such third-party notices normally appear. The contents
|
116
|
+
of the NOTICE file are for informational purposes only and
|
117
|
+
do not modify the License. You may add Your own attribution
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
120
|
+
that such additional attribution notices cannot be construed
|
121
|
+
as modifying the License.
|
122
|
+
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
124
|
+
may provide additional or different license terms and conditions
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
128
|
+
the conditions stated in this License.
|
129
|
+
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
133
|
+
this License, without any additional terms or conditions.
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135
|
+
the terms of any separate license agreement you may have executed
|
136
|
+
with Licensor regarding such Contributions.
|
137
|
+
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
140
|
+
except as required for reasonable and customary use in describing the
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
142
|
+
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
152
|
+
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
158
|
+
incidental, or consequential damages of any character arising as a
|
159
|
+
result of this License or out of the use or inability to use the
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
162
|
+
other commercial damages or losses), even if such Contributor
|
163
|
+
has been advised of the possibility of such damages.
|
164
|
+
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168
|
+
or other liability obligations and/or rights consistent with this
|
169
|
+
License. However, in accepting such obligations, You may act only
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
174
|
+
of your accepting any such warranty or additional liability.
|
175
|
+
|
176
|
+
END OF TERMS AND CONDITIONS
|
177
|
+
|
178
|
+
Copyright 2014 RedJack LLC
|
179
|
+
|
180
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
181
|
+
you may not use this file except in compliance with the License.
|
182
|
+
You may obtain a copy of the License at
|
183
|
+
|
184
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
185
|
+
|
186
|
+
Unless required by applicable law or agreed to in writing, software
|
187
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
188
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
189
|
+
See the License for the specific language governing permissions and
|
190
|
+
limitations under the License.
|
data/NOTES
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
RFlow starts
|
2
|
+
read in DB
|
3
|
+
create new shards
|
4
|
+
- Create a set of workers with the shard configuration
|
5
|
+
- each worker creates a set of components
|
6
|
+
|
7
|
+
- create components
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
1
14
|
RFlow Manager
|
2
15
|
|
3
16
|
Components
|
@@ -20,12 +33,12 @@ rflow <config file>
|
|
20
33
|
- place pid files in deployment's run directory
|
21
34
|
Configure components via zmq
|
22
35
|
Daemonize self
|
23
|
-
|
36
|
+
|
24
37
|
|
25
38
|
|
26
39
|
class Component
|
27
40
|
def self.input_port
|
28
|
-
end
|
41
|
+
end
|
29
42
|
|
30
43
|
def self.output_port
|
31
44
|
end
|
@@ -33,11 +46,11 @@ class Component
|
|
33
46
|
attr_accessor :state
|
34
47
|
|
35
48
|
def initialize(config, run_directory)
|
36
|
-
|
49
|
+
|
37
50
|
end
|
38
51
|
|
39
52
|
def run
|
40
|
-
|
53
|
+
|
41
54
|
end
|
42
55
|
|
43
56
|
def configure
|
@@ -56,7 +69,7 @@ class PassThrough < Component
|
|
56
69
|
# This will initialize the ports
|
57
70
|
super
|
58
71
|
# Do stuff to initialize component
|
59
|
-
end
|
72
|
+
end
|
60
73
|
|
61
74
|
end
|
62
75
|
|
@@ -66,7 +79,7 @@ Computation Requirements:
|
|
66
79
|
- management bus connection information
|
67
80
|
- group and instance UUID
|
68
81
|
- beacon interval
|
69
|
-
- run directory, containing
|
82
|
+
- run directory, containing
|
70
83
|
- PID files
|
71
84
|
- log dir + logs
|
72
85
|
- computation-specific configuration (conf dir)
|
@@ -90,7 +103,7 @@ Computation Requirements:
|
|
90
103
|
|
91
104
|
External Computations:
|
92
105
|
- Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
|
93
|
-
-
|
106
|
+
-
|
94
107
|
|
95
108
|
|
96
109
|
RFlow
|
@@ -100,7 +113,7 @@ RFlow
|
|
100
113
|
|
101
114
|
Translate
|
102
115
|
- Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
|
103
|
-
|
116
|
+
|
104
117
|
|
105
118
|
|
106
119
|
|
@@ -112,7 +125,7 @@ Plugins:
|
|
112
125
|
- necessary to tell system?
|
113
126
|
- need a protocol for defining schema transfer
|
114
127
|
- each message has attached schema
|
115
|
-
|
128
|
+
|
116
129
|
|
117
130
|
lib/rflow/message.rb
|
118
131
|
|
@@ -122,7 +135,7 @@ RFlow::Management
|
|
122
135
|
- Somewhere for external people to register new computations with running system
|
123
136
|
- computation says that its running and asks for Connection configuration
|
124
137
|
- how will it specify where in the workflow it wants to run????
|
125
|
-
|
138
|
+
|
126
139
|
RFlow::Message(complete on-the-wire Avro message format)
|
127
140
|
data_type, provenance, external_ids, empty, data (see below)
|
128
141
|
|
@@ -142,7 +155,7 @@ RFlow::Connection::AMQP
|
|
142
155
|
|
143
156
|
RFlow::Connection::ZMQ
|
144
157
|
|
145
|
-
|
158
|
+
|
146
159
|
|
147
160
|
|
148
161
|
computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
|
@@ -152,12 +165,12 @@ AMQP::Topic - responsible for setting up a topic -> queue binding
|
|
152
165
|
r.outgoing = amqp connection, channel, vhost, login, password, queue name
|
153
166
|
behavior -> n x m, "round-robin" among the connected outgoing
|
154
167
|
incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
|
155
|
-
|
168
|
+
|
156
169
|
|
157
170
|
ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
158
171
|
r.incoming = zmq connection string (tcp://ip:port), type pub
|
159
172
|
r.outgoing = zmq connection string (tcp://ip:port), type sub
|
160
|
-
behavior -> n x m, broadcast sending,
|
173
|
+
behavior -> n x m, broadcast sending,
|
161
174
|
|
162
175
|
ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
163
176
|
r.incoming = zmq connection string (tcp://ip:port), type push
|
data/README.md
ADDED
@@ -0,0 +1,448 @@
|
|
1
|
+
# RFlow
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/redjack/rflow.png?branch=master)](https://travis-ci.org/redjack/rflow)
|
4
|
+
|
5
|
+
RFlow is a Ruby framework inspired by
|
6
|
+
[flow-based programming](http://en.wikipedia.org/wiki/Flow-based_programming)
|
7
|
+
(FBP), which was previously inspired by
|
8
|
+
[Communicating Sequential Processes](http://en.wikipedia.org/wiki/Communicating_sequential_processes)
|
9
|
+
(CSP). It has some conceptual similarities to Javascript's
|
10
|
+
[NoFlo](http://noflojs.org/) system, Java's
|
11
|
+
[Storm](http://storm.incubator.apache.org/), and Clojure's
|
12
|
+
[core.async](http://clojure.github.io/core.async/) library.
|
13
|
+
|
14
|
+
In short, components communicate with each other by sending/receiving
|
15
|
+
messages via their output/input ports over connections. Ports are
|
16
|
+
"wired" together output->input with connections, and messages are
|
17
|
+
explicitly serialized before being sent over the connection. RFlow
|
18
|
+
supports generalized connection types and message serialization,
|
19
|
+
however only two are in current use, namely ZeroMQ connections and
|
20
|
+
Avro serialization.
|
21
|
+
|
22
|
+
RFlow currently runs as a single-threaded, evented system on top of
|
23
|
+
[Eventmachine](http://rubyeventmachine.com/), meaning that any code
|
24
|
+
should be coded in an asynchronous style so as to not block the
|
25
|
+
Eventmachine reactor (and thus block all the other components). There
|
26
|
+
is currently work being done to "shard" the workflow among multiple
|
27
|
+
processes and/or threads.
|
28
|
+
|
29
|
+
Some of the long-term goals of RFlow are to allow for components and
|
30
|
+
portions of the workflow to be defined in any language that supports
|
31
|
+
Avro and ZeroMQ, which a numerous.
|
32
|
+
|
33
|
+
|
34
|
+
## Definitions
|
35
|
+
|
36
|
+
* __Component__ - the basic unit of RFlow computation. Each
|
37
|
+
component is a shared-nothing, individual computation module that
|
38
|
+
communicates with the rest of the system through explicit message
|
39
|
+
passing via input and output ports.
|
40
|
+
|
41
|
+
* __Port__ - a named entity on each component that is responsible for
|
42
|
+
receiving data (and input port) or sending data (and output port).
|
43
|
+
Ports can be "keyed" or "indexed" to allow better multiplexing of
|
44
|
+
messages out/in a single port, as well as allow a single port to be
|
45
|
+
accessed by an array.
|
46
|
+
|
47
|
+
* __Connection__ - a directed link between an output port and an input
|
48
|
+
port. RFlow supports generalized connection types, however only
|
49
|
+
ZeroMQ IPC links are currently used.
|
50
|
+
|
51
|
+
* __Message__ - a bit of serialized data that is sent out an output
|
52
|
+
port and recieved on an input port. Due to the serialization,
|
53
|
+
message types and schemas are explicitly defined. In a departure
|
54
|
+
from "pure" FBP, RFlow supports sending multiple message types via a
|
55
|
+
single connection.
|
56
|
+
|
57
|
+
* __Workflow__ - the common name for the digraph created when the
|
58
|
+
components (nodes) are wired together via connections to their
|
59
|
+
respective output/input ports.
|
60
|
+
|
61
|
+
|
62
|
+
## Component Examples
|
63
|
+
|
64
|
+
The following describes the API of an RFlow component:
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
class SimpleComponent < RFlow::Component
|
68
|
+
input_port :in
|
69
|
+
output_port :out
|
70
|
+
|
71
|
+
def configure!(config); end
|
72
|
+
def run!; end
|
73
|
+
def process_message(input_port, input_port_key, connection, message); end
|
74
|
+
def shutdown!; end
|
75
|
+
def cleanup!; end
|
76
|
+
end
|
77
|
+
```
|
78
|
+
|
79
|
+
* `input_port` and `output_port` define the named ports that will
|
80
|
+
receive data or send data, respectively. These class methods create
|
81
|
+
accessors for their respective port names, to be used later in the
|
82
|
+
`process_message` or `run!` methods. There can be multiple (or no)
|
83
|
+
input and output ports.
|
84
|
+
|
85
|
+
* `configure!` (called with a hash configuration) is called after the
|
86
|
+
component is instantiated but before the workflow has been wired or
|
87
|
+
any messages have been sent. Note that this is called outside the
|
88
|
+
Eventmachine reactor.
|
89
|
+
|
90
|
+
* `run!` is called after all the components have been wired together
|
91
|
+
with connections and the entire workflow has been created. For a
|
92
|
+
component that is a source of messages, this is where messages will
|
93
|
+
be sent. For example, if the component is reading from a file, this
|
94
|
+
is where the file will be opened, the contents read into a message,
|
95
|
+
and the message sent out the output port. `run!` is called within
|
96
|
+
the Eventmachine reactor.
|
97
|
+
|
98
|
+
* `process_message` is an evented callback that is called whenever the
|
99
|
+
component receives a message on one of its input ports.
|
100
|
+
`process_message` is called withing the Eventmachine reactor
|
101
|
+
|
102
|
+
* `shutdown!` is called when the flow is being terminated, and is
|
103
|
+
meant to allow the components to do penultimate processing and send
|
104
|
+
any final messages. All components in a flow will be told to
|
105
|
+
`shutdown!` before they are told to `cleanup!`.
|
106
|
+
|
107
|
+
* `cleanup!` is the final call to each component, and allow the
|
108
|
+
component to clean up any external resources that it might have
|
109
|
+
outstanding, such as file handles or network sockets.
|
110
|
+
|
111
|
+
"Source" components will often do all of their work within the `run!`
|
112
|
+
method, and often gather message data from an external source, such as
|
113
|
+
file, database, or network socket. The following component generates a
|
114
|
+
set of integers between a configured start/finish, incrementing by a
|
115
|
+
configured step:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
class RFlow::Components::GenerateIntegerSequence < RFlow::Component
|
119
|
+
output_port :out
|
120
|
+
|
121
|
+
def configure!(config)
|
122
|
+
@start = config['start'].to_i
|
123
|
+
@finish = config['finish'].to_i
|
124
|
+
@step = config['step'] ? config['step'].to_i : 1
|
125
|
+
# If interval seconds is not given, it will default to 0
|
126
|
+
@interval_seconds = config['interval_seconds'].to_i
|
127
|
+
end
|
128
|
+
|
129
|
+
# Note that this uses the timer (sometimes with 0 interval) so as
|
130
|
+
# not to block the reactor
|
131
|
+
def run!
|
132
|
+
timer = EM::PeriodicTimer.new(@interval_seconds) do
|
133
|
+
message = RFlow::Message.new('RFlow::Message::Data::Integer')
|
134
|
+
message.data.data_object = @start
|
135
|
+
out.send_message message
|
136
|
+
@start += @step
|
137
|
+
timer.cancel if @start > @finish
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
```
|
142
|
+
|
143
|
+
"Middle" components receive messages on input port(s), perform a bit
|
144
|
+
of computation, and then send a message out the output port(s). The
|
145
|
+
following component accepts a Ruby expression string via its config,
|
146
|
+
and then uses that as an expression to determine what port to send an
|
147
|
+
incoming message:
|
148
|
+
|
149
|
+
```ruby
|
150
|
+
class RFlow::Components::RubyProcFilter < RFlow::Component
|
151
|
+
input_port :in
|
152
|
+
output_port :filtered
|
153
|
+
output_port :dropped
|
154
|
+
output_port :errored
|
155
|
+
|
156
|
+
def configure!(config)
|
157
|
+
@filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
|
158
|
+
end
|
159
|
+
|
160
|
+
def process_message(input_port, input_port_key, connection, message)
|
161
|
+
begin
|
162
|
+
if @filter_proc.call(message)
|
163
|
+
filtered.send_message message
|
164
|
+
else
|
165
|
+
dropped.send_message message
|
166
|
+
end
|
167
|
+
rescue Exception => e
|
168
|
+
errored.send_message message
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
```
|
173
|
+
|
174
|
+
"Sink" components accept messages on an input port and do not have an
|
175
|
+
output port. They often operate on external sinks, such as writing
|
176
|
+
messages to a file, database, or network socket. The following
|
177
|
+
component writes the inspected message to a file (defined via the
|
178
|
+
configuration):
|
179
|
+
|
180
|
+
```ruby
|
181
|
+
class RFlow::Components::FileOutput < RFlow::Component
|
182
|
+
input_port :in
|
183
|
+
|
184
|
+
attr_accessor :output_file_path, :output_file
|
185
|
+
|
186
|
+
def configure!(config)
|
187
|
+
self.output_file_path = config['output_file_path']
|
188
|
+
self.output_file = File.new output_file_path, 'w+'
|
189
|
+
end
|
190
|
+
|
191
|
+
def process_message(input_port, input_port_key, connection, message)
|
192
|
+
output_file.puts message.data.data_object.inspect
|
193
|
+
output_file.flush
|
194
|
+
end
|
195
|
+
|
196
|
+
def cleanup
|
197
|
+
output_file.close
|
198
|
+
end
|
199
|
+
end
|
200
|
+
```
|
201
|
+
|
202
|
+
## RFlow Messages
|
203
|
+
|
204
|
+
RFlow messages are instances of
|
205
|
+
[`RFlow::Message`](lib/rflow/message.rb), which are ultimately
|
206
|
+
serialized via an Avro [schema](schema/message.zvsc).
|
207
|
+
|
208
|
+
There are two parts of the message "envelope": a provenance and the
|
209
|
+
embedded data object "payload".
|
210
|
+
|
211
|
+
The `provenance` is a way for a component to annotate a message with a
|
212
|
+
bit of data that should (by convention) be carried through the
|
213
|
+
workflow with the message, as well as being copied to derived
|
214
|
+
messages. For example, a TCP server component would spin up a TCP
|
215
|
+
server and, upon recieving a connection and packets on a session, it
|
216
|
+
would marshal the packets into `RFlow::Messsage`s and send them out
|
217
|
+
its output ports. Messages received on its input port, however, need
|
218
|
+
to have a way to be matched to the corresponding underlying TCP
|
219
|
+
connection. `provenance` provides a method for the TCP server
|
220
|
+
component to add a bit of metadata (namely an identifier for the TCP
|
221
|
+
connection) such that later messages that contain the same provenance
|
222
|
+
can be matched to the correct underlying TCP connection.
|
223
|
+
|
224
|
+
|
225
|
+
The other parts of the message envelope are related to the embedded
|
226
|
+
data object. In addition to the data object itself (which is encoded
|
227
|
+
with a specific Avro schema), there are a few fields that describe the
|
228
|
+
embedded data, namely the `data_type_name`, the
|
229
|
+
`data_serialization_type`, and the `data_schema`. By including all
|
230
|
+
this metadata in each message, the system is completely dynamic and
|
231
|
+
allow for multiple message types to be included on a single
|
232
|
+
connection, as well as enabling non-RFlow components to be created in
|
233
|
+
any language. This does come at the expense of larger messages which
|
234
|
+
results in greater message overhead.
|
235
|
+
|
236
|
+
For example, if we have a simple integer data type that we would like
|
237
|
+
to serialize via Avro, we can register the schema with the following
|
238
|
+
`add_available_data_type` code shown below:
|
239
|
+
|
240
|
+
```ruby
|
241
|
+
long_integer_schema = '{"type": "long"}'
|
242
|
+
RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
|
243
|
+
```
|
244
|
+
|
245
|
+
This will make the schema and message type available to RFlow, such
|
246
|
+
that it will be able to create a new message with:
|
247
|
+
|
248
|
+
```ruby
|
249
|
+
message = RFlow::Message.new('RFlow::Message::Data::Integer')
|
250
|
+
```
|
251
|
+
|
252
|
+
and will automatically reconstitute a message from the connection and
|
253
|
+
call a component's `process_message`.
|
254
|
+
|
255
|
+
The deserialized Avro Ruby object is available as the `data_object`
|
256
|
+
accessor on the `data` class, i.e.:
|
257
|
+
|
258
|
+
```ruby
|
259
|
+
message.data.data_object = 1024
|
260
|
+
```
|
261
|
+
|
262
|
+
The `data_object` is the deserialized Avro Ruby object and, as such,
|
263
|
+
allows the Avro object to be accessed as a Ruby object. In order to
|
264
|
+
provide a more convenient interface to the underlying Avro object,
|
265
|
+
RFlow allows modules to be dynamically mixed in to the `data` class
|
266
|
+
object.
|
267
|
+
|
268
|
+
For example, the module below provides a bit of extra functionality to
|
269
|
+
the above-mentioned `RFlow::Message::Data::Integer` message type,
|
270
|
+
namely to default the integer to 0 upon being mixed in, provide a
|
271
|
+
better named accessor, and add a `default?` method to the `data` object:
|
272
|
+
|
273
|
+
```ruby
|
274
|
+
module SimpleDataExtension
|
275
|
+
def self.extended(base_data)
|
276
|
+
base_data.data_object = 0
|
277
|
+
end
|
278
|
+
|
279
|
+
def int; data_object; end
|
280
|
+
def int=(new_int); data_object = new_int; end
|
281
|
+
|
282
|
+
def default?;
|
283
|
+
data_object == 0
|
284
|
+
end
|
285
|
+
end
|
286
|
+
```
|
287
|
+
|
288
|
+
Once a module is defined, it needs to be registered to the appropriate
|
289
|
+
message data type. Note that multiple modules can be registered for a
|
290
|
+
given message data type.
|
291
|
+
|
292
|
+
```ruby
|
293
|
+
RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
|
294
|
+
```
|
295
|
+
|
296
|
+
The result of this is that the following code will work:
|
297
|
+
|
298
|
+
```ruby
|
299
|
+
message = RFlow::Message.new('RFlow::Message::Data::Integer')
|
300
|
+
message.data.int == 0 # => true
|
301
|
+
message.data.default? # => true
|
302
|
+
message.data.int = 1024
|
303
|
+
messaga.data.default? # => false
|
304
|
+
```
|
305
|
+
|
306
|
+
|
307
|
+
## RFlow Workflow Configuration
|
308
|
+
|
309
|
+
RFlow currently stores its configuration in a SQLite database which
|
310
|
+
are internally accessed via ActiveRecord. Given that SQLite is a
|
311
|
+
rather simple and standard interface, non-RFlow components could
|
312
|
+
access it and determine their respsective ZMQ connections.
|
313
|
+
|
314
|
+
DB schemas for the configuration database are in
|
315
|
+
[lib/rflow/configuration/migrations](lib/rflow/configuration/migrations)
|
316
|
+
and define the complete workflow configuration. Note that each of the
|
317
|
+
tables uses a UUID primary key, and UUIDs are used within RFlow to
|
318
|
+
identify specific components.
|
319
|
+
|
320
|
+
* settings - general application settings, such as log levels, app
|
321
|
+
names, directories, etc
|
322
|
+
|
323
|
+
* components - a list of the components including its name,
|
324
|
+
specification (Ruby class), and options. Note that the options are
|
325
|
+
serialized to the database as YAML, and components should understand
|
326
|
+
that the round-trip through the database might not be perfect (e.g.
|
327
|
+
Ruby symbols might become strings). A component also has a number of
|
328
|
+
input ports and output ports.
|
329
|
+
|
330
|
+
* ports - belonging to a component (via `component_uuid` foreign key),
|
331
|
+
also has a `type` colum for ActiveRecord STI, which gets set to
|
332
|
+
either a `RFlow::Configuration::InputPort` or
|
333
|
+
`RFlow::Configuration::OutputPort`.
|
334
|
+
|
335
|
+
* connections - a connection between two ports via foriegn keys
|
336
|
+
`input_port_uuid` and `output_port_uuid`. Like ports, connections
|
337
|
+
are typed via AR STI (`RFlow::Configuration::ZMQConnection` or
|
338
|
+
`RFlow::Configuration::AMQPConnection`) and have a YAML serialized
|
339
|
+
`options` hash. A connection also (potentially) defines the port
|
340
|
+
keys.
|
341
|
+
|
342
|
+
RFlow also provides a RubyDSL for configuration-like file to be used
|
343
|
+
to load the database:
|
344
|
+
|
345
|
+
```ruby
|
346
|
+
RFlow::Configuration::RubyDSL.configure do |config|
|
347
|
+
# Configure the settings, which include paths for various files, log
|
348
|
+
# levels, and component specific stuffs
|
349
|
+
config.setting('rflow.log_level', 'DEBUG')
|
350
|
+
config.setting('rflow.application_directory_path', '../tmp')
|
351
|
+
|
352
|
+
config.setting('rflow.application_name', 'testapp')
|
353
|
+
|
354
|
+
# Instantiate components
|
355
|
+
config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', {
|
356
|
+
'start' => 0,
|
357
|
+
'finish' => 10,
|
358
|
+
'step' => 3,
|
359
|
+
'interval_seconds' => 1
|
360
|
+
}
|
361
|
+
config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', {
|
362
|
+
'start' => 20,
|
363
|
+
'finish' => 30
|
364
|
+
}
|
365
|
+
config.component 'filter', 'RFlow::Components::RubyProcFilter', {
|
366
|
+
'filter_proc_string' => 'lambda {|message| true}'
|
367
|
+
}
|
368
|
+
config.component 'output1', 'RFlow::Components::FileOutput', {
|
369
|
+
'output_file_path' => '/tmp/out1'
|
370
|
+
}
|
371
|
+
config.component 'output2', 'RFlow::Components::FileOutput', {
|
372
|
+
'output_file_path' => '/tmp/out2'
|
373
|
+
}
|
374
|
+
|
375
|
+
# Wire components together
|
376
|
+
config.connect 'generate_ints1#out' => 'filter#in'
|
377
|
+
config.connect 'generate_ints2#out' => 'filter#in'
|
378
|
+
config.connect 'filter#filtered' => 'replicate#in'
|
379
|
+
config.connect 'filter#out' => 'output1#in'
|
380
|
+
config.connect 'filter#filtered' => 'output2#in'
|
381
|
+
end
|
382
|
+
```
|
383
|
+
|
384
|
+
## Command-Line Operation
|
385
|
+
|
386
|
+
RFlow includes the `rflow` binary that can load a database from a Ruby
|
387
|
+
DSL, as well as start/stop the wokflow application as a daemon.
|
388
|
+
Invoking the `rflow` binary without any options will give a brief help:
|
389
|
+
|
390
|
+
```
|
391
|
+
Usage: rflow [options] (start|stop|status|load)
|
392
|
+
-d, --database DB Config database (sqlite) path (GENERALLY REQUIRED)
|
393
|
+
-c, --config CONFIG Config file path (only valid for load)
|
394
|
+
-e, --extensions FILE1[,FILE_N] Extension file paths (will load)
|
395
|
+
-g, --gems GEM1[,GEM_N] Extension gems (will require)
|
396
|
+
-l, --log LOGFILE Initial startup log file (in addition to stdout)
|
397
|
+
-v, --verbose [LEVEL] Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO
|
398
|
+
-f Run in the foreground
|
399
|
+
--version Show RFlow version and exit
|
400
|
+
-h, --help Show this message and exit
|
401
|
+
```
|
402
|
+
|
403
|
+
In general, the process for getting started is to first create a
|
404
|
+
configuration database via `rflow load`:
|
405
|
+
|
406
|
+
```
|
407
|
+
rflow load -d my_config.sqlite -c my_ruby_dsl.rb
|
408
|
+
```
|
409
|
+
|
410
|
+
which will create the `my_config.sqlite` configuration database loaded
|
411
|
+
with the `my_ruby_dsl.rb` configuration DSL.
|
412
|
+
|
413
|
+
Once a config database exists, you can start up the application that
|
414
|
+
it describes with `rflow start`. The `--extensions` argument allows
|
415
|
+
loading of arbitrary Ruby code (via Ruby's `load`), which is usually
|
416
|
+
where the component implementations are stored, as well as data type
|
417
|
+
registrations.
|
418
|
+
|
419
|
+
```
|
420
|
+
rflow start -d my_config.sqlite -e my_component.rb,my_other_component.rb,my_data_type.rb
|
421
|
+
```
|
422
|
+
|
423
|
+
By default, RFlow will daemonize, write its pid file to
|
424
|
+
`./run/app.pid` and write its log file to `./log/app.log`. The `-f`
|
425
|
+
flag will keep RFlow in the foreground.
|
426
|
+
|
427
|
+
RFlow also supports two signals that allow for useful management of a
|
428
|
+
running RFlow daemon's log. Sending a `SIGUSR1` to the running RFlow
|
429
|
+
process will cause RFlow to close and reopen its log file, which
|
430
|
+
allows for easy log management without restarting RFlow. In addition,
|
431
|
+
sending a `SIGUSR2` will toggle RFlow's log-level to `DEBUG`, and a
|
432
|
+
subsequent `SIGUSR2` will toggle the log-level back to what was
|
433
|
+
originally set. This allows for easy debugging of a running RFlow
|
434
|
+
process.
|
435
|
+
|
436
|
+
Copyright 2014 RedJack LLC
|
437
|
+
|
438
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
439
|
+
you may not use this file except in compliance with the License.
|
440
|
+
You may obtain a copy of the License at
|
441
|
+
|
442
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
443
|
+
|
444
|
+
Unless required by applicable law or agreed to in writing, software
|
445
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
446
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
447
|
+
See the License for the specific language governing permissions and
|
448
|
+
limitations under the License.
|