rflow 0.0.5 → 1.0.0a1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +21 -0
- data/.yardopts +1 -0
- data/Gemfile +5 -1
- data/Guardfile +8 -0
- data/LICENSE +190 -0
- data/NOTES +26 -13
- data/README.md +448 -0
- data/Rakefile +5 -12
- data/bin/rflow +23 -20
- data/example/basic_config.rb +2 -2
- data/example/basic_extensions.rb +8 -8
- data/example/http_config.rb +1 -1
- data/example/http_extensions.rb +15 -15
- data/lib/rflow.rb +15 -387
- data/lib/rflow/component.rb +105 -50
- data/lib/rflow/component/port.rb +25 -24
- data/lib/rflow/components/raw.rb +4 -4
- data/lib/rflow/components/raw/extensions.rb +2 -2
- data/lib/rflow/configuration.rb +54 -36
- data/lib/rflow/configuration/component.rb +2 -3
- data/lib/rflow/configuration/connection.rb +9 -10
- data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
- data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
- data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
- data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
- data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
- data/lib/rflow/configuration/port.rb +3 -4
- data/lib/rflow/configuration/ruby_dsl.rb +59 -35
- data/lib/rflow/configuration/setting.rb +8 -7
- data/lib/rflow/configuration/shard.rb +24 -0
- data/lib/rflow/configuration/uuid_keyed.rb +3 -3
- data/lib/rflow/connection.rb +21 -10
- data/lib/rflow/connections/zmq_connection.rb +45 -44
- data/lib/rflow/logger.rb +67 -0
- data/lib/rflow/master.rb +127 -0
- data/lib/rflow/message.rb +14 -14
- data/lib/rflow/pid_file.rb +84 -0
- data/lib/rflow/shard.rb +148 -0
- data/lib/rflow/version.rb +1 -1
- data/rflow.gemspec +22 -28
- data/schema/message.avsc +8 -8
- data/spec/fixtures/config_ints.rb +4 -4
- data/spec/fixtures/config_shards.rb +30 -0
- data/spec/fixtures/extensions_ints.rb +8 -8
- data/spec/rflow_component_port_spec.rb +58 -0
- data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
- data/spec/rflow_configuration_spec.rb +4 -4
- data/spec/rflow_message_data_raw.rb +2 -2
- data/spec/rflow_message_data_spec.rb +6 -6
- data/spec/rflow_message_spec.rb +13 -13
- data/spec/rflow_spec.rb +294 -71
- data/spec/schema_spec.rb +2 -2
- data/spec/spec_helper.rb +6 -4
- data/temp.rb +21 -21
- metadata +56 -65
- data/.rvmrc +0 -1
- data/README +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57c6f0b7c61b30886bbf0f4b2f65821aa5b1b0f9
|
4
|
+
data.tar.gz: 62f58d281509732effeca0c1a041df2668497b80
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d74949a15024641aef4123ca703d2f2ccf6fb5f97dca9829a282ca53bd6d36c347c844b189255955c7fa058bf903853d0c3acf13fda4dc2e2b3f40e49129310
|
7
|
+
data.tar.gz: f6233d9cc128220c886b6ed4970b544040cace77af6701b6f7429da304ad7de00b5536455469e21988f37d2fa1faaedf3f4324f08eee343669b03c6bdaece735
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rflow-dev
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.1.1
|
data/.travis.yml
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
language: ruby
|
2
|
+
|
3
|
+
rvm:
|
4
|
+
- 1.9.3
|
5
|
+
- 2.0.0
|
6
|
+
- 2.1.0
|
7
|
+
|
8
|
+
before_install:
|
9
|
+
- sudo apt-get install libtool autoconf automake uuid-dev build-essential
|
10
|
+
- wget http://download.zeromq.org/zeromq-3.2.4.tar.gz && tar zxvf zeromq-3.2.4.tar.gz && cd zeromq-3.2.4 && ./configure && make && sudo make install && cd ..
|
11
|
+
# Only has 4.0.4, need 3.2 version due to old em-zeromq
|
12
|
+
# - sudo add-apt-repository -y ppa:chris-lea/zeromq
|
13
|
+
# - sudo apt-get update
|
14
|
+
# - sudo apt-get install libzmq3 libzmq3-dev
|
15
|
+
|
16
|
+
script: bundle exec rspec spec
|
17
|
+
|
18
|
+
notifications:
|
19
|
+
hipchat:
|
20
|
+
rooms:
|
21
|
+
secure: a4nrCmDPwhteJA65QFRlBdnsknT+4y/JtZL/sLPCObOahFWvLOXMggPXvHAOssCaa2ydYrMMvWNliOz63nuu3qAnR90H7aOU3o+2K3zeACy0cAjF27lDonLhaYHeUz07oPwr/iDlFC8bDfFDempjIFFnXSc/LhUWaCltnJ7W5vI=
|
data/.yardopts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--output ./doc --main README.md --files schema/*.avsc lib/**/*.rb bin/*.rb - README.md LICENSE
|
data/Gemfile
CHANGED
data/Guardfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,190 @@
|
|
1
|
+
Apache License
|
2
|
+
Version 2.0, January 2004
|
3
|
+
http://www.apache.org/licenses/
|
4
|
+
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6
|
+
|
7
|
+
1. Definitions.
|
8
|
+
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
11
|
+
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13
|
+
the copyright owner that is granting the License.
|
14
|
+
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
16
|
+
other entities that control, are controlled by, or are under common
|
17
|
+
control with that entity. For the purposes of this definition,
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
19
|
+
direction or management of such entity, whether by contract or
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22
|
+
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24
|
+
exercising permissions granted by this License.
|
25
|
+
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
27
|
+
including but not limited to software source code, documentation
|
28
|
+
source, and configuration files.
|
29
|
+
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
31
|
+
transformation or translation of a Source form, including but
|
32
|
+
not limited to compiled object code, generated documentation,
|
33
|
+
and conversions to other media types.
|
34
|
+
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
36
|
+
Object form, made available under the License, as indicated by a
|
37
|
+
copyright notice that is included in or attached to the work
|
38
|
+
(an example is provided in the Appendix below).
|
39
|
+
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46
|
+
the Work and Derivative Works thereof.
|
47
|
+
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
49
|
+
the original version of the Work and any modifications or additions
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
61
|
+
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
64
|
+
subsequently incorporated within the Work.
|
65
|
+
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
72
|
+
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78
|
+
where such license applies only to those patent claims licensable
|
79
|
+
by such Contributor that are necessarily infringed by their
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
82
|
+
institute patent litigation against any entity (including a
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
85
|
+
or contributory patent infringement, then any patent licenses
|
86
|
+
granted to You under this License for that Work shall terminate
|
87
|
+
as of the date such litigation is filed.
|
88
|
+
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
91
|
+
modifications, and in Source or Object form, provided that You
|
92
|
+
meet the following conditions:
|
93
|
+
|
94
|
+
(a) You must give any other recipients of the Work or
|
95
|
+
Derivative Works a copy of this License; and
|
96
|
+
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
98
|
+
stating that You changed the files; and
|
99
|
+
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
102
|
+
attribution notices from the Source form of the Work,
|
103
|
+
excluding those notices that do not pertain to any part of
|
104
|
+
the Derivative Works; and
|
105
|
+
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
108
|
+
include a readable copy of the attribution notices contained
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
111
|
+
of the following places: within a NOTICE text file distributed
|
112
|
+
as part of the Derivative Works; within the Source form or
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
114
|
+
within a display generated by the Derivative Works, if and
|
115
|
+
wherever such third-party notices normally appear. The contents
|
116
|
+
of the NOTICE file are for informational purposes only and
|
117
|
+
do not modify the License. You may add Your own attribution
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
120
|
+
that such additional attribution notices cannot be construed
|
121
|
+
as modifying the License.
|
122
|
+
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
124
|
+
may provide additional or different license terms and conditions
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
128
|
+
the conditions stated in this License.
|
129
|
+
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
133
|
+
this License, without any additional terms or conditions.
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135
|
+
the terms of any separate license agreement you may have executed
|
136
|
+
with Licensor regarding such Contributions.
|
137
|
+
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
140
|
+
except as required for reasonable and customary use in describing the
|
141
|
+
origin of the Work and reproducing the content of the NOTICE file.
|
142
|
+
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
152
|
+
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
158
|
+
incidental, or consequential damages of any character arising as a
|
159
|
+
result of this License or out of the use or inability to use the
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
162
|
+
other commercial damages or losses), even if such Contributor
|
163
|
+
has been advised of the possibility of such damages.
|
164
|
+
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166
|
+
the Work or Derivative Works thereof, You may choose to offer,
|
167
|
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168
|
+
or other liability obligations and/or rights consistent with this
|
169
|
+
License. However, in accepting such obligations, You may act only
|
170
|
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171
|
+
of any other Contributor, and only if You agree to indemnify,
|
172
|
+
defend, and hold each Contributor harmless for any liability
|
173
|
+
incurred by, or claims asserted against, such Contributor by reason
|
174
|
+
of your accepting any such warranty or additional liability.
|
175
|
+
|
176
|
+
END OF TERMS AND CONDITIONS
|
177
|
+
|
178
|
+
Copyright 2014 RedJack LLC
|
179
|
+
|
180
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
181
|
+
you may not use this file except in compliance with the License.
|
182
|
+
You may obtain a copy of the License at
|
183
|
+
|
184
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
185
|
+
|
186
|
+
Unless required by applicable law or agreed to in writing, software
|
187
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
188
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
189
|
+
See the License for the specific language governing permissions and
|
190
|
+
limitations under the License.
|
data/NOTES
CHANGED
@@ -1,3 +1,16 @@
|
|
1
|
+
RFlow starts
|
2
|
+
read in DB
|
3
|
+
create new shards
|
4
|
+
- Create a set of workers with the shard configuration
|
5
|
+
- each worker creates a set of components
|
6
|
+
|
7
|
+
- create components
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
|
1
14
|
RFlow Manager
|
2
15
|
|
3
16
|
Components
|
@@ -20,12 +33,12 @@ rflow <config file>
|
|
20
33
|
- place pid files in deployment's run directory
|
21
34
|
Configure components via zmq
|
22
35
|
Daemonize self
|
23
|
-
|
36
|
+
|
24
37
|
|
25
38
|
|
26
39
|
class Component
|
27
40
|
def self.input_port
|
28
|
-
end
|
41
|
+
end
|
29
42
|
|
30
43
|
def self.output_port
|
31
44
|
end
|
@@ -33,11 +46,11 @@ class Component
|
|
33
46
|
attr_accessor :state
|
34
47
|
|
35
48
|
def initialize(config, run_directory)
|
36
|
-
|
49
|
+
|
37
50
|
end
|
38
51
|
|
39
52
|
def run
|
40
|
-
|
53
|
+
|
41
54
|
end
|
42
55
|
|
43
56
|
def configure
|
@@ -56,7 +69,7 @@ class PassThrough < Component
|
|
56
69
|
# This will initialize the ports
|
57
70
|
super
|
58
71
|
# Do stuff to initialize component
|
59
|
-
end
|
72
|
+
end
|
60
73
|
|
61
74
|
end
|
62
75
|
|
@@ -66,7 +79,7 @@ Computation Requirements:
|
|
66
79
|
- management bus connection information
|
67
80
|
- group and instance UUID
|
68
81
|
- beacon interval
|
69
|
-
- run directory, containing
|
82
|
+
- run directory, containing
|
70
83
|
- PID files
|
71
84
|
- log dir + logs
|
72
85
|
- computation-specific configuration (conf dir)
|
@@ -90,7 +103,7 @@ Computation Requirements:
|
|
90
103
|
|
91
104
|
External Computations:
|
92
105
|
- Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
|
93
|
-
-
|
106
|
+
-
|
94
107
|
|
95
108
|
|
96
109
|
RFlow
|
@@ -100,7 +113,7 @@ RFlow
|
|
100
113
|
|
101
114
|
Translate
|
102
115
|
- Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
|
103
|
-
|
116
|
+
|
104
117
|
|
105
118
|
|
106
119
|
|
@@ -112,7 +125,7 @@ Plugins:
|
|
112
125
|
- necessary to tell system?
|
113
126
|
- need a protocol for defining schema transfer
|
114
127
|
- each message has attached schema
|
115
|
-
|
128
|
+
|
116
129
|
|
117
130
|
lib/rflow/message.rb
|
118
131
|
|
@@ -122,7 +135,7 @@ RFlow::Management
|
|
122
135
|
- Somewhere for external people to register new computations with running system
|
123
136
|
- computation says that its running and asks for Connection configuration
|
124
137
|
- how will it specify where in the workflow it wants to run????
|
125
|
-
|
138
|
+
|
126
139
|
RFlow::Message(complete on-the-wire Avro message format)
|
127
140
|
data_type, provenance, external_ids, empty, data (see below)
|
128
141
|
|
@@ -142,7 +155,7 @@ RFlow::Connection::AMQP
|
|
142
155
|
|
143
156
|
RFlow::Connection::ZMQ
|
144
157
|
|
145
|
-
|
158
|
+
|
146
159
|
|
147
160
|
|
148
161
|
computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
|
@@ -152,12 +165,12 @@ AMQP::Topic - responsible for setting up a topic -> queue binding
|
|
152
165
|
r.outgoing = amqp connection, channel, vhost, login, password, queue name
|
153
166
|
behavior -> n x m, "round-robin" among the connected outgoing
|
154
167
|
incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
|
155
|
-
|
168
|
+
|
156
169
|
|
157
170
|
ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
158
171
|
r.incoming = zmq connection string (tcp://ip:port), type pub
|
159
172
|
r.outgoing = zmq connection string (tcp://ip:port), type sub
|
160
|
-
behavior -> n x m, broadcast sending,
|
173
|
+
behavior -> n x m, broadcast sending,
|
161
174
|
|
162
175
|
ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
163
176
|
r.incoming = zmq connection string (tcp://ip:port), type push
|
data/README.md
ADDED
@@ -0,0 +1,448 @@
|
|
1
|
+
# RFlow
|
2
|
+
|
3
|
+
[](https://travis-ci.org/redjack/rflow)
|
4
|
+
|
5
|
+
RFlow is a Ruby framework inspired by
|
6
|
+
[flow-based programming](http://en.wikipedia.org/wiki/Flow-based_programming)
|
7
|
+
(FBP), which was previously inspired by
|
8
|
+
[Communicating Sequential Processes](http://en.wikipedia.org/wiki/Communicating_sequential_processes)
|
9
|
+
(CSP). It has some conceptual similarities to Javascript's
|
10
|
+
[NoFlo](http://noflojs.org/) system, Java's
|
11
|
+
[Storm](http://storm.incubator.apache.org/), and Clojure's
|
12
|
+
[core.async](http://clojure.github.io/core.async/) library.
|
13
|
+
|
14
|
+
In short, components communicate with each other by sending/receiving
|
15
|
+
messages via their output/input ports over connections. Ports are
|
16
|
+
"wired" together output->input with connections, and messages are
|
17
|
+
explicitly serialized before being sent over the connection. RFlow
|
18
|
+
supports generalized connection types and message serialization,
|
19
|
+
however only two are in current use, namely ZeroMQ connections and
|
20
|
+
Avro serialization.
|
21
|
+
|
22
|
+
RFlow currently runs as a single-threaded, evented system on top of
|
23
|
+
[Eventmachine](http://rubyeventmachine.com/), meaning that any code
|
24
|
+
should be coded in an asynchronous style so as to not block the
|
25
|
+
Eventmachine reactor (and thus block all the other components). There
|
26
|
+
is currently work being done to "shard" the workflow among multiple
|
27
|
+
processes and/or threads.
|
28
|
+
|
29
|
+
Some of the long-term goals of RFlow are to allow for components and
|
30
|
+
portions of the workflow to be defined in any language that supports
|
31
|
+
Avro and ZeroMQ, which a numerous.
|
32
|
+
|
33
|
+
|
34
|
+
## Definitions
|
35
|
+
|
36
|
+
* __Component__ - the basic unit of RFlow computation. Each
|
37
|
+
component is a shared-nothing, individual computation module that
|
38
|
+
communicates with the rest of the system through explicit message
|
39
|
+
passing via input and output ports.
|
40
|
+
|
41
|
+
* __Port__ - a named entity on each component that is responsible for
|
42
|
+
receiving data (and input port) or sending data (and output port).
|
43
|
+
Ports can be "keyed" or "indexed" to allow better multiplexing of
|
44
|
+
messages out/in a single port, as well as allow a single port to be
|
45
|
+
accessed by an array.
|
46
|
+
|
47
|
+
* __Connection__ - a directed link between an output port and an input
|
48
|
+
port. RFlow supports generalized connection types, however only
|
49
|
+
ZeroMQ IPC links are currently used.
|
50
|
+
|
51
|
+
* __Message__ - a bit of serialized data that is sent out an output
|
52
|
+
port and recieved on an input port. Due to the serialization,
|
53
|
+
message types and schemas are explicitly defined. In a departure
|
54
|
+
from "pure" FBP, RFlow supports sending multiple message types via a
|
55
|
+
single connection.
|
56
|
+
|
57
|
+
* __Workflow__ - the common name for the digraph created when the
|
58
|
+
components (nodes) are wired together via connections to their
|
59
|
+
respective output/input ports.
|
60
|
+
|
61
|
+
|
62
|
+
## Component Examples
|
63
|
+
|
64
|
+
The following describes the API of an RFlow component:
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
class SimpleComponent < RFlow::Component
|
68
|
+
input_port :in
|
69
|
+
output_port :out
|
70
|
+
|
71
|
+
def configure!(config); end
|
72
|
+
def run!; end
|
73
|
+
def process_message(input_port, input_port_key, connection, message); end
|
74
|
+
def shutdown!; end
|
75
|
+
def cleanup!; end
|
76
|
+
end
|
77
|
+
```
|
78
|
+
|
79
|
+
* `input_port` and `output_port` define the named ports that will
|
80
|
+
receive data or send data, respectively. These class methods create
|
81
|
+
accessors for their respective port names, to be used later in the
|
82
|
+
`process_message` or `run!` methods. There can be multiple (or no)
|
83
|
+
input and output ports.
|
84
|
+
|
85
|
+
* `configure!` (called with a hash configuration) is called after the
|
86
|
+
component is instantiated but before the workflow has been wired or
|
87
|
+
any messages have been sent. Note that this is called outside the
|
88
|
+
Eventmachine reactor.
|
89
|
+
|
90
|
+
* `run!` is called after all the components have been wired together
|
91
|
+
with connections and the entire workflow has been created. For a
|
92
|
+
component that is a source of messages, this is where messages will
|
93
|
+
be sent. For example, if the component is reading from a file, this
|
94
|
+
is where the file will be opened, the contents read into a message,
|
95
|
+
and the message sent out the output port. `run!` is called within
|
96
|
+
the Eventmachine reactor.
|
97
|
+
|
98
|
+
* `process_message` is an evented callback that is called whenever the
|
99
|
+
component receives a message on one of its input ports.
|
100
|
+
`process_message` is called withing the Eventmachine reactor
|
101
|
+
|
102
|
+
* `shutdown!` is called when the flow is being terminated, and is
|
103
|
+
meant to allow the components to do penultimate processing and send
|
104
|
+
any final messages. All components in a flow will be told to
|
105
|
+
`shutdown!` before they are told to `cleanup!`.
|
106
|
+
|
107
|
+
* `cleanup!` is the final call to each component, and allow the
|
108
|
+
component to clean up any external resources that it might have
|
109
|
+
outstanding, such as file handles or network sockets.
|
110
|
+
|
111
|
+
"Source" components will often do all of their work within the `run!`
|
112
|
+
method, and often gather message data from an external source, such as
|
113
|
+
file, database, or network socket. The following component generates a
|
114
|
+
set of integers between a configured start/finish, incrementing by a
|
115
|
+
configured step:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
class RFlow::Components::GenerateIntegerSequence < RFlow::Component
|
119
|
+
output_port :out
|
120
|
+
|
121
|
+
def configure!(config)
|
122
|
+
@start = config['start'].to_i
|
123
|
+
@finish = config['finish'].to_i
|
124
|
+
@step = config['step'] ? config['step'].to_i : 1
|
125
|
+
# If interval seconds is not given, it will default to 0
|
126
|
+
@interval_seconds = config['interval_seconds'].to_i
|
127
|
+
end
|
128
|
+
|
129
|
+
# Note that this uses the timer (sometimes with 0 interval) so as
|
130
|
+
# not to block the reactor
|
131
|
+
def run!
|
132
|
+
timer = EM::PeriodicTimer.new(@interval_seconds) do
|
133
|
+
message = RFlow::Message.new('RFlow::Message::Data::Integer')
|
134
|
+
message.data.data_object = @start
|
135
|
+
out.send_message message
|
136
|
+
@start += @step
|
137
|
+
timer.cancel if @start > @finish
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
```
|
142
|
+
|
143
|
+
"Middle" components receive messages on input port(s), perform a bit
|
144
|
+
of computation, and then send a message out the output port(s). The
|
145
|
+
following component accepts a Ruby expression string via its config,
|
146
|
+
and then uses that as an expression to determine what port to send an
|
147
|
+
incoming message:
|
148
|
+
|
149
|
+
```ruby
|
150
|
+
class RFlow::Components::RubyProcFilter < RFlow::Component
|
151
|
+
input_port :in
|
152
|
+
output_port :filtered
|
153
|
+
output_port :dropped
|
154
|
+
output_port :errored
|
155
|
+
|
156
|
+
def configure!(config)
|
157
|
+
@filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
|
158
|
+
end
|
159
|
+
|
160
|
+
def process_message(input_port, input_port_key, connection, message)
|
161
|
+
begin
|
162
|
+
if @filter_proc.call(message)
|
163
|
+
filtered.send_message message
|
164
|
+
else
|
165
|
+
dropped.send_message message
|
166
|
+
end
|
167
|
+
rescue Exception => e
|
168
|
+
errored.send_message message
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
```
|
173
|
+
|
174
|
+
"Sink" components accept messages on an input port and do not have an
|
175
|
+
output port. They often operate on external sinks, such as writing
|
176
|
+
messages to a file, database, or network socket. The following
|
177
|
+
component writes the inspected message to a file (defined via the
|
178
|
+
configuration):
|
179
|
+
|
180
|
+
```ruby
|
181
|
+
class RFlow::Components::FileOutput < RFlow::Component
|
182
|
+
input_port :in
|
183
|
+
|
184
|
+
attr_accessor :output_file_path, :output_file
|
185
|
+
|
186
|
+
def configure!(config)
|
187
|
+
self.output_file_path = config['output_file_path']
|
188
|
+
self.output_file = File.new output_file_path, 'w+'
|
189
|
+
end
|
190
|
+
|
191
|
+
def process_message(input_port, input_port_key, connection, message)
|
192
|
+
output_file.puts message.data.data_object.inspect
|
193
|
+
output_file.flush
|
194
|
+
end
|
195
|
+
|
196
|
+
def cleanup
|
197
|
+
output_file.close
|
198
|
+
end
|
199
|
+
end
|
200
|
+
```
|
201
|
+
|
202
|
+
## RFlow Messages
|
203
|
+
|
204
|
+
RFlow messages are instances of
|
205
|
+
[`RFlow::Message`](lib/rflow/message.rb), which are ultimately
|
206
|
+
serialized via an Avro [schema](schema/message.zvsc).
|
207
|
+
|
208
|
+
There are two parts of the message "envelope": a provenance and the
|
209
|
+
embedded data object "payload".
|
210
|
+
|
211
|
+
The `provenance` is a way for a component to annotate a message with a
|
212
|
+
bit of data that should (by convention) be carried through the
|
213
|
+
workflow with the message, as well as being copied to derived
|
214
|
+
messages. For example, a TCP server component would spin up a TCP
|
215
|
+
server and, upon recieving a connection and packets on a session, it
|
216
|
+
would marshal the packets into `RFlow::Messsage`s and send them out
|
217
|
+
its output ports. Messages received on its input port, however, need
|
218
|
+
to have a way to be matched to the corresponding underlying TCP
|
219
|
+
connection. `provenance` provides a method for the TCP server
|
220
|
+
component to add a bit of metadata (namely an identifier for the TCP
|
221
|
+
connection) such that later messages that contain the same provenance
|
222
|
+
can be matched to the correct underlying TCP connection.
|
223
|
+
|
224
|
+
|
225
|
+
The other parts of the message envelope are related to the embedded
|
226
|
+
data object. In addition to the data object itself (which is encoded
|
227
|
+
with a specific Avro schema), there are a few fields that describe the
|
228
|
+
embedded data, namely the `data_type_name`, the
|
229
|
+
`data_serialization_type`, and the `data_schema`. By including all
|
230
|
+
this metadata in each message, the system is completely dynamic and
|
231
|
+
allow for multiple message types to be included on a single
|
232
|
+
connection, as well as enabling non-RFlow components to be created in
|
233
|
+
any language. This does come at the expense of larger messages which
|
234
|
+
results in greater message overhead.
|
235
|
+
|
236
|
+
For example, if we have a simple integer data type that we would like
|
237
|
+
to serialize via Avro, we can register the schema with the following
|
238
|
+
`add_available_data_type` code shown below:
|
239
|
+
|
240
|
+
```ruby
|
241
|
+
long_integer_schema = '{"type": "long"}'
|
242
|
+
RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
|
243
|
+
```
|
244
|
+
|
245
|
+
This will make the schema and message type available to RFlow, such
|
246
|
+
that it will be able to create a new message with:
|
247
|
+
|
248
|
+
```ruby
|
249
|
+
message = RFlow::Message.new('RFlow::Message::Data::Integer')
|
250
|
+
```
|
251
|
+
|
252
|
+
and will automatically reconstitute a message from the connection and
|
253
|
+
call a component's `process_message`.
|
254
|
+
|
255
|
+
The deserialized Avro Ruby object is available as the `data_object`
|
256
|
+
accessor on the `data` class, i.e.:
|
257
|
+
|
258
|
+
```ruby
|
259
|
+
message.data.data_object = 1024
|
260
|
+
```
|
261
|
+
|
262
|
+
The `data_object` is the deserialized Avro Ruby object and, as such,
|
263
|
+
allows the Avro object to be accessed as a Ruby object. In order to
|
264
|
+
provide a more convenient interface to the underlying Avro object,
|
265
|
+
RFlow allows modules to be dynamically mixed in to the `data` class
|
266
|
+
object.
|
267
|
+
|
268
|
+
For example, the module below provides a bit of extra functionality to
|
269
|
+
the above-mentioned `RFlow::Message::Data::Integer` message type,
|
270
|
+
namely to default the integer to 0 upon being mixed in, provide a
|
271
|
+
better named accessor, and add a `default?` method to the `data` object:
|
272
|
+
|
273
|
+
```ruby
|
274
|
+
module SimpleDataExtension
|
275
|
+
def self.extended(base_data)
|
276
|
+
base_data.data_object = 0
|
277
|
+
end
|
278
|
+
|
279
|
+
def int; data_object; end
|
280
|
+
def int=(new_int); data_object = new_int; end
|
281
|
+
|
282
|
+
def default?;
|
283
|
+
data_object == 0
|
284
|
+
end
|
285
|
+
end
|
286
|
+
```
|
287
|
+
|
288
|
+
Once a module is defined, it needs to be registered to the appropriate
|
289
|
+
message data type. Note that multiple modules can be registered for a
|
290
|
+
given message data type.
|
291
|
+
|
292
|
+
```ruby
|
293
|
+
RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
|
294
|
+
```
|
295
|
+
|
296
|
+
The result of this is that the following code will work:
|
297
|
+
|
298
|
+
```ruby
|
299
|
+
message = RFlow::Message.new('RFlow::Message::Data::Integer')
|
300
|
+
message.data.int == 0 # => true
|
301
|
+
message.data.default? # => true
|
302
|
+
message.data.int = 1024
|
303
|
+
messaga.data.default? # => false
|
304
|
+
```
|
305
|
+
|
306
|
+
|
307
|
+
## RFlow Workflow Configuration
|
308
|
+
|
309
|
+
RFlow currently stores its configuration in a SQLite database which
|
310
|
+
are internally accessed via ActiveRecord. Given that SQLite is a
|
311
|
+
rather simple and standard interface, non-RFlow components could
|
312
|
+
access it and determine their respsective ZMQ connections.
|
313
|
+
|
314
|
+
DB schemas for the configuration database are in
|
315
|
+
[lib/rflow/configuration/migrations](lib/rflow/configuration/migrations)
|
316
|
+
and define the complete workflow configuration. Note that each of the
|
317
|
+
tables uses a UUID primary key, and UUIDs are used within RFlow to
|
318
|
+
identify specific components.
|
319
|
+
|
320
|
+
* settings - general application settings, such as log levels, app
|
321
|
+
names, directories, etc
|
322
|
+
|
323
|
+
* components - a list of the components including its name,
|
324
|
+
specification (Ruby class), and options. Note that the options are
|
325
|
+
serialized to the database as YAML, and components should understand
|
326
|
+
that the round-trip through the database might not be perfect (e.g.
|
327
|
+
Ruby symbols might become strings). A component also has a number of
|
328
|
+
input ports and output ports.
|
329
|
+
|
330
|
+
* ports - belonging to a component (via `component_uuid` foreign key),
|
331
|
+
also has a `type` colum for ActiveRecord STI, which gets set to
|
332
|
+
either a `RFlow::Configuration::InputPort` or
|
333
|
+
`RFlow::Configuration::OutputPort`.
|
334
|
+
|
335
|
+
* connections - a connection between two ports via foriegn keys
|
336
|
+
`input_port_uuid` and `output_port_uuid`. Like ports, connections
|
337
|
+
are typed via AR STI (`RFlow::Configuration::ZMQConnection` or
|
338
|
+
`RFlow::Configuration::AMQPConnection`) and have a YAML serialized
|
339
|
+
`options` hash. A connection also (potentially) defines the port
|
340
|
+
keys.
|
341
|
+
|
342
|
+
RFlow also provides a RubyDSL for configuration-like file to be used
|
343
|
+
to load the database:
|
344
|
+
|
345
|
+
```ruby
|
346
|
+
RFlow::Configuration::RubyDSL.configure do |config|
|
347
|
+
# Configure the settings, which include paths for various files, log
|
348
|
+
# levels, and component specific stuffs
|
349
|
+
config.setting('rflow.log_level', 'DEBUG')
|
350
|
+
config.setting('rflow.application_directory_path', '../tmp')
|
351
|
+
|
352
|
+
config.setting('rflow.application_name', 'testapp')
|
353
|
+
|
354
|
+
# Instantiate components
|
355
|
+
config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', {
|
356
|
+
'start' => 0,
|
357
|
+
'finish' => 10,
|
358
|
+
'step' => 3,
|
359
|
+
'interval_seconds' => 1
|
360
|
+
}
|
361
|
+
config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', {
|
362
|
+
'start' => 20,
|
363
|
+
'finish' => 30
|
364
|
+
}
|
365
|
+
config.component 'filter', 'RFlow::Components::RubyProcFilter', {
|
366
|
+
'filter_proc_string' => 'lambda {|message| true}'
|
367
|
+
}
|
368
|
+
config.component 'output1', 'RFlow::Components::FileOutput', {
|
369
|
+
'output_file_path' => '/tmp/out1'
|
370
|
+
}
|
371
|
+
config.component 'output2', 'RFlow::Components::FileOutput', {
|
372
|
+
'output_file_path' => '/tmp/out2'
|
373
|
+
}
|
374
|
+
|
375
|
+
# Wire components together
|
376
|
+
config.connect 'generate_ints1#out' => 'filter#in'
|
377
|
+
config.connect 'generate_ints2#out' => 'filter#in'
|
378
|
+
config.connect 'filter#filtered' => 'replicate#in'
|
379
|
+
config.connect 'filter#out' => 'output1#in'
|
380
|
+
config.connect 'filter#filtered' => 'output2#in'
|
381
|
+
end
|
382
|
+
```
|
383
|
+
|
384
|
+
## Command-Line Operation
|
385
|
+
|
386
|
+
RFlow includes the `rflow` binary that can load a database from a Ruby
|
387
|
+
DSL, as well as start/stop the wokflow application as a daemon.
|
388
|
+
Invoking the `rflow` binary without any options will give a brief help:
|
389
|
+
|
390
|
+
```
|
391
|
+
Usage: rflow [options] (start|stop|status|load)
|
392
|
+
-d, --database DB Config database (sqlite) path (GENERALLY REQUIRED)
|
393
|
+
-c, --config CONFIG Config file path (only valid for load)
|
394
|
+
-e, --extensions FILE1[,FILE_N] Extension file paths (will load)
|
395
|
+
-g, --gems GEM1[,GEM_N] Extension gems (will require)
|
396
|
+
-l, --log LOGFILE Initial startup log file (in addition to stdout)
|
397
|
+
-v, --verbose [LEVEL] Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO
|
398
|
+
-f Run in the foreground
|
399
|
+
--version Show RFlow version and exit
|
400
|
+
-h, --help Show this message and exit
|
401
|
+
```
|
402
|
+
|
403
|
+
In general, the process for getting started is to first create a
|
404
|
+
configuration database via `rflow load`:
|
405
|
+
|
406
|
+
```
|
407
|
+
rflow load -d my_config.sqlite -c my_ruby_dsl.rb
|
408
|
+
```
|
409
|
+
|
410
|
+
which will create the `my_config.sqlite` configuration database loaded
|
411
|
+
with the `my_ruby_dsl.rb` configuration DSL.
|
412
|
+
|
413
|
+
Once a config database exists, you can start up the application that
|
414
|
+
it describes with `rflow start`. The `--extensions` argument allows
|
415
|
+
loading of arbitrary Ruby code (via Ruby's `load`), which is usually
|
416
|
+
where the component implementations are stored, as well as data type
|
417
|
+
registrations.
|
418
|
+
|
419
|
+
```
|
420
|
+
rflow start -d my_config.sqlite -e my_component.rb,my_other_component.rb,my_data_type.rb
|
421
|
+
```
|
422
|
+
|
423
|
+
By default, RFlow will daemonize, write its pid file to
|
424
|
+
`./run/app.pid` and write its log file to `./log/app.log`. The `-f`
|
425
|
+
flag will keep RFlow in the foreground.
|
426
|
+
|
427
|
+
RFlow also supports two signals that allow for useful management of a
|
428
|
+
running RFlow daemon's log. Sending a `SIGUSR1` to the running RFlow
|
429
|
+
process will cause RFlow to close and reopen its log file, which
|
430
|
+
allows for easy log management without restarting RFlow. In addition,
|
431
|
+
sending a `SIGUSR2` will toggle RFlow's log-level to `DEBUG`, and a
|
432
|
+
subsequent `SIGUSR2` will toggle the log-level back to what was
|
433
|
+
originally set. This allows for easy debugging of a running RFlow
|
434
|
+
process.
|
435
|
+
|
436
|
+
Copyright 2014 RedJack LLC
|
437
|
+
|
438
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
439
|
+
you may not use this file except in compliance with the License.
|
440
|
+
You may obtain a copy of the License at
|
441
|
+
|
442
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
443
|
+
|
444
|
+
Unless required by applicable law or agreed to in writing, software
|
445
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
446
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
447
|
+
See the License for the specific language governing permissions and
|
448
|
+
limitations under the License.
|