rflow 0.0.5 → 1.0.0a1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +21 -0
  5. data/.yardopts +1 -0
  6. data/Gemfile +5 -1
  7. data/Guardfile +8 -0
  8. data/LICENSE +190 -0
  9. data/NOTES +26 -13
  10. data/README.md +448 -0
  11. data/Rakefile +5 -12
  12. data/bin/rflow +23 -20
  13. data/example/basic_config.rb +2 -2
  14. data/example/basic_extensions.rb +8 -8
  15. data/example/http_config.rb +1 -1
  16. data/example/http_extensions.rb +15 -15
  17. data/lib/rflow.rb +15 -387
  18. data/lib/rflow/component.rb +105 -50
  19. data/lib/rflow/component/port.rb +25 -24
  20. data/lib/rflow/components/raw.rb +4 -4
  21. data/lib/rflow/components/raw/extensions.rb +2 -2
  22. data/lib/rflow/configuration.rb +54 -36
  23. data/lib/rflow/configuration/component.rb +2 -3
  24. data/lib/rflow/configuration/connection.rb +9 -10
  25. data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
  26. data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
  27. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
  28. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
  29. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
  30. data/lib/rflow/configuration/port.rb +3 -4
  31. data/lib/rflow/configuration/ruby_dsl.rb +59 -35
  32. data/lib/rflow/configuration/setting.rb +8 -7
  33. data/lib/rflow/configuration/shard.rb +24 -0
  34. data/lib/rflow/configuration/uuid_keyed.rb +3 -3
  35. data/lib/rflow/connection.rb +21 -10
  36. data/lib/rflow/connections/zmq_connection.rb +45 -44
  37. data/lib/rflow/logger.rb +67 -0
  38. data/lib/rflow/master.rb +127 -0
  39. data/lib/rflow/message.rb +14 -14
  40. data/lib/rflow/pid_file.rb +84 -0
  41. data/lib/rflow/shard.rb +148 -0
  42. data/lib/rflow/version.rb +1 -1
  43. data/rflow.gemspec +22 -28
  44. data/schema/message.avsc +8 -8
  45. data/spec/fixtures/config_ints.rb +4 -4
  46. data/spec/fixtures/config_shards.rb +30 -0
  47. data/spec/fixtures/extensions_ints.rb +8 -8
  48. data/spec/rflow_component_port_spec.rb +58 -0
  49. data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
  50. data/spec/rflow_configuration_spec.rb +4 -4
  51. data/spec/rflow_message_data_raw.rb +2 -2
  52. data/spec/rflow_message_data_spec.rb +6 -6
  53. data/spec/rflow_message_spec.rb +13 -13
  54. data/spec/rflow_spec.rb +294 -71
  55. data/spec/schema_spec.rb +2 -2
  56. data/spec/spec_helper.rb +6 -4
  57. data/temp.rb +21 -21
  58. metadata +56 -65
  59. data/.rvmrc +0 -1
  60. data/README +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c322bc6cf9c3b4ccd46b13cf56d3c2460dc5f0be
4
- data.tar.gz: af2d6fb3e7051a074c56fa10b70b5a02b23bb0d7
3
+ metadata.gz: 57c6f0b7c61b30886bbf0f4b2f65821aa5b1b0f9
4
+ data.tar.gz: 62f58d281509732effeca0c1a041df2668497b80
5
5
  SHA512:
6
- metadata.gz: 5a3cd46af3c815d2cb5840d48a0e38f7e28dbe911276fac75d3466bef9d05d7d49e38baa010b5ce419a67aea5829d2f227d2ff74aa5e689f6c1e6109d882ad81
7
- data.tar.gz: 539c61aca94e84e1ccb00acba1ef87c7a6556dc65180f9f905561b09e147eab83ff0a409dd01b84a1113ba820cf89c6470ee64c1ace68736232423ba3a1d9668
6
+ metadata.gz: 8d74949a15024641aef4123ca703d2f2ccf6fb5f97dca9829a282ca53bd6d36c347c844b189255955c7fa058bf903853d0c3acf13fda4dc2e2b3f40e49129310
7
+ data.tar.gz: f6233d9cc128220c886b6ed4970b544040cace77af6701b6f7429da304ad7de00b5536455469e21988f37d2fa1faaedf3f4324f08eee343669b03c6bdaece735
@@ -0,0 +1 @@
1
+ rflow-dev
@@ -0,0 +1 @@
1
+ ruby-2.1.1
@@ -0,0 +1,21 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - 2.1.0
7
+
8
+ before_install:
9
+ - sudo apt-get install libtool autoconf automake uuid-dev build-essential
10
+ - wget http://download.zeromq.org/zeromq-3.2.4.tar.gz && tar zxvf zeromq-3.2.4.tar.gz && cd zeromq-3.2.4 && ./configure && make && sudo make install && cd ..
11
+ # Only has 4.0.4, need 3.2 version due to old em-zeromq
12
+ # - sudo add-apt-repository -y ppa:chris-lea/zeromq
13
+ # - sudo apt-get update
14
+ # - sudo apt-get install libzmq3 libzmq3-dev
15
+
16
+ script: bundle exec rspec spec
17
+
18
+ notifications:
19
+ hipchat:
20
+ rooms:
21
+ secure: a4nrCmDPwhteJA65QFRlBdnsknT+4y/JtZL/sLPCObOahFWvLOXMggPXvHAOssCaa2ydYrMMvWNliOz63nuu3qAnR90H7aOU3o+2K3zeACy0cAjF27lDonLhaYHeUz07oPwr/iDlFC8bDfFDempjIFFnXSc/LhUWaCltnJ7W5vI=
@@ -0,0 +1 @@
1
+ --output ./doc --main README.md --files schema/*.avsc lib/**/*.rb bin/*.rb - README.md LICENSE
data/Gemfile CHANGED
@@ -1,5 +1,9 @@
1
- source "http://rubygems.org"
1
+ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in rflow.gemspec
4
4
  gemspec
5
5
 
6
+ group :development do
7
+ gem 'guard'
8
+ gem 'guard-rspec'
9
+ end
@@ -0,0 +1,8 @@
1
+ # A sample Guardfile
2
+ # More info at https://github.com/guard/guard#readme
3
+
4
+ guard :rspec do
5
+ watch(%r{^spec/.+_spec\.rb$})
6
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
7
+ watch('spec/spec_helper.rb') { "spec" }
8
+ end
data/LICENSE ADDED
@@ -0,0 +1,190 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ Copyright 2014 RedJack LLC
179
+
180
+ Licensed under the Apache License, Version 2.0 (the "License");
181
+ you may not use this file except in compliance with the License.
182
+ You may obtain a copy of the License at
183
+
184
+ http://www.apache.org/licenses/LICENSE-2.0
185
+
186
+ Unless required by applicable law or agreed to in writing, software
187
+ distributed under the License is distributed on an "AS IS" BASIS,
188
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189
+ See the License for the specific language governing permissions and
190
+ limitations under the License.
data/NOTES CHANGED
@@ -1,3 +1,16 @@
1
+ RFlow starts
2
+ read in DB
3
+ create new shards
4
+ - Create a set of workers with the shard configuration
5
+ - each worker creates a set of components
6
+
7
+ - create components
8
+
9
+
10
+
11
+
12
+
13
+
1
14
  RFlow Manager
2
15
 
3
16
  Components
@@ -20,12 +33,12 @@ rflow <config file>
20
33
  - place pid files in deployment's run directory
21
34
  Configure components via zmq
22
35
  Daemonize self
23
-
36
+
24
37
 
25
38
 
26
39
  class Component
27
40
  def self.input_port
28
- end
41
+ end
29
42
 
30
43
  def self.output_port
31
44
  end
@@ -33,11 +46,11 @@ class Component
33
46
  attr_accessor :state
34
47
 
35
48
  def initialize(config, run_directory)
36
-
49
+
37
50
  end
38
51
 
39
52
  def run
40
-
53
+
41
54
  end
42
55
 
43
56
  def configure
@@ -56,7 +69,7 @@ class PassThrough < Component
56
69
  # This will initialize the ports
57
70
  super
58
71
  # Do stuff to initialize component
59
- end
72
+ end
60
73
 
61
74
  end
62
75
 
@@ -66,7 +79,7 @@ Computation Requirements:
66
79
  - management bus connection information
67
80
  - group and instance UUID
68
81
  - beacon interval
69
- - run directory, containing
82
+ - run directory, containing
70
83
  - PID files
71
84
  - log dir + logs
72
85
  - computation-specific configuration (conf dir)
@@ -90,7 +103,7 @@ Computation Requirements:
90
103
 
91
104
  External Computations:
92
105
  - Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
93
- -
106
+ -
94
107
 
95
108
 
96
109
  RFlow
@@ -100,7 +113,7 @@ RFlow
100
113
 
101
114
  Translate
102
115
  - Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
103
-
116
+
104
117
 
105
118
 
106
119
 
@@ -112,7 +125,7 @@ Plugins:
112
125
  - necessary to tell system?
113
126
  - need a protocol for defining schema transfer
114
127
  - each message has attached schema
115
-
128
+
116
129
 
117
130
  lib/rflow/message.rb
118
131
 
@@ -122,7 +135,7 @@ RFlow::Management
122
135
  - Somewhere for external people to register new computations with running system
123
136
  - computation says that its running and asks for Connection configuration
124
137
  - how will it specify where in the workflow it wants to run????
125
-
138
+
126
139
  RFlow::Message(complete on-the-wire Avro message format)
127
140
  data_type, provenance, external_ids, empty, data (see below)
128
141
 
@@ -142,7 +155,7 @@ RFlow::Connection::AMQP
142
155
 
143
156
  RFlow::Connection::ZMQ
144
157
 
145
-
158
+
146
159
 
147
160
 
148
161
  computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
@@ -152,12 +165,12 @@ AMQP::Topic - responsible for setting up a topic -> queue binding
152
165
  r.outgoing = amqp connection, channel, vhost, login, password, queue name
153
166
  behavior -> n x m, "round-robin" among the connected outgoing
154
167
  incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
155
-
168
+
156
169
 
157
170
  ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
158
171
  r.incoming = zmq connection string (tcp://ip:port), type pub
159
172
  r.outgoing = zmq connection string (tcp://ip:port), type sub
160
- behavior -> n x m, broadcast sending,
173
+ behavior -> n x m, broadcast sending,
161
174
 
162
175
  ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
163
176
  r.incoming = zmq connection string (tcp://ip:port), type push
@@ -0,0 +1,448 @@
1
+ # RFlow
2
+
3
+ [![Build Status](https://travis-ci.org/redjack/rflow.png?branch=master)](https://travis-ci.org/redjack/rflow)
4
+
5
+ RFlow is a Ruby framework inspired by
6
+ [flow-based programming](http://en.wikipedia.org/wiki/Flow-based_programming)
7
+ (FBP), which was previously inspired by
8
+ [Communicating Sequential Processes](http://en.wikipedia.org/wiki/Communicating_sequential_processes)
9
+ (CSP). It has some conceptual similarities to Javascript's
10
+ [NoFlo](http://noflojs.org/) system, Java's
11
+ [Storm](http://storm.incubator.apache.org/), and Clojure's
12
+ [core.async](http://clojure.github.io/core.async/) library.
13
+
14
+ In short, components communicate with each other by sending/receiving
15
+ messages via their output/input ports over connections. Ports are
16
+ "wired" together output->input with connections, and messages are
17
+ explicitly serialized before being sent over the connection. RFlow
18
+ supports generalized connection types and message serialization,
19
+ however only two are in current use, namely ZeroMQ connections and
20
+ Avro serialization.
21
+
22
+ RFlow currently runs as a single-threaded, evented system on top of
23
+ [Eventmachine](http://rubyeventmachine.com/), meaning that any code
24
+ should be coded in an asynchronous style so as to not block the
25
+ Eventmachine reactor (and thus block all the other components). There
26
+ is currently work being done to "shard" the workflow among multiple
27
+ processes and/or threads.
28
+
29
+ Some of the long-term goals of RFlow are to allow for components and
30
+ portions of the workflow to be defined in any language that supports
31
+ Avro and ZeroMQ, which a numerous.
32
+
33
+
34
+ ## Definitions
35
+
36
+ * __Component__ - the basic unit of RFlow computation. Each
37
+ component is a shared-nothing, individual computation module that
38
+ communicates with the rest of the system through explicit message
39
+ passing via input and output ports.
40
+
41
+ * __Port__ - a named entity on each component that is responsible for
42
+ receiving data (and input port) or sending data (and output port).
43
+ Ports can be "keyed" or "indexed" to allow better multiplexing of
44
+ messages out/in a single port, as well as allow a single port to be
45
+ accessed by an array.
46
+
47
+ * __Connection__ - a directed link between an output port and an input
48
+ port. RFlow supports generalized connection types, however only
49
+ ZeroMQ IPC links are currently used.
50
+
51
+ * __Message__ - a bit of serialized data that is sent out an output
52
+ port and recieved on an input port. Due to the serialization,
53
+ message types and schemas are explicitly defined. In a departure
54
+ from "pure" FBP, RFlow supports sending multiple message types via a
55
+ single connection.
56
+
57
+ * __Workflow__ - the common name for the digraph created when the
58
+ components (nodes) are wired together via connections to their
59
+ respective output/input ports.
60
+
61
+
62
+ ## Component Examples
63
+
64
+ The following describes the API of an RFlow component:
65
+
66
+ ```ruby
67
+ class SimpleComponent < RFlow::Component
68
+ input_port :in
69
+ output_port :out
70
+
71
+ def configure!(config); end
72
+ def run!; end
73
+ def process_message(input_port, input_port_key, connection, message); end
74
+ def shutdown!; end
75
+ def cleanup!; end
76
+ end
77
+ ```
78
+
79
+ * `input_port` and `output_port` define the named ports that will
80
+ receive data or send data, respectively. These class methods create
81
+ accessors for their respective port names, to be used later in the
82
+ `process_message` or `run!` methods. There can be multiple (or no)
83
+ input and output ports.
84
+
85
+ * `configure!` (called with a hash configuration) is called after the
86
+ component is instantiated but before the workflow has been wired or
87
+ any messages have been sent. Note that this is called outside the
88
+ Eventmachine reactor.
89
+
90
+ * `run!` is called after all the components have been wired together
91
+ with connections and the entire workflow has been created. For a
92
+ component that is a source of messages, this is where messages will
93
+ be sent. For example, if the component is reading from a file, this
94
+ is where the file will be opened, the contents read into a message,
95
+ and the message sent out the output port. `run!` is called within
96
+ the Eventmachine reactor.
97
+
98
+ * `process_message` is an evented callback that is called whenever the
99
+ component receives a message on one of its input ports.
100
+ `process_message` is called withing the Eventmachine reactor
101
+
102
+ * `shutdown!` is called when the flow is being terminated, and is
103
+ meant to allow the components to do penultimate processing and send
104
+ any final messages. All components in a flow will be told to
105
+ `shutdown!` before they are told to `cleanup!`.
106
+
107
+ * `cleanup!` is the final call to each component, and allow the
108
+ component to clean up any external resources that it might have
109
+ outstanding, such as file handles or network sockets.
110
+
111
+ "Source" components will often do all of their work within the `run!`
112
+ method, and often gather message data from an external source, such as
113
+ file, database, or network socket. The following component generates a
114
+ set of integers between a configured start/finish, incrementing by a
115
+ configured step:
116
+
117
+ ```ruby
118
+ class RFlow::Components::GenerateIntegerSequence < RFlow::Component
119
+ output_port :out
120
+
121
+ def configure!(config)
122
+ @start = config['start'].to_i
123
+ @finish = config['finish'].to_i
124
+ @step = config['step'] ? config['step'].to_i : 1
125
+ # If interval seconds is not given, it will default to 0
126
+ @interval_seconds = config['interval_seconds'].to_i
127
+ end
128
+
129
+ # Note that this uses the timer (sometimes with 0 interval) so as
130
+ # not to block the reactor
131
+ def run!
132
+ timer = EM::PeriodicTimer.new(@interval_seconds) do
133
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
134
+ message.data.data_object = @start
135
+ out.send_message message
136
+ @start += @step
137
+ timer.cancel if @start > @finish
138
+ end
139
+ end
140
+ end
141
+ ```
142
+
143
+ "Middle" components receive messages on input port(s), perform a bit
144
+ of computation, and then send a message out the output port(s). The
145
+ following component accepts a Ruby expression string via its config,
146
+ and then uses that as an expression to determine what port to send an
147
+ incoming message:
148
+
149
+ ```ruby
150
+ class RFlow::Components::RubyProcFilter < RFlow::Component
151
+ input_port :in
152
+ output_port :filtered
153
+ output_port :dropped
154
+ output_port :errored
155
+
156
+ def configure!(config)
157
+ @filter_proc = eval("lambda {|message| #{config['filter_proc_string']} }")
158
+ end
159
+
160
+ def process_message(input_port, input_port_key, connection, message)
161
+ begin
162
+ if @filter_proc.call(message)
163
+ filtered.send_message message
164
+ else
165
+ dropped.send_message message
166
+ end
167
+ rescue Exception => e
168
+ errored.send_message message
169
+ end
170
+ end
171
+ end
172
+ ```
173
+
174
+ "Sink" components accept messages on an input port and do not have an
175
+ output port. They often operate on external sinks, such as writing
176
+ messages to a file, database, or network socket. The following
177
+ component writes the inspected message to a file (defined via the
178
+ configuration):
179
+
180
+ ```ruby
181
+ class RFlow::Components::FileOutput < RFlow::Component
182
+ input_port :in
183
+
184
+ attr_accessor :output_file_path, :output_file
185
+
186
+ def configure!(config)
187
+ self.output_file_path = config['output_file_path']
188
+ self.output_file = File.new output_file_path, 'w+'
189
+ end
190
+
191
+ def process_message(input_port, input_port_key, connection, message)
192
+ output_file.puts message.data.data_object.inspect
193
+ output_file.flush
194
+ end
195
+
196
+ def cleanup
197
+ output_file.close
198
+ end
199
+ end
200
+ ```
201
+
202
+ ## RFlow Messages
203
+
204
+ RFlow messages are instances of
205
+ [`RFlow::Message`](lib/rflow/message.rb), which are ultimately
206
+ serialized via an Avro [schema](schema/message.zvsc).
207
+
208
+ There are two parts of the message "envelope": a provenance and the
209
+ embedded data object "payload".
210
+
211
+ The `provenance` is a way for a component to annotate a message with a
212
+ bit of data that should (by convention) be carried through the
213
+ workflow with the message, as well as being copied to derived
214
+ messages. For example, a TCP server component would spin up a TCP
215
+ server and, upon recieving a connection and packets on a session, it
216
+ would marshal the packets into `RFlow::Messsage`s and send them out
217
+ its output ports. Messages received on its input port, however, need
218
+ to have a way to be matched to the corresponding underlying TCP
219
+ connection. `provenance` provides a method for the TCP server
220
+ component to add a bit of metadata (namely an identifier for the TCP
221
+ connection) such that later messages that contain the same provenance
222
+ can be matched to the correct underlying TCP connection.
223
+
224
+
225
+ The other parts of the message envelope are related to the embedded
226
+ data object. In addition to the data object itself (which is encoded
227
+ with a specific Avro schema), there are a few fields that describe the
228
+ embedded data, namely the `data_type_name`, the
229
+ `data_serialization_type`, and the `data_schema`. By including all
230
+ this metadata in each message, the system is completely dynamic and
231
+ allow for multiple message types to be included on a single
232
+ connection, as well as enabling non-RFlow components to be created in
233
+ any language. This does come at the expense of larger messages which
234
+ results in greater message overhead.
235
+
236
+ For example, if we have a simple integer data type that we would like
237
+ to serialize via Avro, we can register the schema with the following
238
+ `add_available_data_type` code shown below:
239
+
240
+ ```ruby
241
+ long_integer_schema = '{"type": "long"}'
242
+ RFlow::Configuration.add_available_data_type('RFlow::Message::Data::Integer', 'avro', long_integer_schema)
243
+ ```
244
+
245
+ This will make the schema and message type available to RFlow, such
246
+ that it will be able to create a new message with:
247
+
248
+ ```ruby
249
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
250
+ ```
251
+
252
+ and will automatically reconstitute a message from the connection and
253
+ call a component's `process_message`.
254
+
255
+ The deserialized Avro Ruby object is available as the `data_object`
256
+ accessor on the `data` class, i.e.:
257
+
258
+ ```ruby
259
+ message.data.data_object = 1024
260
+ ```
261
+
262
+ The `data_object` is the deserialized Avro Ruby object and, as such,
263
+ allows the Avro object to be accessed as a Ruby object. In order to
264
+ provide a more convenient interface to the underlying Avro object,
265
+ RFlow allows modules to be dynamically mixed in to the `data` class
266
+ object.
267
+
268
+ For example, the module below provides a bit of extra functionality to
269
+ the above-mentioned `RFlow::Message::Data::Integer` message type,
270
+ namely to default the integer to 0 upon being mixed in, provide a
271
+ better named accessor, and add a `default?` method to the `data` object:
272
+
273
+ ```ruby
274
+ module SimpleDataExtension
275
+ def self.extended(base_data)
276
+ base_data.data_object = 0
277
+ end
278
+
279
+ def int; data_object; end
280
+ def int=(new_int); data_object = new_int; end
281
+
282
+ def default?;
283
+ data_object == 0
284
+ end
285
+ end
286
+ ```
287
+
288
+ Once a module is defined, it needs to be registered to the appropriate
289
+ message data type. Note that multiple modules can be registered for a
290
+ given message data type.
291
+
292
+ ```ruby
293
+ RFlow::Configuration.add_available_data_extension('RFlow::Message::Data::Integer', SimpleDataExtension)
294
+ ```
295
+
296
+ The result of this is that the following code will work:
297
+
298
+ ```ruby
299
+ message = RFlow::Message.new('RFlow::Message::Data::Integer')
300
+ message.data.int == 0 # => true
301
+ message.data.default? # => true
302
+ message.data.int = 1024
303
+ messaga.data.default? # => false
304
+ ```
305
+
306
+
307
+ ## RFlow Workflow Configuration
308
+
309
+ RFlow currently stores its configuration in a SQLite database which
310
+ are internally accessed via ActiveRecord. Given that SQLite is a
311
+ rather simple and standard interface, non-RFlow components could
312
+ access it and determine their respsective ZMQ connections.
313
+
314
+ DB schemas for the configuration database are in
315
+ [lib/rflow/configuration/migrations](lib/rflow/configuration/migrations)
316
+ and define the complete workflow configuration. Note that each of the
317
+ tables uses a UUID primary key, and UUIDs are used within RFlow to
318
+ identify specific components.
319
+
320
+ * settings - general application settings, such as log levels, app
321
+ names, directories, etc
322
+
323
+ * components - a list of the components including its name,
324
+ specification (Ruby class), and options. Note that the options are
325
+ serialized to the database as YAML, and components should understand
326
+ that the round-trip through the database might not be perfect (e.g.
327
+ Ruby symbols might become strings). A component also has a number of
328
+ input ports and output ports.
329
+
330
+ * ports - belonging to a component (via `component_uuid` foreign key),
331
+ also has a `type` colum for ActiveRecord STI, which gets set to
332
+ either a `RFlow::Configuration::InputPort` or
333
+ `RFlow::Configuration::OutputPort`.
334
+
335
+ * connections - a connection between two ports via foriegn keys
336
+ `input_port_uuid` and `output_port_uuid`. Like ports, connections
337
+ are typed via AR STI (`RFlow::Configuration::ZMQConnection` or
338
+ `RFlow::Configuration::AMQPConnection`) and have a YAML serialized
339
+ `options` hash. A connection also (potentially) defines the port
340
+ keys.
341
+
342
+ RFlow also provides a RubyDSL for configuration-like file to be used
343
+ to load the database:
344
+
345
+ ```ruby
346
+ RFlow::Configuration::RubyDSL.configure do |config|
347
+ # Configure the settings, which include paths for various files, log
348
+ # levels, and component specific stuffs
349
+ config.setting('rflow.log_level', 'DEBUG')
350
+ config.setting('rflow.application_directory_path', '../tmp')
351
+
352
+ config.setting('rflow.application_name', 'testapp')
353
+
354
+ # Instantiate components
355
+ config.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', {
356
+ 'start' => 0,
357
+ 'finish' => 10,
358
+ 'step' => 3,
359
+ 'interval_seconds' => 1
360
+ }
361
+ config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', {
362
+ 'start' => 20,
363
+ 'finish' => 30
364
+ }
365
+ config.component 'filter', 'RFlow::Components::RubyProcFilter', {
366
+ 'filter_proc_string' => 'lambda {|message| true}'
367
+ }
368
+ config.component 'output1', 'RFlow::Components::FileOutput', {
369
+ 'output_file_path' => '/tmp/out1'
370
+ }
371
+ config.component 'output2', 'RFlow::Components::FileOutput', {
372
+ 'output_file_path' => '/tmp/out2'
373
+ }
374
+
375
+ # Wire components together
376
+ config.connect 'generate_ints1#out' => 'filter#in'
377
+ config.connect 'generate_ints2#out' => 'filter#in'
378
+ config.connect 'filter#filtered' => 'replicate#in'
379
+ config.connect 'filter#out' => 'output1#in'
380
+ config.connect 'filter#filtered' => 'output2#in'
381
+ end
382
+ ```
383
+
384
+ ## Command-Line Operation
385
+
386
+ RFlow includes the `rflow` binary that can load a database from a Ruby
387
+ DSL, as well as start/stop the wokflow application as a daemon.
388
+ Invoking the `rflow` binary without any options will give a brief help:
389
+
390
+ ```
391
+ Usage: rflow [options] (start|stop|status|load)
392
+ -d, --database DB Config database (sqlite) path (GENERALLY REQUIRED)
393
+ -c, --config CONFIG Config file path (only valid for load)
394
+ -e, --extensions FILE1[,FILE_N] Extension file paths (will load)
395
+ -g, --gems GEM1[,GEM_N] Extension gems (will require)
396
+ -l, --log LOGFILE Initial startup log file (in addition to stdout)
397
+ -v, --verbose [LEVEL] Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO
398
+ -f Run in the foreground
399
+ --version Show RFlow version and exit
400
+ -h, --help Show this message and exit
401
+ ```
402
+
403
+ In general, the process for getting started is to first create a
404
+ configuration database via `rflow load`:
405
+
406
+ ```
407
+ rflow load -d my_config.sqlite -c my_ruby_dsl.rb
408
+ ```
409
+
410
+ which will create the `my_config.sqlite` configuration database loaded
411
+ with the `my_ruby_dsl.rb` configuration DSL.
412
+
413
+ Once a config database exists, you can start up the application that
414
+ it describes with `rflow start`. The `--extensions` argument allows
415
+ loading of arbitrary Ruby code (via Ruby's `load`), which is usually
416
+ where the component implementations are stored, as well as data type
417
+ registrations.
418
+
419
+ ```
420
+ rflow start -d my_config.sqlite -e my_component.rb,my_other_component.rb,my_data_type.rb
421
+ ```
422
+
423
+ By default, RFlow will daemonize, write its pid file to
424
+ `./run/app.pid` and write its log file to `./log/app.log`. The `-f`
425
+ flag will keep RFlow in the foreground.
426
+
427
+ RFlow also supports two signals that allow for useful management of a
428
+ running RFlow daemon's log. Sending a `SIGUSR1` to the running RFlow
429
+ process will cause RFlow to close and reopen its log file, which
430
+ allows for easy log management without restarting RFlow. In addition,
431
+ sending a `SIGUSR2` will toggle RFlow's log-level to `DEBUG`, and a
432
+ subsequent `SIGUSR2` will toggle the log-level back to what was
433
+ originally set. This allows for easy debugging of a running RFlow
434
+ process.
435
+
436
+ Copyright 2014 RedJack LLC
437
+
438
+ Licensed under the Apache License, Version 2.0 (the "License");
439
+ you may not use this file except in compliance with the License.
440
+ You may obtain a copy of the License at
441
+
442
+ http://www.apache.org/licenses/LICENSE-2.0
443
+
444
+ Unless required by applicable law or agreed to in writing, software
445
+ distributed under the License is distributed on an "AS IS" BASIS,
446
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
447
+ See the License for the specific language governing permissions and
448
+ limitations under the License.