rflow 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +3 -0
- data/Gemfile +2 -1
- data/Guardfile +1 -1
- data/README.md +9 -9
- data/Vagrantfile +9 -9
- data/bin/rflow +9 -9
- data/example/http_extensions.rb +7 -7
- data/lib/rflow.rb +24 -3
- data/lib/rflow/child_process.rb +2 -0
- data/lib/rflow/component.rb +2 -2
- data/lib/rflow/component/port.rb +51 -26
- data/lib/rflow/configuration.rb +4 -4
- data/lib/rflow/configuration/ruby_dsl.rb +4 -4
- data/lib/rflow/connection.rb +4 -4
- data/lib/rflow/connections/zmq_connection.rb +5 -7
- data/lib/rflow/daemon_process.rb +12 -2
- data/lib/rflow/logger.rb +1 -1
- data/lib/rflow/master.rb +2 -20
- data/lib/rflow/message.rb +28 -18
- data/lib/rflow/pid_file.rb +17 -5
- data/lib/rflow/shard.rb +6 -6
- data/lib/rflow/version.rb +1 -1
- data/rflow.gemspec +20 -20
- data/schema/message.avsc +6 -1
- data/spec/rflow/component/port_spec.rb +17 -17
- data/spec/rflow/components/clock_spec.rb +1 -1
- data/spec/rflow/configuration/ruby_dsl_spec.rb +47 -47
- data/spec/rflow/configuration_spec.rb +8 -8
- data/spec/rflow/forward_to_input_port_spec.rb +38 -9
- data/spec/rflow/forward_to_output_port_spec.rb +5 -4
- data/spec/rflow/logger_spec.rb +5 -5
- data/spec/rflow/message/data/raw_spec.rb +2 -2
- data/spec/rflow/message/data_spec.rb +8 -8
- data/spec/rflow/message_spec.rb +63 -29
- data/spec/rflow_spec.rb +18 -18
- metadata +14 -13
- data/NOTES +0 -187
data/NOTES
DELETED
@@ -1,187 +0,0 @@
|
|
1
|
-
RFlow Manager
|
2
|
-
|
3
|
-
Components
|
4
|
-
Input Ports
|
5
|
-
Output Ports
|
6
|
-
|
7
|
-
Connections
|
8
|
-
Input Ports
|
9
|
-
Output Ports
|
10
|
-
|
11
|
-
rflow <config file>
|
12
|
-
figure out a work directory
|
13
|
-
make sure that it has the right subdirectories (can be overridden)
|
14
|
-
run tmp logs schemas components
|
15
|
-
Set up logging to logs/rflow.log
|
16
|
-
Load all schemas
|
17
|
-
Verify all component installation
|
18
|
-
Initialize components
|
19
|
-
Start components running and make sure that they "daemonize" correctly
|
20
|
-
- place pid files in deployment's run directory
|
21
|
-
Configure components via zmq
|
22
|
-
Daemonize self
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
class Component
|
27
|
-
def self.input_port
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.output_port
|
31
|
-
end
|
32
|
-
|
33
|
-
attr_accessor :state
|
34
|
-
|
35
|
-
def initialize(config, run_directory)
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
def run
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
def configure
|
44
|
-
|
45
|
-
end
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
class PassThrough < Component
|
50
|
-
input_port [:in]
|
51
|
-
input_port :another_in
|
52
|
-
output_port :out
|
53
|
-
output_port :another_out
|
54
|
-
|
55
|
-
def initialize(config, run_directory)
|
56
|
-
# This will initialize the ports
|
57
|
-
super
|
58
|
-
# Do stuff to initialize component
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
|
64
|
-
Computation Requirements:
|
65
|
-
Initial startup with:
|
66
|
-
- management bus connection information
|
67
|
-
- group and instance UUID
|
68
|
-
- beacon interval
|
69
|
-
- run directory, containing
|
70
|
-
- PID files
|
71
|
-
- log dir + logs
|
72
|
-
- computation-specific configuration (conf dir)
|
73
|
-
Needs to process the following messages from mgmt bus:
|
74
|
-
- CONFIGURE (ports)
|
75
|
-
- RUN
|
76
|
-
- SHUTDOWN
|
77
|
-
Needs to send the following messages to mgmt bus:
|
78
|
-
- LOG
|
79
|
-
- BEACON (state machine of the below submessages)
|
80
|
-
- STARTED
|
81
|
-
- CONFIGURED
|
82
|
-
- RUNNING
|
83
|
-
- STOPPING
|
84
|
-
- STOPPED
|
85
|
-
- ERROR
|
86
|
-
On startup:
|
87
|
-
- listen to mgmt bus
|
88
|
-
- publish BEACON + state to mgmt bus every (beacon interval) seconds (default to 1 sec)
|
89
|
-
|
90
|
-
|
91
|
-
External Computations:
|
92
|
-
- Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
|
93
|
-
-
|
94
|
-
|
95
|
-
|
96
|
-
RFlow
|
97
|
-
- Will need a DB for config
|
98
|
-
- Initial startup will need to resolve all remaining outstanding items (ports, UUIDs, etc) and store in config DB
|
99
|
-
- MVC, Mongrel2-like?
|
100
|
-
|
101
|
-
Translate
|
102
|
-
- Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
----------------
|
110
|
-
Plugins:
|
111
|
-
an externally defined plugin needs access to all current data types, as well as being able to define its own and tell the system about that.
|
112
|
-
- necessary to tell system?
|
113
|
-
- need a protocol for defining schema transfer
|
114
|
-
- each message has attached schema
|
115
|
-
|
116
|
-
|
117
|
-
lib/rflow/message.rb
|
118
|
-
|
119
|
-
RFlow::Config
|
120
|
-
|
121
|
-
RFlow::Management
|
122
|
-
- Somewhere for external people to register new computations with running system
|
123
|
-
- computation says that its running and asks for Connection configuration
|
124
|
-
- how will it specify where in the workflow it wants to run????
|
125
|
-
|
126
|
-
RFlow::Message(complete on-the-wire Avro message format)
|
127
|
-
data_type, provenance, external_ids, empty, data (see below)
|
128
|
-
|
129
|
-
RFlow::Data::(various message data blocks)
|
130
|
-
|
131
|
-
RFlow::Computation
|
132
|
-
uuid, name, class, input_ports, output_ports
|
133
|
-
|
134
|
-
|
135
|
-
RFlow::Connection
|
136
|
-
encapsulates link knowlege and provides an API for sending and receiving
|
137
|
-
each computation will have one for each port
|
138
|
-
each computation will call into the connection to send (possibly via a Port object) and recieve
|
139
|
-
|
140
|
-
RFlow::Connection::AMQP
|
141
|
-
will manage connections to an AMQP server
|
142
|
-
|
143
|
-
RFlow::Connection::ZMQ
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
|
149
|
-
|
150
|
-
AMQP::Topic - responsible for setting up a topic -> queue binding
|
151
|
-
r.incoming = amqp connection, channel, vhost, login, password, topic
|
152
|
-
r.outgoing = amqp connection, channel, vhost, login, password, queue name
|
153
|
-
behavior -> n x m, "round-robin" among the connected outgoing
|
154
|
-
incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
|
155
|
-
|
156
|
-
|
157
|
-
ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
158
|
-
r.incoming = zmq connection string (tcp://ip:port), type pub
|
159
|
-
r.outgoing = zmq connection string (tcp://ip:port), type sub
|
160
|
-
behavior -> n x m, broadcast sending,
|
161
|
-
|
162
|
-
ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
163
|
-
r.incoming = zmq connection string (tcp://ip:port), type push
|
164
|
-
r.outgoing = zmq connection string (tcp://ip:port), type pull
|
165
|
-
|
166
|
-
|
167
|
-
Startup
|
168
|
-
|
169
|
-
RFlow.run is the management process for the workflow
|
170
|
-
|
171
|
-
computations = config.computations.map do |c|
|
172
|
-
instantiate_computation(c)
|
173
|
-
# Check for errors here, which would be evident if a computation couldn't be found/created
|
174
|
-
# Just creating single process ruby objects here to check for errors
|
175
|
-
end
|
176
|
-
|
177
|
-
computations.each do |c|
|
178
|
-
c.configure # with what????
|
179
|
-
# Still single ruby process to set and deconflict all the configuration parameters
|
180
|
-
end
|
181
|
-
|
182
|
-
computations.each do |c|
|
183
|
-
c.run
|
184
|
-
end
|
185
|
-
|
186
|
-
listen_for_management_events_from_old_computations
|
187
|
-
listen_for_new_computation_registration
|