rflow 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rvmrc +1 -0
- data/Gemfile +5 -0
- data/NOTES +187 -0
- data/README +0 -0
- data/Rakefile +16 -0
- data/bin/rflow +215 -0
- data/example/basic_config.rb +49 -0
- data/example/basic_extensions.rb +142 -0
- data/example/http_config.rb +21 -0
- data/example/http_extensions.rb +262 -0
- data/lib/rflow.rb +440 -0
- data/lib/rflow/component.rb +192 -0
- data/lib/rflow/component/port.rb +150 -0
- data/lib/rflow/components.rb +10 -0
- data/lib/rflow/components/raw.rb +26 -0
- data/lib/rflow/components/raw/extensions.rb +18 -0
- data/lib/rflow/configuration.rb +290 -0
- data/lib/rflow/configuration/component.rb +27 -0
- data/lib/rflow/configuration/connection.rb +98 -0
- data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
- data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
- data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
- data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
- data/lib/rflow/configuration/port.rb +30 -0
- data/lib/rflow/configuration/ruby_dsl.rb +183 -0
- data/lib/rflow/configuration/setting.rb +67 -0
- data/lib/rflow/configuration/uuid_keyed.rb +18 -0
- data/lib/rflow/connection.rb +59 -0
- data/lib/rflow/connections.rb +2 -0
- data/lib/rflow/connections/zmq_connection.rb +101 -0
- data/lib/rflow/message.rb +191 -0
- data/lib/rflow/port.rb +4 -0
- data/lib/rflow/util.rb +19 -0
- data/lib/rflow/version.rb +3 -0
- data/rflow.gemspec +42 -0
- data/schema/message.avsc +36 -0
- data/schema/raw.avsc +9 -0
- data/spec/fixtures/config_ints.rb +61 -0
- data/spec/fixtures/extensions_ints.rb +141 -0
- data/spec/rflow_configuration_spec.rb +73 -0
- data/spec/rflow_message_data_raw.rb +26 -0
- data/spec/rflow_message_data_spec.rb +60 -0
- data/spec/rflow_message_spec.rb +182 -0
- data/spec/rflow_spec.rb +100 -0
- data/spec/schema_spec.rb +28 -0
- data/spec/spec_helper.rb +37 -0
- data/temp.rb +295 -0
- metadata +270 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c322bc6cf9c3b4ccd46b13cf56d3c2460dc5f0be
|
4
|
+
data.tar.gz: af2d6fb3e7051a074c56fa10b70b5a02b23bb0d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5a3cd46af3c815d2cb5840d48a0e38f7e28dbe911276fac75d3466bef9d05d7d49e38baa010b5ce419a67aea5829d2f227d2ff74aa5e689f6c1e6109d882ad81
|
7
|
+
data.tar.gz: 539c61aca94e84e1ccb00acba1ef87c7a6556dc65180f9f905561b09e147eab83ff0a409dd01b84a1113ba820cf89c6470ee64c1ace68736232423ba3a1d9668
|
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm 1.9.2@rflow-devel
|
data/Gemfile
ADDED
data/NOTES
ADDED
@@ -0,0 +1,187 @@
|
|
1
|
+
RFlow Manager
|
2
|
+
|
3
|
+
Components
|
4
|
+
Input Ports
|
5
|
+
Output Ports
|
6
|
+
|
7
|
+
Connections
|
8
|
+
Input Ports
|
9
|
+
Output Ports
|
10
|
+
|
11
|
+
rflow <config file>
|
12
|
+
figure out a work directory
|
13
|
+
make sure that it has the right subdirectories (can be overridden)
|
14
|
+
run tmp logs schemas components
|
15
|
+
Set up logging to logs/rflow.log
|
16
|
+
Load all schemas
|
17
|
+
Verify all component installation
|
18
|
+
Initialize components
|
19
|
+
Start components running and make sure that they "daemonize" correctly
|
20
|
+
- place pid files in deployment's run directory
|
21
|
+
Configure components via zmq
|
22
|
+
Daemonize self
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
class Component
|
27
|
+
def self.input_port
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.output_port
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_accessor :state
|
34
|
+
|
35
|
+
def initialize(config, run_directory)
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def configure
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
class PassThrough < Component
|
50
|
+
input_port [:in]
|
51
|
+
input_port :another_in
|
52
|
+
output_port :out
|
53
|
+
output_port :another_out
|
54
|
+
|
55
|
+
def initialize(config, run_directory)
|
56
|
+
# This will initialize the ports
|
57
|
+
super
|
58
|
+
# Do stuff to initialize component
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
Computation Requirements:
|
65
|
+
Initial startup with:
|
66
|
+
- management bus connection information
|
67
|
+
- group and instance UUID
|
68
|
+
- beacon interval
|
69
|
+
- run directory, containing
|
70
|
+
- PID files
|
71
|
+
- log dir + logs
|
72
|
+
- computation-specific configuration (conf dir)
|
73
|
+
Needs to process the following messages from mgmt bus:
|
74
|
+
- CONFIGURE (ports)
|
75
|
+
- RUN
|
76
|
+
- SHUTDOWN
|
77
|
+
Needs to send the following messages to mgmt bus:
|
78
|
+
- LOG
|
79
|
+
- BEACON (state machine of the below submessages)
|
80
|
+
- STARTED
|
81
|
+
- CONFIGURED
|
82
|
+
- RUNNING
|
83
|
+
- STOPPING
|
84
|
+
- STOPPED
|
85
|
+
- ERROR
|
86
|
+
On startup:
|
87
|
+
- listen to mgmt bus
|
88
|
+
- publish BEACON + state to mgmt bus every (beacon interval) seconds (default to 1 sec)
|
89
|
+
|
90
|
+
|
91
|
+
External Computations:
|
92
|
+
- Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
|
93
|
+
-
|
94
|
+
|
95
|
+
|
96
|
+
RFlow
|
97
|
+
- Will need a DB for config
|
98
|
+
- Initial startup will need to resolve all remaining outstanding items (ports, UUIDs, etc) and store in config DB
|
99
|
+
- MVC, Mongrel2-like?
|
100
|
+
|
101
|
+
Translate
|
102
|
+
- Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
----------------
|
110
|
+
Plugins:
|
111
|
+
an externally defined plugin needs access to all current data types, as well as being able to define its own and tell the system about that.
|
112
|
+
- necessary to tell system?
|
113
|
+
- need a protocol for defining schema transfer
|
114
|
+
- each message has attached schema
|
115
|
+
|
116
|
+
|
117
|
+
lib/rflow/message.rb
|
118
|
+
|
119
|
+
RFlow::Config
|
120
|
+
|
121
|
+
RFlow::Management
|
122
|
+
- Somewhere for external people to register new computations with running system
|
123
|
+
- computation says that its running and asks for Connection configuration
|
124
|
+
- how will it specify where in the workflow it wants to run????
|
125
|
+
|
126
|
+
RFlow::Message(complete on-the-wire Avro message format)
|
127
|
+
data_type, provenance, external_ids, empty, data (see below)
|
128
|
+
|
129
|
+
RFlow::Data::(various message data blocks)
|
130
|
+
|
131
|
+
RFlow::Computation
|
132
|
+
uuid, name, class, input_ports, output_ports
|
133
|
+
|
134
|
+
|
135
|
+
RFlow::Connection
|
136
|
+
encapsulates link knowlege and provides an API for sending and receiving
|
137
|
+
each computation will have one for each port
|
138
|
+
each computation will call into the connection to send (possibly via a Port object) and recieve
|
139
|
+
|
140
|
+
RFlow::Connection::AMQP
|
141
|
+
will manage connections to an AMQP server
|
142
|
+
|
143
|
+
RFlow::Connection::ZMQ
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
|
149
|
+
|
150
|
+
AMQP::Topic - responsible for setting up a topic -> queue binding
|
151
|
+
r.incoming = amqp connection, channel, vhost, login, password, topic
|
152
|
+
r.outgoing = amqp connection, channel, vhost, login, password, queue name
|
153
|
+
behavior -> n x m, "round-robin" among the connected outgoing
|
154
|
+
incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
|
155
|
+
|
156
|
+
|
157
|
+
ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
158
|
+
r.incoming = zmq connection string (tcp://ip:port), type pub
|
159
|
+
r.outgoing = zmq connection string (tcp://ip:port), type sub
|
160
|
+
behavior -> n x m, broadcast sending,
|
161
|
+
|
162
|
+
ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
163
|
+
r.incoming = zmq connection string (tcp://ip:port), type push
|
164
|
+
r.outgoing = zmq connection string (tcp://ip:port), type pull
|
165
|
+
|
166
|
+
|
167
|
+
Startup
|
168
|
+
|
169
|
+
RFlow.run is the management process for the workflow
|
170
|
+
|
171
|
+
computations = config.computations.map do |c|
|
172
|
+
instantiate_computation(c)
|
173
|
+
# Check for errors here, which would be evident if a computation couldn't be found/created
|
174
|
+
# Just creating single process ruby objects here to check for errors
|
175
|
+
end
|
176
|
+
|
177
|
+
computations.each do |c|
|
178
|
+
c.configure # with what????
|
179
|
+
# Still single ruby process to set and deconflict all the configuration parameters
|
180
|
+
end
|
181
|
+
|
182
|
+
computations.each do |c|
|
183
|
+
c.run
|
184
|
+
end
|
185
|
+
|
186
|
+
listen_for_management_events_from_old_computations
|
187
|
+
listen_for_new_computation_registration
|
data/README
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'rdoc/task'
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
7
|
+
t.verbose = true
|
8
|
+
t.rspec_opts = '--tty --color'
|
9
|
+
end
|
10
|
+
|
11
|
+
RDoc::Task.new do |rd|
|
12
|
+
rd.main = "README"
|
13
|
+
rd.rdoc_files.include("README", "lib/**/*.rb")
|
14
|
+
rd.rdoc_dir = File.join('doc', 'html')
|
15
|
+
end
|
16
|
+
|
data/bin/rflow
ADDED
@@ -0,0 +1,215 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Will require rflow after option parsing to speed up a couple of
|
4
|
+
# startup cases (version and help) that don't need it
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
options = {
|
8
|
+
:daemonize => true,
|
9
|
+
:startup_log_level => :INFO,
|
10
|
+
:extensions_file_paths => [],
|
11
|
+
:gems => []
|
12
|
+
}
|
13
|
+
|
14
|
+
option_parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = <<EOB
|
16
|
+
Usage: #{File.basename $0} [options] (start|stop|status|load)
|
17
|
+
EOB
|
18
|
+
|
19
|
+
opts.on("-d", "--database DB", "Config database (sqlite) path (GENERALLY REQUIRED)") do |db|
|
20
|
+
options[:config_database_path] = File.expand_path(db)
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-c", "--config CONFIG", "Config file path (only valid for load)") do |config|
|
24
|
+
options[:config_file_path] = File.expand_path(config)
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-e", "--extensions FILE1[,FILE_N]", Array, "Extension file paths (will load)") do |extensions|
|
28
|
+
options[:extensions_file_paths] += extensions.map {|extension| File.expand_path(extension)}
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.on("-g", "--gems GEM1[,GEM_N]", Array, "Extension gems (will require)") do |gems|
|
32
|
+
options[:gems] += gems
|
33
|
+
end
|
34
|
+
|
35
|
+
opts.on("-l", "--log LOGFILE", "Initial startup log file (in addition to stdout)") do |log|
|
36
|
+
options[:startup_log_file_path] = File.expand_path(log)
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.on("-v", "--verbose [LEVEL]", [:DEBUG, :INFO, :WARN], "Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO") do |level|
|
40
|
+
options[:startup_log_level] = level || :DEBUG
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("-f", "Run in the foreground") do |f|
|
44
|
+
options[:daemonize] = false
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on_tail("--version", "Show RFlow version and exit") do
|
48
|
+
require 'rflow/version'
|
49
|
+
puts RFlow::VERSION
|
50
|
+
exit 0
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on_tail("-h", "--help", "Show this message and exit") do
|
54
|
+
puts opts
|
55
|
+
exit 0
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
begin
|
61
|
+
option_parser.parse!
|
62
|
+
rescue Exception => e
|
63
|
+
STDERR.puts "Error processing arguments: #{e.class}: #{e.message}"
|
64
|
+
exit 1
|
65
|
+
end
|
66
|
+
|
67
|
+
# Now require rflow because the following parts of the startup require
|
68
|
+
# pieces (usually RFlow::Configuration or RFlow.logger)
|
69
|
+
require 'rflow'
|
70
|
+
|
71
|
+
# Set up the startup logging, which is distinct from the runtime
|
72
|
+
# logging that is defined in the config database. The startup logging
|
73
|
+
# will always go to STDOUT, as well as to the file specified with the
|
74
|
+
# '-l' parameter
|
75
|
+
startup_logger = Log4r::Logger.new 'startup'
|
76
|
+
startup_logger.add Log4r::StdoutOutputter.new('startup_stdout', :formatter => RFlow::LOG_PATTERN_FORMATTER)
|
77
|
+
startup_logger.level = Log4r::LNAMES.index options[:startup_log_level].to_s
|
78
|
+
|
79
|
+
if options[:startup_log_file_path]
|
80
|
+
begin
|
81
|
+
startup_logger.add Log4r::FileOutputter.new('startup_file', :filename => options[:startup_log_file_path], :formatter => RFlow::LOG_PATTERN_FORMATTER)
|
82
|
+
rescue Exception => e
|
83
|
+
startup_logger.fatal "Log file '#{options[:startup_log_file_path]}' problem: #{e.message}"
|
84
|
+
exit 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
command = ARGV[0]
|
89
|
+
unless ['start', 'stop', 'status', 'load'].include? command
|
90
|
+
startup_logger.fatal "Command needs to be one of [start|stop|status|load]\n#{option_parser.help}"
|
91
|
+
exit 1
|
92
|
+
end
|
93
|
+
|
94
|
+
if options[:config_file_path] && command != 'load'
|
95
|
+
startup_logger.fatal "Config file only valid for 'load' command"
|
96
|
+
exit 1
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
unless options[:config_database_path]
|
101
|
+
startup_logger.warn "Config database not specified, using default 'config.sqlite'"
|
102
|
+
options[:config_database_path] = File.expand_path(File.join(Dir.getwd, 'config.sqlite'))
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Set the standard logger to the startup one in the case that we need
|
107
|
+
# to call into RFlow to check on or setup things, like the config
|
108
|
+
# database. We want those log messages to go to the startup log when
|
109
|
+
# setting up. The running log will transition to what is specified in
|
110
|
+
# the config database
|
111
|
+
RFlow.logger = startup_logger
|
112
|
+
|
113
|
+
|
114
|
+
case command
|
115
|
+
when 'load'
|
116
|
+
# Load the database with the config file, if it exists. Will
|
117
|
+
# otherwise default values (not very useful)
|
118
|
+
if options[:config_file_path]
|
119
|
+
unless File.exist? options[:config_file_path]
|
120
|
+
startup_logger.fatal "Config file '#{options[:config_file_path]}' not found\n#{option_parser.help}"
|
121
|
+
exit 1
|
122
|
+
end
|
123
|
+
|
124
|
+
unless File.readable? options[:config_file_path]
|
125
|
+
startup_logger.fatal "Config file '#{options[:config_file_path]}' not readable\n#{option_parser.help}"
|
126
|
+
exit 1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
if File.exist? options[:config_database_path]
|
131
|
+
startup_logger.fatal "Config database '#{options[:config_database_path]}' exists, exiting to prevent accidental overwrite from config file '#{options[:config_file_path]}'"
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
|
135
|
+
startup_logger.warn "Config database '#{options[:config_database_path]}' not found, creating"
|
136
|
+
begin
|
137
|
+
RFlow::Configuration::initialize_database(options[:config_database_path], options[:config_file_path])
|
138
|
+
rescue Exception => e
|
139
|
+
startup_logger.fatal "Error initializing configuration database: #{e.message}: #{e.backtrace.join "\n"}"
|
140
|
+
exit 1
|
141
|
+
end
|
142
|
+
|
143
|
+
startup_logger.warn "Successfully initialized database '#{options[:config_database_path]}' with '#{options[:config_file_path]}'"
|
144
|
+
exit 0
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
# Load the database config and start setting up environment
|
149
|
+
begin
|
150
|
+
config = RFlow::Configuration.new(options[:config_database_path])
|
151
|
+
rescue Exception => e
|
152
|
+
startup_logger.fatal "Error loading config database: #{e.class} - #{e.message}"
|
153
|
+
exit 1
|
154
|
+
end
|
155
|
+
|
156
|
+
Dir.chdir(File.dirname(options[:config_database_path]))
|
157
|
+
Dir.chdir(config['rflow.application_directory_path'])
|
158
|
+
pid = RFlow.running_pid_file_path?(config['rflow.pid_file_path'])
|
159
|
+
|
160
|
+
case command
|
161
|
+
when 'stop'
|
162
|
+
if pid
|
163
|
+
startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}, terminating"
|
164
|
+
# TODO: check if it actually shut down
|
165
|
+
Process.kill 'INT', pid
|
166
|
+
else
|
167
|
+
startup_logger.warn "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
168
|
+
exit 1
|
169
|
+
end
|
170
|
+
exit 0
|
171
|
+
|
172
|
+
when 'status'
|
173
|
+
unless pid
|
174
|
+
startup_logger.error "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
175
|
+
exit 1
|
176
|
+
end
|
177
|
+
startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
178
|
+
exit 0
|
179
|
+
|
180
|
+
when 'start'
|
181
|
+
if pid
|
182
|
+
startup_logger.error "#{config['rflow.application_name']} already running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
183
|
+
exit 1
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# We should have eliminated all commands but 'start' at this point
|
189
|
+
|
190
|
+
# require all the gem extensions
|
191
|
+
options[:gems].each do |extension_gem|
|
192
|
+
startup_logger.info "Requiring #{extension_gem}"
|
193
|
+
require extension_gem
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
# load all the file extensions
|
198
|
+
options[:extensions_file_paths].each do |extensions_file_path|
|
199
|
+
startup_logger.info "Loading #{extensions_file_path}"
|
200
|
+
unless File.readable? extensions_file_path
|
201
|
+
startup_logger.fatal "Extensions file ('#{Dir.getwd}') '#{extensions_file_path}' not reabable\n#{option_parser.help}"
|
202
|
+
exit 1
|
203
|
+
end
|
204
|
+
load extensions_file_path
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
# Start the flow
|
209
|
+
begin
|
210
|
+
RFlow.run options[:config_database_path], options[:daemonize]
|
211
|
+
rescue Exception => e
|
212
|
+
startup_logger.fatal "Error running rflow: #{e.class}: #{e.message}"
|
213
|
+
end
|
214
|
+
|
215
|
+
__END__
|