rflow 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rvmrc +1 -0
- data/Gemfile +5 -0
- data/NOTES +187 -0
- data/README +0 -0
- data/Rakefile +16 -0
- data/bin/rflow +215 -0
- data/example/basic_config.rb +49 -0
- data/example/basic_extensions.rb +142 -0
- data/example/http_config.rb +21 -0
- data/example/http_extensions.rb +262 -0
- data/lib/rflow.rb +440 -0
- data/lib/rflow/component.rb +192 -0
- data/lib/rflow/component/port.rb +150 -0
- data/lib/rflow/components.rb +10 -0
- data/lib/rflow/components/raw.rb +26 -0
- data/lib/rflow/components/raw/extensions.rb +18 -0
- data/lib/rflow/configuration.rb +290 -0
- data/lib/rflow/configuration/component.rb +27 -0
- data/lib/rflow/configuration/connection.rb +98 -0
- data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
- data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
- data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
- data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
- data/lib/rflow/configuration/port.rb +30 -0
- data/lib/rflow/configuration/ruby_dsl.rb +183 -0
- data/lib/rflow/configuration/setting.rb +67 -0
- data/lib/rflow/configuration/uuid_keyed.rb +18 -0
- data/lib/rflow/connection.rb +59 -0
- data/lib/rflow/connections.rb +2 -0
- data/lib/rflow/connections/zmq_connection.rb +101 -0
- data/lib/rflow/message.rb +191 -0
- data/lib/rflow/port.rb +4 -0
- data/lib/rflow/util.rb +19 -0
- data/lib/rflow/version.rb +3 -0
- data/rflow.gemspec +42 -0
- data/schema/message.avsc +36 -0
- data/schema/raw.avsc +9 -0
- data/spec/fixtures/config_ints.rb +61 -0
- data/spec/fixtures/extensions_ints.rb +141 -0
- data/spec/rflow_configuration_spec.rb +73 -0
- data/spec/rflow_message_data_raw.rb +26 -0
- data/spec/rflow_message_data_spec.rb +60 -0
- data/spec/rflow_message_spec.rb +182 -0
- data/spec/rflow_spec.rb +100 -0
- data/spec/schema_spec.rb +28 -0
- data/spec/spec_helper.rb +37 -0
- data/temp.rb +295 -0
- metadata +270 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c322bc6cf9c3b4ccd46b13cf56d3c2460dc5f0be
|
4
|
+
data.tar.gz: af2d6fb3e7051a074c56fa10b70b5a02b23bb0d7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5a3cd46af3c815d2cb5840d48a0e38f7e28dbe911276fac75d3466bef9d05d7d49e38baa010b5ce419a67aea5829d2f227d2ff74aa5e689f6c1e6109d882ad81
|
7
|
+
data.tar.gz: 539c61aca94e84e1ccb00acba1ef87c7a6556dc65180f9f905561b09e147eab83ff0a409dd01b84a1113ba820cf89c6470ee64c1ace68736232423ba3a1d9668
|
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm 1.9.2@rflow-devel
|
data/Gemfile
ADDED
data/NOTES
ADDED
@@ -0,0 +1,187 @@
|
|
1
|
+
RFlow Manager
|
2
|
+
|
3
|
+
Components
|
4
|
+
Input Ports
|
5
|
+
Output Ports
|
6
|
+
|
7
|
+
Connections
|
8
|
+
Input Ports
|
9
|
+
Output Ports
|
10
|
+
|
11
|
+
rflow <config file>
|
12
|
+
figure out a work directory
|
13
|
+
make sure that it has the right subdirectories (can be overridden)
|
14
|
+
run tmp logs schemas components
|
15
|
+
Set up logging to logs/rflow.log
|
16
|
+
Load all schemas
|
17
|
+
Verify all component installation
|
18
|
+
Initialize components
|
19
|
+
Start components running and make sure that they "daemonize" correctly
|
20
|
+
- place pid files in deployment's run directory
|
21
|
+
Configure components via zmq
|
22
|
+
Daemonize self
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
class Component
|
27
|
+
def self.input_port
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.output_port
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_accessor :state
|
34
|
+
|
35
|
+
def initialize(config, run_directory)
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def run
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
def configure
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
class PassThrough < Component
|
50
|
+
input_port [:in]
|
51
|
+
input_port :another_in
|
52
|
+
output_port :out
|
53
|
+
output_port :another_out
|
54
|
+
|
55
|
+
def initialize(config, run_directory)
|
56
|
+
# This will initialize the ports
|
57
|
+
super
|
58
|
+
# Do stuff to initialize component
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
Computation Requirements:
|
65
|
+
Initial startup with:
|
66
|
+
- management bus connection information
|
67
|
+
- group and instance UUID
|
68
|
+
- beacon interval
|
69
|
+
- run directory, containing
|
70
|
+
- PID files
|
71
|
+
- log dir + logs
|
72
|
+
- computation-specific configuration (conf dir)
|
73
|
+
Needs to process the following messages from mgmt bus:
|
74
|
+
- CONFIGURE (ports)
|
75
|
+
- RUN
|
76
|
+
- SHUTDOWN
|
77
|
+
Needs to send the following messages to mgmt bus:
|
78
|
+
- LOG
|
79
|
+
- BEACON (state machine of the below submessages)
|
80
|
+
- STARTED
|
81
|
+
- CONFIGURED
|
82
|
+
- RUNNING
|
83
|
+
- STOPPING
|
84
|
+
- STOPPED
|
85
|
+
- ERROR
|
86
|
+
On startup:
|
87
|
+
- listen to mgmt bus
|
88
|
+
- publish BEACON + state to mgmt bus every (beacon interval) seconds (default to 1 sec)
|
89
|
+
|
90
|
+
|
91
|
+
External Computations:
|
92
|
+
- Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
|
93
|
+
-
|
94
|
+
|
95
|
+
|
96
|
+
RFlow
|
97
|
+
- Will need a DB for config
|
98
|
+
- Initial startup will need to resolve all remaining outstanding items (ports, UUIDs, etc) and store in config DB
|
99
|
+
- MVC, Mongrel2-like?
|
100
|
+
|
101
|
+
Translate
|
102
|
+
- Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
----------------
|
110
|
+
Plugins:
|
111
|
+
an externally defined plugin needs access to all current data types, as well as being able to define its own and tell the system about that.
|
112
|
+
- necessary to tell system?
|
113
|
+
- need a protocol for defining schema transfer
|
114
|
+
- each message has attached schema
|
115
|
+
|
116
|
+
|
117
|
+
lib/rflow/message.rb
|
118
|
+
|
119
|
+
RFlow::Config
|
120
|
+
|
121
|
+
RFlow::Management
|
122
|
+
- Somewhere for external people to register new computations with running system
|
123
|
+
- computation says that its running and asks for Connection configuration
|
124
|
+
- how will it specify where in the workflow it wants to run????
|
125
|
+
|
126
|
+
RFlow::Message(complete on-the-wire Avro message format)
|
127
|
+
data_type, provenance, external_ids, empty, data (see below)
|
128
|
+
|
129
|
+
RFlow::Data::(various message data blocks)
|
130
|
+
|
131
|
+
RFlow::Computation
|
132
|
+
uuid, name, class, input_ports, output_ports
|
133
|
+
|
134
|
+
|
135
|
+
RFlow::Connection
|
136
|
+
encapsulates link knowlege and provides an API for sending and receiving
|
137
|
+
each computation will have one for each port
|
138
|
+
each computation will call into the connection to send (possibly via a Port object) and recieve
|
139
|
+
|
140
|
+
RFlow::Connection::AMQP
|
141
|
+
will manage connections to an AMQP server
|
142
|
+
|
143
|
+
RFlow::Connection::ZMQ
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
|
149
|
+
|
150
|
+
AMQP::Topic - responsible for setting up a topic -> queue binding
|
151
|
+
r.incoming = amqp connection, channel, vhost, login, password, topic
|
152
|
+
r.outgoing = amqp connection, channel, vhost, login, password, queue name
|
153
|
+
behavior -> n x m, "round-robin" among the connected outgoing
|
154
|
+
incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
|
155
|
+
|
156
|
+
|
157
|
+
ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
158
|
+
r.incoming = zmq connection string (tcp://ip:port), type pub
|
159
|
+
r.outgoing = zmq connection string (tcp://ip:port), type sub
|
160
|
+
behavior -> n x m, broadcast sending,
|
161
|
+
|
162
|
+
ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
|
163
|
+
r.incoming = zmq connection string (tcp://ip:port), type push
|
164
|
+
r.outgoing = zmq connection string (tcp://ip:port), type pull
|
165
|
+
|
166
|
+
|
167
|
+
Startup
|
168
|
+
|
169
|
+
RFlow.run is the management process for the workflow
|
170
|
+
|
171
|
+
computations = config.computations.map do |c|
|
172
|
+
instantiate_computation(c)
|
173
|
+
# Check for errors here, which would be evident if a computation couldn't be found/created
|
174
|
+
# Just creating single process ruby objects here to check for errors
|
175
|
+
end
|
176
|
+
|
177
|
+
computations.each do |c|
|
178
|
+
c.configure # with what????
|
179
|
+
# Still single ruby process to set and deconflict all the configuration parameters
|
180
|
+
end
|
181
|
+
|
182
|
+
computations.each do |c|
|
183
|
+
c.run
|
184
|
+
end
|
185
|
+
|
186
|
+
listen_for_management_events_from_old_computations
|
187
|
+
listen_for_new_computation_registration
|
data/README
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'rdoc/task'
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
7
|
+
t.verbose = true
|
8
|
+
t.rspec_opts = '--tty --color'
|
9
|
+
end
|
10
|
+
|
11
|
+
RDoc::Task.new do |rd|
|
12
|
+
rd.main = "README"
|
13
|
+
rd.rdoc_files.include("README", "lib/**/*.rb")
|
14
|
+
rd.rdoc_dir = File.join('doc', 'html')
|
15
|
+
end
|
16
|
+
|
data/bin/rflow
ADDED
@@ -0,0 +1,215 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Will require rflow after option parsing to speed up a couple of
|
4
|
+
# startup cases (version and help) that don't need it
|
5
|
+
require 'optparse'
|
6
|
+
|
7
|
+
options = {
|
8
|
+
:daemonize => true,
|
9
|
+
:startup_log_level => :INFO,
|
10
|
+
:extensions_file_paths => [],
|
11
|
+
:gems => []
|
12
|
+
}
|
13
|
+
|
14
|
+
option_parser = OptionParser.new do |opts|
|
15
|
+
opts.banner = <<EOB
|
16
|
+
Usage: #{File.basename $0} [options] (start|stop|status|load)
|
17
|
+
EOB
|
18
|
+
|
19
|
+
opts.on("-d", "--database DB", "Config database (sqlite) path (GENERALLY REQUIRED)") do |db|
|
20
|
+
options[:config_database_path] = File.expand_path(db)
|
21
|
+
end
|
22
|
+
|
23
|
+
opts.on("-c", "--config CONFIG", "Config file path (only valid for load)") do |config|
|
24
|
+
options[:config_file_path] = File.expand_path(config)
|
25
|
+
end
|
26
|
+
|
27
|
+
opts.on("-e", "--extensions FILE1[,FILE_N]", Array, "Extension file paths (will load)") do |extensions|
|
28
|
+
options[:extensions_file_paths] += extensions.map {|extension| File.expand_path(extension)}
|
29
|
+
end
|
30
|
+
|
31
|
+
opts.on("-g", "--gems GEM1[,GEM_N]", Array, "Extension gems (will require)") do |gems|
|
32
|
+
options[:gems] += gems
|
33
|
+
end
|
34
|
+
|
35
|
+
opts.on("-l", "--log LOGFILE", "Initial startup log file (in addition to stdout)") do |log|
|
36
|
+
options[:startup_log_file_path] = File.expand_path(log)
|
37
|
+
end
|
38
|
+
|
39
|
+
opts.on("-v", "--verbose [LEVEL]", [:DEBUG, :INFO, :WARN], "Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO") do |level|
|
40
|
+
options[:startup_log_level] = level || :DEBUG
|
41
|
+
end
|
42
|
+
|
43
|
+
opts.on("-f", "Run in the foreground") do |f|
|
44
|
+
options[:daemonize] = false
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on_tail("--version", "Show RFlow version and exit") do
|
48
|
+
require 'rflow/version'
|
49
|
+
puts RFlow::VERSION
|
50
|
+
exit 0
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on_tail("-h", "--help", "Show this message and exit") do
|
54
|
+
puts opts
|
55
|
+
exit 0
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
begin
|
61
|
+
option_parser.parse!
|
62
|
+
rescue Exception => e
|
63
|
+
STDERR.puts "Error processing arguments: #{e.class}: #{e.message}"
|
64
|
+
exit 1
|
65
|
+
end
|
66
|
+
|
67
|
+
# Now require rflow because the following parts of the startup require
|
68
|
+
# pieces (usually RFlow::Configuration or RFlow.logger)
|
69
|
+
require 'rflow'
|
70
|
+
|
71
|
+
# Set up the startup logging, which is distinct from the runtime
|
72
|
+
# logging that is defined in the config database. The startup logging
|
73
|
+
# will always go to STDOUT, as well as to the file specified with the
|
74
|
+
# '-l' parameter
|
75
|
+
startup_logger = Log4r::Logger.new 'startup'
|
76
|
+
startup_logger.add Log4r::StdoutOutputter.new('startup_stdout', :formatter => RFlow::LOG_PATTERN_FORMATTER)
|
77
|
+
startup_logger.level = Log4r::LNAMES.index options[:startup_log_level].to_s
|
78
|
+
|
79
|
+
if options[:startup_log_file_path]
|
80
|
+
begin
|
81
|
+
startup_logger.add Log4r::FileOutputter.new('startup_file', :filename => options[:startup_log_file_path], :formatter => RFlow::LOG_PATTERN_FORMATTER)
|
82
|
+
rescue Exception => e
|
83
|
+
startup_logger.fatal "Log file '#{options[:startup_log_file_path]}' problem: #{e.message}"
|
84
|
+
exit 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
command = ARGV[0]
|
89
|
+
unless ['start', 'stop', 'status', 'load'].include? command
|
90
|
+
startup_logger.fatal "Command needs to be one of [start|stop|status|load]\n#{option_parser.help}"
|
91
|
+
exit 1
|
92
|
+
end
|
93
|
+
|
94
|
+
if options[:config_file_path] && command != 'load'
|
95
|
+
startup_logger.fatal "Config file only valid for 'load' command"
|
96
|
+
exit 1
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
unless options[:config_database_path]
|
101
|
+
startup_logger.warn "Config database not specified, using default 'config.sqlite'"
|
102
|
+
options[:config_database_path] = File.expand_path(File.join(Dir.getwd, 'config.sqlite'))
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Set the standard logger to the startup one in the case that we need
|
107
|
+
# to call into RFlow to check on or setup things, like the config
|
108
|
+
# database. We want those log messages to go to the startup log when
|
109
|
+
# setting up. The running log will transition to what is specified in
|
110
|
+
# the config database
|
111
|
+
RFlow.logger = startup_logger
|
112
|
+
|
113
|
+
|
114
|
+
case command
|
115
|
+
when 'load'
|
116
|
+
# Load the database with the config file, if it exists. Will
|
117
|
+
# otherwise default values (not very useful)
|
118
|
+
if options[:config_file_path]
|
119
|
+
unless File.exist? options[:config_file_path]
|
120
|
+
startup_logger.fatal "Config file '#{options[:config_file_path]}' not found\n#{option_parser.help}"
|
121
|
+
exit 1
|
122
|
+
end
|
123
|
+
|
124
|
+
unless File.readable? options[:config_file_path]
|
125
|
+
startup_logger.fatal "Config file '#{options[:config_file_path]}' not readable\n#{option_parser.help}"
|
126
|
+
exit 1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
if File.exist? options[:config_database_path]
|
131
|
+
startup_logger.fatal "Config database '#{options[:config_database_path]}' exists, exiting to prevent accidental overwrite from config file '#{options[:config_file_path]}'"
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
|
135
|
+
startup_logger.warn "Config database '#{options[:config_database_path]}' not found, creating"
|
136
|
+
begin
|
137
|
+
RFlow::Configuration::initialize_database(options[:config_database_path], options[:config_file_path])
|
138
|
+
rescue Exception => e
|
139
|
+
startup_logger.fatal "Error initializing configuration database: #{e.message}: #{e.backtrace.join "\n"}"
|
140
|
+
exit 1
|
141
|
+
end
|
142
|
+
|
143
|
+
startup_logger.warn "Successfully initialized database '#{options[:config_database_path]}' with '#{options[:config_file_path]}'"
|
144
|
+
exit 0
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
# Load the database config and start setting up environment
|
149
|
+
begin
|
150
|
+
config = RFlow::Configuration.new(options[:config_database_path])
|
151
|
+
rescue Exception => e
|
152
|
+
startup_logger.fatal "Error loading config database: #{e.class} - #{e.message}"
|
153
|
+
exit 1
|
154
|
+
end
|
155
|
+
|
156
|
+
Dir.chdir(File.dirname(options[:config_database_path]))
|
157
|
+
Dir.chdir(config['rflow.application_directory_path'])
|
158
|
+
pid = RFlow.running_pid_file_path?(config['rflow.pid_file_path'])
|
159
|
+
|
160
|
+
case command
|
161
|
+
when 'stop'
|
162
|
+
if pid
|
163
|
+
startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}, terminating"
|
164
|
+
# TODO: check if it actually shut down
|
165
|
+
Process.kill 'INT', pid
|
166
|
+
else
|
167
|
+
startup_logger.warn "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
168
|
+
exit 1
|
169
|
+
end
|
170
|
+
exit 0
|
171
|
+
|
172
|
+
when 'status'
|
173
|
+
unless pid
|
174
|
+
startup_logger.error "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
175
|
+
exit 1
|
176
|
+
end
|
177
|
+
startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
178
|
+
exit 0
|
179
|
+
|
180
|
+
when 'start'
|
181
|
+
if pid
|
182
|
+
startup_logger.error "#{config['rflow.application_name']} already running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
|
183
|
+
exit 1
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
|
188
|
+
# We should have eliminated all commands but 'start' at this point
|
189
|
+
|
190
|
+
# require all the gem extensions
|
191
|
+
options[:gems].each do |extension_gem|
|
192
|
+
startup_logger.info "Requiring #{extension_gem}"
|
193
|
+
require extension_gem
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
# load all the file extensions
|
198
|
+
options[:extensions_file_paths].each do |extensions_file_path|
|
199
|
+
startup_logger.info "Loading #{extensions_file_path}"
|
200
|
+
unless File.readable? extensions_file_path
|
201
|
+
startup_logger.fatal "Extensions file ('#{Dir.getwd}') '#{extensions_file_path}' not reabable\n#{option_parser.help}"
|
202
|
+
exit 1
|
203
|
+
end
|
204
|
+
load extensions_file_path
|
205
|
+
end
|
206
|
+
|
207
|
+
|
208
|
+
# Start the flow
|
209
|
+
begin
|
210
|
+
RFlow.run options[:config_database_path], options[:daemonize]
|
211
|
+
rescue Exception => e
|
212
|
+
startup_logger.fatal "Error running rflow: #{e.class}: #{e.message}"
|
213
|
+
end
|
214
|
+
|
215
|
+
__END__
|