rflow 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c322bc6cf9c3b4ccd46b13cf56d3c2460dc5f0be
4
+ data.tar.gz: af2d6fb3e7051a074c56fa10b70b5a02b23bb0d7
5
+ SHA512:
6
+ metadata.gz: 5a3cd46af3c815d2cb5840d48a0e38f7e28dbe911276fac75d3466bef9d05d7d49e38baa010b5ce419a67aea5829d2f227d2ff74aa5e689f6c1e6109d882ad81
7
+ data.tar.gz: 539c61aca94e84e1ccb00acba1ef87c7a6556dc65180f9f905561b09e147eab83ff0a409dd01b84a1113ba820cf89c6470ee64c1ace68736232423ba3a1d9668
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ \#*
6
+ .\#*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm 1.9.2@rflow-devel
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in rflow.gemspec
4
+ gemspec
5
+
data/NOTES ADDED
@@ -0,0 +1,187 @@
1
+ RFlow Manager
2
+
3
+ Components
4
+ Input Ports
5
+ Output Ports
6
+
7
+ Connections
8
+ Input Ports
9
+ Output Ports
10
+
11
+ rflow <config file>
12
+ figure out a work directory
13
+ make sure that it has the right subdirectories (can be overridden)
14
+ run tmp logs schemas components
15
+ Set up logging to logs/rflow.log
16
+ Load all schemas
17
+ Verify all component installation
18
+ Initialize components
19
+ Start components running and make sure that they "daemonize" correctly
20
+ - place pid files in deployment's run directory
21
+ Configure components via zmq
22
+ Daemonize self
23
+
24
+
25
+
26
+ class Component
27
+ def self.input_port
28
+ end
29
+
30
+ def self.output_port
31
+ end
32
+
33
+ attr_accessor :state
34
+
35
+ def initialize(config, run_directory)
36
+
37
+ end
38
+
39
+ def run
40
+
41
+ end
42
+
43
+ def configure
44
+
45
+ end
46
+
47
+
48
+
49
+ class PassThrough < Component
50
+ input_port [:in]
51
+ input_port :another_in
52
+ output_port :out
53
+ output_port :another_out
54
+
55
+ def initialize(config, run_directory)
56
+ # This will initialize the ports
57
+ super
58
+ # Do stuff to initialize component
59
+ end
60
+
61
+ end
62
+
63
+
64
+ Computation Requirements:
65
+ Initial startup with:
66
+ - management bus connection information
67
+ - group and instance UUID
68
+ - beacon interval
69
+ - run directory, containing
70
+ - PID files
71
+ - log dir + logs
72
+ - computation-specific configuration (conf dir)
73
+ Needs to process the following messages from mgmt bus:
74
+ - CONFIGURE (ports)
75
+ - RUN
76
+ - SHUTDOWN
77
+ Needs to send the following messages to mgmt bus:
78
+ - LOG
79
+ - BEACON (state machine of the below submessages)
80
+ - STARTED
81
+ - CONFIGURED
82
+ - RUNNING
83
+ - STOPPING
84
+ - STOPPED
85
+ - ERROR
86
+ On startup:
87
+ - listen to mgmt bus
88
+ - publish BEACON + state to mgmt bus every (beacon interval) seconds (default to 1 sec)
89
+
90
+
91
+ External Computations:
92
+ - Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
93
+ -
94
+
95
+
96
+ RFlow
97
+ - Will need a DB for config
98
+ - Initial startup will need to resolve all remaining outstanding items (ports, UUIDs, etc) and store in config DB
99
+ - MVC, Mongrel2-like?
100
+
101
+ Translate
102
+ - Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
103
+
104
+
105
+
106
+
107
+
108
+
109
+ ----------------
110
+ Plugins:
111
+ an externally defined plugin needs access to all current data types, as well as being able to define its own and tell the system about that.
112
+ - necessary to tell system?
113
+ - need a protocol for defining schema transfer
114
+ - each message has attached schema
115
+
116
+
117
+ lib/rflow/message.rb
118
+
119
+ RFlow::Config
120
+
121
+ RFlow::Management
122
+ - Somewhere for external people to register new computations with running system
123
+ - computation says that its running and asks for Connection configuration
124
+ - how will it specify where in the workflow it wants to run????
125
+
126
+ RFlow::Message(complete on-the-wire Avro message format)
127
+ data_type, provenance, external_ids, empty, data (see below)
128
+
129
+ RFlow::Data::(various message data blocks)
130
+
131
+ RFlow::Computation
132
+ uuid, name, class, input_ports, output_ports
133
+
134
+
135
+ RFlow::Connection
136
+ encapsulates link knowlege and provides an API for sending and receiving
137
+ each computation will have one for each port
138
+ each computation will call into the connection to send (possibly via a Port object) and recieve
139
+
140
+ RFlow::Connection::AMQP
141
+ will manage connections to an AMQP server
142
+
143
+ RFlow::Connection::ZMQ
144
+
145
+
146
+
147
+
148
+ computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
149
+
150
+ AMQP::Topic - responsible for setting up a topic -> queue binding
151
+ r.incoming = amqp connection, channel, vhost, login, password, topic
152
+ r.outgoing = amqp connection, channel, vhost, login, password, queue name
153
+ behavior -> n x m, "round-robin" among the connected outgoing
154
+ incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
155
+
156
+
157
+ ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
158
+ r.incoming = zmq connection string (tcp://ip:port), type pub
159
+ r.outgoing = zmq connection string (tcp://ip:port), type sub
160
+ behavior -> n x m, broadcast sending,
161
+
162
+ ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
163
+ r.incoming = zmq connection string (tcp://ip:port), type push
164
+ r.outgoing = zmq connection string (tcp://ip:port), type pull
165
+
166
+
167
+ Startup
168
+
169
+ RFlow.run is the management process for the workflow
170
+
171
+ computations = config.computations.map do |c|
172
+ instantiate_computation(c)
173
+ # Check for errors here, which would be evident if a computation couldn't be found/created
174
+ # Just creating single process ruby objects here to check for errors
175
+ end
176
+
177
+ computations.each do |c|
178
+ c.configure # with what????
179
+ # Still single ruby process to set and deconflict all the configuration parameters
180
+ end
181
+
182
+ computations.each do |c|
183
+ c.run
184
+ end
185
+
186
+ listen_for_management_events_from_old_computations
187
+ listen_for_new_computation_registration
data/README ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'bundler'
2
+ require 'rspec/core/rake_task'
3
+ require 'rdoc/task'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.verbose = true
8
+ t.rspec_opts = '--tty --color'
9
+ end
10
+
11
+ RDoc::Task.new do |rd|
12
+ rd.main = "README"
13
+ rd.rdoc_files.include("README", "lib/**/*.rb")
14
+ rd.rdoc_dir = File.join('doc', 'html')
15
+ end
16
+
data/bin/rflow ADDED
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Will require rflow after option parsing to speed up a couple of
4
+ # startup cases (version and help) that don't need it
5
+ require 'optparse'
6
+
7
+ options = {
8
+ :daemonize => true,
9
+ :startup_log_level => :INFO,
10
+ :extensions_file_paths => [],
11
+ :gems => []
12
+ }
13
+
14
+ option_parser = OptionParser.new do |opts|
15
+ opts.banner = <<EOB
16
+ Usage: #{File.basename $0} [options] (start|stop|status|load)
17
+ EOB
18
+
19
+ opts.on("-d", "--database DB", "Config database (sqlite) path (GENERALLY REQUIRED)") do |db|
20
+ options[:config_database_path] = File.expand_path(db)
21
+ end
22
+
23
+ opts.on("-c", "--config CONFIG", "Config file path (only valid for load)") do |config|
24
+ options[:config_file_path] = File.expand_path(config)
25
+ end
26
+
27
+ opts.on("-e", "--extensions FILE1[,FILE_N]", Array, "Extension file paths (will load)") do |extensions|
28
+ options[:extensions_file_paths] += extensions.map {|extension| File.expand_path(extension)}
29
+ end
30
+
31
+ opts.on("-g", "--gems GEM1[,GEM_N]", Array, "Extension gems (will require)") do |gems|
32
+ options[:gems] += gems
33
+ end
34
+
35
+ opts.on("-l", "--log LOGFILE", "Initial startup log file (in addition to stdout)") do |log|
36
+ options[:startup_log_file_path] = File.expand_path(log)
37
+ end
38
+
39
+ opts.on("-v", "--verbose [LEVEL]", [:DEBUG, :INFO, :WARN], "Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO") do |level|
40
+ options[:startup_log_level] = level || :DEBUG
41
+ end
42
+
43
+ opts.on("-f", "Run in the foreground") do |f|
44
+ options[:daemonize] = false
45
+ end
46
+
47
+ opts.on_tail("--version", "Show RFlow version and exit") do
48
+ require 'rflow/version'
49
+ puts RFlow::VERSION
50
+ exit 0
51
+ end
52
+
53
+ opts.on_tail("-h", "--help", "Show this message and exit") do
54
+ puts opts
55
+ exit 0
56
+ end
57
+
58
+ end
59
+
60
+ begin
61
+ option_parser.parse!
62
+ rescue Exception => e
63
+ STDERR.puts "Error processing arguments: #{e.class}: #{e.message}"
64
+ exit 1
65
+ end
66
+
67
+ # Now require rflow because the following parts of the startup require
68
+ # pieces (usually RFlow::Configuration or RFlow.logger)
69
+ require 'rflow'
70
+
71
+ # Set up the startup logging, which is distinct from the runtime
72
+ # logging that is defined in the config database. The startup logging
73
+ # will always go to STDOUT, as well as to the file specified with the
74
+ # '-l' parameter
75
+ startup_logger = Log4r::Logger.new 'startup'
76
+ startup_logger.add Log4r::StdoutOutputter.new('startup_stdout', :formatter => RFlow::LOG_PATTERN_FORMATTER)
77
+ startup_logger.level = Log4r::LNAMES.index options[:startup_log_level].to_s
78
+
79
+ if options[:startup_log_file_path]
80
+ begin
81
+ startup_logger.add Log4r::FileOutputter.new('startup_file', :filename => options[:startup_log_file_path], :formatter => RFlow::LOG_PATTERN_FORMATTER)
82
+ rescue Exception => e
83
+ startup_logger.fatal "Log file '#{options[:startup_log_file_path]}' problem: #{e.message}"
84
+ exit 1
85
+ end
86
+ end
87
+
88
+ command = ARGV[0]
89
+ unless ['start', 'stop', 'status', 'load'].include? command
90
+ startup_logger.fatal "Command needs to be one of [start|stop|status|load]\n#{option_parser.help}"
91
+ exit 1
92
+ end
93
+
94
+ if options[:config_file_path] && command != 'load'
95
+ startup_logger.fatal "Config file only valid for 'load' command"
96
+ exit 1
97
+ end
98
+
99
+
100
+ unless options[:config_database_path]
101
+ startup_logger.warn "Config database not specified, using default 'config.sqlite'"
102
+ options[:config_database_path] = File.expand_path(File.join(Dir.getwd, 'config.sqlite'))
103
+ end
104
+
105
+
106
+ # Set the standard logger to the startup one in the case that we need
107
+ # to call into RFlow to check on or setup things, like the config
108
+ # database. We want those log messages to go to the startup log when
109
+ # setting up. The running log will transition to what is specified in
110
+ # the config database
111
+ RFlow.logger = startup_logger
112
+
113
+
114
+ case command
115
+ when 'load'
116
+ # Load the database with the config file, if it exists. Will
117
+ # otherwise default values (not very useful)
118
+ if options[:config_file_path]
119
+ unless File.exist? options[:config_file_path]
120
+ startup_logger.fatal "Config file '#{options[:config_file_path]}' not found\n#{option_parser.help}"
121
+ exit 1
122
+ end
123
+
124
+ unless File.readable? options[:config_file_path]
125
+ startup_logger.fatal "Config file '#{options[:config_file_path]}' not readable\n#{option_parser.help}"
126
+ exit 1
127
+ end
128
+ end
129
+
130
+ if File.exist? options[:config_database_path]
131
+ startup_logger.fatal "Config database '#{options[:config_database_path]}' exists, exiting to prevent accidental overwrite from config file '#{options[:config_file_path]}'"
132
+ exit 1
133
+ end
134
+
135
+ startup_logger.warn "Config database '#{options[:config_database_path]}' not found, creating"
136
+ begin
137
+ RFlow::Configuration::initialize_database(options[:config_database_path], options[:config_file_path])
138
+ rescue Exception => e
139
+ startup_logger.fatal "Error initializing configuration database: #{e.message}: #{e.backtrace.join "\n"}"
140
+ exit 1
141
+ end
142
+
143
+ startup_logger.warn "Successfully initialized database '#{options[:config_database_path]}' with '#{options[:config_file_path]}'"
144
+ exit 0
145
+ end
146
+
147
+
148
+ # Load the database config and start setting up environment
149
+ begin
150
+ config = RFlow::Configuration.new(options[:config_database_path])
151
+ rescue Exception => e
152
+ startup_logger.fatal "Error loading config database: #{e.class} - #{e.message}"
153
+ exit 1
154
+ end
155
+
156
+ Dir.chdir(File.dirname(options[:config_database_path]))
157
+ Dir.chdir(config['rflow.application_directory_path'])
158
+ pid = RFlow.running_pid_file_path?(config['rflow.pid_file_path'])
159
+
160
+ case command
161
+ when 'stop'
162
+ if pid
163
+ startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}, terminating"
164
+ # TODO: check if it actually shut down
165
+ Process.kill 'INT', pid
166
+ else
167
+ startup_logger.warn "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
168
+ exit 1
169
+ end
170
+ exit 0
171
+
172
+ when 'status'
173
+ unless pid
174
+ startup_logger.error "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
175
+ exit 1
176
+ end
177
+ startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
178
+ exit 0
179
+
180
+ when 'start'
181
+ if pid
182
+ startup_logger.error "#{config['rflow.application_name']} already running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
183
+ exit 1
184
+ end
185
+ end
186
+
187
+
188
+ # We should have eliminated all commands but 'start' at this point
189
+
190
+ # require all the gem extensions
191
+ options[:gems].each do |extension_gem|
192
+ startup_logger.info "Requiring #{extension_gem}"
193
+ require extension_gem
194
+ end
195
+
196
+
197
+ # load all the file extensions
198
+ options[:extensions_file_paths].each do |extensions_file_path|
199
+ startup_logger.info "Loading #{extensions_file_path}"
200
+ unless File.readable? extensions_file_path
201
+ startup_logger.fatal "Extensions file ('#{Dir.getwd}') '#{extensions_file_path}' not reabable\n#{option_parser.help}"
202
+ exit 1
203
+ end
204
+ load extensions_file_path
205
+ end
206
+
207
+
208
+ # Start the flow
209
+ begin
210
+ RFlow.run options[:config_database_path], options[:daemonize]
211
+ rescue Exception => e
212
+ startup_logger.fatal "Error running rflow: #{e.class}: #{e.message}"
213
+ end
214
+
215
+ __END__