rflow 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +5 -0
  5. data/NOTES +187 -0
  6. data/README +0 -0
  7. data/Rakefile +16 -0
  8. data/bin/rflow +215 -0
  9. data/example/basic_config.rb +49 -0
  10. data/example/basic_extensions.rb +142 -0
  11. data/example/http_config.rb +21 -0
  12. data/example/http_extensions.rb +262 -0
  13. data/lib/rflow.rb +440 -0
  14. data/lib/rflow/component.rb +192 -0
  15. data/lib/rflow/component/port.rb +150 -0
  16. data/lib/rflow/components.rb +10 -0
  17. data/lib/rflow/components/raw.rb +26 -0
  18. data/lib/rflow/components/raw/extensions.rb +18 -0
  19. data/lib/rflow/configuration.rb +290 -0
  20. data/lib/rflow/configuration/component.rb +27 -0
  21. data/lib/rflow/configuration/connection.rb +98 -0
  22. data/lib/rflow/configuration/migrations/20010101000001_create_settings.rb +14 -0
  23. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +19 -0
  24. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +24 -0
  25. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +27 -0
  26. data/lib/rflow/configuration/port.rb +30 -0
  27. data/lib/rflow/configuration/ruby_dsl.rb +183 -0
  28. data/lib/rflow/configuration/setting.rb +67 -0
  29. data/lib/rflow/configuration/uuid_keyed.rb +18 -0
  30. data/lib/rflow/connection.rb +59 -0
  31. data/lib/rflow/connections.rb +2 -0
  32. data/lib/rflow/connections/zmq_connection.rb +101 -0
  33. data/lib/rflow/message.rb +191 -0
  34. data/lib/rflow/port.rb +4 -0
  35. data/lib/rflow/util.rb +19 -0
  36. data/lib/rflow/version.rb +3 -0
  37. data/rflow.gemspec +42 -0
  38. data/schema/message.avsc +36 -0
  39. data/schema/raw.avsc +9 -0
  40. data/spec/fixtures/config_ints.rb +61 -0
  41. data/spec/fixtures/extensions_ints.rb +141 -0
  42. data/spec/rflow_configuration_spec.rb +73 -0
  43. data/spec/rflow_message_data_raw.rb +26 -0
  44. data/spec/rflow_message_data_spec.rb +60 -0
  45. data/spec/rflow_message_spec.rb +182 -0
  46. data/spec/rflow_spec.rb +100 -0
  47. data/spec/schema_spec.rb +28 -0
  48. data/spec/spec_helper.rb +37 -0
  49. data/temp.rb +295 -0
  50. metadata +270 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c322bc6cf9c3b4ccd46b13cf56d3c2460dc5f0be
4
+ data.tar.gz: af2d6fb3e7051a074c56fa10b70b5a02b23bb0d7
5
+ SHA512:
6
+ metadata.gz: 5a3cd46af3c815d2cb5840d48a0e38f7e28dbe911276fac75d3466bef9d05d7d49e38baa010b5ce419a67aea5829d2f227d2ff74aa5e689f6c1e6109d882ad81
7
+ data.tar.gz: 539c61aca94e84e1ccb00acba1ef87c7a6556dc65180f9f905561b09e147eab83ff0a409dd01b84a1113ba820cf89c6470ee64c1ace68736232423ba3a1d9668
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ \#*
6
+ .\#*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm 1.9.2@rflow-devel
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in rflow.gemspec
4
+ gemspec
5
+
data/NOTES ADDED
@@ -0,0 +1,187 @@
1
+ RFlow Manager
2
+
3
+ Components
4
+ Input Ports
5
+ Output Ports
6
+
7
+ Connections
8
+ Input Ports
9
+ Output Ports
10
+
11
+ rflow <config file>
12
+ figure out a work directory
13
+ make sure that it has the right subdirectories (can be overridden)
14
+ run tmp logs schemas components
15
+ Set up logging to logs/rflow.log
16
+ Load all schemas
17
+ Verify all component installation
18
+ Initialize components
19
+ Start components running and make sure that they "daemonize" correctly
20
+ - place pid files in deployment's run directory
21
+ Configure components via zmq
22
+ Daemonize self
23
+
24
+
25
+
26
+ class Component
27
+ def self.input_port
28
+ end
29
+
30
+ def self.output_port
31
+ end
32
+
33
+ attr_accessor :state
34
+
35
+ def initialize(config, run_directory)
36
+
37
+ end
38
+
39
+ def run
40
+
41
+ end
42
+
43
+ def configure
44
+
45
+ end
46
+
47
+
48
+
49
+ class PassThrough < Component
50
+ input_port [:in]
51
+ input_port :another_in
52
+ output_port :out
53
+ output_port :another_out
54
+
55
+ def initialize(config, run_directory)
56
+ # This will initialize the ports
57
+ super
58
+ # Do stuff to initialize component
59
+ end
60
+
61
+ end
62
+
63
+
64
+ Computation Requirements:
65
+ Initial startup with:
66
+ - management bus connection information
67
+ - group and instance UUID
68
+ - beacon interval
69
+ - run directory, containing
70
+ - PID files
71
+ - log dir + logs
72
+ - computation-specific configuration (conf dir)
73
+ Needs to process the following messages from mgmt bus:
74
+ - CONFIGURE (ports)
75
+ - RUN
76
+ - SHUTDOWN
77
+ Needs to send the following messages to mgmt bus:
78
+ - LOG
79
+ - BEACON (state machine of the below submessages)
80
+ - STARTED
81
+ - CONFIGURED
82
+ - RUNNING
83
+ - STOPPING
84
+ - STOPPED
85
+ - ERROR
86
+ On startup:
87
+ - listen to mgmt bus
88
+ - publish BEACON + state to mgmt bus every (beacon interval) seconds (default to 1 sec)
89
+
90
+
91
+ External Computations:
92
+ - Given (out-of-band) startup info (mgmt bus, UUIDs, beacon interval)
93
+ -
94
+
95
+
96
+ RFlow
97
+ - Will need a DB for config
98
+ - Initial startup will need to resolve all remaining outstanding items (ports, UUIDs, etc) and store in config DB
99
+ - MVC, Mongrel2-like?
100
+
101
+ Translate
102
+ - Need to add <associated type="objtype" name="myname"> where name attr can be used in later XML templates
103
+
104
+
105
+
106
+
107
+
108
+
109
+ ----------------
110
+ Plugins:
111
+ an externally defined plugin needs access to all current data types, as well as being able to define its own and tell the system about that.
112
+ - necessary to tell system?
113
+ - need a protocol for defining schema transfer
114
+ - each message has attached schema
115
+
116
+
117
+ lib/rflow/message.rb
118
+
119
+ RFlow::Config
120
+
121
+ RFlow::Management
122
+ - Somewhere for external people to register new computations with running system
123
+ - computation says that its running and asks for Connection configuration
124
+ - how will it specify where in the workflow it wants to run????
125
+
126
+ RFlow::Message(complete on-the-wire Avro message format)
127
+ data_type, provenance, external_ids, empty, data (see below)
128
+
129
+ RFlow::Data::(various message data blocks)
130
+
131
+ RFlow::Computation
132
+ uuid, name, class, input_ports, output_ports
133
+
134
+
135
+ RFlow::Connection
136
+ encapsulates link knowlege and provides an API for sending and receiving
137
+ each computation will have one for each port
138
+ each computation will call into the connection to send (possibly via a Port object) and recieve
139
+
140
+ RFlow::Connection::AMQP
141
+ will manage connections to an AMQP server
142
+
143
+ RFlow::Connection::ZMQ
144
+
145
+
146
+
147
+
148
+ computation_a.output_port -> (connection.incoming -> connection.outgoing) -> computation_b.input_port
149
+
150
+ AMQP::Topic - responsible for setting up a topic -> queue binding
151
+ r.incoming = amqp connection, channel, vhost, login, password, topic
152
+ r.outgoing = amqp connection, channel, vhost, login, password, queue name
153
+ behavior -> n x m, "round-robin" among the connected outgoing
154
+ incoming behavior will need to set topic/key, uses the data type in the RFlow::Message
155
+
156
+
157
+ ZMQ::PubSub - device-less, responsible for assigning ip/port and assigning one client to bind the port
158
+ r.incoming = zmq connection string (tcp://ip:port), type pub
159
+ r.outgoing = zmq connection string (tcp://ip:port), type sub
160
+ behavior -> n x m, broadcast sending,
161
+
162
+ ZMQ::PushPull - device-less, responsible for assigning ip/port and assigning one client to bind the port
163
+ r.incoming = zmq connection string (tcp://ip:port), type push
164
+ r.outgoing = zmq connection string (tcp://ip:port), type pull
165
+
166
+
167
+ Startup
168
+
169
+ RFlow.run is the management process for the workflow
170
+
171
+ computations = config.computations.map do |c|
172
+ instantiate_computation(c)
173
+ # Check for errors here, which would be evident if a computation couldn't be found/created
174
+ # Just creating single process ruby objects here to check for errors
175
+ end
176
+
177
+ computations.each do |c|
178
+ c.configure # with what????
179
+ # Still single ruby process to set and deconflict all the configuration parameters
180
+ end
181
+
182
+ computations.each do |c|
183
+ c.run
184
+ end
185
+
186
+ listen_for_management_events_from_old_computations
187
+ listen_for_new_computation_registration
data/README ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'bundler'
2
+ require 'rspec/core/rake_task'
3
+ require 'rdoc/task'
4
+ Bundler::GemHelper.install_tasks
5
+
6
+ RSpec::Core::RakeTask.new(:spec) do |t|
7
+ t.verbose = true
8
+ t.rspec_opts = '--tty --color'
9
+ end
10
+
11
+ RDoc::Task.new do |rd|
12
+ rd.main = "README"
13
+ rd.rdoc_files.include("README", "lib/**/*.rb")
14
+ rd.rdoc_dir = File.join('doc', 'html')
15
+ end
16
+
data/bin/rflow ADDED
@@ -0,0 +1,215 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Will require rflow after option parsing to speed up a couple of
4
+ # startup cases (version and help) that don't need it
5
+ require 'optparse'
6
+
7
+ options = {
8
+ :daemonize => true,
9
+ :startup_log_level => :INFO,
10
+ :extensions_file_paths => [],
11
+ :gems => []
12
+ }
13
+
14
+ option_parser = OptionParser.new do |opts|
15
+ opts.banner = <<EOB
16
+ Usage: #{File.basename $0} [options] (start|stop|status|load)
17
+ EOB
18
+
19
+ opts.on("-d", "--database DB", "Config database (sqlite) path (GENERALLY REQUIRED)") do |db|
20
+ options[:config_database_path] = File.expand_path(db)
21
+ end
22
+
23
+ opts.on("-c", "--config CONFIG", "Config file path (only valid for load)") do |config|
24
+ options[:config_file_path] = File.expand_path(config)
25
+ end
26
+
27
+ opts.on("-e", "--extensions FILE1[,FILE_N]", Array, "Extension file paths (will load)") do |extensions|
28
+ options[:extensions_file_paths] += extensions.map {|extension| File.expand_path(extension)}
29
+ end
30
+
31
+ opts.on("-g", "--gems GEM1[,GEM_N]", Array, "Extension gems (will require)") do |gems|
32
+ options[:gems] += gems
33
+ end
34
+
35
+ opts.on("-l", "--log LOGFILE", "Initial startup log file (in addition to stdout)") do |log|
36
+ options[:startup_log_file_path] = File.expand_path(log)
37
+ end
38
+
39
+ opts.on("-v", "--verbose [LEVEL]", [:DEBUG, :INFO, :WARN], "Control the startup log (and stdout) verbosity (DEBUG, INFO, WARN) defaults to INFO") do |level|
40
+ options[:startup_log_level] = level || :DEBUG
41
+ end
42
+
43
+ opts.on("-f", "Run in the foreground") do |f|
44
+ options[:daemonize] = false
45
+ end
46
+
47
+ opts.on_tail("--version", "Show RFlow version and exit") do
48
+ require 'rflow/version'
49
+ puts RFlow::VERSION
50
+ exit 0
51
+ end
52
+
53
+ opts.on_tail("-h", "--help", "Show this message and exit") do
54
+ puts opts
55
+ exit 0
56
+ end
57
+
58
+ end
59
+
60
+ begin
61
+ option_parser.parse!
62
+ rescue Exception => e
63
+ STDERR.puts "Error processing arguments: #{e.class}: #{e.message}"
64
+ exit 1
65
+ end
66
+
67
+ # Now require rflow because the following parts of the startup require
68
+ # pieces (usually RFlow::Configuration or RFlow.logger)
69
+ require 'rflow'
70
+
71
+ # Set up the startup logging, which is distinct from the runtime
72
+ # logging that is defined in the config database. The startup logging
73
+ # will always go to STDOUT, as well as to the file specified with the
74
+ # '-l' parameter
75
+ startup_logger = Log4r::Logger.new 'startup'
76
+ startup_logger.add Log4r::StdoutOutputter.new('startup_stdout', :formatter => RFlow::LOG_PATTERN_FORMATTER)
77
+ startup_logger.level = Log4r::LNAMES.index options[:startup_log_level].to_s
78
+
79
+ if options[:startup_log_file_path]
80
+ begin
81
+ startup_logger.add Log4r::FileOutputter.new('startup_file', :filename => options[:startup_log_file_path], :formatter => RFlow::LOG_PATTERN_FORMATTER)
82
+ rescue Exception => e
83
+ startup_logger.fatal "Log file '#{options[:startup_log_file_path]}' problem: #{e.message}"
84
+ exit 1
85
+ end
86
+ end
87
+
88
+ command = ARGV[0]
89
+ unless ['start', 'stop', 'status', 'load'].include? command
90
+ startup_logger.fatal "Command needs to be one of [start|stop|status|load]\n#{option_parser.help}"
91
+ exit 1
92
+ end
93
+
94
+ if options[:config_file_path] && command != 'load'
95
+ startup_logger.fatal "Config file only valid for 'load' command"
96
+ exit 1
97
+ end
98
+
99
+
100
+ unless options[:config_database_path]
101
+ startup_logger.warn "Config database not specified, using default 'config.sqlite'"
102
+ options[:config_database_path] = File.expand_path(File.join(Dir.getwd, 'config.sqlite'))
103
+ end
104
+
105
+
106
+ # Set the standard logger to the startup one in the case that we need
107
+ # to call into RFlow to check on or setup things, like the config
108
+ # database. We want those log messages to go to the startup log when
109
+ # setting up. The running log will transition to what is specified in
110
+ # the config database
111
+ RFlow.logger = startup_logger
112
+
113
+
114
+ case command
115
+ when 'load'
116
+ # Load the database with the config file, if it exists. Will
117
+ # otherwise default values (not very useful)
118
+ if options[:config_file_path]
119
+ unless File.exist? options[:config_file_path]
120
+ startup_logger.fatal "Config file '#{options[:config_file_path]}' not found\n#{option_parser.help}"
121
+ exit 1
122
+ end
123
+
124
+ unless File.readable? options[:config_file_path]
125
+ startup_logger.fatal "Config file '#{options[:config_file_path]}' not readable\n#{option_parser.help}"
126
+ exit 1
127
+ end
128
+ end
129
+
130
+ if File.exist? options[:config_database_path]
131
+ startup_logger.fatal "Config database '#{options[:config_database_path]}' exists, exiting to prevent accidental overwrite from config file '#{options[:config_file_path]}'"
132
+ exit 1
133
+ end
134
+
135
+ startup_logger.warn "Config database '#{options[:config_database_path]}' not found, creating"
136
+ begin
137
+ RFlow::Configuration::initialize_database(options[:config_database_path], options[:config_file_path])
138
+ rescue Exception => e
139
+ startup_logger.fatal "Error initializing configuration database: #{e.message}: #{e.backtrace.join "\n"}"
140
+ exit 1
141
+ end
142
+
143
+ startup_logger.warn "Successfully initialized database '#{options[:config_database_path]}' with '#{options[:config_file_path]}'"
144
+ exit 0
145
+ end
146
+
147
+
148
+ # Load the database config and start setting up environment
149
+ begin
150
+ config = RFlow::Configuration.new(options[:config_database_path])
151
+ rescue Exception => e
152
+ startup_logger.fatal "Error loading config database: #{e.class} - #{e.message}"
153
+ exit 1
154
+ end
155
+
156
+ Dir.chdir(File.dirname(options[:config_database_path]))
157
+ Dir.chdir(config['rflow.application_directory_path'])
158
+ pid = RFlow.running_pid_file_path?(config['rflow.pid_file_path'])
159
+
160
+ case command
161
+ when 'stop'
162
+ if pid
163
+ startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}, terminating"
164
+ # TODO: check if it actually shut down
165
+ Process.kill 'INT', pid
166
+ else
167
+ startup_logger.warn "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
168
+ exit 1
169
+ end
170
+ exit 0
171
+
172
+ when 'status'
173
+ unless pid
174
+ startup_logger.error "#{config['rflow.application_name']} process not found in #{File.expand_path(config['rflow.pid_file_path'])}"
175
+ exit 1
176
+ end
177
+ startup_logger.info "#{config['rflow.application_name']} running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
178
+ exit 0
179
+
180
+ when 'start'
181
+ if pid
182
+ startup_logger.error "#{config['rflow.application_name']} already running, process #{pid} found in #{File.expand_path(config['rflow.pid_file_path'])}"
183
+ exit 1
184
+ end
185
+ end
186
+
187
+
188
+ # We should have eliminated all commands but 'start' at this point
189
+
190
+ # require all the gem extensions
191
+ options[:gems].each do |extension_gem|
192
+ startup_logger.info "Requiring #{extension_gem}"
193
+ require extension_gem
194
+ end
195
+
196
+
197
+ # load all the file extensions
198
+ options[:extensions_file_paths].each do |extensions_file_path|
199
+ startup_logger.info "Loading #{extensions_file_path}"
200
+ unless File.readable? extensions_file_path
201
+ startup_logger.fatal "Extensions file ('#{Dir.getwd}') '#{extensions_file_path}' not reabable\n#{option_parser.help}"
202
+ exit 1
203
+ end
204
+ load extensions_file_path
205
+ end
206
+
207
+
208
+ # Start the flow
209
+ begin
210
+ RFlow.run options[:config_database_path], options[:daemonize]
211
+ rescue Exception => e
212
+ startup_logger.fatal "Error running rflow: #{e.class}: #{e.message}"
213
+ end
214
+
215
+ __END__