rflow 1.0.0a1 → 1.0.0a2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +1 -0
  4. data/Gemfile +0 -1
  5. data/NOTES +0 -13
  6. data/README.md +6 -1
  7. data/bin/rflow +2 -9
  8. data/example/basic_config.rb +1 -33
  9. data/example/basic_extensions.rb +0 -98
  10. data/example/http_config.rb +2 -3
  11. data/example/http_extensions.rb +6 -63
  12. data/lib/rflow.rb +31 -39
  13. data/lib/rflow/child_process.rb +112 -0
  14. data/lib/rflow/component.rb +77 -148
  15. data/lib/rflow/component/port.rb +38 -41
  16. data/lib/rflow/components.rb +4 -8
  17. data/lib/rflow/components/clock.rb +49 -0
  18. data/lib/rflow/components/integer.rb +39 -0
  19. data/lib/rflow/components/raw.rb +10 -6
  20. data/lib/rflow/components/replicate.rb +20 -0
  21. data/lib/rflow/components/ruby_proc_filter.rb +27 -0
  22. data/lib/rflow/configuration.rb +105 -184
  23. data/lib/rflow/configuration/component.rb +1 -4
  24. data/lib/rflow/configuration/connection.rb +11 -16
  25. data/lib/rflow/configuration/port.rb +3 -5
  26. data/lib/rflow/configuration/ruby_dsl.rb +105 -119
  27. data/lib/rflow/configuration/setting.rb +19 -25
  28. data/lib/rflow/configuration/shard.rb +1 -3
  29. data/lib/rflow/connection.rb +47 -10
  30. data/lib/rflow/connections.rb +0 -1
  31. data/lib/rflow/connections/zmq_connection.rb +34 -38
  32. data/lib/rflow/daemon_process.rb +155 -0
  33. data/lib/rflow/logger.rb +41 -25
  34. data/lib/rflow/master.rb +23 -105
  35. data/lib/rflow/message.rb +78 -108
  36. data/lib/rflow/pid_file.rb +37 -37
  37. data/lib/rflow/shard.rb +33 -100
  38. data/lib/rflow/version.rb +2 -2
  39. data/rflow.gemspec +2 -2
  40. data/schema/tick.avsc +10 -0
  41. data/spec/fixtures/config_ints.rb +4 -40
  42. data/spec/fixtures/config_shards.rb +1 -2
  43. data/spec/fixtures/extensions_ints.rb +0 -98
  44. data/spec/rflow/component/port_spec.rb +61 -0
  45. data/spec/rflow/components/clock_spec.rb +72 -0
  46. data/spec/rflow/configuration/ruby_dsl_spec.rb +150 -0
  47. data/spec/rflow/configuration_spec.rb +54 -0
  48. data/spec/rflow/forward_to_input_port_spec.rb +48 -0
  49. data/spec/rflow/forward_to_output_port_spec.rb +40 -0
  50. data/spec/rflow/logger_spec.rb +48 -0
  51. data/spec/rflow/message/data/raw_spec.rb +29 -0
  52. data/spec/rflow/message/data_spec.rb +58 -0
  53. data/spec/rflow/message_spec.rb +154 -0
  54. data/spec/rflow_spec.rb +94 -124
  55. data/spec/spec_helper.rb +8 -12
  56. metadata +46 -22
  57. data/lib/rflow/components/raw/extensions.rb +0 -18
  58. data/lib/rflow/port.rb +0 -4
  59. data/lib/rflow/util.rb +0 -19
  60. data/spec/rflow_component_port_spec.rb +0 -58
  61. data/spec/rflow_configuration_ruby_dsl_spec.rb +0 -148
  62. data/spec/rflow_configuration_spec.rb +0 -73
  63. data/spec/rflow_message_data_raw.rb +0 -26
  64. data/spec/rflow_message_data_spec.rb +0 -60
  65. data/spec/rflow_message_spec.rb +0 -182
  66. data/spec/schema_spec.rb +0 -28
  67. data/temp.rb +0 -295
data/lib/rflow.rb CHANGED
@@ -1,68 +1,60 @@
1
1
  require "rubygems"
2
2
  require "bundler/setup"
3
-
4
3
  require 'time'
5
-
6
4
  require 'active_record'
7
5
  require 'eventmachine'
8
6
  require 'sqlite3'
9
-
10
7
  require 'rflow/configuration'
11
-
12
8
  require 'rflow/master'
13
9
  require 'rflow/message'
14
-
15
10
  require 'rflow/components'
16
11
  require 'rflow/connections'
17
-
18
12
  require 'rflow/logger'
19
13
 
20
14
  class RFlow
21
15
  include Log4r
22
16
 
23
- class Error < StandardError; end
24
-
25
17
  class << self
26
- attr_accessor :config_database_path
27
18
  attr_accessor :logger
28
- attr_accessor :configuration
29
- attr_accessor :master
19
+ attr_reader :configuration, :master
30
20
  end
31
21
 
32
- def self.run(config_database_path=nil, daemonize=nil)
33
- self.configuration = Configuration.new(config_database_path)
22
+ def self.run!(config_database_path = nil, daemonize = false)
23
+ @config_database_path = config_database_path
24
+ @daemonize = daemonize
34
25
 
35
- if config_database_path
36
- # First change to the config database directory, which might hold
37
- # relative paths for the other files/directories, such as the
38
- # application_directory_path
39
- Dir.chdir File.dirname(config_database_path)
40
- end
26
+ establish_configuration
27
+ chdir_application_directory
28
+ setup_logger
29
+ start_master_node
30
+ rescue SystemExit => e
31
+ # Do nothing, just prevent a normal exit from causing an unsightly
32
+ # error in the logs
33
+ end
41
34
 
42
- # Bail unless you have some of the basic information. TODO:
43
- # rethink this when things get more dynamic
35
+ private
36
+ def self.establish_configuration
37
+ @configuration = Configuration.new(@config_database_path)
44
38
  unless configuration['rflow.application_directory_path']
45
- error_message = "Empty configuration database! Use a view/controller (such as the RubyDSL) to create a configuration"
46
- RFlow.logger.error "Empty configuration database! Use a view/controller (such as the RubyDSL) to create a configuration"
47
- raise ArgumentError, error_message
39
+ raise ArgumentError, "Empty configuration database! Use a view/controller (such as the RubyDSL) to create a configuration"
48
40
  end
41
+ end
49
42
 
43
+ def self.chdir_application_directory
44
+ # First change to the config db directory, which might hold
45
+ # relative paths for the other files/directories
46
+ Dir.chdir(File.dirname(@config_database_path)) if @config_database_path
50
47
  Dir.chdir configuration['rflow.application_directory_path']
48
+ end
51
49
 
52
- self.logger = RFlow::Logger.new(configuration, !daemonize)
53
- @master = Master.new(configuration)
54
-
55
- master.daemonize! if daemonize
56
- master.run # Runs EM and doesn't return
57
-
58
- # Should never get here
59
- logger.warn "going down"
60
- rescue SystemExit => e
61
- # Do nothing, just prevent a normal exit from causing an unsightly
62
- # error in the logs
63
- rescue Exception => e
64
- logger.fatal "Exception caught: #{e.class} - #{e.message}\n#{e.backtrace.join "\n"}"
65
- exit 1
50
+ def self.setup_logger
51
+ include_stdout = !@daemonize
52
+ self.logger = RFlow::Logger.new(configuration, include_stdout)
66
53
  end
67
54
 
68
- end # class RFlow
55
+ def self.start_master_node
56
+ @master = Master.new(configuration)
57
+ master.daemonize! if @daemonize
58
+ master.run! # blocks until EventMachine stops
59
+ end
60
+ end
@@ -0,0 +1,112 @@
1
+ class RFlow
2
+ class ChildProcess
3
+ attr_reader :pid, :name
4
+
5
+ def initialize(name, role = name)
6
+ @name = name
7
+ @role = role
8
+ end
9
+
10
+ # Launch another process to execute the child. The parent
11
+ # process retains the original worker object (with pid and IPC
12
+ # pipe) to allow for process management
13
+ def spawn!
14
+ establish_child_pipe
15
+ drop_database_connections
16
+
17
+ @pid = fork
18
+ if @pid
19
+ return_after_child_starts
20
+ else
21
+ run_child_process
22
+ end
23
+ end
24
+
25
+ def run_child_process
26
+ @child_pipe_w.close
27
+ register_logging_context
28
+ update_process_name
29
+ handle_signals
30
+
31
+ RFlow.logger.info "#{@role} started"
32
+ run_process
33
+ exit 0
34
+ ensure
35
+ unhandle_signals
36
+ end
37
+
38
+ def shutdown!(signal)
39
+ RFlow.logger.info "Shutting down #{@name} due to #{signal}"
40
+ unhandle_signals
41
+ end
42
+
43
+ private
44
+ def establish_child_pipe
45
+ @child_pipe_r, @child_pipe_w = IO.pipe
46
+ [@child_pipe_r, @child_pipe_w].each {|io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
47
+ end
48
+
49
+ # Holding database connections over the fork causes problems. Instead,
50
+ # let them be automatically restored after the fork.
51
+ def drop_database_connections
52
+ ::ActiveRecord::Base.clear_all_connections!
53
+ end
54
+
55
+ def return_after_child_starts
56
+ @child_pipe_r.close
57
+ self
58
+ end
59
+
60
+ def register_logging_context
61
+ # arrange for child's name to appear in log messages
62
+ Log4r::NDC.push @name
63
+ end
64
+
65
+ def clone_logging_context
66
+ Log4r::NDC.clone_stack
67
+ end
68
+
69
+ def apply_logging_context(context)
70
+ Log4r::NDC.inherit(context)
71
+ end
72
+
73
+ def update_process_name
74
+ # set the visible process name to match the child's name
75
+ $0 += " #{@name}"
76
+ end
77
+
78
+ def handle_signals
79
+ ['SIGTERM', 'SIGINT', 'SIGQUIT'].each do |signal|
80
+ trap_signal(signal) do
81
+ shutdown! signal
82
+ exit! 0
83
+ end
84
+ end
85
+
86
+ trap_signal 'SIGUSR1' do
87
+ RFlow.logger.reopen
88
+ end
89
+
90
+ trap_signal 'SIGUSR2' do
91
+ RFlow.logger.toggle_log_level
92
+ end
93
+ end
94
+
95
+ def unhandle_signals
96
+ ['SIGTERM', 'SIGINT', 'SIGQUIT', 'SIGCHLD', 'SIGUSR1', 'SIGUSR2'].each do |signal|
97
+ Signal.trap signal, 'DEFAULT'
98
+ end
99
+ end
100
+
101
+ def trap_signal(signal)
102
+ # Log4r and traps don't mix, so we need to put it in another thread
103
+ context = clone_logging_context
104
+ Signal.trap signal do
105
+ Thread.new do
106
+ apply_logging_context context
107
+ yield
108
+ end.join
109
+ end
110
+ end
111
+ end
112
+ end
@@ -1,64 +1,42 @@
1
1
  require 'ostruct'
2
-
3
2
  require 'rflow/message'
4
3
  require 'rflow/component/port'
5
4
 
6
5
  class RFlow
7
6
  class Component
8
- # Keep track of available component subclasses
9
- def self.inherited(subclass)
10
- RFlow::Configuration.add_available_component(subclass)
11
- end
12
-
13
-
14
- # The Component class methods used in the creation of a component
15
7
  class << self
16
- def defined_input_ports
17
- @defined_input_ports ||= Hash.new
8
+ # Keep track of available component subclasses
9
+ def inherited(subclass)
10
+ RFlow::Configuration.add_available_component(subclass)
18
11
  end
19
12
 
20
- def defined_output_ports
21
- @defined_output_ports ||= Hash.new
22
- end
23
-
24
- # TODO: Update the class vs instance stuffs here to be correct
25
- # Port defintions only have names
26
-
27
- # TODO: consider class-based UUIDs to identify component types
28
-
29
13
  # Define an input port with a given name
30
- def input_port(port_name)
31
- define_port(defined_input_ports, port_name)
32
- end
14
+ def input_port(name); define_port(defined_input_ports, name); end
33
15
 
34
16
  # Define an output port with a given name
35
- def output_port(port_name)
36
- define_port(defined_output_ports, port_name)
37
- end
17
+ def output_port(name); define_port(defined_output_ports, name); end
18
+
19
+ def defined_input_ports; @defined_input_ports ||= {}; end
20
+ def defined_output_ports; @defined_output_ports ||= {}; end
38
21
 
39
- # Helper method to keep things DRY for standard component
40
- # definition methods input_port and output_port
41
- def define_port(collection, port_name)
42
- collection[port_name.to_s] = true
22
+ def define_port(collection, name)
23
+ collection[name.to_s] = true
43
24
 
44
25
  # Create the port accessor method based on the port name
45
- define_method port_name.to_s.to_sym do
46
- port = ports.by_name[port_name.to_s]
26
+ define_method name.to_s.to_sym do
27
+ port = ports.by_name[name.to_s]
47
28
  return port if port
48
29
 
49
30
  # If the port was not connected, return a port-like object
50
31
  # that can respond/log but doesn't send any data. Note,
51
32
  # it won't be available in the 'by_uuid' collection, as it
52
33
  # doesn't have a configured uuid
53
- RFlow.logger.debug "'#{self.name}##{port_name}' not connected, creating a disconnected port"
34
+ RFlow.logger.debug "'#{self.name}##{name}' not connected, creating a disconnected port"
54
35
 
55
- disconnected_port = DisconnectedPort.new(OpenStruct.new(:name => port_name, :uuid => 0))
56
- ports << disconnected_port
57
- disconnected_port
36
+ DisconnectedPort.new(OpenStruct.new(:name => name, :uuid => 0)).tap {|d| ports << d }
58
37
  end
59
38
  end
60
39
 
61
-
62
40
  # Attempt to instantiate a component described by the config
63
41
  # specification. This assumes that the specification of a
64
42
  # component is a fully qualified Ruby class that has already
@@ -68,37 +46,28 @@ class RFlow
68
46
  # class. Future releases will support external (i.e. non-managed
69
47
  # components), but the current stuff only supports Ruby classes
70
48
  def build(config)
71
- if config.managed?
72
- RFlow.logger.debug "Instantiating component '#{config.name}' as '#{config.specification}' (#{config.uuid})"
73
- begin
74
- RFlow.logger.debug RFlow.configuration.available_components.inspect
75
- instantiated_component = if RFlow.configuration.available_components.include? config.specification
76
- RFlow.logger.debug "Component found in configuration.available_components['#{config.specification}']"
77
- RFlow.configuration.available_components[config.specification].new(config)
78
- else
79
- RFlow.logger.debug "Component not found in configuration.available_components, constantizing component '#{config.specification}'"
80
- config.specification.constantize.new(config)
81
- end
82
- rescue NameError => e
83
- error_message = "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): the class '#{config.specification}' was not found"
84
- RFlow.logger.error error_message
85
- raise RuntimeError, error_message
86
- rescue Exception => e
87
- error_message = "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): #{e.class} #{e.message}"
88
- RFlow.logger.error error_message
89
- raise RuntimeError, error_message
49
+ raise NotImplementedError, "Non-managed components not yet implemented for component '#{config.name}' as '#{config.specification}' (#{config.uuid})" unless config.managed?
50
+
51
+ RFlow.logger.debug "Instantiating component '#{config.name}' as '#{config.specification}' (#{config.uuid})"
52
+ begin
53
+ component = RFlow.configuration.available_components[config.specification]
54
+
55
+ if component
56
+ RFlow.logger.debug "Component found in configuration.available_components['#{config.specification}']"
57
+ component.new(config)
58
+ else
59
+ RFlow.logger.debug "Component not found in configuration.available_components, constantizing component '#{config.specification}'"
60
+ config.specification.constantize.new(config)
90
61
  end
91
- else
92
- error_message = "Non-managed components not yet implemented for component '#{config.name}' as '#{config.specification}' (#{config.uuid})"
93
- RFlow.logger.error error_message
94
- raise NotImplementedError, error_message
62
+ rescue NameError => e
63
+ raise RuntimeError, "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): the class '#{config.specification}' was not found"
64
+ rescue Exception => e
65
+ raise RuntimeError, "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): #{e.class} #{e.message}"
95
66
  end
96
-
97
- instantiated_component
98
67
  end
99
68
  end
100
69
 
101
- attr_reader :config, :uuid, :name, :ports
70
+ attr_reader :uuid, :name, :ports
102
71
 
103
72
  def initialize(config)
104
73
  @config = config
@@ -108,114 +77,40 @@ class RFlow
108
77
 
109
78
  configure_ports!
110
79
  configure_connections!
111
- configure!(config.options)
112
80
  end
113
81
 
114
-
115
82
  # Returns a list of connected input ports. Each port will have
116
83
  # one or more keys associated with a particular connection.
117
- def input_ports
118
- ports.by_type["RFlow::Component::InputPort"]
119
- end
120
-
84
+ def input_ports; ports.by_type["RFlow::Component::InputPort"]; end
121
85
 
122
86
  # Returns a list of connected output ports. Each port will have
123
87
  # one or more keys associated with the particular connection.
124
- def output_ports
125
- ports.by_type["RFlow::Component::OutputPort"]
126
- end
127
-
88
+ def output_ports; ports.by_type["RFlow::Component::OutputPort"]; end
128
89
 
129
90
  # Returns a list of disconnected output ports.
130
- def disconnected_ports
131
- ports.by_type["RFlow::Component::DisconnectedPort"]
132
- end
133
-
134
-
135
- def configure_ports!
136
- # Send the port configuration to each component
137
- config.input_ports.each do |input_port_config|
138
- RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) with input port '#{input_port_config.name}' (#{input_port_config.uuid})"
139
- configure_input_port!(input_port_config)
140
- end
141
-
142
- config.output_ports.each do |output_port_config|
143
- RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) with output port '#{output_port_config.name}' (#{output_port_config.uuid})"
144
- configure_output_port!(output_port_config)
145
- end
146
- end
147
-
148
-
149
- def configure_input_port!(port_config)
150
- unless self.class.defined_input_ports.include? port_config.name
151
- raise ArgumentError, "Input port '#{port_config.name}' not defined on component '#{self.class}'"
152
- end
153
- ports << InputPort.new(port_config)
154
- end
155
-
156
-
157
- def configure_output_port!(port_config)
158
- unless self.class.defined_output_ports.include? port_config.name
159
- raise ArgumentError, "Output port '#{port_config.name}' not defined on component '#{self.class}'"
160
- end
161
- ports << OutputPort.new(port_config)
162
- end
163
-
91
+ def disconnected_ports; ports.by_type["RFlow::Component::DisconnectedPort"]; end
164
92
 
165
- def configure_connections!
166
- config.input_ports.each do |input_port_config|
167
- input_port_config.input_connections.each do |input_connection_config|
168
- RFlow.logger.debug "Configuring input port '#{input_port_config.name}' (#{input_port_config.uuid}) key '#{input_connection_config.input_port_key}' with #{input_connection_config.type.to_s} connection '#{input_connection_config.name}' (#{input_connection_config.uuid})"
169
- ports.by_uuid[input_port_config.uuid].add_connection(input_connection_config.input_port_key, Connection.build(input_connection_config))
170
- end
171
- end
172
-
173
- config.output_ports.each do |output_port_config|
174
- output_port_config.output_connections.each do |output_connection_config|
175
- RFlow.logger.debug "Configuring output port '#{output_port_config.name}' (#{output_port_config.uuid}) key '#{output_connection_config.output_port_key}' with #{output_connection_config.type.to_s} connection '#{output_connection_config.name}' (#{output_connection_config.uuid})"
176
- ports.by_uuid[output_port_config.uuid].add_connection(output_connection_config.output_port_key, Connection.build(output_connection_config))
177
- end
178
- end
179
- end
180
-
181
-
182
- # Tell the component to establish it's ports' connections, i.e. make
93
+ # Tell the component to establish its ports' connections, i.e. make
183
94
  # the connection. Uses the underlying connection object. Also
184
95
  # establishes the callbacks for each of the input ports
185
96
  def connect!
186
- input_ports.each do |input_port|
187
- input_port.connect!
188
-
189
- # Create the callbacks for recieving messages as a proc
190
- input_port.keys.each do |input_port_key|
191
- keyed_connections = input_port[input_port_key]
192
- keyed_connections.each do |connection|
193
- connection.recv_callback = Proc.new do |message|
194
- process_message(input_port, input_port_key, connection, message)
195
- end
196
- end
197
- end
198
- end
199
-
200
- output_ports.each do |output_port|
201
- output_port.connect!
202
- end
97
+ input_ports.each {|port| port.recv_callback = method(:process_message) }
98
+ input_ports.each(&:connect!)
99
+ output_ports.each(&:connect!)
203
100
  end
204
101
 
205
-
206
102
  def to_s
207
103
  string = "Component '#{name}' (#{uuid})\n"
208
104
  ports.each do |port|
209
- port.keys.each do |port_key|
210
- port[port_key].each do |connection|
211
- string << "\t#{port.class.to_s} '#{port.name}' (#{port.uuid}) key '#{port_key}' connection '#{connection.name}' (#{connection.uuid})\n"
105
+ port.keys.each do |key|
106
+ port[key].each do |connection|
107
+ string << "\t#{port.class.to_s} '#{port.name}' (#{port.uuid}) key '#{key}' connection '#{connection.name}' (#{connection.uuid})\n"
212
108
  end
213
109
  end
214
110
  end
215
111
  string
216
112
  end
217
113
 
218
-
219
114
  # Method that should be overridden by a subclass to provide for
220
115
  # component-specific configuration. The subclass should use the
221
116
  # self.configuration attribute (@configuration) to store its
@@ -243,5 +138,39 @@ class RFlow
243
138
  # cleanup any leftover state, e.g. flush file handles, etc
244
139
  def cleanup!; end
245
140
 
246
- end # class Component
247
- end # class RFlow
141
+ private
142
+ def configure_ports!
143
+ @config.input_ports.each do |p|
144
+ RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) with input port '#{p.name}' (#{p.uuid})"
145
+ unless self.class.defined_input_ports.include? p.name
146
+ raise ArgumentError, "Input port '#{p.name}' not defined on component '#{self.class}'"
147
+ end
148
+ ports << InputPort.new(p)
149
+ end
150
+
151
+ @config.output_ports.each do |p|
152
+ RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) with output port '#{p.name}' (#{p.uuid})"
153
+ unless self.class.defined_output_ports.include? p.name
154
+ raise ArgumentError, "Output port '#{p.name}' not defined on component '#{self.class}'"
155
+ end
156
+ ports << OutputPort.new(p)
157
+ end
158
+ end
159
+
160
+ def configure_connections!
161
+ @config.input_ports.each do |p|
162
+ p.input_connections.each do |c|
163
+ RFlow.logger.debug "Configuring input port '#{p.name}' (#{p.uuid}) key '#{c.input_port_key}' with #{c.type.to_s} connection '#{c.name}' (#{c.uuid})"
164
+ ports.by_uuid[p.uuid].add_connection c.input_port_key, Connection.build(c)
165
+ end
166
+ end
167
+
168
+ @config.output_ports.each do |p|
169
+ p.output_connections.each do |c|
170
+ RFlow.logger.debug "Configuring output port '#{p.name}' (#{p.uuid}) key '#{c.output_port_key}' with #{c.type.to_s} connection '#{c.name}' (#{c.uuid})"
171
+ ports.by_uuid[p.uuid].add_connection c.output_port_key, Connection.build(c)
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end