rflow 1.0.0a2 → 1.0.0a3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ require 'rflow/child_process'
2
+
3
+ class RFlow
4
+ # A message broker to mediate messages along a connection.
5
+ # The broker runs in a child process and will not return from spawn!.
6
+ class Broker < ChildProcess
7
+ class << self
8
+ def build(config)
9
+ case config.class.name
10
+ when 'RFlow::Configuration::ZMQStreamer'
11
+ RFlow::Connections::ZMQStreamer.new(config)
12
+ else
13
+ raise ArgumentError, 'Only ZMQ brokers currently supported'
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -35,8 +35,10 @@ class RFlow
35
35
  unhandle_signals
36
36
  end
37
37
 
38
+ def run_process; end
39
+
38
40
  def shutdown!(signal)
39
- RFlow.logger.info "Shutting down #{@name} due to #{signal}"
41
+ RFlow.logger.info "Shutting down due to #{signal}"
40
42
  unhandle_signals
41
43
  end
42
44
 
@@ -24,16 +24,7 @@ class RFlow
24
24
 
25
25
  # Create the port accessor method based on the port name
26
26
  define_method name.to_s.to_sym do
27
- port = ports.by_name[name.to_s]
28
- return port if port
29
-
30
- # If the port was not connected, return a port-like object
31
- # that can respond/log but doesn't send any data. Note,
32
- # it won't be available in the 'by_uuid' collection, as it
33
- # doesn't have a configured uuid
34
- RFlow.logger.debug "'#{self.name}##{name}' not connected, creating a disconnected port"
35
-
36
- DisconnectedPort.new(OpenStruct.new(:name => name, :uuid => 0)).tap {|d| ports << d }
27
+ ports.by_name[name.to_s]
37
28
  end
38
29
  end
39
30
 
@@ -50,33 +41,49 @@ class RFlow
50
41
 
51
42
  RFlow.logger.debug "Instantiating component '#{config.name}' as '#{config.specification}' (#{config.uuid})"
52
43
  begin
53
- component = RFlow.configuration.available_components[config.specification]
44
+ component_class = RFlow.configuration.available_components[config.specification]
54
45
 
55
- if component
46
+ if component_class
56
47
  RFlow.logger.debug "Component found in configuration.available_components['#{config.specification}']"
57
- component.new(config)
58
48
  else
59
49
  RFlow.logger.debug "Component not found in configuration.available_components, constantizing component '#{config.specification}'"
60
- config.specification.constantize.new(config)
50
+ component_class = config.specification.constantize
51
+ end
52
+
53
+ component_class.new(uuid: config.uuid, name: config.name).tap do |component|
54
+ config.input_ports.each {|p| component.configure_input_port! p.name, uuid: p.uuid }
55
+ config.output_ports.each {|p| component.configure_output_port! p.name, uuid: p.uuid }
56
+
57
+ config.input_ports.each do |p|
58
+ p.input_connections.each do |c|
59
+ component.send(p.name.to_sym).add_connection c.input_port_key, Connection.build(c)
60
+ end
61
+ end
62
+
63
+ config.output_ports.each do |p|
64
+ p.output_connections.each do |c|
65
+ component.send(p.name.to_sym).add_connection c.output_port_key, Connection.build(c)
66
+ end
67
+ end
61
68
  end
62
69
  rescue NameError => e
63
- raise RuntimeError, "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): the class '#{config.specification}' was not found"
70
+ raise RuntimeError, "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): the class '#{config.specification}' could not be loaded (#{e.message})"
64
71
  rescue Exception => e
65
- raise RuntimeError, "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): #{e.class} #{e.message}"
72
+ raise RuntimeError, "Could not instantiate component '#{config.name}' as '#{config.specification}' (#{config.uuid}): #{e.class} #{e.message}, because: #{e.backtrace.inspect}"
66
73
  end
67
74
  end
68
75
  end
69
76
 
70
- attr_reader :uuid, :name, :ports
77
+ attr_accessor :uuid, :name
78
+ attr_reader :ports
71
79
 
72
- def initialize(config)
73
- @config = config
74
- @uuid = config.uuid
75
- @name = config.name
80
+ def initialize(args = {})
81
+ @name = args[:name]
82
+ @uuid = args[:uuid]
76
83
  @ports = PortCollection.new
77
84
 
78
- configure_ports!
79
- configure_connections!
85
+ self.class.defined_input_ports.each {|name, _| ports << InputPort.new(self, name: name) }
86
+ self.class.defined_output_ports.each {|name, _| ports << OutputPort.new(self, name: name) }
80
87
  end
81
88
 
82
89
  # Returns a list of connected input ports. Each port will have
@@ -87,15 +94,33 @@ class RFlow
87
94
  # one or more keys associated with the particular connection.
88
95
  def output_ports; ports.by_type["RFlow::Component::OutputPort"]; end
89
96
 
90
- # Returns a list of disconnected output ports.
91
- def disconnected_ports; ports.by_type["RFlow::Component::DisconnectedPort"]; end
97
+ def configure_input_port!(port_name, options = {})
98
+ RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) input port '#{port_name}' (#{options[:uuid]})"
99
+ unless self.class.defined_input_ports.include? port_name
100
+ raise ArgumentError, "Input port '#{port_name}' not defined on component '#{self.class}'"
101
+ end
102
+ ports.by_name[port_name].uuid = options[:uuid]
103
+ end
104
+
105
+ def configure_output_port!(port_name, options = {})
106
+ RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) output port '#{port_name}' (#{options[:uuid]})"
107
+ unless self.class.defined_output_ports.include? port_name
108
+ raise ArgumentError, "Output port '#{port_name}' not defined on component '#{self.class}'"
109
+ end
110
+ ports.by_name[port_name].uuid = options[:uuid]
111
+ end
92
112
 
93
113
  # Tell the component to establish its ports' connections, i.e. make
94
114
  # the connection. Uses the underlying connection object. Also
95
115
  # establishes the callbacks for each of the input ports
96
- def connect!
116
+ def connect_inputs!
97
117
  input_ports.each {|port| port.recv_callback = method(:process_message) }
98
118
  input_ports.each(&:connect!)
119
+ end
120
+
121
+ # Tell the component to establish its ports' connections, i.e. make
122
+ # the connection. Uses the underlying connection object.
123
+ def connect_outputs!
99
124
  output_ports.each(&:connect!)
100
125
  end
101
126
 
@@ -137,40 +162,5 @@ class RFlow
137
162
  # before the global RFlow exit. Sublcasses should implement to
138
163
  # cleanup any leftover state, e.g. flush file handles, etc
139
164
  def cleanup!; end
140
-
141
- private
142
- def configure_ports!
143
- @config.input_ports.each do |p|
144
- RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) with input port '#{p.name}' (#{p.uuid})"
145
- unless self.class.defined_input_ports.include? p.name
146
- raise ArgumentError, "Input port '#{p.name}' not defined on component '#{self.class}'"
147
- end
148
- ports << InputPort.new(p)
149
- end
150
-
151
- @config.output_ports.each do |p|
152
- RFlow.logger.debug "Configuring component '#{name}' (#{uuid}) with output port '#{p.name}' (#{p.uuid})"
153
- unless self.class.defined_output_ports.include? p.name
154
- raise ArgumentError, "Output port '#{p.name}' not defined on component '#{self.class}'"
155
- end
156
- ports << OutputPort.new(p)
157
- end
158
- end
159
-
160
- def configure_connections!
161
- @config.input_ports.each do |p|
162
- p.input_connections.each do |c|
163
- RFlow.logger.debug "Configuring input port '#{p.name}' (#{p.uuid}) key '#{c.input_port_key}' with #{c.type.to_s} connection '#{c.name}' (#{c.uuid})"
164
- ports.by_uuid[p.uuid].add_connection c.input_port_key, Connection.build(c)
165
- end
166
- end
167
-
168
- @config.output_ports.each do |p|
169
- p.output_connections.each do |c|
170
- RFlow.logger.debug "Configuring output port '#{p.name}' (#{p.uuid}) key '#{c.output_port_key}' with #{c.type.to_s} connection '#{c.name}' (#{c.uuid})"
171
- ports.by_uuid[p.uuid].add_connection c.output_port_key, Connection.build(c)
172
- end
173
- end
174
- end
175
165
  end
176
166
  end
@@ -9,28 +9,25 @@ class RFlow
9
9
  end
10
10
  end
11
11
 
12
- # Collection class to make it easier to index by both names,
13
- # UUIDs, and types.
12
+ # Collection class to make it easier to index by both names
13
+ # and types.
14
14
  class PortCollection
15
- attr_reader :ports, :by_uuid, :by_name, :by_type
15
+ attr_reader :ports, :by_name, :by_type
16
16
 
17
17
  def initialize
18
18
  @ports = []
19
- @by_uuid = {}
20
19
  @by_name = {}
21
20
  @by_type = Hash.new {|hash, key| hash[key.to_s] = []}
22
21
  end
23
22
 
24
23
  def <<(port)
25
- by_uuid[port.uuid.to_s] = port
26
24
  by_name[port.name.to_s] = port
27
25
  by_type[port.class.to_s] << port
28
26
  ports << port
29
27
  self
30
28
  end
31
29
 
32
- # Enumerate through each connected (or disconnected but
33
- # referenced) port
30
+ # Enumerate through each port
34
31
  # TODO: simplify with enumerators and procs
35
32
  def each
36
33
  ports.each {|port| yield port }
@@ -38,7 +35,12 @@ class RFlow
38
35
  end
39
36
 
40
37
  class Port
41
- attr_reader :connected
38
+ attr_reader :connected, :component
39
+
40
+ def initialize(component)
41
+ @component = component
42
+ end
43
+
42
44
  def connected?; connected; end
43
45
  end
44
46
 
@@ -49,16 +51,16 @@ class RFlow
49
51
  # result in the same message being sent to all indexed
50
52
  # connections.
51
53
  class HashPort < Port
52
- attr_reader :config, :name, :uuid
54
+ attr_accessor :name, :uuid
53
55
 
54
56
  protected
55
57
  attr_reader :connections_for
56
58
 
57
59
  public
58
- def initialize(config)
59
- @config = config
60
- @name = config.name
61
- @uuid = config.uuid
60
+ def initialize(component, args = {})
61
+ super(component)
62
+ self.uuid = args[:uuid]
63
+ self.name = args[:name]
62
64
  @connections_for = Hash.new {|hash, key| hash[key] = [].extend(ConnectionCollection)}
63
65
  end
64
66
 
@@ -77,9 +79,18 @@ class RFlow
77
79
 
78
80
  # Adds a connection for a given key
79
81
  def add_connection(key, connection)
82
+ RFlow.logger.debug "Attaching #{connection.class.name} connection '#{connection.name}' (#{connection.uuid}) to port '#{name}' (#{uuid}), key '#{connection.input_port_key}'"
80
83
  connections_for[key] << connection
81
84
  end
82
85
 
86
+ def direct_connect(other_port)
87
+ case other_port
88
+ when InputPort; add_connection nil, ForwardToInputPort.new(other_port)
89
+ when OutputPort; add_connection nil, ForwardToOutputPort.new(other_port)
90
+ else raise ArgumentError, "Unknown port type #{other_port.class.name}"
91
+ end
92
+ end
93
+
83
94
  # Return a list of connected keys
84
95
  def keys
85
96
  connections_for.keys
@@ -142,7 +153,5 @@ class RFlow
142
153
  all_connections.send_message(message)
143
154
  end
144
155
  end
145
-
146
- class DisconnectedPort < HashPort; end
147
156
  end
148
157
  end
@@ -211,6 +211,7 @@ class RFlow
211
211
  def [](name); Setting.find_by_name(name).value rescue nil; end
212
212
  def settings; Setting.all; end
213
213
  def shards; Shard.all; end
214
+ def connections; Connection.all; end
214
215
  def shard(uuid); Shard.find_by_uuid uuid; end
215
216
  def components; Component.all; end
216
217
  def component(uuid); Component.find_by_uuid uuid; end
@@ -46,6 +46,9 @@ class RFlow
46
46
  # allow defaults to use other parameters in the connection to
47
47
  # construct the appropriate default value.
48
48
  def self.default_options; {}; end
49
+
50
+ # By default, no broker processes are required to manage a connection.
51
+ def brokers; []; end
49
52
  end
50
53
 
51
54
  # STI Subclass for ZMQ connections and their required options
@@ -62,31 +65,46 @@ class RFlow
62
65
  end
63
66
  end
64
67
 
65
- # STI Subclass for AMQP connections and their required options
66
- class AMQPConnection < Connection
68
+ # STI Subclass for brokered ZMQ connections and their required options
69
+ #
70
+ # We name the IPCs to resemble a quasi-component. Outputting to this
71
+ # connection goes to the 'in' of the IPC pair. Reading input from this
72
+ # connection comes from the 'out' of the IPC pair.
73
+ #
74
+ # The broker shuttles messages between the two to support the many-to-many
75
+ # delivery pattern.
76
+ class BrokeredZMQConnection < Connection
67
77
  def self.default_options
68
78
  {
69
- 'host' => 'localhost',
70
- 'port' => 5672,
71
- 'insist' => true,
72
- 'vhost' => '/',
73
- 'username' => 'guest',
74
- 'password' => 'guest',
75
-
76
- # If a queue is created, these are the default parameters
77
- # for said queue type
78
- 'queue_passive' => false,
79
- 'queue_durable' => true,
80
- 'queue_exclusive' => false,
81
- 'queue_auto_delete' => false,
82
- 'queue_nowait' => true,
79
+ 'output_socket_type' => 'PUSH',
80
+ 'output_address' => lambda{|conn| "ipc://rflow.#{conn.uuid}.in"},
81
+ 'output_responsibility' => 'connect',
82
+ 'input_socket_type' => 'PULL',
83
+ 'input_address' => lambda{|conn| "ipc://rflow.#{conn.uuid}.out"},
84
+ 'input_responsibility' => 'connect',
83
85
  }
84
86
  end
87
+
88
+ # A brokered ZMQ connection requires one broker process.
89
+ def brokers
90
+ @brokers ||= [ZMQStreamer.new(self)]
91
+ end
92
+ end
93
+
94
+ # Represents the broker process configuration. No special parameters
95
+ # that can't be derived from the connection. Not persisted in the database -
96
+ # it's encapsulated in the nature of the connection.
97
+ class ZMQStreamer
98
+ attr_reader :connection
99
+
100
+ def initialize(connection)
101
+ @connection = connection
102
+ end
85
103
  end
86
104
 
87
105
  # for testing purposes
88
106
  class NullConfiguration
89
- attr_accessor :name, :uuid, :options
107
+ attr_accessor :name, :uuid, :options, :input_port_key, :output_port_key
90
108
  end
91
109
  end
92
110
  end
@@ -42,6 +42,16 @@ class RFlow
42
42
  @current_shard = default_shard
43
43
  end
44
44
 
45
+ # shortcut
46
+ def process(name, options = {}, &block)
47
+ shard(name, options.merge(:type => :process), &block)
48
+ end
49
+
50
+ # shortcut
51
+ def thread(name, options = {}, &block)
52
+ shard(name, options.merge(:type => :thread), &block)
53
+ end
54
+
45
55
  # DSL method to specify a component. Expects a name,
46
56
  # specification, and set of component specific options, that
47
57
  # must be marshallable into the database (i.e. should all be strings)
@@ -85,12 +95,12 @@ class RFlow
85
95
  def self.configure
86
96
  config_file = self.new
87
97
  yield config_file
88
- config_file.process
98
+ config_file.process_objects
89
99
  end
90
100
 
91
101
  # Method to process the 'DSL' objects into the config database
92
102
  # via ActiveRecord
93
- def process
103
+ def process_objects
94
104
  process_setting_specs
95
105
  process_shard_specs
96
106
  process_connection_specs
@@ -154,8 +164,8 @@ class RFlow
154
164
 
155
165
  # For each given connection, break up each input/output
156
166
  # component/port specification, ensure that the component
157
- # already exists in the database (by name). Also, only supports
158
- # ZeroMQ ipc sockets
167
+ # already exists in the database (by name). Chooses the best
168
+ # connection type for any pair of components.
159
169
  def process_connection_specs
160
170
  connection_specs.each do |spec|
161
171
  begin
@@ -175,11 +185,39 @@ class RFlow
175
185
  input_port = input_component.input_ports.find_or_initialize_by_name :name => spec[:input_port_name]
176
186
  input_port.save!
177
187
 
178
- RFlow::Configuration::ZMQConnection.create!(:name => spec[:name],
179
- :output_port_key => spec[:output_port_key],
180
- :input_port_key => spec[:input_port_key],
181
- :output_port => output_port,
182
- :input_port => input_port)
188
+ output_shards = output_component.shard.count
189
+ input_shards = input_component.shard.count
190
+
191
+ in_shard_connection = output_component.shard == input_component.shard
192
+ one_to_one = output_shards == 1 && input_shards == 1
193
+ one_to_many = output_shards == 1 && input_shards > 1
194
+ many_to_one = output_shards > 1 && input_shards == 1
195
+ many_to_many = output_shards > 1 && input_shards > 1
196
+
197
+ connection_type = many_to_many ? RFlow::Configuration::BrokeredZMQConnection : RFlow::Configuration::ZMQConnection
198
+
199
+ conn = connection_type.create!(:name => spec[:name],
200
+ :output_port_key => spec[:output_port_key],
201
+ :input_port_key => spec[:input_port_key],
202
+ :output_port => output_port,
203
+ :input_port => input_port)
204
+
205
+ # bind on the cardinality-1 side, connect on the cardinality-n side
206
+ if in_shard_connection
207
+ conn.options['output_responsibility'] = 'connect'
208
+ conn.options['input_responsibility'] = 'bind'
209
+ conn.options['output_address'] = "inproc://rflow.#{conn.uuid}"
210
+ conn.options['input_address'] = "inproc://rflow.#{conn.uuid}"
211
+ elsif many_to_one
212
+ conn.options['output_responsibility'] = 'connect'
213
+ conn.options['input_responsibility'] = 'bind'
214
+ elsif one_to_many
215
+ conn.options['output_responsibility'] = 'bind'
216
+ conn.options['input_responsibility'] = 'connect'
217
+ end
218
+
219
+ conn.save!
220
+ conn
183
221
  rescue Exception => e
184
222
  # TODO: Figure out why an ArgumentError doesn't put the
185
223
  # offending message into e.message, even though it is printed