rflow 1.0.0a3 → 1.0.0a4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 82d81c66d6a26aa814d88893e294951247434585
4
- data.tar.gz: 03edc9684d65ffb4e0729000f8948f8dbe69c16c
3
+ metadata.gz: 91749755100dd4bfa1eb64f8b455e42cbb9e6b1a
4
+ data.tar.gz: 537aa8a39d393a99e8b4f8bf674eb8d38eb87169
5
5
  SHA512:
6
- metadata.gz: 7ca15c17aa77a9e15d5309c5e9dd22b533b0108ca6f8c29031899f8c824f3c4939c5c37e57bbe51db5834eefc40f0f52a840874af9660fbbe07729b8a66ea3d0
7
- data.tar.gz: a8f7a76ca8eba6aec45cdf56433792938610e11332dd0523917795dd4a59edc84660d80b08a48a67c222f3a312ba6837b238a36e9db9e982b62fac2742f93417
6
+ metadata.gz: f107c920c6255585fe9508c50cdea49dc0c3f39f8e8cbd321cb3ec04f3997c1e733923d0fcc153dc632a134af650ec37fe55402a266f87a093ae19b431d5dcde
7
+ data.tar.gz: 537459d7fed23e2f025b8b06f1fa22738b39fc1235bd54376edef8d880683915769e47d5c8b94f90979bd0add24726c1749cd010f02241f3385a17d2d4ca14aa
data/bin/rflow CHANGED
@@ -135,7 +135,7 @@ when 'load'
135
135
  exit 1
136
136
  end
137
137
 
138
- startup_logger.warn "Config database '#{options[:config_database_path]}' not found, creating"
138
+ startup_logger.info "Creating config database '#{options[:config_database_path]}'"
139
139
  begin
140
140
  config = RFlow::Configuration::initialize_database(options[:config_database_path], options[:config_file_path])
141
141
  rescue Exception => e
@@ -30,14 +30,3 @@ class RFlow::Components::FileOutput < RFlow::Component
30
30
  end
31
31
  end
32
32
  end
33
-
34
- class SimpleComponent < RFlow::Component
35
- input_port :in
36
- output_port :out
37
-
38
- def configure!(config); end
39
- def run!; end
40
- def process_message(input_port, input_port_key, connection, message); end
41
- def shutdown!; end
42
- def cleanup!; end
43
- end
@@ -22,17 +22,6 @@ class RFlow::Components::FileOutput < RFlow::Component
22
22
  end
23
23
  end
24
24
 
25
- class SimpleComponent < RFlow::Component
26
- input_port :in
27
- output_port :out
28
-
29
- def configure!(config); end
30
- def run!; end
31
- def process_message(input_port, input_port_key, connection, message); end
32
- def shutdown!; end
33
- def cleanup!; end
34
- end
35
-
36
25
  http_request_schema =<<EOS
37
26
  {
38
27
  "type": "record",
data/lib/rflow.rb CHANGED
@@ -53,6 +53,7 @@ class RFlow
53
53
  end
54
54
 
55
55
  def self.start_master_node
56
+ RFlow.logger.info "#{configuration['rflow.application_name']} starting"
56
57
  @master = Master.new(configuration)
57
58
  master.daemonize! if @daemonize
58
59
  master.run! # blocks until EventMachine stops
@@ -78,6 +78,8 @@ class RFlow
78
78
  end
79
79
 
80
80
  def handle_signals
81
+ Signal.trap 'SIGCHLD', 'DEFAULT' # make sure child process can run subshells
82
+
81
83
  ['SIGTERM', 'SIGINT', 'SIGQUIT'].each do |signal|
82
84
  trap_signal(signal) do
83
85
  shutdown! signal
@@ -36,7 +36,7 @@ class RFlow
36
36
  # attempt to constantize the specification into a different
37
37
  # class. Future releases will support external (i.e. non-managed
38
38
  # components), but the current stuff only supports Ruby classes
39
- def build(config)
39
+ def build(worker, config)
40
40
  raise NotImplementedError, "Non-managed components not yet implemented for component '#{config.name}' as '#{config.specification}' (#{config.uuid})" unless config.managed?
41
41
 
42
42
  RFlow.logger.debug "Instantiating component '#{config.name}' as '#{config.specification}' (#{config.uuid})"
@@ -50,7 +50,7 @@ class RFlow
50
50
  component_class = config.specification.constantize
51
51
  end
52
52
 
53
- component_class.new(uuid: config.uuid, name: config.name).tap do |component|
53
+ component_class.new(worker: worker, uuid: config.uuid, name: config.name).tap do |component|
54
54
  config.input_ports.each {|p| component.configure_input_port! p.name, uuid: p.uuid }
55
55
  config.output_ports.each {|p| component.configure_output_port! p.name, uuid: p.uuid }
56
56
 
@@ -75,17 +75,20 @@ class RFlow
75
75
  end
76
76
 
77
77
  attr_accessor :uuid, :name
78
- attr_reader :ports
78
+ attr_reader :ports, :worker
79
79
 
80
80
  def initialize(args = {})
81
81
  @name = args[:name]
82
82
  @uuid = args[:uuid]
83
+ @worker = args[:worker]
83
84
  @ports = PortCollection.new
84
85
 
85
86
  self.class.defined_input_ports.each {|name, _| ports << InputPort.new(self, name: name) }
86
87
  self.class.defined_output_ports.each {|name, _| ports << OutputPort.new(self, name: name) }
87
88
  end
88
89
 
90
+ def shard; worker.shard if worker; end
91
+
89
92
  # Returns a list of connected input ports. Each port will have
90
93
  # one or more keys associated with a particular connection.
91
94
  def input_ports; ports.by_type["RFlow::Component::InputPort"]; end
@@ -104,7 +104,7 @@ class RFlow
104
104
 
105
105
  # for testing purposes
106
106
  class NullConfiguration
107
- attr_accessor :name, :uuid, :options, :input_port_key, :output_port_key
107
+ attr_accessor :name, :uuid, :options, :input_port_key, :output_port_key, :delivery
108
108
  end
109
109
  end
110
110
  end
@@ -7,6 +7,9 @@ class CreateConnections < ActiveRecord::Migration
7
7
  # To allow for multiple types of connections
8
8
  t.string :type
9
9
 
10
+ # round-robin or broadcast
11
+ t.string :delivery
12
+
10
13
  # Data flows from an output port to an input port
11
14
  t.string :output_port_uuid
12
15
  t.string :output_port_key, :default => '0'
@@ -74,12 +74,14 @@ class RFlow
74
74
  # connect 'componentA#arrayport[2]' => 'componentB#in[1]'
75
75
  # Uses the model to assign random UUIDs
76
76
  def connect(hash)
77
- hash.each do |output_string, input_string|
77
+ delivery = hash[:delivery] || 'round-robin'
78
+ hash.except(:delivery).each do |output_string, input_string|
78
79
  output_component_name, output_port_name, output_port_key = parse_connection_string(output_string)
79
80
  input_component_name, input_port_name, input_port_key = parse_connection_string(input_string)
80
81
 
81
82
  connection_specs << {
82
83
  :name => output_string + '=>' + input_string,
84
+ :delivery => delivery,
83
85
  :output_component_name => output_component_name,
84
86
  :output_port_name => output_port_name, :output_port_key => output_port_key,
85
87
  :output_string => output_string,
@@ -197,6 +199,7 @@ class RFlow
197
199
  connection_type = many_to_many ? RFlow::Configuration::BrokeredZMQConnection : RFlow::Configuration::ZMQConnection
198
200
 
199
201
  conn = connection_type.create!(:name => spec[:name],
202
+ :delivery => spec[:delivery],
200
203
  :output_port_key => spec[:output_port_key],
201
204
  :input_port_key => spec[:input_port_key],
202
205
  :output_port => output_port,
@@ -216,6 +219,18 @@ class RFlow
216
219
  conn.options['input_responsibility'] = 'connect'
217
220
  end
218
221
 
222
+ case spec[:delivery]
223
+ when 'broadcast'
224
+ conn.options['output_socket_type'] = 'PUB'
225
+ conn.options['input_socket_type'] = 'SUB'
226
+ when 'round-robin'
227
+ conn.options['output_socket_type'] = 'PUSH'
228
+ conn.options['input_socket_type'] = 'PULL'
229
+ else
230
+ raise RFlow::Configuration::Connection::ConnectionInvalid,
231
+ "Delivery type '#{spec[:delivery]}' unknown at #{spec[:config_line]}"
232
+ end
233
+
219
234
  conn.save!
220
235
  conn
221
236
  rescue Exception => e
@@ -6,6 +6,7 @@ end
6
6
  require 'rflow/connection'
7
7
  require 'rflow/message'
8
8
  require 'rflow/broker'
9
+ require 'sys/filesystem'
9
10
 
10
11
  class RFlow
11
12
  module Connections
@@ -42,8 +43,13 @@ class RFlow
42
43
 
43
44
  def connect_input!
44
45
  RFlow.logger.debug "Connecting input #{uuid} with #{options.find_all {|k, v| k.to_s =~ /input/}}"
45
- self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type'].to_sym))
46
+ check_address(options['input_address'])
47
+
48
+ self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type']))
46
49
  input_socket.send(options['input_responsibility'].to_sym, options['input_address'])
50
+ if config.delivery == 'broadcast'
51
+ input_socket.setsockopt(ZMQ::SUBSCRIBE, '') # request all messages
52
+ end
47
53
 
48
54
  input_socket.on(:message) do |*message_parts|
49
55
  begin
@@ -61,7 +67,9 @@ class RFlow
61
67
 
62
68
  def connect_output!
63
69
  RFlow.logger.debug "Connecting output #{uuid} with #{options.find_all {|k, v| k.to_s =~ /output/}}"
64
- self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type'].to_sym))
70
+ check_address(options['output_address'])
71
+
72
+ self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type']))
65
73
  output_socket.send(options['output_responsibility'].to_sym, options['output_address'].to_s)
66
74
  output_socket
67
75
  end
@@ -99,6 +107,23 @@ class RFlow
99
107
 
100
108
  true
101
109
  end
110
+
111
+ def check_address(address)
112
+ # make sure we're not trying to create IPC sockets in an NFS share
113
+ # because that works poorly
114
+ if address.start_with?('ipc://')
115
+ filename = address[6..-1]
116
+ mount_point = Sys::Filesystem.mount_point(File.dirname(filename))
117
+ return unless mount_point
118
+ mount_type = Sys::Filesystem.mounts.find {|m| m.mount_point == mount_point }.mount_type
119
+ return unless mount_type
120
+
121
+ case mount_type
122
+ when 'vmhgfs', 'vboxsf', 'nfs' # vmware, virtualbox, nfs
123
+ raise ArgumentError, "Cannot safely create IPC sockets in network filesystem '#{mount_point}' of type #{mount_type}"
124
+ end
125
+ end
126
+ end
102
127
  end
103
128
 
104
129
  class BrokeredZMQConnection < ZMQConnection
@@ -122,11 +147,20 @@ class RFlow
122
147
  RFlow.logger.debug { "Creating a new ZeroMQ context; ZeroMQ version is #{version[:major]}.#{version[:minor]}.#{version[:patch]}" }
123
148
  @context = ZMQ::Context.new
124
149
  RFlow.logger.debug { "Connecting message broker to route from #{connection.options['output_address']} to #{connection.options['input_address']}" }
125
- @back = context.socket(ZMQ::PULL)
126
- back.bind(connection.options['output_address'])
127
- @front = context.socket(ZMQ::PUSH)
128
- front.bind(connection.options['input_address'])
129
- ZMQ::Proxy.new(back, front)
150
+
151
+ @front = case connection.options['output_socket_type']
152
+ when 'PUSH'; context.socket(ZMQ::PULL)
153
+ when 'PUB'; context.socket(ZMQ::XSUB)
154
+ else raise ArgumentError, "Unknown output socket type #{connection.options['output_socket_type']}"
155
+ end
156
+ @back = case connection.options['input_socket_type']
157
+ when 'PULL'; context.socket(ZMQ::PUSH)
158
+ when 'SUB'; context.socket(ZMQ::XPUB)
159
+ else raise ArgumentError, "Unknown input socket type #{connection.options['input_socket_type']}"
160
+ end
161
+ front.bind(connection.options['output_address'])
162
+ back.bind(connection.options['input_address'])
163
+ ZMQ::Proxy.new(front, back)
130
164
  back.close
131
165
  front.close
132
166
  rescue Exception => e
data/lib/rflow/shard.rb CHANGED
@@ -9,22 +9,29 @@ class RFlow
9
9
  # start an EventMachine reactor.
10
10
  class Shard
11
11
  class Worker < ChildProcess
12
+ attr_reader :shard, :index
13
+
12
14
  def initialize(shard, index = 1)
13
15
  super("#{shard.name}-#{index}", 'Worker')
14
16
  @shard = shard
17
+ @index = index
15
18
 
16
19
  # build at initialize time to fail fast
17
- @components = shard.config.components.map {|config| Component.build(config) }
20
+ @components = shard.config.components.map {|config| Component.build(self, config) }
18
21
  end
19
22
 
20
23
  def run_process
21
24
  EM.run do
22
- # TODO: Monitor the master
23
- configure_components!
24
- connect_components!
25
- # TODO: need to do proper node synchronization for ZMQ to remove sleep
26
- sleep 1
27
- run_components!
25
+ begin
26
+ # TODO: Monitor the master
27
+ configure_components!
28
+ connect_components!
29
+ # TODO: need to do proper node synchronization for ZMQ to remove sleep
30
+ sleep 1
31
+ run_components!
32
+ rescue Exception => e
33
+ RFlow.logger.error "Error in worker, shutting down: #{e.class.name}: #{e.message}, because: #{e.backtrace.inspect}"
34
+ end
28
35
  end
29
36
 
30
37
  RFlow.logger.info "Shutting down worker after EM stopped"
@@ -32,7 +39,7 @@ class RFlow
32
39
 
33
40
  def configure_components!
34
41
  RFlow.logger.debug "Configuring components"
35
- @components.zip(@shard.config.components.map(&:options)).each do |(component, config)|
42
+ @components.zip(shard.config.components.map(&:options)).each do |(component, config)|
36
43
  RFlow.logger.debug "Configuring component '#{component.name}' (#{component.uuid})"
37
44
  component.configure! config
38
45
  end
data/lib/rflow/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class RFlow
2
- VERSION = "1.0.0a3"
2
+ VERSION = "1.0.0a4"
3
3
  end
data/rflow.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
 
22
22
  s.add_dependency "uuidtools", "~> 2.1"
23
23
  s.add_dependency "log4r", "~> 1.1"
24
+ s.add_dependency "sys-filesystem", "~> 1.1.2"
24
25
 
25
26
  s.add_dependency "sqlite3", "~> 1.3"
26
27
  s.add_dependency "activerecord", "~> 3.2"
@@ -30,13 +30,19 @@ class RFlow::Components::FileOutput < RFlow::Component
30
30
  end
31
31
  end
32
32
 
33
- class SimpleComponent < RFlow::Component
33
+ class RFlow::Components::DateShellComponent < RFlow::Component
34
34
  input_port :in
35
35
  output_port :out
36
36
 
37
37
  def configure!(config); end
38
38
  def run!; end
39
- def process_message(input_port, input_port_key, connection, message); end
39
+ def process_message(input_port, input_port_key, connection, message)
40
+ out.send_message(
41
+ RFlow::Message.new('RFlow::Message::Data::Raw').tap do |m|
42
+ m.provenance = message.provenance
43
+ m.data.raw = `date`
44
+ end)
45
+ end
40
46
  def shutdown!; end
41
47
  def cleanup!; end
42
48
  end
@@ -39,25 +39,30 @@ class RFlow
39
39
  described_class.configure do |c|
40
40
  c.component 'first', 'First'
41
41
  c.component 'second', 'Second'
42
+ c.component 'third', 'Third'
42
43
  c.connect 'first#out' => 'second#in'
43
44
  c.connect 'first#out' => 'second#in[inkey]'
44
45
  c.connect 'first#out[outkey]' => 'second#in'
45
46
  c.connect 'first#out[outkey]' => 'second#in[inkey]'
47
+ c.connect 'second#out' => 'third#in', :delivery => 'broadcast'
46
48
  end
47
49
 
48
50
  expect(Shard).to have(1).shard
49
- expect(Component).to have(2).components
50
- expect(Port).to have(2).ports
51
- expect(Connection).to have(4).connections
51
+ expect(Component).to have(3).components
52
+ expect(Port).to have(4).ports
53
+ expect(Connection).to have(5).connections
54
+
55
+ first_component = Component.find_by_name('first').tap do |c|
56
+ expect(c.specification).to eq('First')
57
+ expect(c).to have(0).input_ports
58
+ expect(c).to have(1).output_port
52
59
 
53
- first_component = Component.where(name: 'first').first.tap do |component|
54
- expect(component.specification).to eq('First')
55
- expect(component).to have(0).input_ports
56
- expect(component).to have(1).output_port
57
- expect(component.output_ports.first.name).to eq('out')
60
+ out_port = c.output_ports.first
61
+ expect(out_port.name).to eq('out')
58
62
 
59
- expect(component.output_ports.first).to have(4).connections
60
- component.output_ports.first.connections.tap do |connections|
63
+ expect(out_port).to have(4).connections
64
+ out_port.connections.tap do |connections|
65
+ connections.each {|c| expect(c.delivery).to eq 'round-robin' }
61
66
  expect(connections[0].input_port_key).to be_nil
62
67
  expect(connections[0].output_port_key).to be_nil
63
68
  expect(connections[1].input_port_key).to eq('inkey')
@@ -69,14 +74,38 @@ class RFlow
69
74
  end
70
75
  end
71
76
 
72
- Component.where(name: 'second').first.tap do |component|
73
- expect(component.specification).to eq('Second')
74
- expect(component).to have(1).input_port
75
- expect(component.input_ports.first.name).to eq('in')
76
- expect(component).to have(0).output_ports
77
+ second_component = Component.find_by_name('second').tap do |c|
78
+ expect(c.specification).to eq('Second')
79
+ expect(c).to have(1).input_port
80
+ expect(c).to have(1).output_port
81
+
82
+ in_port = c.input_ports.first
83
+ expect(in_port.name).to eq('in')
84
+
85
+ out_port = c.output_ports.first
86
+ expect(out_port.name).to eq('out')
77
87
 
78
- expect(component.input_ports.first).to have(4).connections
79
- expect(component.input_ports.first.connections).to eq(first_component.output_ports.first.connections)
88
+ expect(in_port).to have(4).connections
89
+ expect(in_port.connections).to eq(first_component.output_ports.first.connections)
90
+
91
+ expect(out_port).to have(1).connections
92
+ out_port.connections.first.tap do |connection|
93
+ expect(connection.delivery).to eq 'broadcast'
94
+ expect(connection.input_port_key).to be_nil
95
+ expect(connection.output_port_key).to be_nil
96
+ end
97
+ end
98
+
99
+ Component.find_by_name('third').tap do |c|
100
+ expect(c.specification).to eq('Third')
101
+ expect(c).to have(1).input_port
102
+ expect(c).to have(0).output_ports
103
+
104
+ in_port = c.input_ports.first
105
+ expect(in_port.name).to eq('in')
106
+
107
+ expect(in_port).to have(1).connections
108
+ expect(in_port.connections).to eq(second_component.output_ports.first.connections)
80
109
  end
81
110
  end
82
111
 
@@ -316,6 +345,76 @@ class RFlow
316
345
  end
317
346
  end
318
347
 
348
+ it "should generate PUB-SUB ipc ZeroMQ connections for one-to-many broadcast connections" do
349
+ described_class.configure do |c|
350
+
351
+ c.shard "s1", :process => 1 do |s|
352
+ s.component 'first', 'First', :opt1 => 'opt1'
353
+ end
354
+
355
+ c.shard "s2", :process => 3 do |s|
356
+ s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
357
+ end
358
+
359
+ c.connect 'first#out' => 'second#in', :delivery => 'broadcast'
360
+ end
361
+
362
+ expect(Shard).to have(2).shards
363
+ expect(Component).to have(2).components
364
+ expect(Port).to have(2).ports
365
+ expect(Connection).to have(1).connections
366
+
367
+ Connection.first.tap do |conn|
368
+ expect(conn.type).to eq('RFlow::Configuration::ZMQConnection')
369
+ expect(conn.name).to eq('first#out=>second#in')
370
+ expect(conn.output_port_key).to be_nil
371
+ expect(conn.input_port_key).to be_nil
372
+ conn.options.tap do |opts|
373
+ expect(opts['output_socket_type']).to eq('PUB')
374
+ expect(opts['output_address']).to eq("ipc://rflow.#{conn.uuid}")
375
+ expect(opts['output_responsibility']).to eq('bind')
376
+ expect(opts['input_socket_type']).to eq('SUB')
377
+ expect(opts['input_address']).to eq("ipc://rflow.#{conn.uuid}")
378
+ expect(opts['input_responsibility']).to eq('connect')
379
+ end
380
+ end
381
+ end
382
+
383
+ it "should generate PUB-SUB brokered ZeroMQ connections for many-to-many broadcast connections" do
384
+ described_class.configure do |c|
385
+
386
+ c.shard "s1", :process => 3 do |s|
387
+ s.component 'first', 'First', :opt1 => 'opt1'
388
+ end
389
+
390
+ c.shard "s2", :process => 3 do |s|
391
+ s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
392
+ end
393
+
394
+ c.connect 'first#out' => 'second#in', :delivery => 'broadcast'
395
+ end
396
+
397
+ expect(Shard).to have(2).shards
398
+ expect(Component).to have(2).components
399
+ expect(Port).to have(2).ports
400
+ expect(Connection).to have(1).connections
401
+
402
+ Connection.first.tap do |conn|
403
+ expect(conn.type).to eq('RFlow::Configuration::BrokeredZMQConnection')
404
+ expect(conn.name).to eq('first#out=>second#in')
405
+ expect(conn.output_port_key).to be_nil
406
+ expect(conn.input_port_key).to be_nil
407
+ conn.options.tap do |opts|
408
+ expect(opts['output_socket_type']).to eq('PUB')
409
+ expect(opts['output_address']).to eq("ipc://rflow.#{conn.uuid}.in")
410
+ expect(opts['output_responsibility']).to eq('connect')
411
+ expect(opts['input_socket_type']).to eq('SUB')
412
+ expect(opts['input_address']).to eq("ipc://rflow.#{conn.uuid}.out")
413
+ expect(opts['input_responsibility']).to eq('connect')
414
+ end
415
+ end
416
+ end
417
+
319
418
  it "should not allow two components with the same name" do
320
419
  expect {
321
420
  described_class.configure do |c|
data/spec/rflow_spec.rb CHANGED
@@ -3,8 +3,84 @@ require 'open3'
3
3
  require 'rflow'
4
4
 
5
5
  describe RFlow do
6
+ def write_config_file(content)
7
+ File.open(config_file_name, 'w+') {|file| file.write content }
8
+ end
9
+
10
+ def execute_rflow(args)
11
+ stdout, stderr, status = Open3.capture3("bundle exec rflow #{args}")
12
+ {:stdout => stdout, :stderr => stderr, :status => status}
13
+ end
14
+
15
+ def load_database
16
+ execute_rflow("load -d #{db_file_name} -c #{config_file_name}").tap do |result|
17
+ expect(result[:status].exitstatus).to eq(0)
18
+ expect(result[:stderr]).to eq('')
19
+ expect(result[:stdout]).to match /Successfully initialized database.*#{db_file_name}/
20
+ end
21
+ end
22
+
23
+ def start_rflow
24
+ execute_rflow("start -d #{db_file_name} -e #{@extensions_file_name}").tap do |result|
25
+ expect(result[:status].exitstatus).to eq(0)
26
+ expect(result[:stderr]).to eq('')
27
+ expect(result[:stdout]).not_to match /error/i
28
+ end
29
+ end
30
+
31
+ def get_log_pids(logfile)
32
+ log_contents = File.read(logfile).chomp
33
+ log_lines = log_contents.split("\n")
34
+
35
+ log_lines.each {|line| expect(line).not_to match /^ERROR/ }
36
+ log_lines.each {|line| expect(line).not_to match /^DEBUG/ }
37
+
38
+ # Grab all the pids from the log, which seems to be the only
39
+ # reliable way to get them
40
+ log_lines.map {|line| /\((\d+)\)/.match(line)[1].to_i }.uniq
41
+ end
42
+
43
+ def run_and_shutdown(app_name, expected_worker_count)
44
+ r = start_rflow
45
+ sleep 2 # give the daemon a chance to finish
46
+
47
+ log_pids = get_log_pids("log/#{app_name}.log")
48
+
49
+ initial_pid = r[:status].pid
50
+ master_pid = File.read("run/#{app_name}.pid").chomp.to_i
51
+ worker_pids = log_pids - [initial_pid, master_pid]
52
+
53
+ expect(log_pids).to include initial_pid
54
+ expect(log_pids).to include master_pid
55
+
56
+ expect(worker_pids).to have(expected_worker_count).pids
57
+ expect(worker_pids).not_to include 0
58
+
59
+ expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
60
+ ([master_pid] + worker_pids).each do |pid|
61
+ expect(Process.kill(0, pid)).to eq(1)
62
+ end
63
+
64
+ yield # verify output
65
+
66
+ # Terminate the master
67
+ expect(Process.kill("TERM", master_pid)).to eq(1)
68
+
69
+ # Make sure everything is dead after a second
70
+ sleep 2
71
+ ([master_pid] + worker_pids).each do |pid|
72
+ expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
73
+ end
74
+ rescue Exception => e
75
+ Process.kill("TERM", master_pid) if master_pid
76
+ raise
77
+ end
78
+
79
+ let(:config_file_name) { 'input_config' }
80
+ let(:db_file_name) { 'outdb' }
81
+
6
82
  before(:all) do
7
- @extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions_ints.rb')
83
+ @extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions.rb')
8
84
  end
9
85
 
10
86
  before(:each) do
@@ -43,7 +119,7 @@ describe RFlow do
43
119
 
44
120
  it "should run a non-sharded workflow" do
45
121
  run_rflow_with_dsl do |c|
46
- c.setting 'rflow.log_level', 'FATAL'
122
+ c.setting 'rflow.log_level', 'ERROR'
47
123
  c.setting 'rflow.application_directory_path', @temp_directory_path
48
124
  c.setting 'rflow.application_name', 'nonsharded_test'
49
125
 
@@ -84,7 +160,7 @@ describe RFlow do
84
160
 
85
161
  it "should run a sharded workflow" do
86
162
  run_rflow_with_dsl do |c|
87
- c.setting 'rflow.log_level', 'FATAL'
163
+ c.setting 'rflow.log_level', 'ERROR'
88
164
  c.setting 'rflow.application_directory_path', @temp_directory_path
89
165
  c.setting 'rflow.application_name', 'sharded_test'
90
166
 
@@ -130,38 +206,62 @@ describe RFlow do
130
206
  expect(File.readlines(file_name).map(&:to_i).sort).to eq(expected_contents.sort)
131
207
  end
132
208
  end
209
+
210
+ it "should deliver broadcast messages to every copy of a shard" do
211
+ run_rflow_with_dsl do |c|
212
+ c.setting 'rflow.log_level', 'FATAL'
213
+ c.setting 'rflow.application_directory_path', @temp_directory_path
214
+ c.setting 'rflow.application_name', 'sharded_broadcast_test'
215
+
216
+ c.shard 's1', :process => 1 do |s|
217
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
218
+ end
219
+
220
+ c.shard 's2', :process => 2 do |s|
221
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 1, 'finish' => 11, 'step' => 3
222
+ end
223
+
224
+ c.shard 's3', :type => :process, :count => 3 do |s|
225
+ s.component 'broadcast_output', 'RFlow::Components::FileOutput', 'output_file_path' => 'broadcast'
226
+ s.component 'roundrobin_output', 'RFlow::Components::FileOutput', 'output_file_path' => 'round-robin'
227
+ end
228
+
229
+ c.connect 'generate_ints1#out' => 'broadcast_output#in', :delivery => 'broadcast'
230
+ c.connect 'generate_ints2#out' => 'broadcast_output#in', :delivery => 'broadcast'
231
+ c.connect 'generate_ints1#out' => 'roundrobin_output#in'
232
+ c.connect 'generate_ints2#out' => 'roundrobin_output#in'
233
+ end
234
+
235
+ output_files = {
236
+ 'broadcast' => ([0, 3, 6, 9] * 3) + ([1, 4, 7, 10] * 6),
237
+ 'round-robin' => [0, 3, 6, 9] + ([1, 4, 7, 10] * 2)
238
+ }
239
+
240
+ expect(RFlow.master).to have(3).shards
241
+ expect(RFlow.master.shards.map(&:count)).to eq([1, 2, 3])
242
+ expect(RFlow.master.shards.map(&:workers).map(&:count)).to eq([1, 2, 3])
243
+
244
+ output_files.each do |file_name, expected_contents|
245
+ expect(File.exist?(File.join(@temp_directory_path, file_name))).to be true
246
+ expect(File.readlines(file_name).map(&:to_i).sort).to eq(expected_contents.sort)
247
+ end
248
+ end
133
249
  end
134
250
  end
135
251
 
136
252
  context "when executing via the rflow binary" do
137
- def execute_rflow(args)
138
- stdout, stderr, status = Open3.capture3("bundle exec rflow #{args}")
139
- {:stdout => stdout, :stderr => stderr, :status => status}
140
- end
141
-
142
253
  context "with a simple ruby DSL config file" do
143
- let(:config_file_name) { 'input_config' }
144
- let(:db_file_name) { 'outdb' }
145
-
146
254
  before(:each) do
147
- File.open(config_file_name, 'w+') do |file|
148
- file.write <<-EOF
149
- RFlow::Configuration::RubyDSL.configure do |c|
150
- c.setting 'mysetting', 'myvalue'
151
- end
152
- EOF
153
- end
255
+ write_config_file <<-EOF
256
+ RFlow::Configuration::RubyDSL.configure do |c|
257
+ c.setting 'mysetting', 'myvalue'
258
+ end
259
+ EOF
154
260
  end
155
261
 
156
262
  it "should load a ruby dsl file into a sqlite DB" do
157
- r = execute_rflow("load -d #{db_file_name} -c #{config_file_name}")
158
-
159
- # Make sure that the process execution worked
160
- expect(r[:status].exitstatus).to eq(0)
161
- expect(r[:stderr]).to eq('')
162
- expect(r[:stdout]).to match /Successfully initialized database.*#{db_file_name}/
263
+ load_database
163
264
 
164
- # Make sure the config actually got loaded
165
265
  ActiveRecord::Base.establish_connection adapter: "sqlite3", database: db_file_name
166
266
  expect(RFlow::Configuration::Setting.where(:name => 'mysetting').first.value).to eq('myvalue')
167
267
  end
@@ -178,46 +278,70 @@ describe RFlow do
178
278
  end
179
279
  end
180
280
 
281
+ context "with a component that runs subshells" do
282
+ let(:app_name) { 'sharded_subshell_test' }
283
+
284
+ before(:each) do
285
+ write_config_file <<-EOF
286
+ RFlow::Configuration::RubyDSL.configure do |c|
287
+ c.setting('rflow.log_level', 'INFO')
288
+ c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
289
+ c.setting('rflow.application_name', '#{app_name}')
290
+
291
+ c.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
292
+ c.component 'subshell_date', 'RFlow::Components::DateShellComponent'
293
+ c.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
294
+
295
+ c.connect 'generate_ints#out' => 'subshell_date#in'
296
+ c.connect 'subshell_date#out' => 'output#in'
297
+ end
298
+ EOF
299
+
300
+ load_database
301
+ end
302
+
303
+ it "should run successfully daemonize and run in the background" do
304
+ run_and_shutdown app_name, 1 do # 1 default worker
305
+ expect(File.exist?(File.join(@temp_directory_path, 'out1'))).to be true
306
+ File.readlines('out1').each {|line| expect(line).to match /\w+ \w+ \d+ \d+:\d+:\d+ \w+ \d+/ }
307
+ end
308
+ end
309
+ end
310
+
181
311
  context "with a complex, sharded ruby DSL config file" do
182
- let(:config_file_name) { 'input_config' }
183
- let(:db_file_name) { 'config_db' }
184
312
  let(:app_name) { 'sharded_bin_test' }
185
313
 
186
314
  before(:each) do
187
- File.open(config_file_name, 'w+') do |file|
188
- file.write <<-EOF
189
- RFlow::Configuration::RubyDSL.configure do |c|
190
- c.setting('rflow.log_level', 'INFO')
191
- c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
192
- c.setting('rflow.application_name', '#{app_name}')
193
-
194
- c.shard 's1', :process => 3 do |s|
195
- s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
196
- end
197
- c.shard 's2', :type => :process, :count => 2 do |s|
198
- s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
199
- end
200
- c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
201
- c.shard 's3', :process => 2 do |s|
202
- s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
203
- s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
204
- end
205
- c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
206
- c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
207
-
208
- c.connect 'generate_ints1#out' => 'output1#in'
209
- c.connect 'generate_ints2#out' => 'output2#in'
210
- c.connect 'generate_ints3#out' => 'output3#in'
211
- c.connect 'generate_ints1#out' => 'output_all#in'
212
- c.connect 'generate_ints2#out' => 'output_all#in'
213
- c.connect 'generate_ints3#out' => 'output_all#in'
315
+ write_config_file <<-EOF
316
+ RFlow::Configuration::RubyDSL.configure do |c|
317
+ c.setting('rflow.log_level', 'INFO')
318
+ c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
319
+ c.setting('rflow.application_name', '#{app_name}')
320
+
321
+ c.shard 's1', :process => 3 do |s|
322
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
214
323
  end
215
- EOF
216
- end
217
- r = execute_rflow("load -d #{db_file_name} -c #{config_file_name}")
218
- expect(r[:status].exitstatus).to eq(0)
219
- expect(r[:stderr]).to eq('')
220
- expect(r[:stdout]).to match /Successfully initialized database.*#{db_file_name}/
324
+ c.shard 's2', :type => :process, :count => 2 do |s|
325
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
326
+ end
327
+ c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
328
+ c.shard 's3', :process => 2 do |s|
329
+ s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
330
+ s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
331
+ end
332
+ c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
333
+ c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
334
+
335
+ c.connect 'generate_ints1#out' => 'output1#in'
336
+ c.connect 'generate_ints2#out' => 'output2#in'
337
+ c.connect 'generate_ints3#out' => 'output3#in'
338
+ c.connect 'generate_ints1#out' => 'output_all#in'
339
+ c.connect 'generate_ints2#out' => 'output_all#in'
340
+ c.connect 'generate_ints3#out' => 'output_all#in'
341
+ end
342
+ EOF
343
+
344
+ load_database
221
345
  end
222
346
 
223
347
  it "should not start if the components aren't loaded" do
@@ -229,63 +353,18 @@ describe RFlow do
229
353
  end
230
354
 
231
355
  it "should daemonize and run in the background" do
232
- begin
233
- r = execute_rflow("start -d #{db_file_name} -e #{@extensions_file_name}")
234
-
235
- expect(r[:status].exitstatus).to eq(0)
236
- expect(r[:stderr]).to eq('')
237
- expect(r[:stdout]).not_to match /error/i
238
-
239
- sleep 2 # give the daemon a chance to finish
240
-
241
- log_contents = File.read("log/#{app_name}.log").chomp
242
- log_lines = log_contents.split("\n")
243
-
244
- log_lines.each {|line| expect(line).not_to match /^ERROR/ }
245
- log_lines.each {|line| expect(line).not_to match /^DEBUG/ }
246
-
247
- # Grab all the pids from the log, which seems to be the only
248
- # reliable way to get them
249
- log_pids = log_lines.map {|line| /\((\d+)\)/.match(line)[1].to_i }.uniq
250
-
251
- initial_pid = r[:status].pid
252
- master_pid = File.read("run/#{app_name}.pid").chomp.to_i
253
- worker_pids = log_pids - [initial_pid, master_pid]
254
-
255
- expect(log_pids).to include initial_pid
256
- expect(log_pids).to include master_pid
257
-
258
- expect(worker_pids).to have(10).pids # 1+3+2+2 workers, 2 brokers
259
- expect(worker_pids).not_to include 0
260
-
261
- expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
262
- ([master_pid] + worker_pids).each do |pid|
263
- expect(Process.kill(0, pid)).to eq(1)
264
- end
265
-
266
- output_files = {
267
- 'out1' => [0, 3, 6, 9] * 3,
268
- 'out2' => (20..30).to_a * 2,
269
- 'out3' => (100..105).to_a,
270
- 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
271
- }
356
+ output_files = {
357
+ 'out1' => [0, 3, 6, 9] * 3,
358
+ 'out2' => (20..30).to_a * 2,
359
+ 'out3' => (100..105).to_a,
360
+ 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
361
+ }
272
362
 
363
+ run_and_shutdown app_name, 10 do # 1+3+2+2 workers, 2 brokers
273
364
  output_files.each do |file_name, expected_contents|
274
365
  expect(File.exist?(File.join(@temp_directory_path, file_name))).to be true
275
366
  expect(File.readlines(file_name).map(&:to_i).sort).to eq(expected_contents.sort)
276
367
  end
277
-
278
- # Terminate the master
279
- expect(Process.kill("TERM", master_pid)).to eq(1)
280
-
281
- # Make sure everything is dead after a second
282
- sleep 2
283
- ([master_pid] + worker_pids).each do |pid|
284
- expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
285
- end
286
- rescue Exception => e
287
- Process.kill("TERM", master_pid) if master_pid
288
- raise
289
368
  end
290
369
  end
291
370
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rflow
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0a3
4
+ version: 1.0.0a4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael L. Artz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-16 00:00:00.000000000 Z
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: uuidtools
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sys-filesystem
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.1.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.1.2
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: sqlite3
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -230,9 +244,7 @@ files:
230
244
  - schema/message.avsc
231
245
  - schema/raw.avsc
232
246
  - schema/tick.avsc
233
- - spec/fixtures/config_ints.rb
234
- - spec/fixtures/config_shards.rb
235
- - spec/fixtures/extensions_ints.rb
247
+ - spec/fixtures/extensions.rb
236
248
  - spec/rflow/component/port_spec.rb
237
249
  - spec/rflow/components/clock_spec.rb
238
250
  - spec/rflow/configuration/ruby_dsl_spec.rb
@@ -270,9 +282,7 @@ signing_key:
270
282
  specification_version: 4
271
283
  summary: A Ruby flow-based programming framework
272
284
  test_files:
273
- - spec/fixtures/config_ints.rb
274
- - spec/fixtures/config_shards.rb
275
- - spec/fixtures/extensions_ints.rb
285
+ - spec/fixtures/extensions.rb
276
286
  - spec/rflow/component/port_spec.rb
277
287
  - spec/rflow/components/clock_spec.rb
278
288
  - spec/rflow/configuration/ruby_dsl_spec.rb
@@ -1,25 +0,0 @@
1
- RFlow::Configuration::RubyDSL.configure do |config|
2
- # Configure the settings, which include paths for various files, log
3
- # levels, and component specific stuffs
4
- config.setting('rflow.log_level', 'FATAL')
5
- config.setting('rflow.application_directory_path', '../tmp')
6
- config.setting('rflow.application_name', 'testapp')
7
-
8
- # Instantiate components
9
- config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
10
- config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
11
- config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out'
12
- config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out2'
13
- config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even'
14
- config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_odd'
15
- config.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd'
16
- config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
17
-
18
- # Hook components together
19
- config.connect 'generate_ints#out' => 'output#in'
20
- config.connect 'generate_ints#out' => 'output2#in'
21
- config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
22
- config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
23
- config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
24
- config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
25
- end
@@ -1,29 +0,0 @@
1
- RFlow::Configuration::RubyDSL.configure do |config|
2
- config.setting('rflow.log_level', 'FATAL')
3
- config.setting('rflow.application_directory_path', '.')
4
- config.setting('rflow.application_name', 'shardapp')
5
-
6
- # Instantiate components
7
- config.shard 's1', :process => 1 do |shard|
8
- shard.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
9
- end
10
-
11
- config.shard 's2', :type => :process, :count => 2 do |shard|
12
- shard.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
13
- end
14
-
15
- config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
- config.component 'replicate', 'RFlow::Components::Replicate'
17
-
18
- config.shard 's3', :process => 2 do |shard|
19
- shard.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
20
- shard.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
21
- end
22
-
23
- # Hook components together
24
- config.connect 'generate_ints1#out' => 'filter#in'
25
- config.connect 'generate_ints2#out' => 'filter#in'
26
- config.connect 'filter#filtered' => 'replicate#in'
27
- config.connect 'replicate#out' => 'output1#in'
28
- config.connect 'replicate#out' => 'output2#in'
29
- end