rflow 1.0.0a3 → 1.0.0a4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 82d81c66d6a26aa814d88893e294951247434585
4
- data.tar.gz: 03edc9684d65ffb4e0729000f8948f8dbe69c16c
3
+ metadata.gz: 91749755100dd4bfa1eb64f8b455e42cbb9e6b1a
4
+ data.tar.gz: 537aa8a39d393a99e8b4f8bf674eb8d38eb87169
5
5
  SHA512:
6
- metadata.gz: 7ca15c17aa77a9e15d5309c5e9dd22b533b0108ca6f8c29031899f8c824f3c4939c5c37e57bbe51db5834eefc40f0f52a840874af9660fbbe07729b8a66ea3d0
7
- data.tar.gz: a8f7a76ca8eba6aec45cdf56433792938610e11332dd0523917795dd4a59edc84660d80b08a48a67c222f3a312ba6837b238a36e9db9e982b62fac2742f93417
6
+ metadata.gz: f107c920c6255585fe9508c50cdea49dc0c3f39f8e8cbd321cb3ec04f3997c1e733923d0fcc153dc632a134af650ec37fe55402a266f87a093ae19b431d5dcde
7
+ data.tar.gz: 537459d7fed23e2f025b8b06f1fa22738b39fc1235bd54376edef8d880683915769e47d5c8b94f90979bd0add24726c1749cd010f02241f3385a17d2d4ca14aa
data/bin/rflow CHANGED
@@ -135,7 +135,7 @@ when 'load'
135
135
  exit 1
136
136
  end
137
137
 
138
- startup_logger.warn "Config database '#{options[:config_database_path]}' not found, creating"
138
+ startup_logger.info "Creating config database '#{options[:config_database_path]}'"
139
139
  begin
140
140
  config = RFlow::Configuration::initialize_database(options[:config_database_path], options[:config_file_path])
141
141
  rescue Exception => e
@@ -30,14 +30,3 @@ class RFlow::Components::FileOutput < RFlow::Component
30
30
  end
31
31
  end
32
32
  end
33
-
34
- class SimpleComponent < RFlow::Component
35
- input_port :in
36
- output_port :out
37
-
38
- def configure!(config); end
39
- def run!; end
40
- def process_message(input_port, input_port_key, connection, message); end
41
- def shutdown!; end
42
- def cleanup!; end
43
- end
@@ -22,17 +22,6 @@ class RFlow::Components::FileOutput < RFlow::Component
22
22
  end
23
23
  end
24
24
 
25
- class SimpleComponent < RFlow::Component
26
- input_port :in
27
- output_port :out
28
-
29
- def configure!(config); end
30
- def run!; end
31
- def process_message(input_port, input_port_key, connection, message); end
32
- def shutdown!; end
33
- def cleanup!; end
34
- end
35
-
36
25
  http_request_schema =<<EOS
37
26
  {
38
27
  "type": "record",
data/lib/rflow.rb CHANGED
@@ -53,6 +53,7 @@ class RFlow
53
53
  end
54
54
 
55
55
  def self.start_master_node
56
+ RFlow.logger.info "#{configuration['rflow.application_name']} starting"
56
57
  @master = Master.new(configuration)
57
58
  master.daemonize! if @daemonize
58
59
  master.run! # blocks until EventMachine stops
@@ -78,6 +78,8 @@ class RFlow
78
78
  end
79
79
 
80
80
  def handle_signals
81
+ Signal.trap 'SIGCHLD', 'DEFAULT' # make sure child process can run subshells
82
+
81
83
  ['SIGTERM', 'SIGINT', 'SIGQUIT'].each do |signal|
82
84
  trap_signal(signal) do
83
85
  shutdown! signal
@@ -36,7 +36,7 @@ class RFlow
36
36
  # attempt to constantize the specification into a different
37
37
  # class. Future releases will support external (i.e. non-managed
38
38
  # components), but the current stuff only supports Ruby classes
39
- def build(config)
39
+ def build(worker, config)
40
40
  raise NotImplementedError, "Non-managed components not yet implemented for component '#{config.name}' as '#{config.specification}' (#{config.uuid})" unless config.managed?
41
41
 
42
42
  RFlow.logger.debug "Instantiating component '#{config.name}' as '#{config.specification}' (#{config.uuid})"
@@ -50,7 +50,7 @@ class RFlow
50
50
  component_class = config.specification.constantize
51
51
  end
52
52
 
53
- component_class.new(uuid: config.uuid, name: config.name).tap do |component|
53
+ component_class.new(worker: worker, uuid: config.uuid, name: config.name).tap do |component|
54
54
  config.input_ports.each {|p| component.configure_input_port! p.name, uuid: p.uuid }
55
55
  config.output_ports.each {|p| component.configure_output_port! p.name, uuid: p.uuid }
56
56
 
@@ -75,17 +75,20 @@ class RFlow
75
75
  end
76
76
 
77
77
  attr_accessor :uuid, :name
78
- attr_reader :ports
78
+ attr_reader :ports, :worker
79
79
 
80
80
  def initialize(args = {})
81
81
  @name = args[:name]
82
82
  @uuid = args[:uuid]
83
+ @worker = args[:worker]
83
84
  @ports = PortCollection.new
84
85
 
85
86
  self.class.defined_input_ports.each {|name, _| ports << InputPort.new(self, name: name) }
86
87
  self.class.defined_output_ports.each {|name, _| ports << OutputPort.new(self, name: name) }
87
88
  end
88
89
 
90
+ def shard; worker.shard if worker; end
91
+
89
92
  # Returns a list of connected input ports. Each port will have
90
93
  # one or more keys associated with a particular connection.
91
94
  def input_ports; ports.by_type["RFlow::Component::InputPort"]; end
@@ -104,7 +104,7 @@ class RFlow
104
104
 
105
105
  # for testing purposes
106
106
  class NullConfiguration
107
- attr_accessor :name, :uuid, :options, :input_port_key, :output_port_key
107
+ attr_accessor :name, :uuid, :options, :input_port_key, :output_port_key, :delivery
108
108
  end
109
109
  end
110
110
  end
@@ -7,6 +7,9 @@ class CreateConnections < ActiveRecord::Migration
7
7
  # To allow for multiple types of connections
8
8
  t.string :type
9
9
 
10
+ # round-robin or broadcast
11
+ t.string :delivery
12
+
10
13
  # Data flows from an output port to an input port
11
14
  t.string :output_port_uuid
12
15
  t.string :output_port_key, :default => '0'
@@ -74,12 +74,14 @@ class RFlow
74
74
  # connect 'componentA#arrayport[2]' => 'componentB#in[1]'
75
75
  # Uses the model to assign random UUIDs
76
76
  def connect(hash)
77
- hash.each do |output_string, input_string|
77
+ delivery = hash[:delivery] || 'round-robin'
78
+ hash.except(:delivery).each do |output_string, input_string|
78
79
  output_component_name, output_port_name, output_port_key = parse_connection_string(output_string)
79
80
  input_component_name, input_port_name, input_port_key = parse_connection_string(input_string)
80
81
 
81
82
  connection_specs << {
82
83
  :name => output_string + '=>' + input_string,
84
+ :delivery => delivery,
83
85
  :output_component_name => output_component_name,
84
86
  :output_port_name => output_port_name, :output_port_key => output_port_key,
85
87
  :output_string => output_string,
@@ -197,6 +199,7 @@ class RFlow
197
199
  connection_type = many_to_many ? RFlow::Configuration::BrokeredZMQConnection : RFlow::Configuration::ZMQConnection
198
200
 
199
201
  conn = connection_type.create!(:name => spec[:name],
202
+ :delivery => spec[:delivery],
200
203
  :output_port_key => spec[:output_port_key],
201
204
  :input_port_key => spec[:input_port_key],
202
205
  :output_port => output_port,
@@ -216,6 +219,18 @@ class RFlow
216
219
  conn.options['input_responsibility'] = 'connect'
217
220
  end
218
221
 
222
+ case spec[:delivery]
223
+ when 'broadcast'
224
+ conn.options['output_socket_type'] = 'PUB'
225
+ conn.options['input_socket_type'] = 'SUB'
226
+ when 'round-robin'
227
+ conn.options['output_socket_type'] = 'PUSH'
228
+ conn.options['input_socket_type'] = 'PULL'
229
+ else
230
+ raise RFlow::Configuration::Connection::ConnectionInvalid,
231
+ "Delivery type '#{spec[:delivery]}' unknown at #{spec[:config_line]}"
232
+ end
233
+
219
234
  conn.save!
220
235
  conn
221
236
  rescue Exception => e
@@ -6,6 +6,7 @@ end
6
6
  require 'rflow/connection'
7
7
  require 'rflow/message'
8
8
  require 'rflow/broker'
9
+ require 'sys/filesystem'
9
10
 
10
11
  class RFlow
11
12
  module Connections
@@ -42,8 +43,13 @@ class RFlow
42
43
 
43
44
  def connect_input!
44
45
  RFlow.logger.debug "Connecting input #{uuid} with #{options.find_all {|k, v| k.to_s =~ /input/}}"
45
- self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type'].to_sym))
46
+ check_address(options['input_address'])
47
+
48
+ self.input_socket = zmq_context.socket(ZMQ.const_get(options['input_socket_type']))
46
49
  input_socket.send(options['input_responsibility'].to_sym, options['input_address'])
50
+ if config.delivery == 'broadcast'
51
+ input_socket.setsockopt(ZMQ::SUBSCRIBE, '') # request all messages
52
+ end
47
53
 
48
54
  input_socket.on(:message) do |*message_parts|
49
55
  begin
@@ -61,7 +67,9 @@ class RFlow
61
67
 
62
68
  def connect_output!
63
69
  RFlow.logger.debug "Connecting output #{uuid} with #{options.find_all {|k, v| k.to_s =~ /output/}}"
64
- self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type'].to_sym))
70
+ check_address(options['output_address'])
71
+
72
+ self.output_socket = zmq_context.socket(ZMQ.const_get(options['output_socket_type']))
65
73
  output_socket.send(options['output_responsibility'].to_sym, options['output_address'].to_s)
66
74
  output_socket
67
75
  end
@@ -99,6 +107,23 @@ class RFlow
99
107
 
100
108
  true
101
109
  end
110
+
111
+ def check_address(address)
112
+ # make sure we're not trying to create IPC sockets in an NFS share
113
+ # because that works poorly
114
+ if address.start_with?('ipc://')
115
+ filename = address[6..-1]
116
+ mount_point = Sys::Filesystem.mount_point(File.dirname(filename))
117
+ return unless mount_point
118
+ mount_type = Sys::Filesystem.mounts.find {|m| m.mount_point == mount_point }.mount_type
119
+ return unless mount_type
120
+
121
+ case mount_type
122
+ when 'vmhgfs', 'vboxsf', 'nfs' # vmware, virtualbox, nfs
123
+ raise ArgumentError, "Cannot safely create IPC sockets in network filesystem '#{mount_point}' of type #{mount_type}"
124
+ end
125
+ end
126
+ end
102
127
  end
103
128
 
104
129
  class BrokeredZMQConnection < ZMQConnection
@@ -122,11 +147,20 @@ class RFlow
122
147
  RFlow.logger.debug { "Creating a new ZeroMQ context; ZeroMQ version is #{version[:major]}.#{version[:minor]}.#{version[:patch]}" }
123
148
  @context = ZMQ::Context.new
124
149
  RFlow.logger.debug { "Connecting message broker to route from #{connection.options['output_address']} to #{connection.options['input_address']}" }
125
- @back = context.socket(ZMQ::PULL)
126
- back.bind(connection.options['output_address'])
127
- @front = context.socket(ZMQ::PUSH)
128
- front.bind(connection.options['input_address'])
129
- ZMQ::Proxy.new(back, front)
150
+
151
+ @front = case connection.options['output_socket_type']
152
+ when 'PUSH'; context.socket(ZMQ::PULL)
153
+ when 'PUB'; context.socket(ZMQ::XSUB)
154
+ else raise ArgumentError, "Unknown output socket type #{connection.options['output_socket_type']}"
155
+ end
156
+ @back = case connection.options['input_socket_type']
157
+ when 'PULL'; context.socket(ZMQ::PUSH)
158
+ when 'SUB'; context.socket(ZMQ::XPUB)
159
+ else raise ArgumentError, "Unknown input socket type #{connection.options['input_socket_type']}"
160
+ end
161
+ front.bind(connection.options['output_address'])
162
+ back.bind(connection.options['input_address'])
163
+ ZMQ::Proxy.new(front, back)
130
164
  back.close
131
165
  front.close
132
166
  rescue Exception => e
data/lib/rflow/shard.rb CHANGED
@@ -9,22 +9,29 @@ class RFlow
9
9
  # start an EventMachine reactor.
10
10
  class Shard
11
11
  class Worker < ChildProcess
12
+ attr_reader :shard, :index
13
+
12
14
  def initialize(shard, index = 1)
13
15
  super("#{shard.name}-#{index}", 'Worker')
14
16
  @shard = shard
17
+ @index = index
15
18
 
16
19
  # build at initialize time to fail fast
17
- @components = shard.config.components.map {|config| Component.build(config) }
20
+ @components = shard.config.components.map {|config| Component.build(self, config) }
18
21
  end
19
22
 
20
23
  def run_process
21
24
  EM.run do
22
- # TODO: Monitor the master
23
- configure_components!
24
- connect_components!
25
- # TODO: need to do proper node synchronization for ZMQ to remove sleep
26
- sleep 1
27
- run_components!
25
+ begin
26
+ # TODO: Monitor the master
27
+ configure_components!
28
+ connect_components!
29
+ # TODO: need to do proper node synchronization for ZMQ to remove sleep
30
+ sleep 1
31
+ run_components!
32
+ rescue Exception => e
33
+ RFlow.logger.error "Error in worker, shutting down: #{e.class.name}: #{e.message}, because: #{e.backtrace.inspect}"
34
+ end
28
35
  end
29
36
 
30
37
  RFlow.logger.info "Shutting down worker after EM stopped"
@@ -32,7 +39,7 @@ class RFlow
32
39
 
33
40
  def configure_components!
34
41
  RFlow.logger.debug "Configuring components"
35
- @components.zip(@shard.config.components.map(&:options)).each do |(component, config)|
42
+ @components.zip(shard.config.components.map(&:options)).each do |(component, config)|
36
43
  RFlow.logger.debug "Configuring component '#{component.name}' (#{component.uuid})"
37
44
  component.configure! config
38
45
  end
data/lib/rflow/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class RFlow
2
- VERSION = "1.0.0a3"
2
+ VERSION = "1.0.0a4"
3
3
  end
data/rflow.gemspec CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
 
22
22
  s.add_dependency "uuidtools", "~> 2.1"
23
23
  s.add_dependency "log4r", "~> 1.1"
24
+ s.add_dependency "sys-filesystem", "~> 1.1.2"
24
25
 
25
26
  s.add_dependency "sqlite3", "~> 1.3"
26
27
  s.add_dependency "activerecord", "~> 3.2"
@@ -30,13 +30,19 @@ class RFlow::Components::FileOutput < RFlow::Component
30
30
  end
31
31
  end
32
32
 
33
- class SimpleComponent < RFlow::Component
33
+ class RFlow::Components::DateShellComponent < RFlow::Component
34
34
  input_port :in
35
35
  output_port :out
36
36
 
37
37
  def configure!(config); end
38
38
  def run!; end
39
- def process_message(input_port, input_port_key, connection, message); end
39
+ def process_message(input_port, input_port_key, connection, message)
40
+ out.send_message(
41
+ RFlow::Message.new('RFlow::Message::Data::Raw').tap do |m|
42
+ m.provenance = message.provenance
43
+ m.data.raw = `date`
44
+ end)
45
+ end
40
46
  def shutdown!; end
41
47
  def cleanup!; end
42
48
  end
@@ -39,25 +39,30 @@ class RFlow
39
39
  described_class.configure do |c|
40
40
  c.component 'first', 'First'
41
41
  c.component 'second', 'Second'
42
+ c.component 'third', 'Third'
42
43
  c.connect 'first#out' => 'second#in'
43
44
  c.connect 'first#out' => 'second#in[inkey]'
44
45
  c.connect 'first#out[outkey]' => 'second#in'
45
46
  c.connect 'first#out[outkey]' => 'second#in[inkey]'
47
+ c.connect 'second#out' => 'third#in', :delivery => 'broadcast'
46
48
  end
47
49
 
48
50
  expect(Shard).to have(1).shard
49
- expect(Component).to have(2).components
50
- expect(Port).to have(2).ports
51
- expect(Connection).to have(4).connections
51
+ expect(Component).to have(3).components
52
+ expect(Port).to have(4).ports
53
+ expect(Connection).to have(5).connections
54
+
55
+ first_component = Component.find_by_name('first').tap do |c|
56
+ expect(c.specification).to eq('First')
57
+ expect(c).to have(0).input_ports
58
+ expect(c).to have(1).output_port
52
59
 
53
- first_component = Component.where(name: 'first').first.tap do |component|
54
- expect(component.specification).to eq('First')
55
- expect(component).to have(0).input_ports
56
- expect(component).to have(1).output_port
57
- expect(component.output_ports.first.name).to eq('out')
60
+ out_port = c.output_ports.first
61
+ expect(out_port.name).to eq('out')
58
62
 
59
- expect(component.output_ports.first).to have(4).connections
60
- component.output_ports.first.connections.tap do |connections|
63
+ expect(out_port).to have(4).connections
64
+ out_port.connections.tap do |connections|
65
+ connections.each {|c| expect(c.delivery).to eq 'round-robin' }
61
66
  expect(connections[0].input_port_key).to be_nil
62
67
  expect(connections[0].output_port_key).to be_nil
63
68
  expect(connections[1].input_port_key).to eq('inkey')
@@ -69,14 +74,38 @@ class RFlow
69
74
  end
70
75
  end
71
76
 
72
- Component.where(name: 'second').first.tap do |component|
73
- expect(component.specification).to eq('Second')
74
- expect(component).to have(1).input_port
75
- expect(component.input_ports.first.name).to eq('in')
76
- expect(component).to have(0).output_ports
77
+ second_component = Component.find_by_name('second').tap do |c|
78
+ expect(c.specification).to eq('Second')
79
+ expect(c).to have(1).input_port
80
+ expect(c).to have(1).output_port
81
+
82
+ in_port = c.input_ports.first
83
+ expect(in_port.name).to eq('in')
84
+
85
+ out_port = c.output_ports.first
86
+ expect(out_port.name).to eq('out')
77
87
 
78
- expect(component.input_ports.first).to have(4).connections
79
- expect(component.input_ports.first.connections).to eq(first_component.output_ports.first.connections)
88
+ expect(in_port).to have(4).connections
89
+ expect(in_port.connections).to eq(first_component.output_ports.first.connections)
90
+
91
+ expect(out_port).to have(1).connections
92
+ out_port.connections.first.tap do |connection|
93
+ expect(connection.delivery).to eq 'broadcast'
94
+ expect(connection.input_port_key).to be_nil
95
+ expect(connection.output_port_key).to be_nil
96
+ end
97
+ end
98
+
99
+ Component.find_by_name('third').tap do |c|
100
+ expect(c.specification).to eq('Third')
101
+ expect(c).to have(1).input_port
102
+ expect(c).to have(0).output_ports
103
+
104
+ in_port = c.input_ports.first
105
+ expect(in_port.name).to eq('in')
106
+
107
+ expect(in_port).to have(1).connections
108
+ expect(in_port.connections).to eq(second_component.output_ports.first.connections)
80
109
  end
81
110
  end
82
111
 
@@ -316,6 +345,76 @@ class RFlow
316
345
  end
317
346
  end
318
347
 
348
+ it "should generate PUB-SUB ipc ZeroMQ connections for one-to-many broadcast connections" do
349
+ described_class.configure do |c|
350
+
351
+ c.shard "s1", :process => 1 do |s|
352
+ s.component 'first', 'First', :opt1 => 'opt1'
353
+ end
354
+
355
+ c.shard "s2", :process => 3 do |s|
356
+ s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
357
+ end
358
+
359
+ c.connect 'first#out' => 'second#in', :delivery => 'broadcast'
360
+ end
361
+
362
+ expect(Shard).to have(2).shards
363
+ expect(Component).to have(2).components
364
+ expect(Port).to have(2).ports
365
+ expect(Connection).to have(1).connections
366
+
367
+ Connection.first.tap do |conn|
368
+ expect(conn.type).to eq('RFlow::Configuration::ZMQConnection')
369
+ expect(conn.name).to eq('first#out=>second#in')
370
+ expect(conn.output_port_key).to be_nil
371
+ expect(conn.input_port_key).to be_nil
372
+ conn.options.tap do |opts|
373
+ expect(opts['output_socket_type']).to eq('PUB')
374
+ expect(opts['output_address']).to eq("ipc://rflow.#{conn.uuid}")
375
+ expect(opts['output_responsibility']).to eq('bind')
376
+ expect(opts['input_socket_type']).to eq('SUB')
377
+ expect(opts['input_address']).to eq("ipc://rflow.#{conn.uuid}")
378
+ expect(opts['input_responsibility']).to eq('connect')
379
+ end
380
+ end
381
+ end
382
+
383
+ it "should generate PUB-SUB brokered ZeroMQ connections for many-to-many broadcast connections" do
384
+ described_class.configure do |c|
385
+
386
+ c.shard "s1", :process => 3 do |s|
387
+ s.component 'first', 'First', :opt1 => 'opt1'
388
+ end
389
+
390
+ c.shard "s2", :process => 3 do |s|
391
+ s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
392
+ end
393
+
394
+ c.connect 'first#out' => 'second#in', :delivery => 'broadcast'
395
+ end
396
+
397
+ expect(Shard).to have(2).shards
398
+ expect(Component).to have(2).components
399
+ expect(Port).to have(2).ports
400
+ expect(Connection).to have(1).connections
401
+
402
+ Connection.first.tap do |conn|
403
+ expect(conn.type).to eq('RFlow::Configuration::BrokeredZMQConnection')
404
+ expect(conn.name).to eq('first#out=>second#in')
405
+ expect(conn.output_port_key).to be_nil
406
+ expect(conn.input_port_key).to be_nil
407
+ conn.options.tap do |opts|
408
+ expect(opts['output_socket_type']).to eq('PUB')
409
+ expect(opts['output_address']).to eq("ipc://rflow.#{conn.uuid}.in")
410
+ expect(opts['output_responsibility']).to eq('connect')
411
+ expect(opts['input_socket_type']).to eq('SUB')
412
+ expect(opts['input_address']).to eq("ipc://rflow.#{conn.uuid}.out")
413
+ expect(opts['input_responsibility']).to eq('connect')
414
+ end
415
+ end
416
+ end
417
+
319
418
  it "should not allow two components with the same name" do
320
419
  expect {
321
420
  described_class.configure do |c|
data/spec/rflow_spec.rb CHANGED
@@ -3,8 +3,84 @@ require 'open3'
3
3
  require 'rflow'
4
4
 
5
5
  describe RFlow do
6
+ def write_config_file(content)
7
+ File.open(config_file_name, 'w+') {|file| file.write content }
8
+ end
9
+
10
+ def execute_rflow(args)
11
+ stdout, stderr, status = Open3.capture3("bundle exec rflow #{args}")
12
+ {:stdout => stdout, :stderr => stderr, :status => status}
13
+ end
14
+
15
+ def load_database
16
+ execute_rflow("load -d #{db_file_name} -c #{config_file_name}").tap do |result|
17
+ expect(result[:status].exitstatus).to eq(0)
18
+ expect(result[:stderr]).to eq('')
19
+ expect(result[:stdout]).to match /Successfully initialized database.*#{db_file_name}/
20
+ end
21
+ end
22
+
23
+ def start_rflow
24
+ execute_rflow("start -d #{db_file_name} -e #{@extensions_file_name}").tap do |result|
25
+ expect(result[:status].exitstatus).to eq(0)
26
+ expect(result[:stderr]).to eq('')
27
+ expect(result[:stdout]).not_to match /error/i
28
+ end
29
+ end
30
+
31
+ def get_log_pids(logfile)
32
+ log_contents = File.read(logfile).chomp
33
+ log_lines = log_contents.split("\n")
34
+
35
+ log_lines.each {|line| expect(line).not_to match /^ERROR/ }
36
+ log_lines.each {|line| expect(line).not_to match /^DEBUG/ }
37
+
38
+ # Grab all the pids from the log, which seems to be the only
39
+ # reliable way to get them
40
+ log_lines.map {|line| /\((\d+)\)/.match(line)[1].to_i }.uniq
41
+ end
42
+
43
+ def run_and_shutdown(app_name, expected_worker_count)
44
+ r = start_rflow
45
+ sleep 2 # give the daemon a chance to finish
46
+
47
+ log_pids = get_log_pids("log/#{app_name}.log")
48
+
49
+ initial_pid = r[:status].pid
50
+ master_pid = File.read("run/#{app_name}.pid").chomp.to_i
51
+ worker_pids = log_pids - [initial_pid, master_pid]
52
+
53
+ expect(log_pids).to include initial_pid
54
+ expect(log_pids).to include master_pid
55
+
56
+ expect(worker_pids).to have(expected_worker_count).pids
57
+ expect(worker_pids).not_to include 0
58
+
59
+ expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
60
+ ([master_pid] + worker_pids).each do |pid|
61
+ expect(Process.kill(0, pid)).to eq(1)
62
+ end
63
+
64
+ yield # verify output
65
+
66
+ # Terminate the master
67
+ expect(Process.kill("TERM", master_pid)).to eq(1)
68
+
69
+ # Make sure everything is dead after a second
70
+ sleep 2
71
+ ([master_pid] + worker_pids).each do |pid|
72
+ expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
73
+ end
74
+ rescue Exception => e
75
+ Process.kill("TERM", master_pid) if master_pid
76
+ raise
77
+ end
78
+
79
+ let(:config_file_name) { 'input_config' }
80
+ let(:db_file_name) { 'outdb' }
81
+
6
82
  before(:all) do
7
- @extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions_ints.rb')
83
+ @extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions.rb')
8
84
  end
9
85
 
10
86
  before(:each) do
@@ -43,7 +119,7 @@ describe RFlow do
43
119
 
44
120
  it "should run a non-sharded workflow" do
45
121
  run_rflow_with_dsl do |c|
46
- c.setting 'rflow.log_level', 'FATAL'
122
+ c.setting 'rflow.log_level', 'ERROR'
47
123
  c.setting 'rflow.application_directory_path', @temp_directory_path
48
124
  c.setting 'rflow.application_name', 'nonsharded_test'
49
125
 
@@ -84,7 +160,7 @@ describe RFlow do
84
160
 
85
161
  it "should run a sharded workflow" do
86
162
  run_rflow_with_dsl do |c|
87
- c.setting 'rflow.log_level', 'FATAL'
163
+ c.setting 'rflow.log_level', 'ERROR'
88
164
  c.setting 'rflow.application_directory_path', @temp_directory_path
89
165
  c.setting 'rflow.application_name', 'sharded_test'
90
166
 
@@ -130,38 +206,62 @@ describe RFlow do
130
206
  expect(File.readlines(file_name).map(&:to_i).sort).to eq(expected_contents.sort)
131
207
  end
132
208
  end
209
+
210
+ it "should deliver broadcast messages to every copy of a shard" do
211
+ run_rflow_with_dsl do |c|
212
+ c.setting 'rflow.log_level', 'FATAL'
213
+ c.setting 'rflow.application_directory_path', @temp_directory_path
214
+ c.setting 'rflow.application_name', 'sharded_broadcast_test'
215
+
216
+ c.shard 's1', :process => 1 do |s|
217
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
218
+ end
219
+
220
+ c.shard 's2', :process => 2 do |s|
221
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 1, 'finish' => 11, 'step' => 3
222
+ end
223
+
224
+ c.shard 's3', :type => :process, :count => 3 do |s|
225
+ s.component 'broadcast_output', 'RFlow::Components::FileOutput', 'output_file_path' => 'broadcast'
226
+ s.component 'roundrobin_output', 'RFlow::Components::FileOutput', 'output_file_path' => 'round-robin'
227
+ end
228
+
229
+ c.connect 'generate_ints1#out' => 'broadcast_output#in', :delivery => 'broadcast'
230
+ c.connect 'generate_ints2#out' => 'broadcast_output#in', :delivery => 'broadcast'
231
+ c.connect 'generate_ints1#out' => 'roundrobin_output#in'
232
+ c.connect 'generate_ints2#out' => 'roundrobin_output#in'
233
+ end
234
+
235
+ output_files = {
236
+ 'broadcast' => ([0, 3, 6, 9] * 3) + ([1, 4, 7, 10] * 6),
237
+ 'round-robin' => [0, 3, 6, 9] + ([1, 4, 7, 10] * 2)
238
+ }
239
+
240
+ expect(RFlow.master).to have(3).shards
241
+ expect(RFlow.master.shards.map(&:count)).to eq([1, 2, 3])
242
+ expect(RFlow.master.shards.map(&:workers).map(&:count)).to eq([1, 2, 3])
243
+
244
+ output_files.each do |file_name, expected_contents|
245
+ expect(File.exist?(File.join(@temp_directory_path, file_name))).to be true
246
+ expect(File.readlines(file_name).map(&:to_i).sort).to eq(expected_contents.sort)
247
+ end
248
+ end
133
249
  end
134
250
  end
135
251
 
136
252
  context "when executing via the rflow binary" do
137
- def execute_rflow(args)
138
- stdout, stderr, status = Open3.capture3("bundle exec rflow #{args}")
139
- {:stdout => stdout, :stderr => stderr, :status => status}
140
- end
141
-
142
253
  context "with a simple ruby DSL config file" do
143
- let(:config_file_name) { 'input_config' }
144
- let(:db_file_name) { 'outdb' }
145
-
146
254
  before(:each) do
147
- File.open(config_file_name, 'w+') do |file|
148
- file.write <<-EOF
149
- RFlow::Configuration::RubyDSL.configure do |c|
150
- c.setting 'mysetting', 'myvalue'
151
- end
152
- EOF
153
- end
255
+ write_config_file <<-EOF
256
+ RFlow::Configuration::RubyDSL.configure do |c|
257
+ c.setting 'mysetting', 'myvalue'
258
+ end
259
+ EOF
154
260
  end
155
261
 
156
262
  it "should load a ruby dsl file into a sqlite DB" do
157
- r = execute_rflow("load -d #{db_file_name} -c #{config_file_name}")
158
-
159
- # Make sure that the process execution worked
160
- expect(r[:status].exitstatus).to eq(0)
161
- expect(r[:stderr]).to eq('')
162
- expect(r[:stdout]).to match /Successfully initialized database.*#{db_file_name}/
263
+ load_database
163
264
 
164
- # Make sure the config actually got loaded
165
265
  ActiveRecord::Base.establish_connection adapter: "sqlite3", database: db_file_name
166
266
  expect(RFlow::Configuration::Setting.where(:name => 'mysetting').first.value).to eq('myvalue')
167
267
  end
@@ -178,46 +278,70 @@ describe RFlow do
178
278
  end
179
279
  end
180
280
 
281
+ context "with a component that runs subshells" do
282
+ let(:app_name) { 'sharded_subshell_test' }
283
+
284
+ before(:each) do
285
+ write_config_file <<-EOF
286
+ RFlow::Configuration::RubyDSL.configure do |c|
287
+ c.setting('rflow.log_level', 'INFO')
288
+ c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
289
+ c.setting('rflow.application_name', '#{app_name}')
290
+
291
+ c.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
292
+ c.component 'subshell_date', 'RFlow::Components::DateShellComponent'
293
+ c.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
294
+
295
+ c.connect 'generate_ints#out' => 'subshell_date#in'
296
+ c.connect 'subshell_date#out' => 'output#in'
297
+ end
298
+ EOF
299
+
300
+ load_database
301
+ end
302
+
303
+ it "should run successfully daemonize and run in the background" do
304
+ run_and_shutdown app_name, 1 do # 1 default worker
305
+ expect(File.exist?(File.join(@temp_directory_path, 'out1'))).to be true
306
+ File.readlines('out1').each {|line| expect(line).to match /\w+ \w+ \d+ \d+:\d+:\d+ \w+ \d+/ }
307
+ end
308
+ end
309
+ end
310
+
181
311
  context "with a complex, sharded ruby DSL config file" do
182
- let(:config_file_name) { 'input_config' }
183
- let(:db_file_name) { 'config_db' }
184
312
  let(:app_name) { 'sharded_bin_test' }
185
313
 
186
314
  before(:each) do
187
- File.open(config_file_name, 'w+') do |file|
188
- file.write <<-EOF
189
- RFlow::Configuration::RubyDSL.configure do |c|
190
- c.setting('rflow.log_level', 'INFO')
191
- c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
192
- c.setting('rflow.application_name', '#{app_name}')
193
-
194
- c.shard 's1', :process => 3 do |s|
195
- s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
196
- end
197
- c.shard 's2', :type => :process, :count => 2 do |s|
198
- s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
199
- end
200
- c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
201
- c.shard 's3', :process => 2 do |s|
202
- s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
203
- s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
204
- end
205
- c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
206
- c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
207
-
208
- c.connect 'generate_ints1#out' => 'output1#in'
209
- c.connect 'generate_ints2#out' => 'output2#in'
210
- c.connect 'generate_ints3#out' => 'output3#in'
211
- c.connect 'generate_ints1#out' => 'output_all#in'
212
- c.connect 'generate_ints2#out' => 'output_all#in'
213
- c.connect 'generate_ints3#out' => 'output_all#in'
315
+ write_config_file <<-EOF
316
+ RFlow::Configuration::RubyDSL.configure do |c|
317
+ c.setting('rflow.log_level', 'INFO')
318
+ c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
319
+ c.setting('rflow.application_name', '#{app_name}')
320
+
321
+ c.shard 's1', :process => 3 do |s|
322
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
214
323
  end
215
- EOF
216
- end
217
- r = execute_rflow("load -d #{db_file_name} -c #{config_file_name}")
218
- expect(r[:status].exitstatus).to eq(0)
219
- expect(r[:stderr]).to eq('')
220
- expect(r[:stdout]).to match /Successfully initialized database.*#{db_file_name}/
324
+ c.shard 's2', :type => :process, :count => 2 do |s|
325
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
326
+ end
327
+ c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
328
+ c.shard 's3', :process => 2 do |s|
329
+ s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
330
+ s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
331
+ end
332
+ c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
333
+ c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
334
+
335
+ c.connect 'generate_ints1#out' => 'output1#in'
336
+ c.connect 'generate_ints2#out' => 'output2#in'
337
+ c.connect 'generate_ints3#out' => 'output3#in'
338
+ c.connect 'generate_ints1#out' => 'output_all#in'
339
+ c.connect 'generate_ints2#out' => 'output_all#in'
340
+ c.connect 'generate_ints3#out' => 'output_all#in'
341
+ end
342
+ EOF
343
+
344
+ load_database
221
345
  end
222
346
 
223
347
  it "should not start if the components aren't loaded" do
@@ -229,63 +353,18 @@ describe RFlow do
229
353
  end
230
354
 
231
355
  it "should daemonize and run in the background" do
232
- begin
233
- r = execute_rflow("start -d #{db_file_name} -e #{@extensions_file_name}")
234
-
235
- expect(r[:status].exitstatus).to eq(0)
236
- expect(r[:stderr]).to eq('')
237
- expect(r[:stdout]).not_to match /error/i
238
-
239
- sleep 2 # give the daemon a chance to finish
240
-
241
- log_contents = File.read("log/#{app_name}.log").chomp
242
- log_lines = log_contents.split("\n")
243
-
244
- log_lines.each {|line| expect(line).not_to match /^ERROR/ }
245
- log_lines.each {|line| expect(line).not_to match /^DEBUG/ }
246
-
247
- # Grab all the pids from the log, which seems to be the only
248
- # reliable way to get them
249
- log_pids = log_lines.map {|line| /\((\d+)\)/.match(line)[1].to_i }.uniq
250
-
251
- initial_pid = r[:status].pid
252
- master_pid = File.read("run/#{app_name}.pid").chomp.to_i
253
- worker_pids = log_pids - [initial_pid, master_pid]
254
-
255
- expect(log_pids).to include initial_pid
256
- expect(log_pids).to include master_pid
257
-
258
- expect(worker_pids).to have(10).pids # 1+3+2+2 workers, 2 brokers
259
- expect(worker_pids).not_to include 0
260
-
261
- expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
262
- ([master_pid] + worker_pids).each do |pid|
263
- expect(Process.kill(0, pid)).to eq(1)
264
- end
265
-
266
- output_files = {
267
- 'out1' => [0, 3, 6, 9] * 3,
268
- 'out2' => (20..30).to_a * 2,
269
- 'out3' => (100..105).to_a,
270
- 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
271
- }
356
+ output_files = {
357
+ 'out1' => [0, 3, 6, 9] * 3,
358
+ 'out2' => (20..30).to_a * 2,
359
+ 'out3' => (100..105).to_a,
360
+ 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
361
+ }
272
362
 
363
+ run_and_shutdown app_name, 10 do # 1+3+2+2 workers, 2 brokers
273
364
  output_files.each do |file_name, expected_contents|
274
365
  expect(File.exist?(File.join(@temp_directory_path, file_name))).to be true
275
366
  expect(File.readlines(file_name).map(&:to_i).sort).to eq(expected_contents.sort)
276
367
  end
277
-
278
- # Terminate the master
279
- expect(Process.kill("TERM", master_pid)).to eq(1)
280
-
281
- # Make sure everything is dead after a second
282
- sleep 2
283
- ([master_pid] + worker_pids).each do |pid|
284
- expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
285
- end
286
- rescue Exception => e
287
- Process.kill("TERM", master_pid) if master_pid
288
- raise
289
368
  end
290
369
  end
291
370
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rflow
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0a3
4
+ version: 1.0.0a4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael L. Artz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-16 00:00:00.000000000 Z
11
+ date: 2014-06-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: uuidtools
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: '1.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: sys-filesystem
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.1.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.1.2
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: sqlite3
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -230,9 +244,7 @@ files:
230
244
  - schema/message.avsc
231
245
  - schema/raw.avsc
232
246
  - schema/tick.avsc
233
- - spec/fixtures/config_ints.rb
234
- - spec/fixtures/config_shards.rb
235
- - spec/fixtures/extensions_ints.rb
247
+ - spec/fixtures/extensions.rb
236
248
  - spec/rflow/component/port_spec.rb
237
249
  - spec/rflow/components/clock_spec.rb
238
250
  - spec/rflow/configuration/ruby_dsl_spec.rb
@@ -270,9 +282,7 @@ signing_key:
270
282
  specification_version: 4
271
283
  summary: A Ruby flow-based programming framework
272
284
  test_files:
273
- - spec/fixtures/config_ints.rb
274
- - spec/fixtures/config_shards.rb
275
- - spec/fixtures/extensions_ints.rb
285
+ - spec/fixtures/extensions.rb
276
286
  - spec/rflow/component/port_spec.rb
277
287
  - spec/rflow/components/clock_spec.rb
278
288
  - spec/rflow/configuration/ruby_dsl_spec.rb
@@ -1,25 +0,0 @@
1
- RFlow::Configuration::RubyDSL.configure do |config|
2
- # Configure the settings, which include paths for various files, log
3
- # levels, and component specific stuffs
4
- config.setting('rflow.log_level', 'FATAL')
5
- config.setting('rflow.application_directory_path', '../tmp')
6
- config.setting('rflow.application_name', 'testapp')
7
-
8
- # Instantiate components
9
- config.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
10
- config.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
11
- config.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out'
12
- config.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out2'
13
- config.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even'
14
- config.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_odd'
15
- config.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd'
16
- config.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => '../tmp/out_even_odd2'
17
-
18
- # Hook components together
19
- config.connect 'generate_ints#out' => 'output#in'
20
- config.connect 'generate_ints#out' => 'output2#in'
21
- config.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
22
- config.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
23
- config.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
24
- config.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
25
- end
@@ -1,29 +0,0 @@
1
- RFlow::Configuration::RubyDSL.configure do |config|
2
- config.setting('rflow.log_level', 'FATAL')
3
- config.setting('rflow.application_directory_path', '.')
4
- config.setting('rflow.application_name', 'shardapp')
5
-
6
- # Instantiate components
7
- config.shard 's1', :process => 1 do |shard|
8
- shard.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
9
- end
10
-
11
- config.shard 's2', :type => :process, :count => 2 do |shard|
12
- shard.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
13
- end
14
-
15
- config.component 'filter', 'RFlow::Components::RubyProcFilter', 'filter_proc_string' => 'lambda {|message| true}'
16
- config.component 'replicate', 'RFlow::Components::Replicate'
17
-
18
- config.shard 's3', :process => 2 do |shard|
19
- shard.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
20
- shard.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
21
- end
22
-
23
- # Hook components together
24
- config.connect 'generate_ints1#out' => 'filter#in'
25
- config.connect 'generate_ints2#out' => 'filter#in'
26
- config.connect 'filter#filtered' => 'replicate#in'
27
- config.connect 'replicate#out' => 'output1#in'
28
- config.connect 'replicate#out' => 'output2#in'
29
- end