rflow 0.0.5 → 1.0.0a1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +21 -0
  5. data/.yardopts +1 -0
  6. data/Gemfile +5 -1
  7. data/Guardfile +8 -0
  8. data/LICENSE +190 -0
  9. data/NOTES +26 -13
  10. data/README.md +448 -0
  11. data/Rakefile +5 -12
  12. data/bin/rflow +23 -20
  13. data/example/basic_config.rb +2 -2
  14. data/example/basic_extensions.rb +8 -8
  15. data/example/http_config.rb +1 -1
  16. data/example/http_extensions.rb +15 -15
  17. data/lib/rflow.rb +15 -387
  18. data/lib/rflow/component.rb +105 -50
  19. data/lib/rflow/component/port.rb +25 -24
  20. data/lib/rflow/components/raw.rb +4 -4
  21. data/lib/rflow/components/raw/extensions.rb +2 -2
  22. data/lib/rflow/configuration.rb +54 -36
  23. data/lib/rflow/configuration/component.rb +2 -3
  24. data/lib/rflow/configuration/connection.rb +9 -10
  25. data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
  26. data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
  27. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
  28. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
  29. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
  30. data/lib/rflow/configuration/port.rb +3 -4
  31. data/lib/rflow/configuration/ruby_dsl.rb +59 -35
  32. data/lib/rflow/configuration/setting.rb +8 -7
  33. data/lib/rflow/configuration/shard.rb +24 -0
  34. data/lib/rflow/configuration/uuid_keyed.rb +3 -3
  35. data/lib/rflow/connection.rb +21 -10
  36. data/lib/rflow/connections/zmq_connection.rb +45 -44
  37. data/lib/rflow/logger.rb +67 -0
  38. data/lib/rflow/master.rb +127 -0
  39. data/lib/rflow/message.rb +14 -14
  40. data/lib/rflow/pid_file.rb +84 -0
  41. data/lib/rflow/shard.rb +148 -0
  42. data/lib/rflow/version.rb +1 -1
  43. data/rflow.gemspec +22 -28
  44. data/schema/message.avsc +8 -8
  45. data/spec/fixtures/config_ints.rb +4 -4
  46. data/spec/fixtures/config_shards.rb +30 -0
  47. data/spec/fixtures/extensions_ints.rb +8 -8
  48. data/spec/rflow_component_port_spec.rb +58 -0
  49. data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
  50. data/spec/rflow_configuration_spec.rb +4 -4
  51. data/spec/rflow_message_data_raw.rb +2 -2
  52. data/spec/rflow_message_data_spec.rb +6 -6
  53. data/spec/rflow_message_spec.rb +13 -13
  54. data/spec/rflow_spec.rb +294 -71
  55. data/spec/schema_spec.rb +2 -2
  56. data/spec/spec_helper.rb +6 -4
  57. data/temp.rb +21 -21
  58. metadata +56 -65
  59. data/.rvmrc +0 -1
  60. data/README +0 -0
@@ -0,0 +1,58 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe RFlow::Component::Port do
4
+ it "should not be connected" do
5
+ described_class.new.connected?.should be_false
6
+ end
7
+ end
8
+
9
+ describe RFlow::Component::HashPort do
10
+ it "should not be connected" do
11
+ port_config = double('Port Config')
12
+ port_config.should_receive(:name).and_return('port')
13
+ port_config.should_receive(:uuid).and_return('1')
14
+
15
+ port = described_class.new(port_config)
16
+ port.connected?.should be_false
17
+ end
18
+ end
19
+
20
+ describe RFlow::Component::InputPort do
21
+ context ".connect!" do
22
+ it "should be connected" do
23
+ connection_double = double('connection')
24
+ connection_double.should_receive(:connect_input!)
25
+
26
+ port_config = double('Port Config')
27
+ port_config.should_receive(:name).and_return('port')
28
+ port_config.should_receive(:uuid).and_return('1')
29
+
30
+ port = described_class.new(port_config)
31
+ port.add_connection(nil, connection_double)
32
+
33
+ port.connected?.should be_false
34
+ port.connect!
35
+ port.connected?.should be_true
36
+ end
37
+ end
38
+ end
39
+
40
+ describe RFlow::Component::OutputPort do
41
+ context ".connect!" do
42
+ it "shouldbe connected" do
43
+ connection_double = double('connection')
44
+ connection_double.should_receive(:connect_output!)
45
+
46
+ port_config = double('Port Config')
47
+ port_config.should_receive(:name).and_return('port')
48
+ port_config.should_receive(:uuid).and_return('1')
49
+
50
+ port = described_class.new(port_config)
51
+ port.add_connection(nil, connection_double)
52
+
53
+ port.connected?.should be_false
54
+ port.connect!
55
+ port.connected?.should be_true
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,148 @@
1
+ require 'spec_helper.rb'
2
+ require 'rflow/configuration'
3
+
4
+ describe RFlow::Configuration::RubyDSL do
5
+ before(:each) do
6
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
7
+ RFlow::Configuration.migrate_database
8
+ end
9
+
10
+ it "should correctly process an empty DSL" do
11
+ described_class.configure { |c| }
12
+
13
+ config = RFlow::Configuration.new
14
+ RFlow::Configuration::Shard.count.should == 1
15
+ RFlow::Configuration::Component.count.should == 0
16
+ RFlow::Configuration::Port.count.should == 0
17
+ RFlow::Configuration::Connection.count.should == 0
18
+
19
+ puts config.to_s
20
+ end
21
+
22
+ it "should correctly process a component declaration" do
23
+ described_class.configure do |c|
24
+ c.component 'boom', 'town', 'opt1' => 'OPT1', 'opt2' => 'OPT2'
25
+ end
26
+
27
+ config = RFlow::Configuration.new
28
+ RFlow::Configuration::Shard.count.should == 1
29
+ RFlow::Configuration::Component.count.should == 1
30
+ RFlow::Configuration::Port.count.should == 0
31
+ RFlow::Configuration::Connection.count.should == 0
32
+
33
+ component = RFlow::Configuration::Component.all.first
34
+ component.name.should == 'boom'
35
+ component.specification.should == 'town'
36
+ component.options.should == {'opt1' => 'OPT1', 'opt2' => 'OPT2'}
37
+ end
38
+
39
+ it "should correctly process a connect declaration" do
40
+ described_class.configure do |c|
41
+ c.component 'first', 'First'
42
+ c.component 'second', 'Second'
43
+ c.connect 'first#out' => 'second#in'
44
+ c.connect 'first#out' => 'second#in[inkey]'
45
+ c.connect 'first#out[outkey]' => 'second#in'
46
+ c.connect 'first#out[outkey]' => 'second#in[inkey]'
47
+ end
48
+
49
+ config = RFlow::Configuration.new
50
+ RFlow::Configuration::Shard.count.should == 1
51
+ RFlow::Configuration::Component.count.should == 2
52
+ RFlow::Configuration::Port.count.should == 2
53
+ RFlow::Configuration::Connection.count.should == 4
54
+
55
+ first_component = RFlow::Configuration::Component.where(name: 'first').first
56
+ second_component = RFlow::Configuration::Component.where(name: 'second').first
57
+
58
+ first_component.specification.should == 'First'
59
+ first_component.input_ports.count.should == 0
60
+ first_component.output_ports.count.should == 1
61
+ first_component.output_ports.first.name.should == 'out'
62
+ first_connections = first_component.output_ports.first.connections.all
63
+ first_connections.count.should == 4
64
+ first_connections[0].input_port_key.should be_nil
65
+ first_connections[0].output_port_key.should be_nil
66
+ first_connections[1].input_port_key.should == 'inkey'
67
+ first_connections[1].output_port_key.should be_nil
68
+ first_connections[2].input_port_key.should be_nil
69
+ first_connections[2].output_port_key.should == 'outkey'
70
+ first_connections[3].input_port_key.should == 'inkey'
71
+ first_connections[3].output_port_key.should == 'outkey'
72
+
73
+ second_component.specification.should == 'Second'
74
+ second_component.input_ports.count.should == 1
75
+ second_component.output_ports.count.should == 0
76
+ second_component.input_ports.first.name.should == 'in'
77
+ second_connections = second_component.input_ports.first.connections.all
78
+ second_connections.count.should == 4
79
+
80
+ first_connections.should == second_connections
81
+
82
+ puts config.to_s
83
+ end
84
+
85
+ it "should correctly process shard declarations" do
86
+ described_class.configure do |c|
87
+ c.component 'first', 'First', :opt1 => 'opt1'
88
+
89
+ c.shard "s1", :process => 2 do |s|
90
+ s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
91
+ end
92
+
93
+ c.shard "s2", :type => :process, :count => 10 do |s|
94
+ s.component 'third', 'Third'
95
+ s.component 'fourth', 'Fourth'
96
+ end
97
+
98
+ c.component 'fifth', 'Fifth'
99
+
100
+ c.connect 'first#out' => 'second#in'
101
+ c.connect 'second#out[outkey]' => 'third#in[inkey]'
102
+ c.connect 'second#out' => 'third#in2'
103
+ c.connect 'third#out' => 'fourth#in'
104
+ c.connect 'third#out' => 'fifth#in'
105
+ end
106
+
107
+ config = RFlow::Configuration.new
108
+ RFlow::Configuration::Shard.count.should == 3
109
+ RFlow::Configuration::Component.count.should == 5
110
+ RFlow::Configuration::Port.count.should == 8
111
+ RFlow::Configuration::Connection.count.should == 5
112
+
113
+ shards = RFlow::Configuration::Shard.all
114
+ shards.map(&:name).should == ['DEFAULT', 's1', 's2']
115
+ shards.first.components.all.map(&:name).should == ['first', 'fifth']
116
+ shards.second.components.all.map(&:name).should == ['second']
117
+ shards.third.components.all.map(&:name).should == ['third', 'fourth']
118
+
119
+ RFlow::Configuration::Port.all.map(&:name).should == ['out', 'in', 'out', 'in', 'in2', 'out', 'in', 'in']
120
+
121
+ RFlow::Configuration::Connection.all.map(&:name).should == ['first#out=>second#in',
122
+ 'second#out[outkey]=>third#in[inkey]',
123
+ 'second#out=>third#in2',
124
+ 'third#out=>fourth#in',
125
+ 'third#out=>fifth#in']
126
+
127
+ puts config.to_s
128
+ end
129
+
130
+ it "should not allow two components with the same name" do
131
+ expect do
132
+ described_class.configure do |c|
133
+ c.component 'first', 'First'
134
+ c.component 'first', 'First'
135
+ end
136
+ end.to raise_error
137
+ end
138
+
139
+ it "should not allow two shards with the same name" do
140
+ expect do
141
+ described_class.configure do |c|
142
+ c.shard("s1", :process => 2) { |s| }
143
+ c.shard("s1", :process => 2) { |s| }
144
+ end
145
+ end.to raise_error
146
+ end
147
+
148
+ end
@@ -9,7 +9,7 @@ describe RFlow::Configuration do
9
9
  # RFlow::Configuration.available_data_extensions.clear
10
10
  end
11
11
 
12
-
12
+
13
13
  describe '.add_available_data_type' do
14
14
  context 'if passed a data_serialization that is not avro or xml' do
15
15
  it "should throw an exception" do
@@ -27,7 +27,7 @@ describe RFlow::Configuration do
27
27
  end
28
28
 
29
29
  describe "Data Extensions" do
30
-
30
+
31
31
  describe ".add_available_data_extension" do
32
32
  context 'if passed a non-module data extension' do
33
33
  it "should throw an exception" do
@@ -36,7 +36,7 @@ describe RFlow::Configuration do
36
36
  end.to raise_error(ArgumentError)
37
37
  end
38
38
  end
39
-
39
+
40
40
  context "if passed a valid Module as a data extension" do
41
41
  it "should update the available_data_extensions" do
42
42
  num_extensions = RFlow::Configuration.available_data_extensions['data_type'].size
@@ -47,7 +47,7 @@ describe RFlow::Configuration do
47
47
  end
48
48
  end
49
49
  end
50
-
50
+
51
51
  it "should perform simple 'prefix'-based inheritance for extensions" do
52
52
  RFlow::Configuration.add_available_data_extension('A', A = Module.new)
53
53
  RFlow::Configuration.add_available_data_extension('A::B', B = Module.new)
@@ -3,14 +3,14 @@ require 'spec_helper.rb'
3
3
  require 'rflow/components/raw'
4
4
 
5
5
  describe 'RFlow::Message::Data::Raw Avro Schema' do
6
- before(:each) do
6
+ before(:each) do
7
7
  @schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
8
8
  end
9
9
 
10
10
  it "should load the schema" do
11
11
  @schema_string.should_not == nil
12
12
  end
13
-
13
+
14
14
  it "should encode and decode an object" do
15
15
  raw = {'raw' => 'rawdata'}
16
16
 
@@ -17,22 +17,22 @@ describe RFlow::Message::Data do
17
17
  end
18
18
 
19
19
  context "if created with an invalid schema for the serialization" do
20
- it "should throw and exception" do
20
+ it "should throw and exception" do
21
21
  expect {RFlow::Message::Data.new(@invalid_avro_schema_string)}.to raise_error(ArgumentError)
22
22
  expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
23
23
  expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
24
24
  end
25
25
  end
26
-
26
+
27
27
  context "if created with a valid avro schema and serialization" do
28
28
  end
29
29
 
30
- context "if created with a valid avro schema" do
31
- it "should instantiate correctly" do
30
+ context "if created with a valid avro schema" do
31
+ it "should instantiate correctly" do
32
32
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
33
33
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
34
34
  end
35
-
35
+
36
36
  context "if created with a non-avro data serialization" do
37
37
  it "should throw an exception" do
38
38
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'unknown')}.to raise_error(ArgumentError)
@@ -41,7 +41,7 @@ describe RFlow::Message::Data do
41
41
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :xml)}.to raise_error(ArgumentError)
42
42
  end
43
43
  end
44
-
44
+
45
45
  context "if created with an avro serialization" do
46
46
  it "should instantiate correctly" do
47
47
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
@@ -50,7 +50,7 @@ describe RFlow::Message do
50
50
  @string = 'this is a string to be serialized'
51
51
  @avro_serialized_string = encode_avro(@avro_string_schema_string, @string)
52
52
  end
53
-
53
+
54
54
  it "should instantiate correctly" do
55
55
  expect {RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)}.to_not raise_error
56
56
  message = RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)
@@ -65,12 +65,12 @@ describe RFlow::Message do
65
65
  @invalid_processing_event_hash = {'started_at' => 'bad time string'}
66
66
  @invalid_provenance = [@invalid_processing_event_hash]
67
67
  end
68
-
68
+
69
69
  it "should throw an exception" do
70
70
  expect {RFlow::Message.new('string_type', @invalid_provenance)}.to raise_error(ArgumentError)
71
71
  end
72
72
  end
73
-
73
+
74
74
  context "if created with valid provenance" do
75
75
  before(:all) do
76
76
  @valid_xmlschema_time = '2001-01-01T01:01:01.000001Z'
@@ -87,7 +87,7 @@ describe RFlow::Message do
87
87
  {"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>@valid_xmlschema_time, "context"=>"context"},
88
88
  ]
89
89
  end
90
-
90
+
91
91
  it "should instantiate correctly" do
92
92
  p @valid_provenance
93
93
  expect {RFlow::Message.new('string_type', @valid_provenance)}.to_not raise_error
@@ -105,7 +105,7 @@ describe RFlow::Message do
105
105
  message = RFlow::Message.new('string_type', @valid_provenance)
106
106
  message.provenance.map(&:to_hash).should == @valid_provenance_hashes
107
107
  end
108
-
108
+
109
109
  end
110
110
 
111
111
  context "if correctly created" do
@@ -119,7 +119,7 @@ describe RFlow::Message do
119
119
  message.data.data_object.should == processed_message.data.data_object
120
120
  end
121
121
  end
122
-
122
+
123
123
  context "if data extensions exist" do
124
124
  it "should extend the data element with the extension" do
125
125
  module ExtensionModule; def ext_method; end; end
@@ -138,9 +138,9 @@ describe RFlow::Message do
138
138
  it "should correctly handle large raw types" do
139
139
  message = RFlow::Message.new('RFlow::Message::Data::Raw')
140
140
  message.data.raw = Array.new(101) { rand(256) }.pack('c*')
141
-
141
+
142
142
  message_avro = message.to_avro.force_encoding('BINARY')
143
-
143
+
144
144
  processed_message = RFlow::Message.from_avro(message_avro)
145
145
  processed_message_avro = processed_message.to_avro.force_encoding('BINARY')
146
146
 
@@ -148,13 +148,13 @@ describe RFlow::Message do
148
148
 
149
149
  encode_avro(@raw_schema, message.data.data_object).should == message.data.to_avro
150
150
  decode_avro(@raw_schema, message.data.to_avro).should == message.data.data_object
151
-
151
+
152
152
  p message.data.raw
153
153
  p message_avro
154
154
  p message_avro.bytesize
155
155
  p processed_message_avro
156
156
  p processed_message_avro.bytesize
157
-
157
+
158
158
  p message_avro.encoding
159
159
  p message_avro.valid_encoding?
160
160
 
@@ -163,7 +163,7 @@ describe RFlow::Message do
163
163
 
164
164
  message_data_avro = message.data.to_avro.force_encoding('BINARY')
165
165
  processed_message_data_avro = processed_message.data.to_avro.force_encoding('BINARY')
166
-
166
+
167
167
  p message_data_avro.encoding
168
168
  p message_data_avro.valid_encoding?
169
169
  p message_data_avro
@@ -173,10 +173,10 @@ describe RFlow::Message do
173
173
 
174
174
  Digest::MD5.hexdigest(message_avro).should == Digest::MD5.hexdigest(processed_message_avro)
175
175
 
176
-
176
+
177
177
  message_data_avro.should == processed_message_data_avro
178
178
  Digest::MD5.hexdigest(message_data_avro).should == Digest::MD5.hexdigest(processed_message_data_avro)
179
179
  Digest::MD5.hexdigest(message.data.raw).should == Digest::MD5.hexdigest(processed_message.data.raw)
180
180
  end
181
-
181
+
182
182
  end
@@ -1,100 +1,323 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
+ require 'open3'
3
4
  require 'rflow'
4
5
 
5
6
  describe RFlow do
6
- before(:each) do
7
- @fixture_directory_path = File.join(File.dirname(__FILE__), 'fixtures')
8
- end
9
7
 
10
-
11
- describe 'logger' do
12
- it "should initialize correctly" do
13
- log_file_path = File.join(@temp_directory_path, 'logfile')
14
- RFlow.initialize_logger log_file_path
8
+ before(:all) do
9
+ @extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions_ints.rb')
10
+ end
15
11
 
16
- File.exist?(log_file_path).should_not be_nil
12
+ context "when executing from the test script" do
17
13
 
18
- RFlow.logger.error "TESTTESTTEST"
19
- File.read(log_file_path).should match(/TESTTESTTEST/)
20
-
21
- RFlow.close_log_file
14
+ before(:all) do
15
+ load @extensions_file_name
22
16
  end
23
17
 
24
- it "should reopen correctly" do
25
- log_file_path = File.join(@temp_directory_path, 'logfile')
26
- moved_path = log_file_path + '.old'
27
-
28
- RFlow.initialize_logger log_file_path
29
- File.exist?(log_file_path).should be_true
30
- File.exist?(moved_path).should be_false
31
-
32
- File.rename log_file_path, moved_path
33
-
34
- RFlow.reopen_log_file
35
-
36
- RFlow.logger.error "TESTTESTTEST"
37
- File.read(log_file_path).should match(/TESTTESTTEST/)
38
- File.read(moved_path).should_not match(/TESTTESTTEST/)
39
-
40
- RFlow.close_log_file
41
- end
18
+ describe '.run' do
19
+ before(:each) do
20
+ @original_directory_path = Dir.getwd
21
+ @run_directory_path = File.join(@temp_directory_path, 'run')
22
+ @log_directory_path = File.join(@temp_directory_path, 'log')
23
+ Dir.mkdir @run_directory_path
24
+ Dir.mkdir @log_directory_path
25
+ end
26
+
27
+ after(:each) do
28
+ Dir.chdir @original_directory_path
29
+ end
30
+
31
+ def run_rflow_with_dsl(&block)
32
+ rflow_thread = Thread.new do
33
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
34
+ RFlow::Configuration.migrate_database
35
+ RFlow::Configuration::RubyDSL.configure do |c|
36
+ block.call(c)
37
+ end
38
+
39
+ RFlow::Configuration.merge_defaults!
40
+
41
+ RFlow.run nil, false
42
+ end
43
+
44
+ # TODO: figure out a way to get rid of this sleep, as there
45
+ # should be a better way to figure out when RFlow is done
46
+ sleep(2)
47
+
48
+ # Shut down the reactor and the thread
49
+ EM.run { EM.stop }
50
+ rflow_thread.join
51
+ end
52
+
53
+
54
+ it "should run a non-sharded workflow" do
55
+
56
+ run_rflow_with_dsl do |c|
57
+ c.setting('rflow.log_level', 'DEBUG')
58
+ c.setting('rflow.application_directory_path', @temp_directory_path)
59
+ c.setting('rflow.application_name', 'nonsharded_test')
60
+
61
+ c.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
62
+ c.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => 'out'
63
+ c.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
64
+ c.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even'
65
+ c.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_odd'
66
+ c.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even_odd'
67
+ c.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
68
+ c.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even_odd2'
69
+
70
+ c.connect 'generate_ints#out' => 'output#in'
71
+ c.connect 'generate_ints#out' => 'output2#in'
72
+ c.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
73
+ c.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
74
+ c.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
75
+ c.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
76
+ end
42
77
 
43
- it "should toggle log level" do
78
+ RFlow.master.shards.count.should == 1
79
+ RFlow.master.shards.first.workers.count.should == 1
80
+
81
+ output_files = {
82
+ 'out' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
83
+ 'out2' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
84
+ 'out_even' => [20, 22, 24, 26, 28, 30],
85
+ 'out_odd' => [21, 23, 25, 27, 29],
86
+ 'out_even_odd' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
87
+ 'out_even_odd2' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
88
+ }
89
+
90
+ output_files.each do |file_name, expected_contents|
91
+ File.exist?(File.join(@temp_directory_path, file_name)).should be_true
92
+ File.readlines(file_name).map(&:to_i).should == expected_contents
93
+ end
94
+ end
95
+
96
+
97
+ it "should run a sharded workflow" do
98
+ run_rflow_with_dsl do |c|
99
+ c.setting('rflow.log_level', 'DEBUG')
100
+ c.setting('rflow.application_directory_path', @temp_directory_path)
101
+ c.setting('rflow.application_name', 'sharded_test')
102
+
103
+ # Instantiate components
104
+ c.shard 's1', :process => 3 do |s|
105
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
106
+ end
107
+
108
+ c.shard 's2', :type => :process, :count => 2 do |s|
109
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
110
+ end
111
+
112
+ c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
113
+
114
+ c.shard 's3', :process => 2 do |s|
115
+ s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
116
+ s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
117
+ end
118
+
119
+ c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
120
+ c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
121
+
122
+ # Hook components together
123
+ c.connect 'generate_ints1#out' => 'output1#in'
124
+ c.connect 'generate_ints2#out' => 'output2#in'
125
+ c.connect 'generate_ints3#out' => 'output3#in'
126
+ c.connect 'generate_ints1#out' => 'output_all#in'
127
+ c.connect 'generate_ints2#out' => 'output_all#in'
128
+ c.connect 'generate_ints3#out' => 'output_all#in'
129
+ end
130
+
131
+ RFlow.master.shards.count.should == 4
132
+ RFlow.master.shards.map(&:count).should == [1, 3, 2, 2]
133
+ RFlow.master.shards.map(&:workers).map(&:count).should == [1, 3, 2, 2]
134
+
135
+ output_files = {
136
+ 'out1' => [0, 3, 6, 9] * 3,
137
+ 'out2' => (20..30).to_a * 2,
138
+ 'out3' => (100..105).to_a,
139
+ 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
140
+ }
141
+
142
+ output_files.each do |file_name, expected_contents|
143
+ File.exist?(File.join(@temp_directory_path, file_name)).should be_true
144
+ File.readlines(file_name).map(&:to_i).sort.should == expected_contents.sort
145
+ end
146
+ end
44
147
  end
45
148
  end
46
149
 
47
- describe '.run' do
150
+ context "when executing via the rflow binary" do
48
151
  before(:each) do
152
+ @original_directory_path = Dir.getwd
49
153
  @run_directory_path = File.join(@temp_directory_path, 'run')
50
154
  @log_directory_path = File.join(@temp_directory_path, 'log')
51
155
  Dir.mkdir @run_directory_path
52
156
  Dir.mkdir @log_directory_path
157
+ Dir.chdir @temp_directory_path
53
158
  end
54
159
 
55
- it "should startup and run correctly with non-trivial workflow" do
56
- config_file_path = File.join(@fixture_directory_path, 'config_ints.rb')
57
- extensions_path = File.join(@fixture_directory_path, 'extensions_ints.rb')
58
- config_database_path = File.join(@temp_directory_path, 'config.sqlite')
160
+ after(:each) do
161
+ Dir.chdir @original_directory_path
162
+ end
59
163
 
60
- # Load the new database with the fixtured config file
61
- RFlow::Configuration::initialize_database(config_database_path, config_file_path)
62
- File.exist?(config_database_path).should be_true
164
+ def execute_rflow(rflow_args)
165
+ r = {}
166
+ r[:stdout], r[:stderr], r[:status] = Open3.capture3("bundle exec rflow #{rflow_args}")
167
+ r
168
+ end
169
+
170
+ context "with a simple ruby DSL config file" do
171
+ before(:each) do
172
+ @config_file_name = 'input_config'
173
+ File.open('input_config', 'w+') do |file|
174
+ file.write <<-EOF
175
+ RFlow::Configuration::RubyDSL.configure do |c|
176
+ c.setting 'mysetting', 'myvalue'
177
+ end
178
+ EOF
179
+ end
180
+ end
181
+
182
+ it "should load a ruby dsl file into a sqlite DB" do
183
+ db_file_name = 'outdb'
184
+
185
+ r = execute_rflow("load -d #{db_file_name} -c #{@config_file_name}")
186
+
187
+ # Make sure that the process execution worked
188
+ r[:status].exitstatus.should == 0
189
+ r[:stderr].should == ''
190
+ r[:stdout].should match /Successfully initialized database.*#{db_file_name}/
191
+
192
+ # Make sure the config actually got loaded
193
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: db_file_name
194
+ RFlow::Configuration::Setting.where(:name => 'mysetting').first.value.should == 'myvalue'
195
+ end
63
196
 
64
- # Load the fixtured extensions
65
- load extensions_path
197
+ it "should not load a database if the database file already exists" do
198
+ db_file_name = 'outdb'
199
+ File.open(db_file_name, 'w') { |file| file.write 'boom' }
66
200
 
67
- # Startup RFlow in its own thread
68
- rflow_thread = Thread.new do
69
- RFlow.run config_database_path, false
201
+ r = execute_rflow("load -d #{db_file_name} -c #{@config_file_name}")
202
+
203
+ # Make sure that the process execution worked
204
+ r[:status].exitstatus.should == 1
205
+ r[:stderr].should == ''
206
+ r[:stdout].should match /Config database.*#{db_file_name}.*exists/
70
207
  end
71
208
 
72
- # TODO: figure out a way to get rid of this sleep, as there
73
- # should be a better way
74
- sleep(5)
75
-
76
- all_file_path = File.join(@temp_directory_path, 'out')
77
- all2_file_path = File.join(@temp_directory_path, 'out2')
78
- even_file_path = File.join(@temp_directory_path, 'out_even')
79
- odd_file_path = File.join(@temp_directory_path, 'out_odd')
80
- even_odd_file_path = File.join(@temp_directory_path, 'out_even_odd')
81
- even_odd2_file_path = File.join(@temp_directory_path, 'out_even_odd2')
82
-
83
- File.exist?(all_file_path).should be_true
84
- File.exist?(all2_file_path).should be_true
85
- File.exist?(even_file_path).should be_true
86
- File.exist?(odd_file_path).should be_true
87
- File.exist?(even_odd_file_path).should be_true
88
- File.exist?(even_odd2_file_path).should be_true
89
-
90
- File.readlines(all_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
91
- File.readlines(all2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
92
- File.readlines(even_file_path).map(&:to_i).should == [20, 22, 24, 26, 28, 30]
93
- File.readlines(odd_file_path).map(&:to_i).should == [21, 23, 25, 27, 29]
94
- File.readlines(even_odd_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
95
- File.readlines(even_odd2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
96
209
  end
97
- end
98
-
99
210
 
211
+ context "with a complex, sharded ruby DSL config file" do
212
+ before(:each) do
213
+ @config_file_name = 'input_config'
214
+ @db_file_name = 'config_db'
215
+ @app_name = 'sharded_bin_test'
216
+ File.open(@config_file_name, 'w+') do |file|
217
+ file.write <<-EOF
218
+ RFlow::Configuration::RubyDSL.configure do |c|
219
+ c.setting('rflow.log_level', 'INFO')
220
+ c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
221
+ c.setting('rflow.application_name', '#{@app_name}')
222
+ # Instantiate components
223
+ c.shard 's1', :process => 3 do |s|
224
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
225
+ end
226
+ c.shard 's2', :type => :process, :count => 2 do |s|
227
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
228
+ end
229
+ c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
230
+ c.shard 's3', :process => 2 do |s|
231
+ s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
232
+ s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
233
+ end
234
+ c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
235
+ c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
236
+ # Hook components together
237
+ c.connect 'generate_ints1#out' => 'output1#in'
238
+ c.connect 'generate_ints2#out' => 'output2#in'
239
+ c.connect 'generate_ints3#out' => 'output3#in'
240
+ c.connect 'generate_ints1#out' => 'output_all#in'
241
+ c.connect 'generate_ints2#out' => 'output_all#in'
242
+ c.connect 'generate_ints3#out' => 'output_all#in'
243
+ end
244
+ EOF
245
+ end
246
+ r = execute_rflow("load -d #{@db_file_name} -c #{@config_file_name}")
247
+ r[:status].exitstatus.should == 0
248
+ r[:stderr].should == ''
249
+ r[:stdout].should match /Successfully initialized database.*#{@db_file_name}/
250
+ end
251
+
252
+ it "should not start if the components aren't loaded" do
253
+ r = execute_rflow("start -d #{@db_file_name} -f")
254
+
255
+ r[:status].exitstatus.should == 1
256
+ r[:stderr].should == ''
257
+ r[:stdout].should match /error/i
258
+ end
259
+
260
+ it "should daemonize and run in the background" do
261
+ r = execute_rflow("start -d #{@db_file_name} -e #{@extensions_file_name}")
262
+
263
+ r[:status].exitstatus.should == 0
264
+ r[:stderr].should == ''
265
+ r[:stdout].should_not match /error/i
266
+
267
+ sleep 1 # give the daemon a chance to finish
268
+
269
+ log_contents = File.read("log/#{@app_name}.log").chomp
270
+ log_lines = log_contents.split("\n")
271
+
272
+ puts '++++++++++++++++++++'
273
+ puts log_contents
274
+ puts '++++++++++++++++++++'
275
+
276
+ # Log file testing
277
+ log_lines.each { |line| line.should_not match /^ERROR/ }
278
+ log_lines.each { |line| line.should_not match /^DEBUG/ }
279
+
280
+ # Grab all the pids from the log, which seems to be the only
281
+ # reliable way to get them
282
+ log_pids = log_lines.map { |line| /\((\d+)\)/.match(line)[1].to_i }.uniq
283
+
284
+ initial_pid = r[:status].pid
285
+ master_pid = File.read("run/#{@app_name}.pid").chomp.to_i
286
+ worker_pids = log_pids - [initial_pid, master_pid]
287
+
288
+ log_pids.should include initial_pid
289
+ log_pids.should include master_pid
290
+
291
+ worker_pids.size.should == 8
292
+ worker_pids.should_not include 0
293
+
294
+ # Process checks
295
+ expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
296
+ ([master_pid] + worker_pids).each do |pid|
297
+ Process.kill(0, pid).should == 1
298
+ end
299
+
300
+ # Output checks
301
+ output_files = {
302
+ 'out1' => [0, 3, 6, 9] * 3,
303
+ 'out2' => (20..30).to_a * 2,
304
+ 'out3' => (100..105).to_a,
305
+ 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
306
+ }
307
+
308
+ output_files.each do |file_name, expected_contents|
309
+ File.exist?(File.join(@temp_directory_path, file_name)).should be_true
310
+ File.readlines(file_name).map(&:to_i).sort.should == expected_contents.sort
311
+ end
312
+
313
+ # Terminate the master
314
+ Process.kill("TERM", master_pid).should == 1
315
+
316
+ # Make sure everything is dead
317
+ ([master_pid] + worker_pids).each do |pid|
318
+ expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
319
+ end
320
+ end
321
+ end
322
+ end
100
323
  end