rflow 0.0.5 → 1.0.0a1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/.ruby-gemset +1 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +21 -0
  5. data/.yardopts +1 -0
  6. data/Gemfile +5 -1
  7. data/Guardfile +8 -0
  8. data/LICENSE +190 -0
  9. data/NOTES +26 -13
  10. data/README.md +448 -0
  11. data/Rakefile +5 -12
  12. data/bin/rflow +23 -20
  13. data/example/basic_config.rb +2 -2
  14. data/example/basic_extensions.rb +8 -8
  15. data/example/http_config.rb +1 -1
  16. data/example/http_extensions.rb +15 -15
  17. data/lib/rflow.rb +15 -387
  18. data/lib/rflow/component.rb +105 -50
  19. data/lib/rflow/component/port.rb +25 -24
  20. data/lib/rflow/components/raw.rb +4 -4
  21. data/lib/rflow/components/raw/extensions.rb +2 -2
  22. data/lib/rflow/configuration.rb +54 -36
  23. data/lib/rflow/configuration/component.rb +2 -3
  24. data/lib/rflow/configuration/connection.rb +9 -10
  25. data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
  26. data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
  27. data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
  28. data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
  29. data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
  30. data/lib/rflow/configuration/port.rb +3 -4
  31. data/lib/rflow/configuration/ruby_dsl.rb +59 -35
  32. data/lib/rflow/configuration/setting.rb +8 -7
  33. data/lib/rflow/configuration/shard.rb +24 -0
  34. data/lib/rflow/configuration/uuid_keyed.rb +3 -3
  35. data/lib/rflow/connection.rb +21 -10
  36. data/lib/rflow/connections/zmq_connection.rb +45 -44
  37. data/lib/rflow/logger.rb +67 -0
  38. data/lib/rflow/master.rb +127 -0
  39. data/lib/rflow/message.rb +14 -14
  40. data/lib/rflow/pid_file.rb +84 -0
  41. data/lib/rflow/shard.rb +148 -0
  42. data/lib/rflow/version.rb +1 -1
  43. data/rflow.gemspec +22 -28
  44. data/schema/message.avsc +8 -8
  45. data/spec/fixtures/config_ints.rb +4 -4
  46. data/spec/fixtures/config_shards.rb +30 -0
  47. data/spec/fixtures/extensions_ints.rb +8 -8
  48. data/spec/rflow_component_port_spec.rb +58 -0
  49. data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
  50. data/spec/rflow_configuration_spec.rb +4 -4
  51. data/spec/rflow_message_data_raw.rb +2 -2
  52. data/spec/rflow_message_data_spec.rb +6 -6
  53. data/spec/rflow_message_spec.rb +13 -13
  54. data/spec/rflow_spec.rb +294 -71
  55. data/spec/schema_spec.rb +2 -2
  56. data/spec/spec_helper.rb +6 -4
  57. data/temp.rb +21 -21
  58. metadata +56 -65
  59. data/.rvmrc +0 -1
  60. data/README +0 -0
@@ -0,0 +1,58 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe RFlow::Component::Port do
4
+ it "should not be connected" do
5
+ described_class.new.connected?.should be_false
6
+ end
7
+ end
8
+
9
+ describe RFlow::Component::HashPort do
10
+ it "should not be connected" do
11
+ port_config = double('Port Config')
12
+ port_config.should_receive(:name).and_return('port')
13
+ port_config.should_receive(:uuid).and_return('1')
14
+
15
+ port = described_class.new(port_config)
16
+ port.connected?.should be_false
17
+ end
18
+ end
19
+
20
+ describe RFlow::Component::InputPort do
21
+ context ".connect!" do
22
+ it "should be connected" do
23
+ connection_double = double('connection')
24
+ connection_double.should_receive(:connect_input!)
25
+
26
+ port_config = double('Port Config')
27
+ port_config.should_receive(:name).and_return('port')
28
+ port_config.should_receive(:uuid).and_return('1')
29
+
30
+ port = described_class.new(port_config)
31
+ port.add_connection(nil, connection_double)
32
+
33
+ port.connected?.should be_false
34
+ port.connect!
35
+ port.connected?.should be_true
36
+ end
37
+ end
38
+ end
39
+
40
+ describe RFlow::Component::OutputPort do
41
+ context ".connect!" do
42
+ it "shouldbe connected" do
43
+ connection_double = double('connection')
44
+ connection_double.should_receive(:connect_output!)
45
+
46
+ port_config = double('Port Config')
47
+ port_config.should_receive(:name).and_return('port')
48
+ port_config.should_receive(:uuid).and_return('1')
49
+
50
+ port = described_class.new(port_config)
51
+ port.add_connection(nil, connection_double)
52
+
53
+ port.connected?.should be_false
54
+ port.connect!
55
+ port.connected?.should be_true
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,148 @@
1
+ require 'spec_helper.rb'
2
+ require 'rflow/configuration'
3
+
4
+ describe RFlow::Configuration::RubyDSL do
5
+ before(:each) do
6
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
7
+ RFlow::Configuration.migrate_database
8
+ end
9
+
10
+ it "should correctly process an empty DSL" do
11
+ described_class.configure { |c| }
12
+
13
+ config = RFlow::Configuration.new
14
+ RFlow::Configuration::Shard.count.should == 1
15
+ RFlow::Configuration::Component.count.should == 0
16
+ RFlow::Configuration::Port.count.should == 0
17
+ RFlow::Configuration::Connection.count.should == 0
18
+
19
+ puts config.to_s
20
+ end
21
+
22
+ it "should correctly process a component declaration" do
23
+ described_class.configure do |c|
24
+ c.component 'boom', 'town', 'opt1' => 'OPT1', 'opt2' => 'OPT2'
25
+ end
26
+
27
+ config = RFlow::Configuration.new
28
+ RFlow::Configuration::Shard.count.should == 1
29
+ RFlow::Configuration::Component.count.should == 1
30
+ RFlow::Configuration::Port.count.should == 0
31
+ RFlow::Configuration::Connection.count.should == 0
32
+
33
+ component = RFlow::Configuration::Component.all.first
34
+ component.name.should == 'boom'
35
+ component.specification.should == 'town'
36
+ component.options.should == {'opt1' => 'OPT1', 'opt2' => 'OPT2'}
37
+ end
38
+
39
+ it "should correctly process a connect declaration" do
40
+ described_class.configure do |c|
41
+ c.component 'first', 'First'
42
+ c.component 'second', 'Second'
43
+ c.connect 'first#out' => 'second#in'
44
+ c.connect 'first#out' => 'second#in[inkey]'
45
+ c.connect 'first#out[outkey]' => 'second#in'
46
+ c.connect 'first#out[outkey]' => 'second#in[inkey]'
47
+ end
48
+
49
+ config = RFlow::Configuration.new
50
+ RFlow::Configuration::Shard.count.should == 1
51
+ RFlow::Configuration::Component.count.should == 2
52
+ RFlow::Configuration::Port.count.should == 2
53
+ RFlow::Configuration::Connection.count.should == 4
54
+
55
+ first_component = RFlow::Configuration::Component.where(name: 'first').first
56
+ second_component = RFlow::Configuration::Component.where(name: 'second').first
57
+
58
+ first_component.specification.should == 'First'
59
+ first_component.input_ports.count.should == 0
60
+ first_component.output_ports.count.should == 1
61
+ first_component.output_ports.first.name.should == 'out'
62
+ first_connections = first_component.output_ports.first.connections.all
63
+ first_connections.count.should == 4
64
+ first_connections[0].input_port_key.should be_nil
65
+ first_connections[0].output_port_key.should be_nil
66
+ first_connections[1].input_port_key.should == 'inkey'
67
+ first_connections[1].output_port_key.should be_nil
68
+ first_connections[2].input_port_key.should be_nil
69
+ first_connections[2].output_port_key.should == 'outkey'
70
+ first_connections[3].input_port_key.should == 'inkey'
71
+ first_connections[3].output_port_key.should == 'outkey'
72
+
73
+ second_component.specification.should == 'Second'
74
+ second_component.input_ports.count.should == 1
75
+ second_component.output_ports.count.should == 0
76
+ second_component.input_ports.first.name.should == 'in'
77
+ second_connections = second_component.input_ports.first.connections.all
78
+ second_connections.count.should == 4
79
+
80
+ first_connections.should == second_connections
81
+
82
+ puts config.to_s
83
+ end
84
+
85
+ it "should correctly process shard declarations" do
86
+ described_class.configure do |c|
87
+ c.component 'first', 'First', :opt1 => 'opt1'
88
+
89
+ c.shard "s1", :process => 2 do |s|
90
+ s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
91
+ end
92
+
93
+ c.shard "s2", :type => :process, :count => 10 do |s|
94
+ s.component 'third', 'Third'
95
+ s.component 'fourth', 'Fourth'
96
+ end
97
+
98
+ c.component 'fifth', 'Fifth'
99
+
100
+ c.connect 'first#out' => 'second#in'
101
+ c.connect 'second#out[outkey]' => 'third#in[inkey]'
102
+ c.connect 'second#out' => 'third#in2'
103
+ c.connect 'third#out' => 'fourth#in'
104
+ c.connect 'third#out' => 'fifth#in'
105
+ end
106
+
107
+ config = RFlow::Configuration.new
108
+ RFlow::Configuration::Shard.count.should == 3
109
+ RFlow::Configuration::Component.count.should == 5
110
+ RFlow::Configuration::Port.count.should == 8
111
+ RFlow::Configuration::Connection.count.should == 5
112
+
113
+ shards = RFlow::Configuration::Shard.all
114
+ shards.map(&:name).should == ['DEFAULT', 's1', 's2']
115
+ shards.first.components.all.map(&:name).should == ['first', 'fifth']
116
+ shards.second.components.all.map(&:name).should == ['second']
117
+ shards.third.components.all.map(&:name).should == ['third', 'fourth']
118
+
119
+ RFlow::Configuration::Port.all.map(&:name).should == ['out', 'in', 'out', 'in', 'in2', 'out', 'in', 'in']
120
+
121
+ RFlow::Configuration::Connection.all.map(&:name).should == ['first#out=>second#in',
122
+ 'second#out[outkey]=>third#in[inkey]',
123
+ 'second#out=>third#in2',
124
+ 'third#out=>fourth#in',
125
+ 'third#out=>fifth#in']
126
+
127
+ puts config.to_s
128
+ end
129
+
130
+ it "should not allow two components with the same name" do
131
+ expect do
132
+ described_class.configure do |c|
133
+ c.component 'first', 'First'
134
+ c.component 'first', 'First'
135
+ end
136
+ end.to raise_error
137
+ end
138
+
139
+ it "should not allow two shards with the same name" do
140
+ expect do
141
+ described_class.configure do |c|
142
+ c.shard("s1", :process => 2) { |s| }
143
+ c.shard("s1", :process => 2) { |s| }
144
+ end
145
+ end.to raise_error
146
+ end
147
+
148
+ end
@@ -9,7 +9,7 @@ describe RFlow::Configuration do
9
9
  # RFlow::Configuration.available_data_extensions.clear
10
10
  end
11
11
 
12
-
12
+
13
13
  describe '.add_available_data_type' do
14
14
  context 'if passed a data_serialization that is not avro or xml' do
15
15
  it "should throw an exception" do
@@ -27,7 +27,7 @@ describe RFlow::Configuration do
27
27
  end
28
28
 
29
29
  describe "Data Extensions" do
30
-
30
+
31
31
  describe ".add_available_data_extension" do
32
32
  context 'if passed a non-module data extension' do
33
33
  it "should throw an exception" do
@@ -36,7 +36,7 @@ describe RFlow::Configuration do
36
36
  end.to raise_error(ArgumentError)
37
37
  end
38
38
  end
39
-
39
+
40
40
  context "if passed a valid Module as a data extension" do
41
41
  it "should update the available_data_extensions" do
42
42
  num_extensions = RFlow::Configuration.available_data_extensions['data_type'].size
@@ -47,7 +47,7 @@ describe RFlow::Configuration do
47
47
  end
48
48
  end
49
49
  end
50
-
50
+
51
51
  it "should perform simple 'prefix'-based inheritance for extensions" do
52
52
  RFlow::Configuration.add_available_data_extension('A', A = Module.new)
53
53
  RFlow::Configuration.add_available_data_extension('A::B', B = Module.new)
@@ -3,14 +3,14 @@ require 'spec_helper.rb'
3
3
  require 'rflow/components/raw'
4
4
 
5
5
  describe 'RFlow::Message::Data::Raw Avro Schema' do
6
- before(:each) do
6
+ before(:each) do
7
7
  @schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
8
8
  end
9
9
 
10
10
  it "should load the schema" do
11
11
  @schema_string.should_not == nil
12
12
  end
13
-
13
+
14
14
  it "should encode and decode an object" do
15
15
  raw = {'raw' => 'rawdata'}
16
16
 
@@ -17,22 +17,22 @@ describe RFlow::Message::Data do
17
17
  end
18
18
 
19
19
  context "if created with an invalid schema for the serialization" do
20
- it "should throw and exception" do
20
+ it "should throw and exception" do
21
21
  expect {RFlow::Message::Data.new(@invalid_avro_schema_string)}.to raise_error(ArgumentError)
22
22
  expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
23
23
  expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
24
24
  end
25
25
  end
26
-
26
+
27
27
  context "if created with a valid avro schema and serialization" do
28
28
  end
29
29
 
30
- context "if created with a valid avro schema" do
31
- it "should instantiate correctly" do
30
+ context "if created with a valid avro schema" do
31
+ it "should instantiate correctly" do
32
32
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
33
33
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
34
34
  end
35
-
35
+
36
36
  context "if created with a non-avro data serialization" do
37
37
  it "should throw an exception" do
38
38
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'unknown')}.to raise_error(ArgumentError)
@@ -41,7 +41,7 @@ describe RFlow::Message::Data do
41
41
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :xml)}.to raise_error(ArgumentError)
42
42
  end
43
43
  end
44
-
44
+
45
45
  context "if created with an avro serialization" do
46
46
  it "should instantiate correctly" do
47
47
  expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
@@ -50,7 +50,7 @@ describe RFlow::Message do
50
50
  @string = 'this is a string to be serialized'
51
51
  @avro_serialized_string = encode_avro(@avro_string_schema_string, @string)
52
52
  end
53
-
53
+
54
54
  it "should instantiate correctly" do
55
55
  expect {RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)}.to_not raise_error
56
56
  message = RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)
@@ -65,12 +65,12 @@ describe RFlow::Message do
65
65
  @invalid_processing_event_hash = {'started_at' => 'bad time string'}
66
66
  @invalid_provenance = [@invalid_processing_event_hash]
67
67
  end
68
-
68
+
69
69
  it "should throw an exception" do
70
70
  expect {RFlow::Message.new('string_type', @invalid_provenance)}.to raise_error(ArgumentError)
71
71
  end
72
72
  end
73
-
73
+
74
74
  context "if created with valid provenance" do
75
75
  before(:all) do
76
76
  @valid_xmlschema_time = '2001-01-01T01:01:01.000001Z'
@@ -87,7 +87,7 @@ describe RFlow::Message do
87
87
  {"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>@valid_xmlschema_time, "context"=>"context"},
88
88
  ]
89
89
  end
90
-
90
+
91
91
  it "should instantiate correctly" do
92
92
  p @valid_provenance
93
93
  expect {RFlow::Message.new('string_type', @valid_provenance)}.to_not raise_error
@@ -105,7 +105,7 @@ describe RFlow::Message do
105
105
  message = RFlow::Message.new('string_type', @valid_provenance)
106
106
  message.provenance.map(&:to_hash).should == @valid_provenance_hashes
107
107
  end
108
-
108
+
109
109
  end
110
110
 
111
111
  context "if correctly created" do
@@ -119,7 +119,7 @@ describe RFlow::Message do
119
119
  message.data.data_object.should == processed_message.data.data_object
120
120
  end
121
121
  end
122
-
122
+
123
123
  context "if data extensions exist" do
124
124
  it "should extend the data element with the extension" do
125
125
  module ExtensionModule; def ext_method; end; end
@@ -138,9 +138,9 @@ describe RFlow::Message do
138
138
  it "should correctly handle large raw types" do
139
139
  message = RFlow::Message.new('RFlow::Message::Data::Raw')
140
140
  message.data.raw = Array.new(101) { rand(256) }.pack('c*')
141
-
141
+
142
142
  message_avro = message.to_avro.force_encoding('BINARY')
143
-
143
+
144
144
  processed_message = RFlow::Message.from_avro(message_avro)
145
145
  processed_message_avro = processed_message.to_avro.force_encoding('BINARY')
146
146
 
@@ -148,13 +148,13 @@ describe RFlow::Message do
148
148
 
149
149
  encode_avro(@raw_schema, message.data.data_object).should == message.data.to_avro
150
150
  decode_avro(@raw_schema, message.data.to_avro).should == message.data.data_object
151
-
151
+
152
152
  p message.data.raw
153
153
  p message_avro
154
154
  p message_avro.bytesize
155
155
  p processed_message_avro
156
156
  p processed_message_avro.bytesize
157
-
157
+
158
158
  p message_avro.encoding
159
159
  p message_avro.valid_encoding?
160
160
 
@@ -163,7 +163,7 @@ describe RFlow::Message do
163
163
 
164
164
  message_data_avro = message.data.to_avro.force_encoding('BINARY')
165
165
  processed_message_data_avro = processed_message.data.to_avro.force_encoding('BINARY')
166
-
166
+
167
167
  p message_data_avro.encoding
168
168
  p message_data_avro.valid_encoding?
169
169
  p message_data_avro
@@ -173,10 +173,10 @@ describe RFlow::Message do
173
173
 
174
174
  Digest::MD5.hexdigest(message_avro).should == Digest::MD5.hexdigest(processed_message_avro)
175
175
 
176
-
176
+
177
177
  message_data_avro.should == processed_message_data_avro
178
178
  Digest::MD5.hexdigest(message_data_avro).should == Digest::MD5.hexdigest(processed_message_data_avro)
179
179
  Digest::MD5.hexdigest(message.data.raw).should == Digest::MD5.hexdigest(processed_message.data.raw)
180
180
  end
181
-
181
+
182
182
  end
@@ -1,100 +1,323 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
+ require 'open3'
3
4
  require 'rflow'
4
5
 
5
6
  describe RFlow do
6
- before(:each) do
7
- @fixture_directory_path = File.join(File.dirname(__FILE__), 'fixtures')
8
- end
9
7
 
10
-
11
- describe 'logger' do
12
- it "should initialize correctly" do
13
- log_file_path = File.join(@temp_directory_path, 'logfile')
14
- RFlow.initialize_logger log_file_path
8
+ before(:all) do
9
+ @extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions_ints.rb')
10
+ end
15
11
 
16
- File.exist?(log_file_path).should_not be_nil
12
+ context "when executing from the test script" do
17
13
 
18
- RFlow.logger.error "TESTTESTTEST"
19
- File.read(log_file_path).should match(/TESTTESTTEST/)
20
-
21
- RFlow.close_log_file
14
+ before(:all) do
15
+ load @extensions_file_name
22
16
  end
23
17
 
24
- it "should reopen correctly" do
25
- log_file_path = File.join(@temp_directory_path, 'logfile')
26
- moved_path = log_file_path + '.old'
27
-
28
- RFlow.initialize_logger log_file_path
29
- File.exist?(log_file_path).should be_true
30
- File.exist?(moved_path).should be_false
31
-
32
- File.rename log_file_path, moved_path
33
-
34
- RFlow.reopen_log_file
35
-
36
- RFlow.logger.error "TESTTESTTEST"
37
- File.read(log_file_path).should match(/TESTTESTTEST/)
38
- File.read(moved_path).should_not match(/TESTTESTTEST/)
39
-
40
- RFlow.close_log_file
41
- end
18
+ describe '.run' do
19
+ before(:each) do
20
+ @original_directory_path = Dir.getwd
21
+ @run_directory_path = File.join(@temp_directory_path, 'run')
22
+ @log_directory_path = File.join(@temp_directory_path, 'log')
23
+ Dir.mkdir @run_directory_path
24
+ Dir.mkdir @log_directory_path
25
+ end
26
+
27
+ after(:each) do
28
+ Dir.chdir @original_directory_path
29
+ end
30
+
31
+ def run_rflow_with_dsl(&block)
32
+ rflow_thread = Thread.new do
33
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
34
+ RFlow::Configuration.migrate_database
35
+ RFlow::Configuration::RubyDSL.configure do |c|
36
+ block.call(c)
37
+ end
38
+
39
+ RFlow::Configuration.merge_defaults!
40
+
41
+ RFlow.run nil, false
42
+ end
43
+
44
+ # TODO: figure out a way to get rid of this sleep, as there
45
+ # should be a better way to figure out when RFlow is done
46
+ sleep(2)
47
+
48
+ # Shut down the reactor and the thread
49
+ EM.run { EM.stop }
50
+ rflow_thread.join
51
+ end
52
+
53
+
54
+ it "should run a non-sharded workflow" do
55
+
56
+ run_rflow_with_dsl do |c|
57
+ c.setting('rflow.log_level', 'DEBUG')
58
+ c.setting('rflow.application_directory_path', @temp_directory_path)
59
+ c.setting('rflow.application_name', 'nonsharded_test')
60
+
61
+ c.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
62
+ c.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => 'out'
63
+ c.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
64
+ c.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even'
65
+ c.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_odd'
66
+ c.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even_odd'
67
+ c.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
68
+ c.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even_odd2'
69
+
70
+ c.connect 'generate_ints#out' => 'output#in'
71
+ c.connect 'generate_ints#out' => 'output2#in'
72
+ c.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
73
+ c.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
74
+ c.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
75
+ c.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
76
+ end
42
77
 
43
- it "should toggle log level" do
78
+ RFlow.master.shards.count.should == 1
79
+ RFlow.master.shards.first.workers.count.should == 1
80
+
81
+ output_files = {
82
+ 'out' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
83
+ 'out2' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
84
+ 'out_even' => [20, 22, 24, 26, 28, 30],
85
+ 'out_odd' => [21, 23, 25, 27, 29],
86
+ 'out_even_odd' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
87
+ 'out_even_odd2' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
88
+ }
89
+
90
+ output_files.each do |file_name, expected_contents|
91
+ File.exist?(File.join(@temp_directory_path, file_name)).should be_true
92
+ File.readlines(file_name).map(&:to_i).should == expected_contents
93
+ end
94
+ end
95
+
96
+
97
+ it "should run a sharded workflow" do
98
+ run_rflow_with_dsl do |c|
99
+ c.setting('rflow.log_level', 'DEBUG')
100
+ c.setting('rflow.application_directory_path', @temp_directory_path)
101
+ c.setting('rflow.application_name', 'sharded_test')
102
+
103
+ # Instantiate components
104
+ c.shard 's1', :process => 3 do |s|
105
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
106
+ end
107
+
108
+ c.shard 's2', :type => :process, :count => 2 do |s|
109
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
110
+ end
111
+
112
+ c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
113
+
114
+ c.shard 's3', :process => 2 do |s|
115
+ s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
116
+ s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
117
+ end
118
+
119
+ c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
120
+ c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
121
+
122
+ # Hook components together
123
+ c.connect 'generate_ints1#out' => 'output1#in'
124
+ c.connect 'generate_ints2#out' => 'output2#in'
125
+ c.connect 'generate_ints3#out' => 'output3#in'
126
+ c.connect 'generate_ints1#out' => 'output_all#in'
127
+ c.connect 'generate_ints2#out' => 'output_all#in'
128
+ c.connect 'generate_ints3#out' => 'output_all#in'
129
+ end
130
+
131
+ RFlow.master.shards.count.should == 4
132
+ RFlow.master.shards.map(&:count).should == [1, 3, 2, 2]
133
+ RFlow.master.shards.map(&:workers).map(&:count).should == [1, 3, 2, 2]
134
+
135
+ output_files = {
136
+ 'out1' => [0, 3, 6, 9] * 3,
137
+ 'out2' => (20..30).to_a * 2,
138
+ 'out3' => (100..105).to_a,
139
+ 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
140
+ }
141
+
142
+ output_files.each do |file_name, expected_contents|
143
+ File.exist?(File.join(@temp_directory_path, file_name)).should be_true
144
+ File.readlines(file_name).map(&:to_i).sort.should == expected_contents.sort
145
+ end
146
+ end
44
147
  end
45
148
  end
46
149
 
47
- describe '.run' do
150
+ context "when executing via the rflow binary" do
48
151
  before(:each) do
152
+ @original_directory_path = Dir.getwd
49
153
  @run_directory_path = File.join(@temp_directory_path, 'run')
50
154
  @log_directory_path = File.join(@temp_directory_path, 'log')
51
155
  Dir.mkdir @run_directory_path
52
156
  Dir.mkdir @log_directory_path
157
+ Dir.chdir @temp_directory_path
53
158
  end
54
159
 
55
- it "should startup and run correctly with non-trivial workflow" do
56
- config_file_path = File.join(@fixture_directory_path, 'config_ints.rb')
57
- extensions_path = File.join(@fixture_directory_path, 'extensions_ints.rb')
58
- config_database_path = File.join(@temp_directory_path, 'config.sqlite')
160
+ after(:each) do
161
+ Dir.chdir @original_directory_path
162
+ end
59
163
 
60
- # Load the new database with the fixtured config file
61
- RFlow::Configuration::initialize_database(config_database_path, config_file_path)
62
- File.exist?(config_database_path).should be_true
164
+ def execute_rflow(rflow_args)
165
+ r = {}
166
+ r[:stdout], r[:stderr], r[:status] = Open3.capture3("bundle exec rflow #{rflow_args}")
167
+ r
168
+ end
169
+
170
+ context "with a simple ruby DSL config file" do
171
+ before(:each) do
172
+ @config_file_name = 'input_config'
173
+ File.open('input_config', 'w+') do |file|
174
+ file.write <<-EOF
175
+ RFlow::Configuration::RubyDSL.configure do |c|
176
+ c.setting 'mysetting', 'myvalue'
177
+ end
178
+ EOF
179
+ end
180
+ end
181
+
182
+ it "should load a ruby dsl file into a sqlite DB" do
183
+ db_file_name = 'outdb'
184
+
185
+ r = execute_rflow("load -d #{db_file_name} -c #{@config_file_name}")
186
+
187
+ # Make sure that the process execution worked
188
+ r[:status].exitstatus.should == 0
189
+ r[:stderr].should == ''
190
+ r[:stdout].should match /Successfully initialized database.*#{db_file_name}/
191
+
192
+ # Make sure the config actually got loaded
193
+ ActiveRecord::Base.establish_connection adapter: "sqlite3", database: db_file_name
194
+ RFlow::Configuration::Setting.where(:name => 'mysetting').first.value.should == 'myvalue'
195
+ end
63
196
 
64
- # Load the fixtured extensions
65
- load extensions_path
197
+ it "should not load a database if the database file already exists" do
198
+ db_file_name = 'outdb'
199
+ File.open(db_file_name, 'w') { |file| file.write 'boom' }
66
200
 
67
- # Startup RFlow in its own thread
68
- rflow_thread = Thread.new do
69
- RFlow.run config_database_path, false
201
+ r = execute_rflow("load -d #{db_file_name} -c #{@config_file_name}")
202
+
203
+ # Make sure that the process execution worked
204
+ r[:status].exitstatus.should == 1
205
+ r[:stderr].should == ''
206
+ r[:stdout].should match /Config database.*#{db_file_name}.*exists/
70
207
  end
71
208
 
72
- # TODO: figure out a way to get rid of this sleep, as there
73
- # should be a better way
74
- sleep(5)
75
-
76
- all_file_path = File.join(@temp_directory_path, 'out')
77
- all2_file_path = File.join(@temp_directory_path, 'out2')
78
- even_file_path = File.join(@temp_directory_path, 'out_even')
79
- odd_file_path = File.join(@temp_directory_path, 'out_odd')
80
- even_odd_file_path = File.join(@temp_directory_path, 'out_even_odd')
81
- even_odd2_file_path = File.join(@temp_directory_path, 'out_even_odd2')
82
-
83
- File.exist?(all_file_path).should be_true
84
- File.exist?(all2_file_path).should be_true
85
- File.exist?(even_file_path).should be_true
86
- File.exist?(odd_file_path).should be_true
87
- File.exist?(even_odd_file_path).should be_true
88
- File.exist?(even_odd2_file_path).should be_true
89
-
90
- File.readlines(all_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
91
- File.readlines(all2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
92
- File.readlines(even_file_path).map(&:to_i).should == [20, 22, 24, 26, 28, 30]
93
- File.readlines(odd_file_path).map(&:to_i).should == [21, 23, 25, 27, 29]
94
- File.readlines(even_odd_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
95
- File.readlines(even_odd2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
96
209
  end
97
- end
98
-
99
210
 
211
+ context "with a complex, sharded ruby DSL config file" do
212
+ before(:each) do
213
+ @config_file_name = 'input_config'
214
+ @db_file_name = 'config_db'
215
+ @app_name = 'sharded_bin_test'
216
+ File.open(@config_file_name, 'w+') do |file|
217
+ file.write <<-EOF
218
+ RFlow::Configuration::RubyDSL.configure do |c|
219
+ c.setting('rflow.log_level', 'INFO')
220
+ c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
221
+ c.setting('rflow.application_name', '#{@app_name}')
222
+ # Instantiate components
223
+ c.shard 's1', :process => 3 do |s|
224
+ s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
225
+ end
226
+ c.shard 's2', :type => :process, :count => 2 do |s|
227
+ s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
228
+ end
229
+ c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
230
+ c.shard 's3', :process => 2 do |s|
231
+ s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
232
+ s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
233
+ end
234
+ c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
235
+ c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
236
+ # Hook components together
237
+ c.connect 'generate_ints1#out' => 'output1#in'
238
+ c.connect 'generate_ints2#out' => 'output2#in'
239
+ c.connect 'generate_ints3#out' => 'output3#in'
240
+ c.connect 'generate_ints1#out' => 'output_all#in'
241
+ c.connect 'generate_ints2#out' => 'output_all#in'
242
+ c.connect 'generate_ints3#out' => 'output_all#in'
243
+ end
244
+ EOF
245
+ end
246
+ r = execute_rflow("load -d #{@db_file_name} -c #{@config_file_name}")
247
+ r[:status].exitstatus.should == 0
248
+ r[:stderr].should == ''
249
+ r[:stdout].should match /Successfully initialized database.*#{@db_file_name}/
250
+ end
251
+
252
+ it "should not start if the components aren't loaded" do
253
+ r = execute_rflow("start -d #{@db_file_name} -f")
254
+
255
+ r[:status].exitstatus.should == 1
256
+ r[:stderr].should == ''
257
+ r[:stdout].should match /error/i
258
+ end
259
+
260
+ it "should daemonize and run in the background" do
261
+ r = execute_rflow("start -d #{@db_file_name} -e #{@extensions_file_name}")
262
+
263
+ r[:status].exitstatus.should == 0
264
+ r[:stderr].should == ''
265
+ r[:stdout].should_not match /error/i
266
+
267
+ sleep 1 # give the daemon a chance to finish
268
+
269
+ log_contents = File.read("log/#{@app_name}.log").chomp
270
+ log_lines = log_contents.split("\n")
271
+
272
+ puts '++++++++++++++++++++'
273
+ puts log_contents
274
+ puts '++++++++++++++++++++'
275
+
276
+ # Log file testing
277
+ log_lines.each { |line| line.should_not match /^ERROR/ }
278
+ log_lines.each { |line| line.should_not match /^DEBUG/ }
279
+
280
+ # Grab all the pids from the log, which seems to be the only
281
+ # reliable way to get them
282
+ log_pids = log_lines.map { |line| /\((\d+)\)/.match(line)[1].to_i }.uniq
283
+
284
+ initial_pid = r[:status].pid
285
+ master_pid = File.read("run/#{@app_name}.pid").chomp.to_i
286
+ worker_pids = log_pids - [initial_pid, master_pid]
287
+
288
+ log_pids.should include initial_pid
289
+ log_pids.should include master_pid
290
+
291
+ worker_pids.size.should == 8
292
+ worker_pids.should_not include 0
293
+
294
+ # Process checks
295
+ expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
296
+ ([master_pid] + worker_pids).each do |pid|
297
+ Process.kill(0, pid).should == 1
298
+ end
299
+
300
+ # Output checks
301
+ output_files = {
302
+ 'out1' => [0, 3, 6, 9] * 3,
303
+ 'out2' => (20..30).to_a * 2,
304
+ 'out3' => (100..105).to_a,
305
+ 'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
306
+ }
307
+
308
+ output_files.each do |file_name, expected_contents|
309
+ File.exist?(File.join(@temp_directory_path, file_name)).should be_true
310
+ File.readlines(file_name).map(&:to_i).sort.should == expected_contents.sort
311
+ end
312
+
313
+ # Terminate the master
314
+ Process.kill("TERM", master_pid).should == 1
315
+
316
+ # Make sure everything is dead
317
+ ([master_pid] + worker_pids).each do |pid|
318
+ expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
319
+ end
320
+ end
321
+ end
322
+ end
100
323
  end