rflow 0.0.5 → 1.0.0a1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +21 -0
- data/.yardopts +1 -0
- data/Gemfile +5 -1
- data/Guardfile +8 -0
- data/LICENSE +190 -0
- data/NOTES +26 -13
- data/README.md +448 -0
- data/Rakefile +5 -12
- data/bin/rflow +23 -20
- data/example/basic_config.rb +2 -2
- data/example/basic_extensions.rb +8 -8
- data/example/http_config.rb +1 -1
- data/example/http_extensions.rb +15 -15
- data/lib/rflow.rb +15 -387
- data/lib/rflow/component.rb +105 -50
- data/lib/rflow/component/port.rb +25 -24
- data/lib/rflow/components/raw.rb +4 -4
- data/lib/rflow/components/raw/extensions.rb +2 -2
- data/lib/rflow/configuration.rb +54 -36
- data/lib/rflow/configuration/component.rb +2 -3
- data/lib/rflow/configuration/connection.rb +9 -10
- data/lib/rflow/configuration/migrations/{20010101000001_create_settings.rb → 20010101000000_create_settings.rb} +2 -2
- data/lib/rflow/configuration/migrations/20010101000001_create_shards.rb +21 -0
- data/lib/rflow/configuration/migrations/20010101000002_create_components.rb +7 -2
- data/lib/rflow/configuration/migrations/20010101000003_create_ports.rb +3 -3
- data/lib/rflow/configuration/migrations/20010101000004_create_connections.rb +2 -2
- data/lib/rflow/configuration/port.rb +3 -4
- data/lib/rflow/configuration/ruby_dsl.rb +59 -35
- data/lib/rflow/configuration/setting.rb +8 -7
- data/lib/rflow/configuration/shard.rb +24 -0
- data/lib/rflow/configuration/uuid_keyed.rb +3 -3
- data/lib/rflow/connection.rb +21 -10
- data/lib/rflow/connections/zmq_connection.rb +45 -44
- data/lib/rflow/logger.rb +67 -0
- data/lib/rflow/master.rb +127 -0
- data/lib/rflow/message.rb +14 -14
- data/lib/rflow/pid_file.rb +84 -0
- data/lib/rflow/shard.rb +148 -0
- data/lib/rflow/version.rb +1 -1
- data/rflow.gemspec +22 -28
- data/schema/message.avsc +8 -8
- data/spec/fixtures/config_ints.rb +4 -4
- data/spec/fixtures/config_shards.rb +30 -0
- data/spec/fixtures/extensions_ints.rb +8 -8
- data/spec/rflow_component_port_spec.rb +58 -0
- data/spec/rflow_configuration_ruby_dsl_spec.rb +148 -0
- data/spec/rflow_configuration_spec.rb +4 -4
- data/spec/rflow_message_data_raw.rb +2 -2
- data/spec/rflow_message_data_spec.rb +6 -6
- data/spec/rflow_message_spec.rb +13 -13
- data/spec/rflow_spec.rb +294 -71
- data/spec/schema_spec.rb +2 -2
- data/spec/spec_helper.rb +6 -4
- data/temp.rb +21 -21
- metadata +56 -65
- data/.rvmrc +0 -1
- data/README +0 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper.rb'
|
2
|
+
|
3
|
+
describe RFlow::Component::Port do
|
4
|
+
it "should not be connected" do
|
5
|
+
described_class.new.connected?.should be_false
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
describe RFlow::Component::HashPort do
|
10
|
+
it "should not be connected" do
|
11
|
+
port_config = double('Port Config')
|
12
|
+
port_config.should_receive(:name).and_return('port')
|
13
|
+
port_config.should_receive(:uuid).and_return('1')
|
14
|
+
|
15
|
+
port = described_class.new(port_config)
|
16
|
+
port.connected?.should be_false
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe RFlow::Component::InputPort do
|
21
|
+
context ".connect!" do
|
22
|
+
it "should be connected" do
|
23
|
+
connection_double = double('connection')
|
24
|
+
connection_double.should_receive(:connect_input!)
|
25
|
+
|
26
|
+
port_config = double('Port Config')
|
27
|
+
port_config.should_receive(:name).and_return('port')
|
28
|
+
port_config.should_receive(:uuid).and_return('1')
|
29
|
+
|
30
|
+
port = described_class.new(port_config)
|
31
|
+
port.add_connection(nil, connection_double)
|
32
|
+
|
33
|
+
port.connected?.should be_false
|
34
|
+
port.connect!
|
35
|
+
port.connected?.should be_true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe RFlow::Component::OutputPort do
|
41
|
+
context ".connect!" do
|
42
|
+
it "shouldbe connected" do
|
43
|
+
connection_double = double('connection')
|
44
|
+
connection_double.should_receive(:connect_output!)
|
45
|
+
|
46
|
+
port_config = double('Port Config')
|
47
|
+
port_config.should_receive(:name).and_return('port')
|
48
|
+
port_config.should_receive(:uuid).and_return('1')
|
49
|
+
|
50
|
+
port = described_class.new(port_config)
|
51
|
+
port.add_connection(nil, connection_double)
|
52
|
+
|
53
|
+
port.connected?.should be_false
|
54
|
+
port.connect!
|
55
|
+
port.connected?.should be_true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'spec_helper.rb'
|
2
|
+
require 'rflow/configuration'
|
3
|
+
|
4
|
+
describe RFlow::Configuration::RubyDSL do
|
5
|
+
before(:each) do
|
6
|
+
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
|
7
|
+
RFlow::Configuration.migrate_database
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should correctly process an empty DSL" do
|
11
|
+
described_class.configure { |c| }
|
12
|
+
|
13
|
+
config = RFlow::Configuration.new
|
14
|
+
RFlow::Configuration::Shard.count.should == 1
|
15
|
+
RFlow::Configuration::Component.count.should == 0
|
16
|
+
RFlow::Configuration::Port.count.should == 0
|
17
|
+
RFlow::Configuration::Connection.count.should == 0
|
18
|
+
|
19
|
+
puts config.to_s
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should correctly process a component declaration" do
|
23
|
+
described_class.configure do |c|
|
24
|
+
c.component 'boom', 'town', 'opt1' => 'OPT1', 'opt2' => 'OPT2'
|
25
|
+
end
|
26
|
+
|
27
|
+
config = RFlow::Configuration.new
|
28
|
+
RFlow::Configuration::Shard.count.should == 1
|
29
|
+
RFlow::Configuration::Component.count.should == 1
|
30
|
+
RFlow::Configuration::Port.count.should == 0
|
31
|
+
RFlow::Configuration::Connection.count.should == 0
|
32
|
+
|
33
|
+
component = RFlow::Configuration::Component.all.first
|
34
|
+
component.name.should == 'boom'
|
35
|
+
component.specification.should == 'town'
|
36
|
+
component.options.should == {'opt1' => 'OPT1', 'opt2' => 'OPT2'}
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should correctly process a connect declaration" do
|
40
|
+
described_class.configure do |c|
|
41
|
+
c.component 'first', 'First'
|
42
|
+
c.component 'second', 'Second'
|
43
|
+
c.connect 'first#out' => 'second#in'
|
44
|
+
c.connect 'first#out' => 'second#in[inkey]'
|
45
|
+
c.connect 'first#out[outkey]' => 'second#in'
|
46
|
+
c.connect 'first#out[outkey]' => 'second#in[inkey]'
|
47
|
+
end
|
48
|
+
|
49
|
+
config = RFlow::Configuration.new
|
50
|
+
RFlow::Configuration::Shard.count.should == 1
|
51
|
+
RFlow::Configuration::Component.count.should == 2
|
52
|
+
RFlow::Configuration::Port.count.should == 2
|
53
|
+
RFlow::Configuration::Connection.count.should == 4
|
54
|
+
|
55
|
+
first_component = RFlow::Configuration::Component.where(name: 'first').first
|
56
|
+
second_component = RFlow::Configuration::Component.where(name: 'second').first
|
57
|
+
|
58
|
+
first_component.specification.should == 'First'
|
59
|
+
first_component.input_ports.count.should == 0
|
60
|
+
first_component.output_ports.count.should == 1
|
61
|
+
first_component.output_ports.first.name.should == 'out'
|
62
|
+
first_connections = first_component.output_ports.first.connections.all
|
63
|
+
first_connections.count.should == 4
|
64
|
+
first_connections[0].input_port_key.should be_nil
|
65
|
+
first_connections[0].output_port_key.should be_nil
|
66
|
+
first_connections[1].input_port_key.should == 'inkey'
|
67
|
+
first_connections[1].output_port_key.should be_nil
|
68
|
+
first_connections[2].input_port_key.should be_nil
|
69
|
+
first_connections[2].output_port_key.should == 'outkey'
|
70
|
+
first_connections[3].input_port_key.should == 'inkey'
|
71
|
+
first_connections[3].output_port_key.should == 'outkey'
|
72
|
+
|
73
|
+
second_component.specification.should == 'Second'
|
74
|
+
second_component.input_ports.count.should == 1
|
75
|
+
second_component.output_ports.count.should == 0
|
76
|
+
second_component.input_ports.first.name.should == 'in'
|
77
|
+
second_connections = second_component.input_ports.first.connections.all
|
78
|
+
second_connections.count.should == 4
|
79
|
+
|
80
|
+
first_connections.should == second_connections
|
81
|
+
|
82
|
+
puts config.to_s
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should correctly process shard declarations" do
|
86
|
+
described_class.configure do |c|
|
87
|
+
c.component 'first', 'First', :opt1 => 'opt1'
|
88
|
+
|
89
|
+
c.shard "s1", :process => 2 do |s|
|
90
|
+
s.component 'second', 'Second', :opt1 => 'opt1', "opt2" => "opt2"
|
91
|
+
end
|
92
|
+
|
93
|
+
c.shard "s2", :type => :process, :count => 10 do |s|
|
94
|
+
s.component 'third', 'Third'
|
95
|
+
s.component 'fourth', 'Fourth'
|
96
|
+
end
|
97
|
+
|
98
|
+
c.component 'fifth', 'Fifth'
|
99
|
+
|
100
|
+
c.connect 'first#out' => 'second#in'
|
101
|
+
c.connect 'second#out[outkey]' => 'third#in[inkey]'
|
102
|
+
c.connect 'second#out' => 'third#in2'
|
103
|
+
c.connect 'third#out' => 'fourth#in'
|
104
|
+
c.connect 'third#out' => 'fifth#in'
|
105
|
+
end
|
106
|
+
|
107
|
+
config = RFlow::Configuration.new
|
108
|
+
RFlow::Configuration::Shard.count.should == 3
|
109
|
+
RFlow::Configuration::Component.count.should == 5
|
110
|
+
RFlow::Configuration::Port.count.should == 8
|
111
|
+
RFlow::Configuration::Connection.count.should == 5
|
112
|
+
|
113
|
+
shards = RFlow::Configuration::Shard.all
|
114
|
+
shards.map(&:name).should == ['DEFAULT', 's1', 's2']
|
115
|
+
shards.first.components.all.map(&:name).should == ['first', 'fifth']
|
116
|
+
shards.second.components.all.map(&:name).should == ['second']
|
117
|
+
shards.third.components.all.map(&:name).should == ['third', 'fourth']
|
118
|
+
|
119
|
+
RFlow::Configuration::Port.all.map(&:name).should == ['out', 'in', 'out', 'in', 'in2', 'out', 'in', 'in']
|
120
|
+
|
121
|
+
RFlow::Configuration::Connection.all.map(&:name).should == ['first#out=>second#in',
|
122
|
+
'second#out[outkey]=>third#in[inkey]',
|
123
|
+
'second#out=>third#in2',
|
124
|
+
'third#out=>fourth#in',
|
125
|
+
'third#out=>fifth#in']
|
126
|
+
|
127
|
+
puts config.to_s
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should not allow two components with the same name" do
|
131
|
+
expect do
|
132
|
+
described_class.configure do |c|
|
133
|
+
c.component 'first', 'First'
|
134
|
+
c.component 'first', 'First'
|
135
|
+
end
|
136
|
+
end.to raise_error
|
137
|
+
end
|
138
|
+
|
139
|
+
it "should not allow two shards with the same name" do
|
140
|
+
expect do
|
141
|
+
described_class.configure do |c|
|
142
|
+
c.shard("s1", :process => 2) { |s| }
|
143
|
+
c.shard("s1", :process => 2) { |s| }
|
144
|
+
end
|
145
|
+
end.to raise_error
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
@@ -9,7 +9,7 @@ describe RFlow::Configuration do
|
|
9
9
|
# RFlow::Configuration.available_data_extensions.clear
|
10
10
|
end
|
11
11
|
|
12
|
-
|
12
|
+
|
13
13
|
describe '.add_available_data_type' do
|
14
14
|
context 'if passed a data_serialization that is not avro or xml' do
|
15
15
|
it "should throw an exception" do
|
@@ -27,7 +27,7 @@ describe RFlow::Configuration do
|
|
27
27
|
end
|
28
28
|
|
29
29
|
describe "Data Extensions" do
|
30
|
-
|
30
|
+
|
31
31
|
describe ".add_available_data_extension" do
|
32
32
|
context 'if passed a non-module data extension' do
|
33
33
|
it "should throw an exception" do
|
@@ -36,7 +36,7 @@ describe RFlow::Configuration do
|
|
36
36
|
end.to raise_error(ArgumentError)
|
37
37
|
end
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
context "if passed a valid Module as a data extension" do
|
41
41
|
it "should update the available_data_extensions" do
|
42
42
|
num_extensions = RFlow::Configuration.available_data_extensions['data_type'].size
|
@@ -47,7 +47,7 @@ describe RFlow::Configuration do
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
it "should perform simple 'prefix'-based inheritance for extensions" do
|
52
52
|
RFlow::Configuration.add_available_data_extension('A', A = Module.new)
|
53
53
|
RFlow::Configuration.add_available_data_extension('A::B', B = Module.new)
|
@@ -3,14 +3,14 @@ require 'spec_helper.rb'
|
|
3
3
|
require 'rflow/components/raw'
|
4
4
|
|
5
5
|
describe 'RFlow::Message::Data::Raw Avro Schema' do
|
6
|
-
before(:each) do
|
6
|
+
before(:each) do
|
7
7
|
@schema_string = RFlow::Configuration.available_data_types['RFlow::Message::Data::Raw']['avro']
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should load the schema" do
|
11
11
|
@schema_string.should_not == nil
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "should encode and decode an object" do
|
15
15
|
raw = {'raw' => 'rawdata'}
|
16
16
|
|
@@ -17,22 +17,22 @@ describe RFlow::Message::Data do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
context "if created with an invalid schema for the serialization" do
|
20
|
-
it "should throw and exception" do
|
20
|
+
it "should throw and exception" do
|
21
21
|
expect {RFlow::Message::Data.new(@invalid_avro_schema_string)}.to raise_error(ArgumentError)
|
22
22
|
expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
|
23
23
|
expect {RFlow::Message::Data.new(@invalid_avro_schema_string, 'avro')}.to raise_error(ArgumentError)
|
24
24
|
end
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
27
|
context "if created with a valid avro schema and serialization" do
|
28
28
|
end
|
29
29
|
|
30
|
-
context "if created with a valid avro schema" do
|
31
|
-
it "should instantiate correctly" do
|
30
|
+
context "if created with a valid avro schema" do
|
31
|
+
it "should instantiate correctly" do
|
32
32
|
expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
|
33
33
|
expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
context "if created with a non-avro data serialization" do
|
37
37
|
it "should throw an exception" do
|
38
38
|
expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'unknown')}.to raise_error(ArgumentError)
|
@@ -41,7 +41,7 @@ describe RFlow::Message::Data do
|
|
41
41
|
expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, :xml)}.to raise_error(ArgumentError)
|
42
42
|
end
|
43
43
|
end
|
44
|
-
|
44
|
+
|
45
45
|
context "if created with an avro serialization" do
|
46
46
|
it "should instantiate correctly" do
|
47
47
|
expect {RFlow::Message::Data.new(@valid_avro_string_schema_string, 'avro')}.to_not raise_error
|
data/spec/rflow_message_spec.rb
CHANGED
@@ -50,7 +50,7 @@ describe RFlow::Message do
|
|
50
50
|
@string = 'this is a string to be serialized'
|
51
51
|
@avro_serialized_string = encode_avro(@avro_string_schema_string, @string)
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
it "should instantiate correctly" do
|
55
55
|
expect {RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)}.to_not raise_error
|
56
56
|
message = RFlow::Message.new('string_type', [], 'avro', nil, @avro_serialized_string)
|
@@ -65,12 +65,12 @@ describe RFlow::Message do
|
|
65
65
|
@invalid_processing_event_hash = {'started_at' => 'bad time string'}
|
66
66
|
@invalid_provenance = [@invalid_processing_event_hash]
|
67
67
|
end
|
68
|
-
|
68
|
+
|
69
69
|
it "should throw an exception" do
|
70
70
|
expect {RFlow::Message.new('string_type', @invalid_provenance)}.to raise_error(ArgumentError)
|
71
71
|
end
|
72
72
|
end
|
73
|
-
|
73
|
+
|
74
74
|
context "if created with valid provenance" do
|
75
75
|
before(:all) do
|
76
76
|
@valid_xmlschema_time = '2001-01-01T01:01:01.000001Z'
|
@@ -87,7 +87,7 @@ describe RFlow::Message do
|
|
87
87
|
{"component_instance_uuid"=>"uuid", "started_at"=>@valid_xmlschema_time, "completed_at"=>@valid_xmlschema_time, "context"=>"context"},
|
88
88
|
]
|
89
89
|
end
|
90
|
-
|
90
|
+
|
91
91
|
it "should instantiate correctly" do
|
92
92
|
p @valid_provenance
|
93
93
|
expect {RFlow::Message.new('string_type', @valid_provenance)}.to_not raise_error
|
@@ -105,7 +105,7 @@ describe RFlow::Message do
|
|
105
105
|
message = RFlow::Message.new('string_type', @valid_provenance)
|
106
106
|
message.provenance.map(&:to_hash).should == @valid_provenance_hashes
|
107
107
|
end
|
108
|
-
|
108
|
+
|
109
109
|
end
|
110
110
|
|
111
111
|
context "if correctly created" do
|
@@ -119,7 +119,7 @@ describe RFlow::Message do
|
|
119
119
|
message.data.data_object.should == processed_message.data.data_object
|
120
120
|
end
|
121
121
|
end
|
122
|
-
|
122
|
+
|
123
123
|
context "if data extensions exist" do
|
124
124
|
it "should extend the data element with the extension" do
|
125
125
|
module ExtensionModule; def ext_method; end; end
|
@@ -138,9 +138,9 @@ describe RFlow::Message do
|
|
138
138
|
it "should correctly handle large raw types" do
|
139
139
|
message = RFlow::Message.new('RFlow::Message::Data::Raw')
|
140
140
|
message.data.raw = Array.new(101) { rand(256) }.pack('c*')
|
141
|
-
|
141
|
+
|
142
142
|
message_avro = message.to_avro.force_encoding('BINARY')
|
143
|
-
|
143
|
+
|
144
144
|
processed_message = RFlow::Message.from_avro(message_avro)
|
145
145
|
processed_message_avro = processed_message.to_avro.force_encoding('BINARY')
|
146
146
|
|
@@ -148,13 +148,13 @@ describe RFlow::Message do
|
|
148
148
|
|
149
149
|
encode_avro(@raw_schema, message.data.data_object).should == message.data.to_avro
|
150
150
|
decode_avro(@raw_schema, message.data.to_avro).should == message.data.data_object
|
151
|
-
|
151
|
+
|
152
152
|
p message.data.raw
|
153
153
|
p message_avro
|
154
154
|
p message_avro.bytesize
|
155
155
|
p processed_message_avro
|
156
156
|
p processed_message_avro.bytesize
|
157
|
-
|
157
|
+
|
158
158
|
p message_avro.encoding
|
159
159
|
p message_avro.valid_encoding?
|
160
160
|
|
@@ -163,7 +163,7 @@ describe RFlow::Message do
|
|
163
163
|
|
164
164
|
message_data_avro = message.data.to_avro.force_encoding('BINARY')
|
165
165
|
processed_message_data_avro = processed_message.data.to_avro.force_encoding('BINARY')
|
166
|
-
|
166
|
+
|
167
167
|
p message_data_avro.encoding
|
168
168
|
p message_data_avro.valid_encoding?
|
169
169
|
p message_data_avro
|
@@ -173,10 +173,10 @@ describe RFlow::Message do
|
|
173
173
|
|
174
174
|
Digest::MD5.hexdigest(message_avro).should == Digest::MD5.hexdigest(processed_message_avro)
|
175
175
|
|
176
|
-
|
176
|
+
|
177
177
|
message_data_avro.should == processed_message_data_avro
|
178
178
|
Digest::MD5.hexdigest(message_data_avro).should == Digest::MD5.hexdigest(processed_message_data_avro)
|
179
179
|
Digest::MD5.hexdigest(message.data.raw).should == Digest::MD5.hexdigest(processed_message.data.raw)
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
end
|
data/spec/rflow_spec.rb
CHANGED
@@ -1,100 +1,323 @@
|
|
1
1
|
require 'spec_helper.rb'
|
2
2
|
|
3
|
+
require 'open3'
|
3
4
|
require 'rflow'
|
4
5
|
|
5
6
|
describe RFlow do
|
6
|
-
before(:each) do
|
7
|
-
@fixture_directory_path = File.join(File.dirname(__FILE__), 'fixtures')
|
8
|
-
end
|
9
7
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
log_file_path = File.join(@temp_directory_path, 'logfile')
|
14
|
-
RFlow.initialize_logger log_file_path
|
8
|
+
before(:all) do
|
9
|
+
@extensions_file_name = File.join(File.dirname(__FILE__), 'fixtures', 'extensions_ints.rb')
|
10
|
+
end
|
15
11
|
|
16
|
-
|
12
|
+
context "when executing from the test script" do
|
17
13
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
RFlow.close_log_file
|
14
|
+
before(:all) do
|
15
|
+
load @extensions_file_name
|
22
16
|
end
|
23
17
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
18
|
+
describe '.run' do
|
19
|
+
before(:each) do
|
20
|
+
@original_directory_path = Dir.getwd
|
21
|
+
@run_directory_path = File.join(@temp_directory_path, 'run')
|
22
|
+
@log_directory_path = File.join(@temp_directory_path, 'log')
|
23
|
+
Dir.mkdir @run_directory_path
|
24
|
+
Dir.mkdir @log_directory_path
|
25
|
+
end
|
26
|
+
|
27
|
+
after(:each) do
|
28
|
+
Dir.chdir @original_directory_path
|
29
|
+
end
|
30
|
+
|
31
|
+
def run_rflow_with_dsl(&block)
|
32
|
+
rflow_thread = Thread.new do
|
33
|
+
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: ":memory:"
|
34
|
+
RFlow::Configuration.migrate_database
|
35
|
+
RFlow::Configuration::RubyDSL.configure do |c|
|
36
|
+
block.call(c)
|
37
|
+
end
|
38
|
+
|
39
|
+
RFlow::Configuration.merge_defaults!
|
40
|
+
|
41
|
+
RFlow.run nil, false
|
42
|
+
end
|
43
|
+
|
44
|
+
# TODO: figure out a way to get rid of this sleep, as there
|
45
|
+
# should be a better way to figure out when RFlow is done
|
46
|
+
sleep(2)
|
47
|
+
|
48
|
+
# Shut down the reactor and the thread
|
49
|
+
EM.run { EM.stop }
|
50
|
+
rflow_thread.join
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
it "should run a non-sharded workflow" do
|
55
|
+
|
56
|
+
run_rflow_with_dsl do |c|
|
57
|
+
c.setting('rflow.log_level', 'DEBUG')
|
58
|
+
c.setting('rflow.application_directory_path', @temp_directory_path)
|
59
|
+
c.setting('rflow.application_name', 'nonsharded_test')
|
60
|
+
|
61
|
+
c.component 'generate_ints', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
|
62
|
+
c.component 'output', 'RFlow::Components::FileOutput', 'output_file_path' => 'out'
|
63
|
+
c.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
|
64
|
+
c.component 'output_even', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even'
|
65
|
+
c.component 'output_odd', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_odd'
|
66
|
+
c.component 'output_even_odd', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even_odd'
|
67
|
+
c.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
|
68
|
+
c.component 'output_even_odd2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_even_odd2'
|
69
|
+
|
70
|
+
c.connect 'generate_ints#out' => 'output#in'
|
71
|
+
c.connect 'generate_ints#out' => 'output2#in'
|
72
|
+
c.connect 'generate_ints#even_odd_out[even]' => 'output_even#in'
|
73
|
+
c.connect 'generate_ints#even_odd_out[odd]' => 'output_odd#in'
|
74
|
+
c.connect 'generate_ints#even_odd_out' => 'output_even_odd#in'
|
75
|
+
c.connect 'generate_ints2#even_odd_out' => 'output_even_odd2#in'
|
76
|
+
end
|
42
77
|
|
43
|
-
|
78
|
+
RFlow.master.shards.count.should == 1
|
79
|
+
RFlow.master.shards.first.workers.count.should == 1
|
80
|
+
|
81
|
+
output_files = {
|
82
|
+
'out' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
|
83
|
+
'out2' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
|
84
|
+
'out_even' => [20, 22, 24, 26, 28, 30],
|
85
|
+
'out_odd' => [21, 23, 25, 27, 29],
|
86
|
+
'out_even_odd' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
|
87
|
+
'out_even_odd2' => [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
|
88
|
+
}
|
89
|
+
|
90
|
+
output_files.each do |file_name, expected_contents|
|
91
|
+
File.exist?(File.join(@temp_directory_path, file_name)).should be_true
|
92
|
+
File.readlines(file_name).map(&:to_i).should == expected_contents
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
it "should run a sharded workflow" do
|
98
|
+
run_rflow_with_dsl do |c|
|
99
|
+
c.setting('rflow.log_level', 'DEBUG')
|
100
|
+
c.setting('rflow.application_directory_path', @temp_directory_path)
|
101
|
+
c.setting('rflow.application_name', 'sharded_test')
|
102
|
+
|
103
|
+
# Instantiate components
|
104
|
+
c.shard 's1', :process => 3 do |s|
|
105
|
+
s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
|
106
|
+
end
|
107
|
+
|
108
|
+
c.shard 's2', :type => :process, :count => 2 do |s|
|
109
|
+
s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
|
110
|
+
end
|
111
|
+
|
112
|
+
c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
|
113
|
+
|
114
|
+
c.shard 's3', :process => 2 do |s|
|
115
|
+
s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
|
116
|
+
s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
|
117
|
+
end
|
118
|
+
|
119
|
+
c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
|
120
|
+
c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
|
121
|
+
|
122
|
+
# Hook components together
|
123
|
+
c.connect 'generate_ints1#out' => 'output1#in'
|
124
|
+
c.connect 'generate_ints2#out' => 'output2#in'
|
125
|
+
c.connect 'generate_ints3#out' => 'output3#in'
|
126
|
+
c.connect 'generate_ints1#out' => 'output_all#in'
|
127
|
+
c.connect 'generate_ints2#out' => 'output_all#in'
|
128
|
+
c.connect 'generate_ints3#out' => 'output_all#in'
|
129
|
+
end
|
130
|
+
|
131
|
+
RFlow.master.shards.count.should == 4
|
132
|
+
RFlow.master.shards.map(&:count).should == [1, 3, 2, 2]
|
133
|
+
RFlow.master.shards.map(&:workers).map(&:count).should == [1, 3, 2, 2]
|
134
|
+
|
135
|
+
output_files = {
|
136
|
+
'out1' => [0, 3, 6, 9] * 3,
|
137
|
+
'out2' => (20..30).to_a * 2,
|
138
|
+
'out3' => (100..105).to_a,
|
139
|
+
'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
|
140
|
+
}
|
141
|
+
|
142
|
+
output_files.each do |file_name, expected_contents|
|
143
|
+
File.exist?(File.join(@temp_directory_path, file_name)).should be_true
|
144
|
+
File.readlines(file_name).map(&:to_i).sort.should == expected_contents.sort
|
145
|
+
end
|
146
|
+
end
|
44
147
|
end
|
45
148
|
end
|
46
149
|
|
47
|
-
|
150
|
+
context "when executing via the rflow binary" do
|
48
151
|
before(:each) do
|
152
|
+
@original_directory_path = Dir.getwd
|
49
153
|
@run_directory_path = File.join(@temp_directory_path, 'run')
|
50
154
|
@log_directory_path = File.join(@temp_directory_path, 'log')
|
51
155
|
Dir.mkdir @run_directory_path
|
52
156
|
Dir.mkdir @log_directory_path
|
157
|
+
Dir.chdir @temp_directory_path
|
53
158
|
end
|
54
159
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
config_database_path = File.join(@temp_directory_path, 'config.sqlite')
|
160
|
+
after(:each) do
|
161
|
+
Dir.chdir @original_directory_path
|
162
|
+
end
|
59
163
|
|
60
|
-
|
61
|
-
|
62
|
-
|
164
|
+
def execute_rflow(rflow_args)
|
165
|
+
r = {}
|
166
|
+
r[:stdout], r[:stderr], r[:status] = Open3.capture3("bundle exec rflow #{rflow_args}")
|
167
|
+
r
|
168
|
+
end
|
169
|
+
|
170
|
+
context "with a simple ruby DSL config file" do
|
171
|
+
before(:each) do
|
172
|
+
@config_file_name = 'input_config'
|
173
|
+
File.open('input_config', 'w+') do |file|
|
174
|
+
file.write <<-EOF
|
175
|
+
RFlow::Configuration::RubyDSL.configure do |c|
|
176
|
+
c.setting 'mysetting', 'myvalue'
|
177
|
+
end
|
178
|
+
EOF
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should load a ruby dsl file into a sqlite DB" do
|
183
|
+
db_file_name = 'outdb'
|
184
|
+
|
185
|
+
r = execute_rflow("load -d #{db_file_name} -c #{@config_file_name}")
|
186
|
+
|
187
|
+
# Make sure that the process execution worked
|
188
|
+
r[:status].exitstatus.should == 0
|
189
|
+
r[:stderr].should == ''
|
190
|
+
r[:stdout].should match /Successfully initialized database.*#{db_file_name}/
|
191
|
+
|
192
|
+
# Make sure the config actually got loaded
|
193
|
+
ActiveRecord::Base.establish_connection adapter: "sqlite3", database: db_file_name
|
194
|
+
RFlow::Configuration::Setting.where(:name => 'mysetting').first.value.should == 'myvalue'
|
195
|
+
end
|
63
196
|
|
64
|
-
|
65
|
-
|
197
|
+
it "should not load a database if the database file already exists" do
|
198
|
+
db_file_name = 'outdb'
|
199
|
+
File.open(db_file_name, 'w') { |file| file.write 'boom' }
|
66
200
|
|
67
|
-
|
68
|
-
|
69
|
-
|
201
|
+
r = execute_rflow("load -d #{db_file_name} -c #{@config_file_name}")
|
202
|
+
|
203
|
+
# Make sure that the process execution worked
|
204
|
+
r[:status].exitstatus.should == 1
|
205
|
+
r[:stderr].should == ''
|
206
|
+
r[:stdout].should match /Config database.*#{db_file_name}.*exists/
|
70
207
|
end
|
71
208
|
|
72
|
-
# TODO: figure out a way to get rid of this sleep, as there
|
73
|
-
# should be a better way
|
74
|
-
sleep(5)
|
75
|
-
|
76
|
-
all_file_path = File.join(@temp_directory_path, 'out')
|
77
|
-
all2_file_path = File.join(@temp_directory_path, 'out2')
|
78
|
-
even_file_path = File.join(@temp_directory_path, 'out_even')
|
79
|
-
odd_file_path = File.join(@temp_directory_path, 'out_odd')
|
80
|
-
even_odd_file_path = File.join(@temp_directory_path, 'out_even_odd')
|
81
|
-
even_odd2_file_path = File.join(@temp_directory_path, 'out_even_odd2')
|
82
|
-
|
83
|
-
File.exist?(all_file_path).should be_true
|
84
|
-
File.exist?(all2_file_path).should be_true
|
85
|
-
File.exist?(even_file_path).should be_true
|
86
|
-
File.exist?(odd_file_path).should be_true
|
87
|
-
File.exist?(even_odd_file_path).should be_true
|
88
|
-
File.exist?(even_odd2_file_path).should be_true
|
89
|
-
|
90
|
-
File.readlines(all_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
|
91
|
-
File.readlines(all2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
|
92
|
-
File.readlines(even_file_path).map(&:to_i).should == [20, 22, 24, 26, 28, 30]
|
93
|
-
File.readlines(odd_file_path).map(&:to_i).should == [21, 23, 25, 27, 29]
|
94
|
-
File.readlines(even_odd_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
|
95
|
-
File.readlines(even_odd2_file_path).map(&:to_i).should == [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
|
96
209
|
end
|
97
|
-
end
|
98
|
-
|
99
210
|
|
211
|
+
context "with a complex, sharded ruby DSL config file" do
|
212
|
+
before(:each) do
|
213
|
+
@config_file_name = 'input_config'
|
214
|
+
@db_file_name = 'config_db'
|
215
|
+
@app_name = 'sharded_bin_test'
|
216
|
+
File.open(@config_file_name, 'w+') do |file|
|
217
|
+
file.write <<-EOF
|
218
|
+
RFlow::Configuration::RubyDSL.configure do |c|
|
219
|
+
c.setting('rflow.log_level', 'INFO')
|
220
|
+
c.setting('rflow.application_directory_path', '#{@temp_directory_path}')
|
221
|
+
c.setting('rflow.application_name', '#{@app_name}')
|
222
|
+
# Instantiate components
|
223
|
+
c.shard 's1', :process => 3 do |s|
|
224
|
+
s.component 'generate_ints1', 'RFlow::Components::GenerateIntegerSequence', 'start' => 0, 'finish' => 10, 'step' => 3
|
225
|
+
end
|
226
|
+
c.shard 's2', :type => :process, :count => 2 do |s|
|
227
|
+
s.component 'generate_ints2', 'RFlow::Components::GenerateIntegerSequence', 'start' => 20, 'finish' => 30
|
228
|
+
end
|
229
|
+
c.component 'generate_ints3', 'RFlow::Components::GenerateIntegerSequence', 'start' => 100, 'finish' => 105
|
230
|
+
c.shard 's3', :process => 2 do |s|
|
231
|
+
s.component 'output1', 'RFlow::Components::FileOutput', 'output_file_path' => 'out1'
|
232
|
+
s.component 'output2', 'RFlow::Components::FileOutput', 'output_file_path' => 'out2'
|
233
|
+
end
|
234
|
+
c.component 'output3', 'RFlow::Components::FileOutput', 'output_file_path' => 'out3'
|
235
|
+
c.component 'output_all', 'RFlow::Components::FileOutput', 'output_file_path' => 'out_all'
|
236
|
+
# Hook components together
|
237
|
+
c.connect 'generate_ints1#out' => 'output1#in'
|
238
|
+
c.connect 'generate_ints2#out' => 'output2#in'
|
239
|
+
c.connect 'generate_ints3#out' => 'output3#in'
|
240
|
+
c.connect 'generate_ints1#out' => 'output_all#in'
|
241
|
+
c.connect 'generate_ints2#out' => 'output_all#in'
|
242
|
+
c.connect 'generate_ints3#out' => 'output_all#in'
|
243
|
+
end
|
244
|
+
EOF
|
245
|
+
end
|
246
|
+
r = execute_rflow("load -d #{@db_file_name} -c #{@config_file_name}")
|
247
|
+
r[:status].exitstatus.should == 0
|
248
|
+
r[:stderr].should == ''
|
249
|
+
r[:stdout].should match /Successfully initialized database.*#{@db_file_name}/
|
250
|
+
end
|
251
|
+
|
252
|
+
it "should not start if the components aren't loaded" do
|
253
|
+
r = execute_rflow("start -d #{@db_file_name} -f")
|
254
|
+
|
255
|
+
r[:status].exitstatus.should == 1
|
256
|
+
r[:stderr].should == ''
|
257
|
+
r[:stdout].should match /error/i
|
258
|
+
end
|
259
|
+
|
260
|
+
it "should daemonize and run in the background" do
|
261
|
+
r = execute_rflow("start -d #{@db_file_name} -e #{@extensions_file_name}")
|
262
|
+
|
263
|
+
r[:status].exitstatus.should == 0
|
264
|
+
r[:stderr].should == ''
|
265
|
+
r[:stdout].should_not match /error/i
|
266
|
+
|
267
|
+
sleep 1 # give the daemon a chance to finish
|
268
|
+
|
269
|
+
log_contents = File.read("log/#{@app_name}.log").chomp
|
270
|
+
log_lines = log_contents.split("\n")
|
271
|
+
|
272
|
+
puts '++++++++++++++++++++'
|
273
|
+
puts log_contents
|
274
|
+
puts '++++++++++++++++++++'
|
275
|
+
|
276
|
+
# Log file testing
|
277
|
+
log_lines.each { |line| line.should_not match /^ERROR/ }
|
278
|
+
log_lines.each { |line| line.should_not match /^DEBUG/ }
|
279
|
+
|
280
|
+
# Grab all the pids from the log, which seems to be the only
|
281
|
+
# reliable way to get them
|
282
|
+
log_pids = log_lines.map { |line| /\((\d+)\)/.match(line)[1].to_i }.uniq
|
283
|
+
|
284
|
+
initial_pid = r[:status].pid
|
285
|
+
master_pid = File.read("run/#{@app_name}.pid").chomp.to_i
|
286
|
+
worker_pids = log_pids - [initial_pid, master_pid]
|
287
|
+
|
288
|
+
log_pids.should include initial_pid
|
289
|
+
log_pids.should include master_pid
|
290
|
+
|
291
|
+
worker_pids.size.should == 8
|
292
|
+
worker_pids.should_not include 0
|
293
|
+
|
294
|
+
# Process checks
|
295
|
+
expect { Process.kill(0, initial_pid) }.to raise_error(Errno::ESRCH)
|
296
|
+
([master_pid] + worker_pids).each do |pid|
|
297
|
+
Process.kill(0, pid).should == 1
|
298
|
+
end
|
299
|
+
|
300
|
+
# Output checks
|
301
|
+
output_files = {
|
302
|
+
'out1' => [0, 3, 6, 9] * 3,
|
303
|
+
'out2' => (20..30).to_a * 2,
|
304
|
+
'out3' => (100..105).to_a,
|
305
|
+
'out_all' => [0, 3, 6, 9] * 3 + (20..30).to_a * 2 + (100..105).to_a
|
306
|
+
}
|
307
|
+
|
308
|
+
output_files.each do |file_name, expected_contents|
|
309
|
+
File.exist?(File.join(@temp_directory_path, file_name)).should be_true
|
310
|
+
File.readlines(file_name).map(&:to_i).sort.should == expected_contents.sort
|
311
|
+
end
|
312
|
+
|
313
|
+
# Terminate the master
|
314
|
+
Process.kill("TERM", master_pid).should == 1
|
315
|
+
|
316
|
+
# Make sure everything is dead
|
317
|
+
([master_pid] + worker_pids).each do |pid|
|
318
|
+
expect { Process.kill(0, pid) }.to raise_error(Errno::ESRCH)
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
100
323
|
end
|