turbot-runner-morph 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/bin/rspec +16 -0
- data/lib/turbot_runner.rb +28 -0
- data/lib/turbot_runner/base_handler.rb +15 -0
- data/lib/turbot_runner/exceptions.rb +4 -0
- data/lib/turbot_runner/prerun.rb +3 -0
- data/lib/turbot_runner/processor.rb +53 -0
- data/lib/turbot_runner/runner.rb +179 -0
- data/lib/turbot_runner/script_runner.rb +98 -0
- data/lib/turbot_runner/utils.rb +47 -0
- data/lib/turbot_runner/validator.rb +28 -0
- data/lib/turbot_runner/version.rb +3 -0
- data/schema/schemas/company-schema.json +243 -0
- data/schema/schemas/financial-payment-schema.json +32 -0
- data/schema/schemas/includes/address.json +53 -0
- data/schema/schemas/includes/alternative_name.json +36 -0
- data/schema/schemas/includes/company-for-nesting.json +245 -0
- data/schema/schemas/includes/company.json +25 -0
- data/schema/schemas/includes/entity.json +58 -0
- data/schema/schemas/includes/filing.json +52 -0
- data/schema/schemas/includes/financial-payment-data-object.json +112 -0
- data/schema/schemas/includes/identifier.json +20 -0
- data/schema/schemas/includes/industry_code.json +29 -0
- data/schema/schemas/includes/licence-data-object.json +63 -0
- data/schema/schemas/includes/officer.json +70 -0
- data/schema/schemas/includes/organisation.json +58 -0
- data/schema/schemas/includes/permission.json +46 -0
- data/schema/schemas/includes/person.json +62 -0
- data/schema/schemas/includes/person_name.json +71 -0
- data/schema/schemas/includes/previous_name.json +24 -0
- data/schema/schemas/includes/share-parcel-data.json +82 -0
- data/schema/schemas/includes/share-parcel.json +78 -0
- data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
- data/schema/schemas/includes/total-shares.json +17 -0
- data/schema/schemas/includes/unknown_entity_type.json +58 -0
- data/schema/schemas/licence-schema.json +105 -0
- data/schema/schemas/primary-data-schema.json +20 -0
- data/schema/schemas/share-parcel-schema.json +22 -0
- data/schema/schemas/simple-financial-payment-schema.json +122 -0
- data/schema/schemas/simple-licence-schema.json +82 -0
- data/schema/schemas/simple-subsidiary-schema.json +85 -0
- data/schema/schemas/subsidiary-relationship-schema.json +46 -0
- data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
- data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
- data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
- data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
- data/spec/bots/bot-that-expects-file/manifest.json +8 -0
- data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
- data/spec/bots/bot-that-expects-file/something.txt +1 -0
- data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
- data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
- data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
- data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor_spec.rb +181 -0
- data/spec/lib/runner_spec.rb +330 -0
- data/spec/lib/utils_spec.rb +23 -0
- data/spec/lib/validator_spec.rb +89 -0
- data/spec/manual_spec.rb +57 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- data/spec/spec_helper.rb +20 -0
- metadata +148 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'turbot_runner'
|
|
3
|
+
|
|
4
|
+
describe TurbotRunner::Processor do
|
|
5
|
+
describe '#process' do
|
|
6
|
+
before do
|
|
7
|
+
@handler = TurbotRunner::BaseHandler.new
|
|
8
|
+
@data_type = 'primary data'
|
|
9
|
+
@script_config = {
|
|
10
|
+
:data_type => @data_type,
|
|
11
|
+
:identifying_fields => ['number']
|
|
12
|
+
}
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
context 'with a nil runner passed in' do
|
|
16
|
+
before do
|
|
17
|
+
@processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
context 'with valid record' do
|
|
21
|
+
it 'calls Handler#handle_valid_record' do
|
|
22
|
+
record = {
|
|
23
|
+
'sample_date' => '2014-06-01',
|
|
24
|
+
'source_url' => 'http://example.com/123',
|
|
25
|
+
'number' => 123
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
|
29
|
+
@processor.process(record.to_json)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
context 'with invalid record' do
|
|
34
|
+
it 'calls Handler#handle_invalid_record' do
|
|
35
|
+
record = {
|
|
36
|
+
'sample_date' => '2014-06-01',
|
|
37
|
+
'number' => 123
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
expected_error = 'Missing required property: source_url'
|
|
41
|
+
expect(@handler).to receive(:handle_invalid_record).
|
|
42
|
+
with(record, @data_type, expected_error)
|
|
43
|
+
@processor.process(record.to_json)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
context 'with invalid JSON' do
|
|
48
|
+
it 'calls Handler#handle_invalid_json' do
|
|
49
|
+
line = 'this is not JSON'
|
|
50
|
+
expect(@handler).to receive(:handle_invalid_json).with(line)
|
|
51
|
+
@processor.process(line)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
context 'with a runner passed in' do
|
|
57
|
+
before do
|
|
58
|
+
@script_runner = instance_double('ScriptRunner')
|
|
59
|
+
allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
|
60
|
+
@processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
context 'with valid record' do
|
|
64
|
+
it 'calls Handler#handle_valid_record' do
|
|
65
|
+
record = {
|
|
66
|
+
'sample_date' => '2014-06-01',
|
|
67
|
+
'source_url' => 'http://example.com/123',
|
|
68
|
+
'number' => 123
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
|
72
|
+
@processor.process(record.to_json)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
context 'with invalid record' do
|
|
77
|
+
before do
|
|
78
|
+
@record = {
|
|
79
|
+
'sample_date' => '2014-06-01',
|
|
80
|
+
'number' => 123
|
|
81
|
+
}
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
it 'calls Handler#handle_invalid_record' do
|
|
85
|
+
expected_error = 'Missing required property: source_url'
|
|
86
|
+
expect(@handler).to receive(:handle_invalid_record).
|
|
87
|
+
with(@record, @data_type, expected_error)
|
|
88
|
+
@processor.process(@record.to_json)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it 'interrupts runner' do
|
|
92
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
|
93
|
+
@processor.process(@record.to_json)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
context 'with invalid JSON' do
|
|
98
|
+
before do
|
|
99
|
+
@line = 'this is not JSON'
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it 'calls Handler#handle_invalid_json' do
|
|
103
|
+
expect(@handler).to receive(:handle_invalid_json).with(@line)
|
|
104
|
+
@processor.process(@line)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it 'interrupts runner' do
|
|
108
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
|
109
|
+
@processor.process(@line)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it 'converts date format' do
|
|
114
|
+
record = {
|
|
115
|
+
'sample_date' => '2014-06-01 12:34:56 +0000',
|
|
116
|
+
'source_url' => 'http://example.com/123',
|
|
117
|
+
'number' => 123
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
converted_record = {
|
|
121
|
+
'sample_date' => '2014-06-01',
|
|
122
|
+
'source_url' => 'http://example.com/123',
|
|
123
|
+
'number' => 123
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
|
|
127
|
+
@processor.process(record.to_json)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it 'does not pass retrieved_at to validator' do
|
|
131
|
+
record = {
|
|
132
|
+
'sample_date' => '2014-06-01',
|
|
133
|
+
'retrieved_at' => '2014-06-01 12:34:56 +0000',
|
|
134
|
+
'source_url' => 'http://example.com/123',
|
|
135
|
+
'number' => 123
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
expected_record_to_validate = {
|
|
139
|
+
'sample_date' => '2014-06-01',
|
|
140
|
+
'source_url' => 'http://example.com/123',
|
|
141
|
+
'number' => 123
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
expect(TurbotRunner::Validator).to receive(:validate).
|
|
145
|
+
with('primary data', expected_record_to_validate, ['number'])
|
|
146
|
+
@processor.process(record.to_json)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it 'can handle schemas with $refs' do
|
|
151
|
+
handler = TurbotRunner::BaseHandler.new
|
|
152
|
+
script_config = {
|
|
153
|
+
:data_type => 'licence',
|
|
154
|
+
:identifying_fields => ['licence_number']
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
script_runner = instance_double('ScriptRunner')
|
|
158
|
+
allow(script_runner).to receive(:interrupt_and_mark_as_failed)
|
|
159
|
+
processor = TurbotRunner::Processor.new(script_runner, script_config, handler)
|
|
160
|
+
|
|
161
|
+
record = {
|
|
162
|
+
:licence_holder => {
|
|
163
|
+
:entity_type => 'company',
|
|
164
|
+
:entity_properties => {
|
|
165
|
+
:name => 'Hairy Goat Breeding Ltd',
|
|
166
|
+
:jurisdiction_code => 'gb',
|
|
167
|
+
}
|
|
168
|
+
},
|
|
169
|
+
:licence_number => '1234',
|
|
170
|
+
:permissions => ['Goat breeding'],
|
|
171
|
+
:licence_issuer => 'Sheep and Goat Board of Bermuda',
|
|
172
|
+
:jurisdiction_of_licence => 'bm',
|
|
173
|
+
:source_url => 'http://example.com',
|
|
174
|
+
:sample_date => '2015-01-01'
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
expect(handler).to receive(:handle_valid_record)
|
|
178
|
+
processor.process(record.to_json)
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'turbot_runner'
|
|
3
|
+
|
|
4
|
+
describe TurbotRunner::Runner do
|
|
5
|
+
before(:each) do
|
|
6
|
+
Dir.glob('spec/bots/**/output/*').each {|f| File.delete(f)}
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
after(:all) do
|
|
10
|
+
puts
|
|
11
|
+
puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
describe '#run' do
|
|
15
|
+
context 'with a bot written in ruby' do
|
|
16
|
+
before do
|
|
17
|
+
@runner = test_runner('ruby-bot')
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it 'produces expected output' do
|
|
21
|
+
@runner.run
|
|
22
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it 'returns true' do
|
|
26
|
+
expect(@runner.run).to be(true)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
context 'with a bot written in python' do
|
|
31
|
+
before do
|
|
32
|
+
@runner = test_runner('python-bot')
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it 'produces expected output' do
|
|
36
|
+
@runner.run
|
|
37
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
context 'with a bot with a transformer' do
|
|
42
|
+
before do
|
|
43
|
+
@runner = test_runner('bot-with-transformer')
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it 'produces expected outputs' do
|
|
47
|
+
@runner.run
|
|
48
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
|
49
|
+
expect([@runner, 'transformer']).to have_output('full-transformer.out')
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'returns true' do
|
|
53
|
+
expect(@runner.run).to be(true)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
context 'with a bot with multiple transformers' do
|
|
58
|
+
before do
|
|
59
|
+
@runner = test_runner('bot-with-transformers')
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it 'produces expected outputs' do
|
|
63
|
+
@runner.run
|
|
64
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
|
65
|
+
expect([@runner, 'transformer1']).to have_output('full-transformer.out')
|
|
66
|
+
expect([@runner, 'transformer2']).to have_output('full-transformer.out')
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it 'returns true' do
|
|
70
|
+
expect(@runner.run).to be(true)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
context 'with a bot that logs' do
|
|
75
|
+
context 'when logging to file enabled' do
|
|
76
|
+
it 'logs to file' do
|
|
77
|
+
expected_log = "doing...\ndone\n"
|
|
78
|
+
runner = test_runner('logging-bot', :log_to_file => true)
|
|
79
|
+
runner.run
|
|
80
|
+
expect([runner, 'scraper']).to have_error_output_matching(expected_log)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
context 'when logging to file not enabled' do
|
|
85
|
+
xit 'logs to stderr' do
|
|
86
|
+
# This is tested in manual_spec.rb
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context 'with a bot that outputs RUN ENDED' do
|
|
92
|
+
before do
|
|
93
|
+
@runner = test_runner('bot-that-emits-run-ended', :log_to_file => true)
|
|
94
|
+
end
|
|
95
|
+
it 'calls handle_run_ended on the handler' do
|
|
96
|
+
expect_any_instance_of(TurbotRunner::BaseHandler).to receive(:handle_run_ended)
|
|
97
|
+
@runner.run
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
it 'interrupts the run' do
|
|
101
|
+
expect_any_instance_of(TurbotRunner::ScriptRunner).to receive(:interrupt)
|
|
102
|
+
@runner.run
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
context 'with a bot that crashes in scraper' do
|
|
108
|
+
before do
|
|
109
|
+
@runner = test_runner('bot-that-crashes-in-scraper', :log_to_file => true)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
it 'returns false' do
|
|
113
|
+
expect(@runner.run).to be(false)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it 'writes error to stderr' do
|
|
117
|
+
@runner.run
|
|
118
|
+
expect([@runner, 'scraper']).to have_error_output_matching(/Oh no/)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
it 'still runs the transformers' do
|
|
122
|
+
expect(@runner).to receive(:run_script).once.with(
|
|
123
|
+
hash_including(:file=>"scraper.rb"))
|
|
124
|
+
expect(@runner).to receive(:run_script).once.with(
|
|
125
|
+
hash_including(:file=>"transformer1.rb"), anything)
|
|
126
|
+
@runner.run
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
context 'with a bot that expects a file to be present in the working directory' do
|
|
131
|
+
before do
|
|
132
|
+
@runner = test_runner('bot-that-expects-file')
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
it 'returns true' do
|
|
136
|
+
expect(@runner.run).to be(true)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
context 'with a bot that crashes in transformer' do
|
|
141
|
+
before do
|
|
142
|
+
@runner = test_runner('bot-that-crashes-in-transformer', :log_to_file => true)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
it 'returns false' do
|
|
146
|
+
expect(@runner.run).to be(false)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
it 'writes error to stderr' do
|
|
150
|
+
@runner.run
|
|
151
|
+
expect([@runner, 'transformer2']).to have_error_output_matching(/Oh no/)
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
context 'with a bot that is interrupted in scraper' do
|
|
156
|
+
xit 'produces truncated output' do
|
|
157
|
+
# This is tested in manual_spec.rb
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
context 'with a handler that interrupts the runner' do
|
|
162
|
+
before do
|
|
163
|
+
class Handler < TurbotRunner::BaseHandler
|
|
164
|
+
def initialize(*)
|
|
165
|
+
@count = 0
|
|
166
|
+
super
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def handle_valid_record(record, data_type)
|
|
170
|
+
@count += 1
|
|
171
|
+
raise TurbotRunner::InterruptRun if @count >= 5
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
@runner = test_runner('slow-bot',
|
|
176
|
+
:record_handler => Handler.new,
|
|
177
|
+
:log_to_file => true
|
|
178
|
+
)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
it 'produces expected output' do
|
|
182
|
+
@runner.run
|
|
183
|
+
expect([@runner, 'scraper']).to have_output('truncated-scraper.out')
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it 'returns true' do
|
|
187
|
+
expect(@runner.run).to be(true)
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
context 'with a scraper that produces an invalid record' do
|
|
192
|
+
it 'returns false' do
|
|
193
|
+
@runner = test_runner('invalid-record-bot')
|
|
194
|
+
expect(@runner.run).to be(false)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
context 'with a scraper that produces invalid JSON' do
|
|
199
|
+
it 'returns false' do
|
|
200
|
+
@runner = test_runner('invalid-json-bot')
|
|
201
|
+
expect(@runner.run).to be(false)
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
context 'with a scraper that hangs' do
|
|
206
|
+
# XXX This spec fails because the loop in ScriptRunner#run that
|
|
207
|
+
# reads lines from the output file doesn't start until the
|
|
208
|
+
# output file is created; however, the way we're redirecting
|
|
209
|
+
# stdout using the shell means the file doesn't get created
|
|
210
|
+
# until
|
|
211
|
+
it 'returns false' do
|
|
212
|
+
@runner = test_runner('bot-with-pause',
|
|
213
|
+
:timeout => 1,
|
|
214
|
+
:log_to_file => true
|
|
215
|
+
)
|
|
216
|
+
expect(@runner.run).to be(false)
|
|
217
|
+
end
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
context 'with a bot that emits an invalid sample date' do
|
|
221
|
+
before do
|
|
222
|
+
@runner = test_runner('bot-with-invalid-sample-date')
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it 'returns false' do
|
|
226
|
+
expect(@runner.run).to be(false)
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
context 'with a bot with an invalid data type' do
|
|
231
|
+
before do
|
|
232
|
+
@runner = test_runner('bot-with-invalid-data-type')
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
it 'raises InvalidDataType' do
|
|
236
|
+
expect{@runner.run}.to raise_error(TurbotRunner::InvalidDataType)
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
describe '#process_output' do
|
|
242
|
+
before do
|
|
243
|
+
class Handler < TurbotRunner::BaseHandler
|
|
244
|
+
attr_reader :records_seen
|
|
245
|
+
|
|
246
|
+
def initialize(*)
|
|
247
|
+
@records_seen = Hash.new {|h, k| h[k] = 0}
|
|
248
|
+
super
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
def handle_valid_record(record, data_type)
|
|
252
|
+
@records_seen[data_type] += 1
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
@handler = Handler.new
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it 'calls handler once for each line of output' do
|
|
260
|
+
test_runner('bot-with-transformer').run
|
|
261
|
+
|
|
262
|
+
runner = test_runner('bot-with-transformer',
|
|
263
|
+
:record_handler => @handler
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
runner.process_output
|
|
267
|
+
expect(@handler.records_seen['primary data']).to eq(10)
|
|
268
|
+
expect(@handler.records_seen['simple-licence']).to eq(10)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
it 'can cope when scraper has failed immediately' do
|
|
272
|
+
test_runner('bot-that-crashes-immediately').run
|
|
273
|
+
|
|
274
|
+
runner = test_runner('bot-with-transformer',
|
|
275
|
+
:record_handler => @handler
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
runner.process_output
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
describe '#set_up_output_directory' do
|
|
283
|
+
before do
|
|
284
|
+
@runner = test_runner('bot-with-transformer')
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it 'clears existing output' do
|
|
288
|
+
path = File.join(@runner.base_directory, 'output', 'scraper.out')
|
|
289
|
+
FileUtils.touch(path)
|
|
290
|
+
@runner.set_up_output_directory
|
|
291
|
+
expect(File.exist?(path)).to be(false)
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
it 'does not clear existing files that are not output files' do
|
|
295
|
+
path = File.join(@runner.base_directory, 'output', 'stdout')
|
|
296
|
+
FileUtils.touch(path)
|
|
297
|
+
@runner.set_up_output_directory
|
|
298
|
+
expect(File.exist?(path)).to be(true)
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
RSpec::Matchers.define :have_output do |expected|
|
|
305
|
+
match do |actual|
|
|
306
|
+
runner, script = actual
|
|
307
|
+
|
|
308
|
+
expected_path = File.join('spec', 'outputs', expected)
|
|
309
|
+
expected_output = File.readlines(expected_path).map {|line| JSON.parse(line)}
|
|
310
|
+
actual_path = File.join(runner.base_directory, 'output', "#{script}.out")
|
|
311
|
+
actual_output = File.readlines(actual_path).map {|line| JSON.parse(line)}
|
|
312
|
+
expect(expected_output).to eq(actual_output)
|
|
313
|
+
end
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
RSpec::Matchers.define :have_error_output_matching do |expected|
|
|
318
|
+
match do |actual|
|
|
319
|
+
runner, script = actual
|
|
320
|
+
|
|
321
|
+
actual_path = File.join(runner.base_directory, 'output', "#{script}.err")
|
|
322
|
+
actual_output = File.read(actual_path)
|
|
323
|
+
expect(actual_output).to match(expected)
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def test_runner(name, opts={})
|
|
328
|
+
test_bot_location = File.join(File.dirname(__FILE__), '../bots', name)
|
|
329
|
+
TurbotRunner::Runner.new(test_bot_location, opts)
|
|
330
|
+
end
|