turbot-runner-morph 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/bin/rspec +16 -0
- data/lib/turbot_runner.rb +28 -0
- data/lib/turbot_runner/base_handler.rb +15 -0
- data/lib/turbot_runner/exceptions.rb +4 -0
- data/lib/turbot_runner/prerun.rb +3 -0
- data/lib/turbot_runner/processor.rb +53 -0
- data/lib/turbot_runner/runner.rb +179 -0
- data/lib/turbot_runner/script_runner.rb +98 -0
- data/lib/turbot_runner/utils.rb +47 -0
- data/lib/turbot_runner/validator.rb +28 -0
- data/lib/turbot_runner/version.rb +3 -0
- data/schema/schemas/company-schema.json +243 -0
- data/schema/schemas/financial-payment-schema.json +32 -0
- data/schema/schemas/includes/address.json +53 -0
- data/schema/schemas/includes/alternative_name.json +36 -0
- data/schema/schemas/includes/company-for-nesting.json +245 -0
- data/schema/schemas/includes/company.json +25 -0
- data/schema/schemas/includes/entity.json +58 -0
- data/schema/schemas/includes/filing.json +52 -0
- data/schema/schemas/includes/financial-payment-data-object.json +112 -0
- data/schema/schemas/includes/identifier.json +20 -0
- data/schema/schemas/includes/industry_code.json +29 -0
- data/schema/schemas/includes/licence-data-object.json +63 -0
- data/schema/schemas/includes/officer.json +70 -0
- data/schema/schemas/includes/organisation.json +58 -0
- data/schema/schemas/includes/permission.json +46 -0
- data/schema/schemas/includes/person.json +62 -0
- data/schema/schemas/includes/person_name.json +71 -0
- data/schema/schemas/includes/previous_name.json +24 -0
- data/schema/schemas/includes/share-parcel-data.json +82 -0
- data/schema/schemas/includes/share-parcel.json +78 -0
- data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
- data/schema/schemas/includes/total-shares.json +17 -0
- data/schema/schemas/includes/unknown_entity_type.json +58 -0
- data/schema/schemas/licence-schema.json +105 -0
- data/schema/schemas/primary-data-schema.json +20 -0
- data/schema/schemas/share-parcel-schema.json +22 -0
- data/schema/schemas/simple-financial-payment-schema.json +122 -0
- data/schema/schemas/simple-licence-schema.json +82 -0
- data/schema/schemas/simple-subsidiary-schema.json +85 -0
- data/schema/schemas/subsidiary-relationship-schema.json +46 -0
- data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
- data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
- data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
- data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
- data/spec/bots/bot-that-expects-file/manifest.json +8 -0
- data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
- data/spec/bots/bot-that-expects-file/something.txt +1 -0
- data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
- data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
- data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
- data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor_spec.rb +181 -0
- data/spec/lib/runner_spec.rb +330 -0
- data/spec/lib/utils_spec.rb +23 -0
- data/spec/lib/validator_spec.rb +89 -0
- data/spec/manual_spec.rb +57 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- data/spec/spec_helper.rb +20 -0
- metadata +148 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::Processor do
|
5
|
+
describe '#process' do
|
6
|
+
before do
|
7
|
+
@handler = TurbotRunner::BaseHandler.new
|
8
|
+
@data_type = 'primary data'
|
9
|
+
@script_config = {
|
10
|
+
:data_type => @data_type,
|
11
|
+
:identifying_fields => ['number']
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
context 'with a nil runner passed in' do
|
16
|
+
before do
|
17
|
+
@processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'with valid record' do
|
21
|
+
it 'calls Handler#handle_valid_record' do
|
22
|
+
record = {
|
23
|
+
'sample_date' => '2014-06-01',
|
24
|
+
'source_url' => 'http://example.com/123',
|
25
|
+
'number' => 123
|
26
|
+
}
|
27
|
+
|
28
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
29
|
+
@processor.process(record.to_json)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
context 'with invalid record' do
|
34
|
+
it 'calls Handler#handle_invalid_record' do
|
35
|
+
record = {
|
36
|
+
'sample_date' => '2014-06-01',
|
37
|
+
'number' => 123
|
38
|
+
}
|
39
|
+
|
40
|
+
expected_error = 'Missing required property: source_url'
|
41
|
+
expect(@handler).to receive(:handle_invalid_record).
|
42
|
+
with(record, @data_type, expected_error)
|
43
|
+
@processor.process(record.to_json)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'with invalid JSON' do
|
48
|
+
it 'calls Handler#handle_invalid_json' do
|
49
|
+
line = 'this is not JSON'
|
50
|
+
expect(@handler).to receive(:handle_invalid_json).with(line)
|
51
|
+
@processor.process(line)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'with a runner passed in' do
|
57
|
+
before do
|
58
|
+
@script_runner = instance_double('ScriptRunner')
|
59
|
+
allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
60
|
+
@processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
|
61
|
+
end
|
62
|
+
|
63
|
+
context 'with valid record' do
|
64
|
+
it 'calls Handler#handle_valid_record' do
|
65
|
+
record = {
|
66
|
+
'sample_date' => '2014-06-01',
|
67
|
+
'source_url' => 'http://example.com/123',
|
68
|
+
'number' => 123
|
69
|
+
}
|
70
|
+
|
71
|
+
expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
|
72
|
+
@processor.process(record.to_json)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context 'with invalid record' do
|
77
|
+
before do
|
78
|
+
@record = {
|
79
|
+
'sample_date' => '2014-06-01',
|
80
|
+
'number' => 123
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
it 'calls Handler#handle_invalid_record' do
|
85
|
+
expected_error = 'Missing required property: source_url'
|
86
|
+
expect(@handler).to receive(:handle_invalid_record).
|
87
|
+
with(@record, @data_type, expected_error)
|
88
|
+
@processor.process(@record.to_json)
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'interrupts runner' do
|
92
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
93
|
+
@processor.process(@record.to_json)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
context 'with invalid JSON' do
|
98
|
+
before do
|
99
|
+
@line = 'this is not JSON'
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'calls Handler#handle_invalid_json' do
|
103
|
+
expect(@handler).to receive(:handle_invalid_json).with(@line)
|
104
|
+
@processor.process(@line)
|
105
|
+
end
|
106
|
+
|
107
|
+
it 'interrupts runner' do
|
108
|
+
expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
|
109
|
+
@processor.process(@line)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'converts date format' do
|
114
|
+
record = {
|
115
|
+
'sample_date' => '2014-06-01 12:34:56 +0000',
|
116
|
+
'source_url' => 'http://example.com/123',
|
117
|
+
'number' => 123
|
118
|
+
}
|
119
|
+
|
120
|
+
converted_record = {
|
121
|
+
'sample_date' => '2014-06-01',
|
122
|
+
'source_url' => 'http://example.com/123',
|
123
|
+
'number' => 123
|
124
|
+
}
|
125
|
+
|
126
|
+
expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
|
127
|
+
@processor.process(record.to_json)
|
128
|
+
end
|
129
|
+
|
130
|
+
it 'does not pass retrieved_at to validator' do
|
131
|
+
record = {
|
132
|
+
'sample_date' => '2014-06-01',
|
133
|
+
'retrieved_at' => '2014-06-01 12:34:56 +0000',
|
134
|
+
'source_url' => 'http://example.com/123',
|
135
|
+
'number' => 123
|
136
|
+
}
|
137
|
+
|
138
|
+
expected_record_to_validate = {
|
139
|
+
'sample_date' => '2014-06-01',
|
140
|
+
'source_url' => 'http://example.com/123',
|
141
|
+
'number' => 123
|
142
|
+
}
|
143
|
+
|
144
|
+
expect(TurbotRunner::Validator).to receive(:validate).
|
145
|
+
with('primary data', expected_record_to_validate, ['number'])
|
146
|
+
@processor.process(record.to_json)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
it 'can handle schemas with $refs' do
|
151
|
+
handler = TurbotRunner::BaseHandler.new
|
152
|
+
script_config = {
|
153
|
+
:data_type => 'licence',
|
154
|
+
:identifying_fields => ['licence_number']
|
155
|
+
}
|
156
|
+
|
157
|
+
script_runner = instance_double('ScriptRunner')
|
158
|
+
allow(script_runner).to receive(:interrupt_and_mark_as_failed)
|
159
|
+
processor = TurbotRunner::Processor.new(script_runner, script_config, handler)
|
160
|
+
|
161
|
+
record = {
|
162
|
+
:licence_holder => {
|
163
|
+
:entity_type => 'company',
|
164
|
+
:entity_properties => {
|
165
|
+
:name => 'Hairy Goat Breeding Ltd',
|
166
|
+
:jurisdiction_code => 'gb',
|
167
|
+
}
|
168
|
+
},
|
169
|
+
:licence_number => '1234',
|
170
|
+
:permissions => ['Goat breeding'],
|
171
|
+
:licence_issuer => 'Sheep and Goat Board of Bermuda',
|
172
|
+
:jurisdiction_of_licence => 'bm',
|
173
|
+
:source_url => 'http://example.com',
|
174
|
+
:sample_date => '2015-01-01'
|
175
|
+
}
|
176
|
+
|
177
|
+
expect(handler).to receive(:handle_valid_record)
|
178
|
+
processor.process(record.to_json)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,330 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::Runner do
|
5
|
+
before(:each) do
|
6
|
+
Dir.glob('spec/bots/**/output/*').each {|f| File.delete(f)}
|
7
|
+
end
|
8
|
+
|
9
|
+
after(:all) do
|
10
|
+
puts
|
11
|
+
puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#run' do
|
15
|
+
context 'with a bot written in ruby' do
|
16
|
+
before do
|
17
|
+
@runner = test_runner('ruby-bot')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'produces expected output' do
|
21
|
+
@runner.run
|
22
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns true' do
|
26
|
+
expect(@runner.run).to be(true)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'with a bot written in python' do
|
31
|
+
before do
|
32
|
+
@runner = test_runner('python-bot')
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'produces expected output' do
|
36
|
+
@runner.run
|
37
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'with a bot with a transformer' do
|
42
|
+
before do
|
43
|
+
@runner = test_runner('bot-with-transformer')
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'produces expected outputs' do
|
47
|
+
@runner.run
|
48
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
49
|
+
expect([@runner, 'transformer']).to have_output('full-transformer.out')
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'returns true' do
|
53
|
+
expect(@runner.run).to be(true)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context 'with a bot with multiple transformers' do
|
58
|
+
before do
|
59
|
+
@runner = test_runner('bot-with-transformers')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'produces expected outputs' do
|
63
|
+
@runner.run
|
64
|
+
expect([@runner, 'scraper']).to have_output('full-scraper.out')
|
65
|
+
expect([@runner, 'transformer1']).to have_output('full-transformer.out')
|
66
|
+
expect([@runner, 'transformer2']).to have_output('full-transformer.out')
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'returns true' do
|
70
|
+
expect(@runner.run).to be(true)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context 'with a bot that logs' do
|
75
|
+
context 'when logging to file enabled' do
|
76
|
+
it 'logs to file' do
|
77
|
+
expected_log = "doing...\ndone\n"
|
78
|
+
runner = test_runner('logging-bot', :log_to_file => true)
|
79
|
+
runner.run
|
80
|
+
expect([runner, 'scraper']).to have_error_output_matching(expected_log)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context 'when logging to file not enabled' do
|
85
|
+
xit 'logs to stderr' do
|
86
|
+
# This is tested in manual_spec.rb
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context 'with a bot that outputs RUN ENDED' do
|
92
|
+
before do
|
93
|
+
@runner = test_runner('bot-that-emits-run-ended', :log_to_file => true)
|
94
|
+
end
|
95
|
+
it 'calls handle_run_ended on the handler' do
|
96
|
+
expect_any_instance_of(TurbotRunner::BaseHandler).to receive(:handle_run_ended)
|
97
|
+
@runner.run
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'interrupts the run' do
|
101
|
+
expect_any_instance_of(TurbotRunner::ScriptRunner).to receive(:interrupt)
|
102
|
+
@runner.run
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
context 'with a bot that crashes in scraper' do
|
108
|
+
before do
|
109
|
+
@runner = test_runner('bot-that-crashes-in-scraper', :log_to_file => true)
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'returns false' do
|
113
|
+
expect(@runner.run).to be(false)
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'writes error to stderr' do
|
117
|
+
@runner.run
|
118
|
+
expect([@runner, 'scraper']).to have_error_output_matching(/Oh no/)
|
119
|
+
end
|
120
|
+
|
121
|
+
it 'still runs the transformers' do
|
122
|
+
expect(@runner).to receive(:run_script).once.with(
|
123
|
+
hash_including(:file=>"scraper.rb"))
|
124
|
+
expect(@runner).to receive(:run_script).once.with(
|
125
|
+
hash_including(:file=>"transformer1.rb"), anything)
|
126
|
+
@runner.run
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context 'with a bot that expects a file to be present in the working directory' do
|
131
|
+
before do
|
132
|
+
@runner = test_runner('bot-that-expects-file')
|
133
|
+
end
|
134
|
+
|
135
|
+
it 'returns true' do
|
136
|
+
expect(@runner.run).to be(true)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
context 'with a bot that crashes in transformer' do
|
141
|
+
before do
|
142
|
+
@runner = test_runner('bot-that-crashes-in-transformer', :log_to_file => true)
|
143
|
+
end
|
144
|
+
|
145
|
+
it 'returns false' do
|
146
|
+
expect(@runner.run).to be(false)
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'writes error to stderr' do
|
150
|
+
@runner.run
|
151
|
+
expect([@runner, 'transformer2']).to have_error_output_matching(/Oh no/)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
context 'with a bot that is interrupted in scraper' do
|
156
|
+
xit 'produces truncated output' do
|
157
|
+
# This is tested in manual_spec.rb
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
context 'with a handler that interrupts the runner' do
|
162
|
+
before do
|
163
|
+
class Handler < TurbotRunner::BaseHandler
|
164
|
+
def initialize(*)
|
165
|
+
@count = 0
|
166
|
+
super
|
167
|
+
end
|
168
|
+
|
169
|
+
def handle_valid_record(record, data_type)
|
170
|
+
@count += 1
|
171
|
+
raise TurbotRunner::InterruptRun if @count >= 5
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
@runner = test_runner('slow-bot',
|
176
|
+
:record_handler => Handler.new,
|
177
|
+
:log_to_file => true
|
178
|
+
)
|
179
|
+
end
|
180
|
+
|
181
|
+
it 'produces expected output' do
|
182
|
+
@runner.run
|
183
|
+
expect([@runner, 'scraper']).to have_output('truncated-scraper.out')
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'returns true' do
|
187
|
+
expect(@runner.run).to be(true)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
context 'with a scraper that produces an invalid record' do
|
192
|
+
it 'returns false' do
|
193
|
+
@runner = test_runner('invalid-record-bot')
|
194
|
+
expect(@runner.run).to be(false)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
context 'with a scraper that produces invalid JSON' do
|
199
|
+
it 'returns false' do
|
200
|
+
@runner = test_runner('invalid-json-bot')
|
201
|
+
expect(@runner.run).to be(false)
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
context 'with a scraper that hangs' do
|
206
|
+
# XXX This spec fails because the loop in ScriptRunner#run that
|
207
|
+
# reads lines from the output file doesn't start until the
|
208
|
+
# output file is created; however, the way we're redirecting
|
209
|
+
# stdout using the shell means the file doesn't get created
|
210
|
+
# until
|
211
|
+
it 'returns false' do
|
212
|
+
@runner = test_runner('bot-with-pause',
|
213
|
+
:timeout => 1,
|
214
|
+
:log_to_file => true
|
215
|
+
)
|
216
|
+
expect(@runner.run).to be(false)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
context 'with a bot that emits an invalid sample date' do
|
221
|
+
before do
|
222
|
+
@runner = test_runner('bot-with-invalid-sample-date')
|
223
|
+
end
|
224
|
+
|
225
|
+
it 'returns false' do
|
226
|
+
expect(@runner.run).to be(false)
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
context 'with a bot with an invalid data type' do
|
231
|
+
before do
|
232
|
+
@runner = test_runner('bot-with-invalid-data-type')
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'raises InvalidDataType' do
|
236
|
+
expect{@runner.run}.to raise_error(TurbotRunner::InvalidDataType)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
describe '#process_output' do
|
242
|
+
before do
|
243
|
+
class Handler < TurbotRunner::BaseHandler
|
244
|
+
attr_reader :records_seen
|
245
|
+
|
246
|
+
def initialize(*)
|
247
|
+
@records_seen = Hash.new {|h, k| h[k] = 0}
|
248
|
+
super
|
249
|
+
end
|
250
|
+
|
251
|
+
def handle_valid_record(record, data_type)
|
252
|
+
@records_seen[data_type] += 1
|
253
|
+
end
|
254
|
+
end
|
255
|
+
|
256
|
+
@handler = Handler.new
|
257
|
+
end
|
258
|
+
|
259
|
+
it 'calls handler once for each line of output' do
|
260
|
+
test_runner('bot-with-transformer').run
|
261
|
+
|
262
|
+
runner = test_runner('bot-with-transformer',
|
263
|
+
:record_handler => @handler
|
264
|
+
)
|
265
|
+
|
266
|
+
runner.process_output
|
267
|
+
expect(@handler.records_seen['primary data']).to eq(10)
|
268
|
+
expect(@handler.records_seen['simple-licence']).to eq(10)
|
269
|
+
end
|
270
|
+
|
271
|
+
it 'can cope when scraper has failed immediately' do
|
272
|
+
test_runner('bot-that-crashes-immediately').run
|
273
|
+
|
274
|
+
runner = test_runner('bot-with-transformer',
|
275
|
+
:record_handler => @handler
|
276
|
+
)
|
277
|
+
|
278
|
+
runner.process_output
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
describe '#set_up_output_directory' do
|
283
|
+
before do
|
284
|
+
@runner = test_runner('bot-with-transformer')
|
285
|
+
end
|
286
|
+
|
287
|
+
it 'clears existing output' do
|
288
|
+
path = File.join(@runner.base_directory, 'output', 'scraper.out')
|
289
|
+
FileUtils.touch(path)
|
290
|
+
@runner.set_up_output_directory
|
291
|
+
expect(File.exist?(path)).to be(false)
|
292
|
+
end
|
293
|
+
|
294
|
+
it 'does not clear existing files that are not output files' do
|
295
|
+
path = File.join(@runner.base_directory, 'output', 'stdout')
|
296
|
+
FileUtils.touch(path)
|
297
|
+
@runner.set_up_output_directory
|
298
|
+
expect(File.exist?(path)).to be(true)
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
|
304
|
+
RSpec::Matchers.define :have_output do |expected|
|
305
|
+
match do |actual|
|
306
|
+
runner, script = actual
|
307
|
+
|
308
|
+
expected_path = File.join('spec', 'outputs', expected)
|
309
|
+
expected_output = File.readlines(expected_path).map {|line| JSON.parse(line)}
|
310
|
+
actual_path = File.join(runner.base_directory, 'output', "#{script}.out")
|
311
|
+
actual_output = File.readlines(actual_path).map {|line| JSON.parse(line)}
|
312
|
+
expect(expected_output).to eq(actual_output)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
|
317
|
+
RSpec::Matchers.define :have_error_output_matching do |expected|
|
318
|
+
match do |actual|
|
319
|
+
runner, script = actual
|
320
|
+
|
321
|
+
actual_path = File.join(runner.base_directory, 'output', "#{script}.err")
|
322
|
+
actual_output = File.read(actual_path)
|
323
|
+
expect(actual_output).to match(expected)
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
def test_runner(name, opts={})
|
328
|
+
test_bot_location = File.join(File.dirname(__FILE__), '../bots', name)
|
329
|
+
TurbotRunner::Runner.new(test_bot_location, opts)
|
330
|
+
end
|