turbot-runner-morph 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +15 -0
  2. data/bin/rspec +16 -0
  3. data/lib/turbot_runner.rb +28 -0
  4. data/lib/turbot_runner/base_handler.rb +15 -0
  5. data/lib/turbot_runner/exceptions.rb +4 -0
  6. data/lib/turbot_runner/prerun.rb +3 -0
  7. data/lib/turbot_runner/processor.rb +53 -0
  8. data/lib/turbot_runner/runner.rb +179 -0
  9. data/lib/turbot_runner/script_runner.rb +98 -0
  10. data/lib/turbot_runner/utils.rb +47 -0
  11. data/lib/turbot_runner/validator.rb +28 -0
  12. data/lib/turbot_runner/version.rb +3 -0
  13. data/schema/schemas/company-schema.json +243 -0
  14. data/schema/schemas/financial-payment-schema.json +32 -0
  15. data/schema/schemas/includes/address.json +53 -0
  16. data/schema/schemas/includes/alternative_name.json +36 -0
  17. data/schema/schemas/includes/company-for-nesting.json +245 -0
  18. data/schema/schemas/includes/company.json +25 -0
  19. data/schema/schemas/includes/entity.json +58 -0
  20. data/schema/schemas/includes/filing.json +52 -0
  21. data/schema/schemas/includes/financial-payment-data-object.json +112 -0
  22. data/schema/schemas/includes/identifier.json +20 -0
  23. data/schema/schemas/includes/industry_code.json +29 -0
  24. data/schema/schemas/includes/licence-data-object.json +63 -0
  25. data/schema/schemas/includes/officer.json +70 -0
  26. data/schema/schemas/includes/organisation.json +58 -0
  27. data/schema/schemas/includes/permission.json +46 -0
  28. data/schema/schemas/includes/person.json +62 -0
  29. data/schema/schemas/includes/person_name.json +71 -0
  30. data/schema/schemas/includes/previous_name.json +24 -0
  31. data/schema/schemas/includes/share-parcel-data.json +82 -0
  32. data/schema/schemas/includes/share-parcel.json +78 -0
  33. data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
  34. data/schema/schemas/includes/total-shares.json +17 -0
  35. data/schema/schemas/includes/unknown_entity_type.json +58 -0
  36. data/schema/schemas/licence-schema.json +105 -0
  37. data/schema/schemas/primary-data-schema.json +20 -0
  38. data/schema/schemas/share-parcel-schema.json +22 -0
  39. data/schema/schemas/simple-financial-payment-schema.json +122 -0
  40. data/schema/schemas/simple-licence-schema.json +82 -0
  41. data/schema/schemas/simple-subsidiary-schema.json +85 -0
  42. data/schema/schemas/subsidiary-relationship-schema.json +46 -0
  43. data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
  44. data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
  45. data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
  46. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
  47. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  48. data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
  49. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  50. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  51. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  52. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  53. data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
  54. data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
  55. data/spec/bots/bot-that-expects-file/manifest.json +8 -0
  56. data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
  57. data/spec/bots/bot-that-expects-file/something.txt +1 -0
  58. data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
  59. data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
  60. data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
  61. data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
  62. data/spec/bots/bot-with-pause/manifest.json +8 -0
  63. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  64. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  65. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  66. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  67. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  68. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  69. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  70. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  71. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  72. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  73. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  74. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  75. data/spec/bots/logging-bot/manifest.json +8 -0
  76. data/spec/bots/logging-bot/scraper.rb +14 -0
  77. data/spec/bots/python-bot/manifest.json +8 -0
  78. data/spec/bots/python-bot/scraper.py +11 -0
  79. data/spec/bots/ruby-bot/manifest.json +8 -0
  80. data/spec/bots/ruby-bot/scraper.rb +10 -0
  81. data/spec/bots/slow-bot/manifest.json +8 -0
  82. data/spec/bots/slow-bot/scraper.rb +11 -0
  83. data/spec/lib/processor_spec.rb +181 -0
  84. data/spec/lib/runner_spec.rb +330 -0
  85. data/spec/lib/utils_spec.rb +23 -0
  86. data/spec/lib/validator_spec.rb +89 -0
  87. data/spec/manual_spec.rb +57 -0
  88. data/spec/outputs/full-scraper.out +10 -0
  89. data/spec/outputs/full-transformer.out +10 -0
  90. data/spec/outputs/truncated-scraper.out +5 -0
  91. data/spec/spec_helper.rb +20 -0
  92. metadata +148 -0
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "invalid-record-bot",
3
+ "description": "This is a bot that produces an invalid record",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ 0.upto(9) do |n|
4
+ record = {
5
+ :licence_number => "XYZ#{n}",
6
+ :source_url => 'http://example.com',
7
+ :sample_date => '2014-06-01'
8
+ }
9
+ record.delete(:source_url) if n == 5
10
+ puts(record.to_json)
11
+ end
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "logging-bot",
3
+ "description": "This is a bot that logs",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,14 @@
1
+ require 'json'
2
+
3
+ $stderr.puts('doing...')
4
+
5
+ 0.upto(9) do |n|
6
+ record = {
7
+ :licence_number => "XYZ#{n}",
8
+ :source_url => 'http://example.com',
9
+ :sample_date => '2014-06-01'
10
+ }
11
+ puts(record.to_json)
12
+ end
13
+
14
+ $stderr.puts('done')
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "python-bot",
3
+ "description": "This is a python bot",
4
+ "language": "python",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.py"]
8
+ }
@@ -0,0 +1,11 @@
1
+ from __future__ import print_function
2
+
3
+ import json
4
+
5
+ for n in range(10):
6
+ record = {
7
+ 'licence_number': 'XYZ{}'.format(n),
8
+ 'source_url': 'http://example.com',
9
+ 'sample_date': '2014-06-01'
10
+ }
11
+ print(json.dumps(record))
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "ruby-bot",
3
+ "description": "This is a ruby bot",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,10 @@
1
+ require 'json'
2
+
3
+ 0.upto(9) do |n|
4
+ record = {
5
+ :licence_number => "XYZ#{n}",
6
+ :source_url => 'http://example.com',
7
+ :sample_date => '2014-06-01'
8
+ }
9
+ puts(record.to_json)
10
+ end
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "slow-bot",
3
+ "description": "This bot sleeps between each record it produces",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ 0.upto(99) do |n|
4
+ record = {
5
+ :licence_number => "XYZ#{n}",
6
+ :source_url => 'http://example.com',
7
+ :sample_date => '2014-06-01'
8
+ }
9
+ puts(record.to_json)
10
+ sleep 0.1
11
+ end
@@ -0,0 +1,181 @@
1
+ require 'json'
2
+ require 'turbot_runner'
3
+
4
+ describe TurbotRunner::Processor do
5
+ describe '#process' do
6
+ before do
7
+ @handler = TurbotRunner::BaseHandler.new
8
+ @data_type = 'primary data'
9
+ @script_config = {
10
+ :data_type => @data_type,
11
+ :identifying_fields => ['number']
12
+ }
13
+ end
14
+
15
+ context 'with a nil runner passed in' do
16
+ before do
17
+ @processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
18
+ end
19
+
20
+ context 'with valid record' do
21
+ it 'calls Handler#handle_valid_record' do
22
+ record = {
23
+ 'sample_date' => '2014-06-01',
24
+ 'source_url' => 'http://example.com/123',
25
+ 'number' => 123
26
+ }
27
+
28
+ expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
29
+ @processor.process(record.to_json)
30
+ end
31
+ end
32
+
33
+ context 'with invalid record' do
34
+ it 'calls Handler#handle_invalid_record' do
35
+ record = {
36
+ 'sample_date' => '2014-06-01',
37
+ 'number' => 123
38
+ }
39
+
40
+ expected_error = 'Missing required property: source_url'
41
+ expect(@handler).to receive(:handle_invalid_record).
42
+ with(record, @data_type, expected_error)
43
+ @processor.process(record.to_json)
44
+ end
45
+ end
46
+
47
+ context 'with invalid JSON' do
48
+ it 'calls Handler#handle_invalid_json' do
49
+ line = 'this is not JSON'
50
+ expect(@handler).to receive(:handle_invalid_json).with(line)
51
+ @processor.process(line)
52
+ end
53
+ end
54
+ end
55
+
56
+ context 'with a runner passed in' do
57
+ before do
58
+ @script_runner = instance_double('ScriptRunner')
59
+ allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
60
+ @processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
61
+ end
62
+
63
+ context 'with valid record' do
64
+ it 'calls Handler#handle_valid_record' do
65
+ record = {
66
+ 'sample_date' => '2014-06-01',
67
+ 'source_url' => 'http://example.com/123',
68
+ 'number' => 123
69
+ }
70
+
71
+ expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
72
+ @processor.process(record.to_json)
73
+ end
74
+ end
75
+
76
+ context 'with invalid record' do
77
+ before do
78
+ @record = {
79
+ 'sample_date' => '2014-06-01',
80
+ 'number' => 123
81
+ }
82
+ end
83
+
84
+ it 'calls Handler#handle_invalid_record' do
85
+ expected_error = 'Missing required property: source_url'
86
+ expect(@handler).to receive(:handle_invalid_record).
87
+ with(@record, @data_type, expected_error)
88
+ @processor.process(@record.to_json)
89
+ end
90
+
91
+ it 'interrupts runner' do
92
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
93
+ @processor.process(@record.to_json)
94
+ end
95
+ end
96
+
97
+ context 'with invalid JSON' do
98
+ before do
99
+ @line = 'this is not JSON'
100
+ end
101
+
102
+ it 'calls Handler#handle_invalid_json' do
103
+ expect(@handler).to receive(:handle_invalid_json).with(@line)
104
+ @processor.process(@line)
105
+ end
106
+
107
+ it 'interrupts runner' do
108
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
109
+ @processor.process(@line)
110
+ end
111
+ end
112
+
113
+ it 'converts date format' do
114
+ record = {
115
+ 'sample_date' => '2014-06-01 12:34:56 +0000',
116
+ 'source_url' => 'http://example.com/123',
117
+ 'number' => 123
118
+ }
119
+
120
+ converted_record = {
121
+ 'sample_date' => '2014-06-01',
122
+ 'source_url' => 'http://example.com/123',
123
+ 'number' => 123
124
+ }
125
+
126
+ expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
127
+ @processor.process(record.to_json)
128
+ end
129
+
130
+ it 'does not pass retrieved_at to validator' do
131
+ record = {
132
+ 'sample_date' => '2014-06-01',
133
+ 'retrieved_at' => '2014-06-01 12:34:56 +0000',
134
+ 'source_url' => 'http://example.com/123',
135
+ 'number' => 123
136
+ }
137
+
138
+ expected_record_to_validate = {
139
+ 'sample_date' => '2014-06-01',
140
+ 'source_url' => 'http://example.com/123',
141
+ 'number' => 123
142
+ }
143
+
144
+ expect(TurbotRunner::Validator).to receive(:validate).
145
+ with('primary data', expected_record_to_validate, ['number'])
146
+ @processor.process(record.to_json)
147
+ end
148
+ end
149
+
150
+ it 'can handle schemas with $refs' do
151
+ handler = TurbotRunner::BaseHandler.new
152
+ script_config = {
153
+ :data_type => 'licence',
154
+ :identifying_fields => ['licence_number']
155
+ }
156
+
157
+ script_runner = instance_double('ScriptRunner')
158
+ allow(script_runner).to receive(:interrupt_and_mark_as_failed)
159
+ processor = TurbotRunner::Processor.new(script_runner, script_config, handler)
160
+
161
+ record = {
162
+ :licence_holder => {
163
+ :entity_type => 'company',
164
+ :entity_properties => {
165
+ :name => 'Hairy Goat Breeding Ltd',
166
+ :jurisdiction_code => 'gb',
167
+ }
168
+ },
169
+ :licence_number => '1234',
170
+ :permissions => ['Goat breeding'],
171
+ :licence_issuer => 'Sheep and Goat Board of Bermuda',
172
+ :jurisdiction_of_licence => 'bm',
173
+ :source_url => 'http://example.com',
174
+ :sample_date => '2015-01-01'
175
+ }
176
+
177
+ expect(handler).to receive(:handle_valid_record)
178
+ processor.process(record.to_json)
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,330 @@
1
+ require 'json'
2
+ require 'turbot_runner'
3
+
4
+ describe TurbotRunner::Runner do
5
+ before(:each) do
6
+ Dir.glob('spec/bots/**/output/*').each {|f| File.delete(f)}
7
+ end
8
+
9
+ after(:all) do
10
+ puts
11
+ puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
12
+ end
13
+
14
+ describe '#run' do
15
+ context 'with a bot written in ruby' do
16
+ before do
17
+ @runner = test_runner('ruby-bot')
18
+ end
19
+
20
+ it 'produces expected output' do
21
+ @runner.run
22
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
23
+ end
24
+
25
+ it 'returns true' do
26
+ expect(@runner.run).to be(true)
27
+ end
28
+ end
29
+
30
+ context 'with a bot written in python' do
31
+ before do
32
+ @runner = test_runner('python-bot')
33
+ end
34
+
35
+ it 'produces expected output' do
36
+ @runner.run
37
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
38
+ end
39
+ end
40
+
41
+ context 'with a bot with a transformer' do
42
+ before do
43
+ @runner = test_runner('bot-with-transformer')
44
+ end
45
+
46
+ it 'produces expected outputs' do
47
+ @runner.run
48
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
49
+ expect([@runner, 'transformer']).to have_output('full-transformer.out')
50
+ end
51
+
52
+ it 'returns true' do
53
+ expect(@runner.run).to be(true)
54
+ end
55
+ end
56
+
57
+ context 'with a bot with multiple transformers' do
58
+ before do
59
+ @runner = test_runner('bot-with-transformers')
60
+ end
61
+
62
+ it 'produces expected outputs' do
63
+ @runner.run
64
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
65
+ expect([@runner, 'transformer1']).to have_output('full-transformer.out')
66
+ expect([@runner, 'transformer2']).to have_output('full-transformer.out')
67
+ end
68
+
69
+ it 'returns true' do
70
+ expect(@runner.run).to be(true)
71
+ end
72
+ end
73
+
74
+ context 'with a bot that logs' do
75
+ context 'when logging to file enabled' do
76
+ it 'logs to file' do
77
+ expected_log = "doing...\ndone\n"
78
+ runner = test_runner('logging-bot', :log_to_file => true)
79
+ runner.run
80
+ expect([runner, 'scraper']).to have_error_output_matching(expected_log)
81
+ end
82
+ end
83
+
84
+ context 'when logging to file not enabled' do
85
+ xit 'logs to stderr' do
86
+ # This is tested in manual_spec.rb
87
+ end
88
+ end
89
+ end
90
+
91
+ context 'with a bot that outputs RUN ENDED' do
92
+ before do
93
+ @runner = test_runner('bot-that-emits-run-ended', :log_to_file => true)
94
+ end
95
+ it 'calls handle_run_ended on the handler' do
96
+ expect_any_instance_of(TurbotRunner::BaseHandler).to receive(:handle_run_ended)
97
+ @runner.run
98
+ end
99
+
100
+ it 'interrupts the run' do
101
+ expect_any_instance_of(TurbotRunner::ScriptRunner).to receive(:interrupt)
102
+ @runner.run
103
+ end
104
+ end
105
+
106
+
107
+ context 'with a bot that crashes in scraper' do
108
+ before do
109
+ @runner = test_runner('bot-that-crashes-in-scraper', :log_to_file => true)
110
+ end
111
+
112
+ it 'returns false' do
113
+ expect(@runner.run).to be(false)
114
+ end
115
+
116
+ it 'writes error to stderr' do
117
+ @runner.run
118
+ expect([@runner, 'scraper']).to have_error_output_matching(/Oh no/)
119
+ end
120
+
121
+ it 'still runs the transformers' do
122
+ expect(@runner).to receive(:run_script).once.with(
123
+ hash_including(:file=>"scraper.rb"))
124
+ expect(@runner).to receive(:run_script).once.with(
125
+ hash_including(:file=>"transformer1.rb"), anything)
126
+ @runner.run
127
+ end
128
+ end
129
+
130
+ context 'with a bot that expects a file to be present in the working directory' do
131
+ before do
132
+ @runner = test_runner('bot-that-expects-file')
133
+ end
134
+
135
+ it 'returns true' do
136
+ expect(@runner.run).to be(true)
137
+ end
138
+ end
139
+
140
+ context 'with a bot that crashes in transformer' do
141
+ before do
142
+ @runner = test_runner('bot-that-crashes-in-transformer', :log_to_file => true)
143
+ end
144
+
145
+ it 'returns false' do
146
+ expect(@runner.run).to be(false)
147
+ end
148
+
149
+ it 'writes error to stderr' do
150
+ @runner.run
151
+ expect([@runner, 'transformer2']).to have_error_output_matching(/Oh no/)
152
+ end
153
+ end
154
+
155
+ context 'with a bot that is interrupted in scraper' do
156
+ xit 'produces truncated output' do
157
+ # This is tested in manual_spec.rb
158
+ end
159
+ end
160
+
161
+ context 'with a handler that interrupts the runner' do
162
+ before do
163
+ class Handler < TurbotRunner::BaseHandler
164
+ def initialize(*)
165
+ @count = 0
166
+ super
167
+ end
168
+
169
+ def handle_valid_record(record, data_type)
170
+ @count += 1
171
+ raise TurbotRunner::InterruptRun if @count >= 5
172
+ end
173
+ end
174
+
175
+ @runner = test_runner('slow-bot',
176
+ :record_handler => Handler.new,
177
+ :log_to_file => true
178
+ )
179
+ end
180
+
181
+ it 'produces expected output' do
182
+ @runner.run
183
+ expect([@runner, 'scraper']).to have_output('truncated-scraper.out')
184
+ end
185
+
186
+ it 'returns true' do
187
+ expect(@runner.run).to be(true)
188
+ end
189
+ end
190
+
191
+ context 'with a scraper that produces an invalid record' do
192
+ it 'returns false' do
193
+ @runner = test_runner('invalid-record-bot')
194
+ expect(@runner.run).to be(false)
195
+ end
196
+ end
197
+
198
+ context 'with a scraper that produces invalid JSON' do
199
+ it 'returns false' do
200
+ @runner = test_runner('invalid-json-bot')
201
+ expect(@runner.run).to be(false)
202
+ end
203
+ end
204
+
205
+ context 'with a scraper that hangs' do
206
+ # XXX This spec fails because the loop in ScriptRunner#run that
207
+ # reads lines from the output file doesn't start until the
208
+ # output file is created; however, the way we're redirecting
209
+ # stdout using the shell means the file doesn't get created
210
+ # until
211
+ it 'returns false' do
212
+ @runner = test_runner('bot-with-pause',
213
+ :timeout => 1,
214
+ :log_to_file => true
215
+ )
216
+ expect(@runner.run).to be(false)
217
+ end
218
+ end
219
+
220
+ context 'with a bot that emits an invalid sample date' do
221
+ before do
222
+ @runner = test_runner('bot-with-invalid-sample-date')
223
+ end
224
+
225
+ it 'returns false' do
226
+ expect(@runner.run).to be(false)
227
+ end
228
+ end
229
+
230
+ context 'with a bot with an invalid data type' do
231
+ before do
232
+ @runner = test_runner('bot-with-invalid-data-type')
233
+ end
234
+
235
+ it 'raises InvalidDataType' do
236
+ expect{@runner.run}.to raise_error(TurbotRunner::InvalidDataType)
237
+ end
238
+ end
239
+ end
240
+
241
+ describe '#process_output' do
242
+ before do
243
+ class Handler < TurbotRunner::BaseHandler
244
+ attr_reader :records_seen
245
+
246
+ def initialize(*)
247
+ @records_seen = Hash.new {|h, k| h[k] = 0}
248
+ super
249
+ end
250
+
251
+ def handle_valid_record(record, data_type)
252
+ @records_seen[data_type] += 1
253
+ end
254
+ end
255
+
256
+ @handler = Handler.new
257
+ end
258
+
259
+ it 'calls handler once for each line of output' do
260
+ test_runner('bot-with-transformer').run
261
+
262
+ runner = test_runner('bot-with-transformer',
263
+ :record_handler => @handler
264
+ )
265
+
266
+ runner.process_output
267
+ expect(@handler.records_seen['primary data']).to eq(10)
268
+ expect(@handler.records_seen['simple-licence']).to eq(10)
269
+ end
270
+
271
+ it 'can cope when scraper has failed immediately' do
272
+ test_runner('bot-that-crashes-immediately').run
273
+
274
+ runner = test_runner('bot-with-transformer',
275
+ :record_handler => @handler
276
+ )
277
+
278
+ runner.process_output
279
+ end
280
+ end
281
+
282
+ describe '#set_up_output_directory' do
283
+ before do
284
+ @runner = test_runner('bot-with-transformer')
285
+ end
286
+
287
+ it 'clears existing output' do
288
+ path = File.join(@runner.base_directory, 'output', 'scraper.out')
289
+ FileUtils.touch(path)
290
+ @runner.set_up_output_directory
291
+ expect(File.exist?(path)).to be(false)
292
+ end
293
+
294
+ it 'does not clear existing files that are not output files' do
295
+ path = File.join(@runner.base_directory, 'output', 'stdout')
296
+ FileUtils.touch(path)
297
+ @runner.set_up_output_directory
298
+ expect(File.exist?(path)).to be(true)
299
+ end
300
+ end
301
+ end
302
+
303
+
304
+ RSpec::Matchers.define :have_output do |expected|
305
+ match do |actual|
306
+ runner, script = actual
307
+
308
+ expected_path = File.join('spec', 'outputs', expected)
309
+ expected_output = File.readlines(expected_path).map {|line| JSON.parse(line)}
310
+ actual_path = File.join(runner.base_directory, 'output', "#{script}.out")
311
+ actual_output = File.readlines(actual_path).map {|line| JSON.parse(line)}
312
+ expect(expected_output).to eq(actual_output)
313
+ end
314
+ end
315
+
316
+
317
+ RSpec::Matchers.define :have_error_output_matching do |expected|
318
+ match do |actual|
319
+ runner, script = actual
320
+
321
+ actual_path = File.join(runner.base_directory, 'output', "#{script}.err")
322
+ actual_output = File.read(actual_path)
323
+ expect(actual_output).to match(expected)
324
+ end
325
+ end
326
+
327
+ def test_runner(name, opts={})
328
+ test_bot_location = File.join(File.dirname(__FILE__), '../bots', name)
329
+ TurbotRunner::Runner.new(test_bot_location, opts)
330
+ end