turbot-runner-morph 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +15 -0
  2. data/bin/rspec +16 -0
  3. data/lib/turbot_runner.rb +28 -0
  4. data/lib/turbot_runner/base_handler.rb +15 -0
  5. data/lib/turbot_runner/exceptions.rb +4 -0
  6. data/lib/turbot_runner/prerun.rb +3 -0
  7. data/lib/turbot_runner/processor.rb +53 -0
  8. data/lib/turbot_runner/runner.rb +179 -0
  9. data/lib/turbot_runner/script_runner.rb +98 -0
  10. data/lib/turbot_runner/utils.rb +47 -0
  11. data/lib/turbot_runner/validator.rb +28 -0
  12. data/lib/turbot_runner/version.rb +3 -0
  13. data/schema/schemas/company-schema.json +243 -0
  14. data/schema/schemas/financial-payment-schema.json +32 -0
  15. data/schema/schemas/includes/address.json +53 -0
  16. data/schema/schemas/includes/alternative_name.json +36 -0
  17. data/schema/schemas/includes/company-for-nesting.json +245 -0
  18. data/schema/schemas/includes/company.json +25 -0
  19. data/schema/schemas/includes/entity.json +58 -0
  20. data/schema/schemas/includes/filing.json +52 -0
  21. data/schema/schemas/includes/financial-payment-data-object.json +112 -0
  22. data/schema/schemas/includes/identifier.json +20 -0
  23. data/schema/schemas/includes/industry_code.json +29 -0
  24. data/schema/schemas/includes/licence-data-object.json +63 -0
  25. data/schema/schemas/includes/officer.json +70 -0
  26. data/schema/schemas/includes/organisation.json +58 -0
  27. data/schema/schemas/includes/permission.json +46 -0
  28. data/schema/schemas/includes/person.json +62 -0
  29. data/schema/schemas/includes/person_name.json +71 -0
  30. data/schema/schemas/includes/previous_name.json +24 -0
  31. data/schema/schemas/includes/share-parcel-data.json +82 -0
  32. data/schema/schemas/includes/share-parcel.json +78 -0
  33. data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
  34. data/schema/schemas/includes/total-shares.json +17 -0
  35. data/schema/schemas/includes/unknown_entity_type.json +58 -0
  36. data/schema/schemas/licence-schema.json +105 -0
  37. data/schema/schemas/primary-data-schema.json +20 -0
  38. data/schema/schemas/share-parcel-schema.json +22 -0
  39. data/schema/schemas/simple-financial-payment-schema.json +122 -0
  40. data/schema/schemas/simple-licence-schema.json +82 -0
  41. data/schema/schemas/simple-subsidiary-schema.json +85 -0
  42. data/schema/schemas/subsidiary-relationship-schema.json +46 -0
  43. data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
  44. data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
  45. data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
  46. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
  47. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  48. data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
  49. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  50. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  51. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  52. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  53. data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
  54. data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
  55. data/spec/bots/bot-that-expects-file/manifest.json +8 -0
  56. data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
  57. data/spec/bots/bot-that-expects-file/something.txt +1 -0
  58. data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
  59. data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
  60. data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
  61. data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
  62. data/spec/bots/bot-with-pause/manifest.json +8 -0
  63. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  64. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  65. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  66. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  67. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  68. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  69. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  70. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  71. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  72. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  73. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  74. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  75. data/spec/bots/logging-bot/manifest.json +8 -0
  76. data/spec/bots/logging-bot/scraper.rb +14 -0
  77. data/spec/bots/python-bot/manifest.json +8 -0
  78. data/spec/bots/python-bot/scraper.py +11 -0
  79. data/spec/bots/ruby-bot/manifest.json +8 -0
  80. data/spec/bots/ruby-bot/scraper.rb +10 -0
  81. data/spec/bots/slow-bot/manifest.json +8 -0
  82. data/spec/bots/slow-bot/scraper.rb +11 -0
  83. data/spec/lib/processor_spec.rb +181 -0
  84. data/spec/lib/runner_spec.rb +330 -0
  85. data/spec/lib/utils_spec.rb +23 -0
  86. data/spec/lib/validator_spec.rb +89 -0
  87. data/spec/manual_spec.rb +57 -0
  88. data/spec/outputs/full-scraper.out +10 -0
  89. data/spec/outputs/full-transformer.out +10 -0
  90. data/spec/outputs/truncated-scraper.out +5 -0
  91. data/spec/spec_helper.rb +20 -0
  92. metadata +148 -0
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "invalid-record-bot",
3
+ "description": "This is a bot that produces an invalid record",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ 0.upto(9) do |n|
4
+ record = {
5
+ :licence_number => "XYZ#{n}",
6
+ :source_url => 'http://example.com',
7
+ :sample_date => '2014-06-01'
8
+ }
9
+ record.delete(:source_url) if n == 5
10
+ puts(record.to_json)
11
+ end
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "logging-bot",
3
+ "description": "This is a bot that logs",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,14 @@
1
+ require 'json'
2
+
3
+ $stderr.puts('doing...')
4
+
5
+ 0.upto(9) do |n|
6
+ record = {
7
+ :licence_number => "XYZ#{n}",
8
+ :source_url => 'http://example.com',
9
+ :sample_date => '2014-06-01'
10
+ }
11
+ puts(record.to_json)
12
+ end
13
+
14
+ $stderr.puts('done')
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "python-bot",
3
+ "description": "This is a python bot",
4
+ "language": "python",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.py"]
8
+ }
@@ -0,0 +1,11 @@
1
+ from __future__ import print_function
2
+
3
+ import json
4
+
5
+ for n in range(10):
6
+ record = {
7
+ 'licence_number': 'XYZ{}'.format(n),
8
+ 'source_url': 'http://example.com',
9
+ 'sample_date': '2014-06-01'
10
+ }
11
+ print(json.dumps(record))
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "ruby-bot",
3
+ "description": "This is a ruby bot",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,10 @@
1
+ require 'json'
2
+
3
+ 0.upto(9) do |n|
4
+ record = {
5
+ :licence_number => "XYZ#{n}",
6
+ :source_url => 'http://example.com',
7
+ :sample_date => '2014-06-01'
8
+ }
9
+ puts(record.to_json)
10
+ end
@@ -0,0 +1,8 @@
1
+ {
2
+ "bot_id": "slow-bot",
3
+ "description": "This bot sleeps between each record it produces",
4
+ "language": "ruby",
5
+ "data_type": "primary data",
6
+ "identifying_fields": ["licence_number"],
7
+ "files": ["scraper.rb"]
8
+ }
@@ -0,0 +1,11 @@
1
+ require 'json'
2
+
3
+ 0.upto(99) do |n|
4
+ record = {
5
+ :licence_number => "XYZ#{n}",
6
+ :source_url => 'http://example.com',
7
+ :sample_date => '2014-06-01'
8
+ }
9
+ puts(record.to_json)
10
+ sleep 0.1
11
+ end
@@ -0,0 +1,181 @@
1
+ require 'json'
2
+ require 'turbot_runner'
3
+
4
+ describe TurbotRunner::Processor do
5
+ describe '#process' do
6
+ before do
7
+ @handler = TurbotRunner::BaseHandler.new
8
+ @data_type = 'primary data'
9
+ @script_config = {
10
+ :data_type => @data_type,
11
+ :identifying_fields => ['number']
12
+ }
13
+ end
14
+
15
+ context 'with a nil runner passed in' do
16
+ before do
17
+ @processor = TurbotRunner::Processor.new(nil, @script_config, @handler)
18
+ end
19
+
20
+ context 'with valid record' do
21
+ it 'calls Handler#handle_valid_record' do
22
+ record = {
23
+ 'sample_date' => '2014-06-01',
24
+ 'source_url' => 'http://example.com/123',
25
+ 'number' => 123
26
+ }
27
+
28
+ expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
29
+ @processor.process(record.to_json)
30
+ end
31
+ end
32
+
33
+ context 'with invalid record' do
34
+ it 'calls Handler#handle_invalid_record' do
35
+ record = {
36
+ 'sample_date' => '2014-06-01',
37
+ 'number' => 123
38
+ }
39
+
40
+ expected_error = 'Missing required property: source_url'
41
+ expect(@handler).to receive(:handle_invalid_record).
42
+ with(record, @data_type, expected_error)
43
+ @processor.process(record.to_json)
44
+ end
45
+ end
46
+
47
+ context 'with invalid JSON' do
48
+ it 'calls Handler#handle_invalid_json' do
49
+ line = 'this is not JSON'
50
+ expect(@handler).to receive(:handle_invalid_json).with(line)
51
+ @processor.process(line)
52
+ end
53
+ end
54
+ end
55
+
56
+ context 'with a runner passed in' do
57
+ before do
58
+ @script_runner = instance_double('ScriptRunner')
59
+ allow(@script_runner).to receive(:interrupt_and_mark_as_failed)
60
+ @processor = TurbotRunner::Processor.new(@script_runner, @script_config, @handler)
61
+ end
62
+
63
+ context 'with valid record' do
64
+ it 'calls Handler#handle_valid_record' do
65
+ record = {
66
+ 'sample_date' => '2014-06-01',
67
+ 'source_url' => 'http://example.com/123',
68
+ 'number' => 123
69
+ }
70
+
71
+ expect(@handler).to receive(:handle_valid_record).with(record, @data_type)
72
+ @processor.process(record.to_json)
73
+ end
74
+ end
75
+
76
+ context 'with invalid record' do
77
+ before do
78
+ @record = {
79
+ 'sample_date' => '2014-06-01',
80
+ 'number' => 123
81
+ }
82
+ end
83
+
84
+ it 'calls Handler#handle_invalid_record' do
85
+ expected_error = 'Missing required property: source_url'
86
+ expect(@handler).to receive(:handle_invalid_record).
87
+ with(@record, @data_type, expected_error)
88
+ @processor.process(@record.to_json)
89
+ end
90
+
91
+ it 'interrupts runner' do
92
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
93
+ @processor.process(@record.to_json)
94
+ end
95
+ end
96
+
97
+ context 'with invalid JSON' do
98
+ before do
99
+ @line = 'this is not JSON'
100
+ end
101
+
102
+ it 'calls Handler#handle_invalid_json' do
103
+ expect(@handler).to receive(:handle_invalid_json).with(@line)
104
+ @processor.process(@line)
105
+ end
106
+
107
+ it 'interrupts runner' do
108
+ expect(@script_runner).to receive(:interrupt_and_mark_as_failed)
109
+ @processor.process(@line)
110
+ end
111
+ end
112
+
113
+ it 'converts date format' do
114
+ record = {
115
+ 'sample_date' => '2014-06-01 12:34:56 +0000',
116
+ 'source_url' => 'http://example.com/123',
117
+ 'number' => 123
118
+ }
119
+
120
+ converted_record = {
121
+ 'sample_date' => '2014-06-01',
122
+ 'source_url' => 'http://example.com/123',
123
+ 'number' => 123
124
+ }
125
+
126
+ expect(@handler).to receive(:handle_valid_record).with(converted_record, @data_type)
127
+ @processor.process(record.to_json)
128
+ end
129
+
130
+ it 'does not pass retrieved_at to validator' do
131
+ record = {
132
+ 'sample_date' => '2014-06-01',
133
+ 'retrieved_at' => '2014-06-01 12:34:56 +0000',
134
+ 'source_url' => 'http://example.com/123',
135
+ 'number' => 123
136
+ }
137
+
138
+ expected_record_to_validate = {
139
+ 'sample_date' => '2014-06-01',
140
+ 'source_url' => 'http://example.com/123',
141
+ 'number' => 123
142
+ }
143
+
144
+ expect(TurbotRunner::Validator).to receive(:validate).
145
+ with('primary data', expected_record_to_validate, ['number'])
146
+ @processor.process(record.to_json)
147
+ end
148
+ end
149
+
150
+ it 'can handle schemas with $refs' do
151
+ handler = TurbotRunner::BaseHandler.new
152
+ script_config = {
153
+ :data_type => 'licence',
154
+ :identifying_fields => ['licence_number']
155
+ }
156
+
157
+ script_runner = instance_double('ScriptRunner')
158
+ allow(script_runner).to receive(:interrupt_and_mark_as_failed)
159
+ processor = TurbotRunner::Processor.new(script_runner, script_config, handler)
160
+
161
+ record = {
162
+ :licence_holder => {
163
+ :entity_type => 'company',
164
+ :entity_properties => {
165
+ :name => 'Hairy Goat Breeding Ltd',
166
+ :jurisdiction_code => 'gb',
167
+ }
168
+ },
169
+ :licence_number => '1234',
170
+ :permissions => ['Goat breeding'],
171
+ :licence_issuer => 'Sheep and Goat Board of Bermuda',
172
+ :jurisdiction_of_licence => 'bm',
173
+ :source_url => 'http://example.com',
174
+ :sample_date => '2015-01-01'
175
+ }
176
+
177
+ expect(handler).to receive(:handle_valid_record)
178
+ processor.process(record.to_json)
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,330 @@
1
+ require 'json'
2
+ require 'turbot_runner'
3
+
4
+ describe TurbotRunner::Runner do
5
+ before(:each) do
6
+ Dir.glob('spec/bots/**/output/*').each {|f| File.delete(f)}
7
+ end
8
+
9
+ after(:all) do
10
+ puts
11
+ puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
12
+ end
13
+
14
+ describe '#run' do
15
+ context 'with a bot written in ruby' do
16
+ before do
17
+ @runner = test_runner('ruby-bot')
18
+ end
19
+
20
+ it 'produces expected output' do
21
+ @runner.run
22
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
23
+ end
24
+
25
+ it 'returns true' do
26
+ expect(@runner.run).to be(true)
27
+ end
28
+ end
29
+
30
+ context 'with a bot written in python' do
31
+ before do
32
+ @runner = test_runner('python-bot')
33
+ end
34
+
35
+ it 'produces expected output' do
36
+ @runner.run
37
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
38
+ end
39
+ end
40
+
41
+ context 'with a bot with a transformer' do
42
+ before do
43
+ @runner = test_runner('bot-with-transformer')
44
+ end
45
+
46
+ it 'produces expected outputs' do
47
+ @runner.run
48
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
49
+ expect([@runner, 'transformer']).to have_output('full-transformer.out')
50
+ end
51
+
52
+ it 'returns true' do
53
+ expect(@runner.run).to be(true)
54
+ end
55
+ end
56
+
57
+ context 'with a bot with multiple transformers' do
58
+ before do
59
+ @runner = test_runner('bot-with-transformers')
60
+ end
61
+
62
+ it 'produces expected outputs' do
63
+ @runner.run
64
+ expect([@runner, 'scraper']).to have_output('full-scraper.out')
65
+ expect([@runner, 'transformer1']).to have_output('full-transformer.out')
66
+ expect([@runner, 'transformer2']).to have_output('full-transformer.out')
67
+ end
68
+
69
+ it 'returns true' do
70
+ expect(@runner.run).to be(true)
71
+ end
72
+ end
73
+
74
+ context 'with a bot that logs' do
75
+ context 'when logging to file enabled' do
76
+ it 'logs to file' do
77
+ expected_log = "doing...\ndone\n"
78
+ runner = test_runner('logging-bot', :log_to_file => true)
79
+ runner.run
80
+ expect([runner, 'scraper']).to have_error_output_matching(expected_log)
81
+ end
82
+ end
83
+
84
+ context 'when logging to file not enabled' do
85
+ xit 'logs to stderr' do
86
+ # This is tested in manual_spec.rb
87
+ end
88
+ end
89
+ end
90
+
91
+ context 'with a bot that outputs RUN ENDED' do
92
+ before do
93
+ @runner = test_runner('bot-that-emits-run-ended', :log_to_file => true)
94
+ end
95
+ it 'calls handle_run_ended on the handler' do
96
+ expect_any_instance_of(TurbotRunner::BaseHandler).to receive(:handle_run_ended)
97
+ @runner.run
98
+ end
99
+
100
+ it 'interrupts the run' do
101
+ expect_any_instance_of(TurbotRunner::ScriptRunner).to receive(:interrupt)
102
+ @runner.run
103
+ end
104
+ end
105
+
106
+
107
+ context 'with a bot that crashes in scraper' do
108
+ before do
109
+ @runner = test_runner('bot-that-crashes-in-scraper', :log_to_file => true)
110
+ end
111
+
112
+ it 'returns false' do
113
+ expect(@runner.run).to be(false)
114
+ end
115
+
116
+ it 'writes error to stderr' do
117
+ @runner.run
118
+ expect([@runner, 'scraper']).to have_error_output_matching(/Oh no/)
119
+ end
120
+
121
+ it 'still runs the transformers' do
122
+ expect(@runner).to receive(:run_script).once.with(
123
+ hash_including(:file=>"scraper.rb"))
124
+ expect(@runner).to receive(:run_script).once.with(
125
+ hash_including(:file=>"transformer1.rb"), anything)
126
+ @runner.run
127
+ end
128
+ end
129
+
130
+ context 'with a bot that expects a file to be present in the working directory' do
131
+ before do
132
+ @runner = test_runner('bot-that-expects-file')
133
+ end
134
+
135
+ it 'returns true' do
136
+ expect(@runner.run).to be(true)
137
+ end
138
+ end
139
+
140
+ context 'with a bot that crashes in transformer' do
141
+ before do
142
+ @runner = test_runner('bot-that-crashes-in-transformer', :log_to_file => true)
143
+ end
144
+
145
+ it 'returns false' do
146
+ expect(@runner.run).to be(false)
147
+ end
148
+
149
+ it 'writes error to stderr' do
150
+ @runner.run
151
+ expect([@runner, 'transformer2']).to have_error_output_matching(/Oh no/)
152
+ end
153
+ end
154
+
155
+ context 'with a bot that is interrupted in scraper' do
156
+ xit 'produces truncated output' do
157
+ # This is tested in manual_spec.rb
158
+ end
159
+ end
160
+
161
+ context 'with a handler that interrupts the runner' do
162
+ before do
163
+ class Handler < TurbotRunner::BaseHandler
164
+ def initialize(*)
165
+ @count = 0
166
+ super
167
+ end
168
+
169
+ def handle_valid_record(record, data_type)
170
+ @count += 1
171
+ raise TurbotRunner::InterruptRun if @count >= 5
172
+ end
173
+ end
174
+
175
+ @runner = test_runner('slow-bot',
176
+ :record_handler => Handler.new,
177
+ :log_to_file => true
178
+ )
179
+ end
180
+
181
+ it 'produces expected output' do
182
+ @runner.run
183
+ expect([@runner, 'scraper']).to have_output('truncated-scraper.out')
184
+ end
185
+
186
+ it 'returns true' do
187
+ expect(@runner.run).to be(true)
188
+ end
189
+ end
190
+
191
+ context 'with a scraper that produces an invalid record' do
192
+ it 'returns false' do
193
+ @runner = test_runner('invalid-record-bot')
194
+ expect(@runner.run).to be(false)
195
+ end
196
+ end
197
+
198
+ context 'with a scraper that produces invalid JSON' do
199
+ it 'returns false' do
200
+ @runner = test_runner('invalid-json-bot')
201
+ expect(@runner.run).to be(false)
202
+ end
203
+ end
204
+
205
+ context 'with a scraper that hangs' do
206
+ # XXX This spec fails because the loop in ScriptRunner#run that
207
+ # reads lines from the output file doesn't start until the
208
+ # output file is created; however, the way we're redirecting
209
+ # stdout using the shell means the file doesn't get created
210
+ # until
211
+ it 'returns false' do
212
+ @runner = test_runner('bot-with-pause',
213
+ :timeout => 1,
214
+ :log_to_file => true
215
+ )
216
+ expect(@runner.run).to be(false)
217
+ end
218
+ end
219
+
220
+ context 'with a bot that emits an invalid sample date' do
221
+ before do
222
+ @runner = test_runner('bot-with-invalid-sample-date')
223
+ end
224
+
225
+ it 'returns false' do
226
+ expect(@runner.run).to be(false)
227
+ end
228
+ end
229
+
230
+ context 'with a bot with an invalid data type' do
231
+ before do
232
+ @runner = test_runner('bot-with-invalid-data-type')
233
+ end
234
+
235
+ it 'raises InvalidDataType' do
236
+ expect{@runner.run}.to raise_error(TurbotRunner::InvalidDataType)
237
+ end
238
+ end
239
+ end
240
+
241
+ describe '#process_output' do
242
+ before do
243
+ class Handler < TurbotRunner::BaseHandler
244
+ attr_reader :records_seen
245
+
246
+ def initialize(*)
247
+ @records_seen = Hash.new {|h, k| h[k] = 0}
248
+ super
249
+ end
250
+
251
+ def handle_valid_record(record, data_type)
252
+ @records_seen[data_type] += 1
253
+ end
254
+ end
255
+
256
+ @handler = Handler.new
257
+ end
258
+
259
+ it 'calls handler once for each line of output' do
260
+ test_runner('bot-with-transformer').run
261
+
262
+ runner = test_runner('bot-with-transformer',
263
+ :record_handler => @handler
264
+ )
265
+
266
+ runner.process_output
267
+ expect(@handler.records_seen['primary data']).to eq(10)
268
+ expect(@handler.records_seen['simple-licence']).to eq(10)
269
+ end
270
+
271
+ it 'can cope when scraper has failed immediately' do
272
+ test_runner('bot-that-crashes-immediately').run
273
+
274
+ runner = test_runner('bot-with-transformer',
275
+ :record_handler => @handler
276
+ )
277
+
278
+ runner.process_output
279
+ end
280
+ end
281
+
282
+ describe '#set_up_output_directory' do
283
+ before do
284
+ @runner = test_runner('bot-with-transformer')
285
+ end
286
+
287
+ it 'clears existing output' do
288
+ path = File.join(@runner.base_directory, 'output', 'scraper.out')
289
+ FileUtils.touch(path)
290
+ @runner.set_up_output_directory
291
+ expect(File.exist?(path)).to be(false)
292
+ end
293
+
294
+ it 'does not clear existing files that are not output files' do
295
+ path = File.join(@runner.base_directory, 'output', 'stdout')
296
+ FileUtils.touch(path)
297
+ @runner.set_up_output_directory
298
+ expect(File.exist?(path)).to be(true)
299
+ end
300
+ end
301
+ end
302
+
303
+
304
+ RSpec::Matchers.define :have_output do |expected|
305
+ match do |actual|
306
+ runner, script = actual
307
+
308
+ expected_path = File.join('spec', 'outputs', expected)
309
+ expected_output = File.readlines(expected_path).map {|line| JSON.parse(line)}
310
+ actual_path = File.join(runner.base_directory, 'output', "#{script}.out")
311
+ actual_output = File.readlines(actual_path).map {|line| JSON.parse(line)}
312
+ expect(expected_output).to eq(actual_output)
313
+ end
314
+ end
315
+
316
+
317
+ RSpec::Matchers.define :have_error_output_matching do |expected|
318
+ match do |actual|
319
+ runner, script = actual
320
+
321
+ actual_path = File.join(runner.base_directory, 'output', "#{script}.err")
322
+ actual_output = File.read(actual_path)
323
+ expect(actual_output).to match(expected)
324
+ end
325
+ end
326
+
327
+ def test_runner(name, opts={})
328
+ test_bot_location = File.join(File.dirname(__FILE__), '../bots', name)
329
+ TurbotRunner::Runner.new(test_bot_location, opts)
330
+ end