turbot-runner 0.2.21 → 0.2.22
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/turbot_runner/processor.rb +11 -8
- data/lib/turbot_runner/runner.rb +5 -5
- data/lib/turbot_runner/version.rb +1 -1
- data/spec/lib/processor_spec.rb +14 -4
- data/spec/lib/runner_spec.rb +12 -0
- metadata +3 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf3018be33a490d1220c5f1afaeda1d7c5fdd2f1
|
4
|
+
data.tar.gz: a53d4a8b5a69d1da8a596d4028b3210fa4a87294
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1701ffbe66db6b127c620dc902db0c748acd872d315968bf3da04492ed3817534e1653d43c5deffb4ed083bf5844d0de25b7f8fb69b57fab7b0f18e11c9a2cc0
|
7
|
+
data.tar.gz: 7e50e47e1f2097daf2f83a42ca4d3bab58053ab8acc9e366deec6d81a23ed123411cb17b3a1256dd6357d3c4189fab60e5e4737abb0618728e3173048bc02daa
|
@@ -10,20 +10,23 @@ module TurbotRunner
|
|
10
10
|
@seen_uids = script_config[:duplicates_allowed] ? nil : Set.new
|
11
11
|
end
|
12
12
|
|
13
|
-
def process(line)
|
13
|
+
def process(line, opts={})
|
14
|
+
validate = opts[:validate].nil? ? true : opts[:validate]
|
14
15
|
begin
|
15
16
|
if line.strip == "SNAPSHOT ENDED" || line.strip == "RUN ENDED" # latter is legacy
|
16
17
|
@record_handler.handle_snapshot_ended(@data_type)
|
17
18
|
@runner.interrupt if @runner
|
18
19
|
else
|
19
20
|
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
error_message = nil
|
22
|
+
if validate
|
23
|
+
error_message = Validator.validate(
|
24
|
+
@data_type,
|
25
|
+
record,
|
26
|
+
@identifying_fields,
|
27
|
+
@seen_uids
|
28
|
+
)
|
29
|
+
end
|
27
30
|
|
28
31
|
if error_message.nil?
|
29
32
|
begin
|
data/lib/turbot_runner/runner.rb
CHANGED
@@ -68,11 +68,11 @@ module TurbotRunner
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
def process_output
|
72
|
-
process_script_output(scraper_config)
|
71
|
+
def process_output(opts={})
|
72
|
+
process_script_output(scraper_config, opts)
|
73
73
|
|
74
74
|
transformers.each do |transformer_config|
|
75
|
-
process_script_output(transformer_config.merge(:base_directory => @base_directory))
|
75
|
+
process_script_output(transformer_config.merge(:base_directory => @base_directory), opts)
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
@@ -115,7 +115,7 @@ module TurbotRunner
|
|
115
115
|
script_runner.run # returns boolean indicating success
|
116
116
|
end
|
117
117
|
|
118
|
-
def process_script_output(script_config)
|
118
|
+
def process_script_output(script_config, opts)
|
119
119
|
# The first argument to the Processor constructor is a nil
|
120
120
|
# Runner. This is because no running behaviour
|
121
121
|
# (e.g. interruptions etc) is required; we just want to do
|
@@ -124,7 +124,7 @@ module TurbotRunner
|
|
124
124
|
file = output_file(script_config[:file])
|
125
125
|
File.open(file) do |f|
|
126
126
|
f.each_line do |line|
|
127
|
-
processor.process(line)
|
127
|
+
processor.process(line, opts)
|
128
128
|
end
|
129
129
|
end
|
130
130
|
rescue Errno::ENOENT => e
|
data/spec/lib/processor_spec.rb
CHANGED
@@ -31,16 +31,26 @@ describe TurbotRunner::Processor do
|
|
31
31
|
end
|
32
32
|
|
33
33
|
context 'with invalid record' do
|
34
|
-
|
35
|
-
record = {
|
34
|
+
before do
|
35
|
+
@record = {
|
36
36
|
'sample_date' => '2014-06-01',
|
37
37
|
'number' => 123
|
38
38
|
}
|
39
|
+
end
|
39
40
|
|
41
|
+
it 'calls Handler#handle_invalid_record' do
|
40
42
|
expected_error = 'Missing required property: source_url'
|
41
43
|
expect(@handler).to receive(:handle_invalid_record).
|
42
|
-
with(record, @data_type, expected_error)
|
43
|
-
@processor.process(record.to_json)
|
44
|
+
with(@record, @data_type, expected_error)
|
45
|
+
@processor.process(@record.to_json)
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'requesting non-validation' do
|
49
|
+
it 'calls Handler#handle_valid_record' do
|
50
|
+
expect(@handler).to receive(:handle_valid_record).
|
51
|
+
with(@record, @data_type)
|
52
|
+
@processor.process(@record.to_json, validate: false)
|
53
|
+
end
|
44
54
|
end
|
45
55
|
end
|
46
56
|
|
data/spec/lib/runner_spec.rb
CHANGED
@@ -327,6 +327,18 @@ describe TurbotRunner::Runner do
|
|
327
327
|
expect(@handler.records_seen['simple-licence']).to eq(10)
|
328
328
|
end
|
329
329
|
|
330
|
+
it 'passes opts to processor.process' do
|
331
|
+
test_runner('bot-with-transformer').run
|
332
|
+
runner = test_runner('bot-with-transformer',
|
333
|
+
:record_handler => @handler
|
334
|
+
)
|
335
|
+
opts = {frob: 5}
|
336
|
+
processor = double('processor')
|
337
|
+
allow(TurbotRunner::Processor).to receive(:new).and_return(processor)
|
338
|
+
expect(processor).to receive(:process).with(anything, opts).at_least(:once)
|
339
|
+
runner.process_output(opts)
|
340
|
+
end
|
341
|
+
|
330
342
|
it 'can cope when scraper has failed immediately' do
|
331
343
|
test_runner('bot-that-crashes-immediately').run
|
332
344
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -228,61 +228,4 @@ rubygems_version: 2.4.5
|
|
228
228
|
signing_key:
|
229
229
|
specification_version: 4
|
230
230
|
summary: Utilities for running bots with Turbot
|
231
|
-
test_files:
|
232
|
-
- spec/bots/bot-that-crashes-immediately/manifest.json
|
233
|
-
- spec/bots/bot-that-crashes-immediately/scraper.rb
|
234
|
-
- spec/bots/bot-that-crashes-immediately/transformer1.rb
|
235
|
-
- spec/bots/bot-that-crashes-in-scraper/manifest.json
|
236
|
-
- spec/bots/bot-that-crashes-in-scraper/scraper.rb
|
237
|
-
- spec/bots/bot-that-crashes-in-scraper/transformer1.rb
|
238
|
-
- spec/bots/bot-that-crashes-in-transformer/manifest.json
|
239
|
-
- spec/bots/bot-that-crashes-in-transformer/scraper.rb
|
240
|
-
- spec/bots/bot-that-crashes-in-transformer/transformer1.rb
|
241
|
-
- spec/bots/bot-that-crashes-in-transformer/transformer2.rb
|
242
|
-
- spec/bots/bot-that-emits-run-ended/manifest.json
|
243
|
-
- spec/bots/bot-that-emits-run-ended/scraper.rb
|
244
|
-
- spec/bots/bot-that-emits-snapshot-ended/manifest.json
|
245
|
-
- spec/bots/bot-that-emits-snapshot-ended/scraper.rb
|
246
|
-
- spec/bots/bot-that-expects-file/manifest.json
|
247
|
-
- spec/bots/bot-that-expects-file/scraper.rb
|
248
|
-
- spec/bots/bot-that-expects-file/something.txt
|
249
|
-
- spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
|
250
|
-
- spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
|
251
|
-
- spec/bots/bot-that-produces-duplicates/manifest.json
|
252
|
-
- spec/bots/bot-that-produces-duplicates/scraper.rb
|
253
|
-
- spec/bots/bot-with-invalid-data-type/manifest.json
|
254
|
-
- spec/bots/bot-with-invalid-data-type/scraper.rb
|
255
|
-
- spec/bots/bot-with-invalid-sample-date/manifest.json
|
256
|
-
- spec/bots/bot-with-invalid-sample-date/scraper.rb
|
257
|
-
- spec/bots/bot-with-pause/manifest.json
|
258
|
-
- spec/bots/bot-with-pause/scraper.rb
|
259
|
-
- spec/bots/bot-with-transformer/manifest.json
|
260
|
-
- spec/bots/bot-with-transformer/scraper.rb
|
261
|
-
- spec/bots/bot-with-transformer/transformer.rb
|
262
|
-
- spec/bots/bot-with-transformers/manifest.json
|
263
|
-
- spec/bots/bot-with-transformers/scraper.rb
|
264
|
-
- spec/bots/bot-with-transformers/transformer1.rb
|
265
|
-
- spec/bots/bot-with-transformers/transformer2.rb
|
266
|
-
- spec/bots/invalid-json-bot/manifest.json
|
267
|
-
- spec/bots/invalid-json-bot/scraper.rb
|
268
|
-
- spec/bots/invalid-record-bot/manifest.json
|
269
|
-
- spec/bots/invalid-record-bot/scraper.rb
|
270
|
-
- spec/bots/logging-bot/manifest.json
|
271
|
-
- spec/bots/logging-bot/scraper.rb
|
272
|
-
- spec/bots/python-bot/manifest.json
|
273
|
-
- spec/bots/python-bot/scraper.py
|
274
|
-
- spec/bots/ruby-bot/manifest.json
|
275
|
-
- spec/bots/ruby-bot/scraper.rb
|
276
|
-
- spec/bots/slow-bot/manifest.json
|
277
|
-
- spec/bots/slow-bot/scraper.rb
|
278
|
-
- spec/lib/processor_spec.rb
|
279
|
-
- spec/lib/runner_spec.rb
|
280
|
-
- spec/lib/utils_spec.rb
|
281
|
-
- spec/lib/validator_spec.rb
|
282
|
-
- spec/manual.rb
|
283
|
-
- spec/outputs/full-scraper.out
|
284
|
-
- spec/outputs/full-transformer.out
|
285
|
-
- spec/outputs/truncated-scraper.out
|
286
|
-
- spec/spec_helper.rb
|
287
|
-
- spec/support/custom_matchers.rb
|
288
|
-
- spec/support/helpers.rb
|
231
|
+
test_files: []
|