turbot-runner 0.2.21 → 0.2.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/turbot_runner/processor.rb +11 -8
- data/lib/turbot_runner/runner.rb +5 -5
- data/lib/turbot_runner/version.rb +1 -1
- data/spec/lib/processor_spec.rb +14 -4
- data/spec/lib/runner_spec.rb +12 -0
- metadata +3 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf3018be33a490d1220c5f1afaeda1d7c5fdd2f1
|
4
|
+
data.tar.gz: a53d4a8b5a69d1da8a596d4028b3210fa4a87294
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1701ffbe66db6b127c620dc902db0c748acd872d315968bf3da04492ed3817534e1653d43c5deffb4ed083bf5844d0de25b7f8fb69b57fab7b0f18e11c9a2cc0
|
7
|
+
data.tar.gz: 7e50e47e1f2097daf2f83a42ca4d3bab58053ab8acc9e366deec6d81a23ed123411cb17b3a1256dd6357d3c4189fab60e5e4737abb0618728e3173048bc02daa
|
@@ -10,20 +10,23 @@ module TurbotRunner
|
|
10
10
|
@seen_uids = script_config[:duplicates_allowed] ? nil : Set.new
|
11
11
|
end
|
12
12
|
|
13
|
-
def process(line)
|
13
|
+
def process(line, opts={})
|
14
|
+
validate = opts[:validate].nil? ? true : opts[:validate]
|
14
15
|
begin
|
15
16
|
if line.strip == "SNAPSHOT ENDED" || line.strip == "RUN ENDED" # latter is legacy
|
16
17
|
@record_handler.handle_snapshot_ended(@data_type)
|
17
18
|
@runner.interrupt if @runner
|
18
19
|
else
|
19
20
|
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
error_message = nil
|
22
|
+
if validate
|
23
|
+
error_message = Validator.validate(
|
24
|
+
@data_type,
|
25
|
+
record,
|
26
|
+
@identifying_fields,
|
27
|
+
@seen_uids
|
28
|
+
)
|
29
|
+
end
|
27
30
|
|
28
31
|
if error_message.nil?
|
29
32
|
begin
|
data/lib/turbot_runner/runner.rb
CHANGED
@@ -68,11 +68,11 @@ module TurbotRunner
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
def process_output
|
72
|
-
process_script_output(scraper_config)
|
71
|
+
def process_output(opts={})
|
72
|
+
process_script_output(scraper_config, opts)
|
73
73
|
|
74
74
|
transformers.each do |transformer_config|
|
75
|
-
process_script_output(transformer_config.merge(:base_directory => @base_directory))
|
75
|
+
process_script_output(transformer_config.merge(:base_directory => @base_directory), opts)
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
@@ -115,7 +115,7 @@ module TurbotRunner
|
|
115
115
|
script_runner.run # returns boolean indicating success
|
116
116
|
end
|
117
117
|
|
118
|
-
def process_script_output(script_config)
|
118
|
+
def process_script_output(script_config, opts)
|
119
119
|
# The first argument to the Processor constructor is a nil
|
120
120
|
# Runner. This is because no running behaviour
|
121
121
|
# (e.g. interruptions etc) is required; we just want to do
|
@@ -124,7 +124,7 @@ module TurbotRunner
|
|
124
124
|
file = output_file(script_config[:file])
|
125
125
|
File.open(file) do |f|
|
126
126
|
f.each_line do |line|
|
127
|
-
processor.process(line)
|
127
|
+
processor.process(line, opts)
|
128
128
|
end
|
129
129
|
end
|
130
130
|
rescue Errno::ENOENT => e
|
data/spec/lib/processor_spec.rb
CHANGED
@@ -31,16 +31,26 @@ describe TurbotRunner::Processor do
|
|
31
31
|
end
|
32
32
|
|
33
33
|
context 'with invalid record' do
|
34
|
-
|
35
|
-
record = {
|
34
|
+
before do
|
35
|
+
@record = {
|
36
36
|
'sample_date' => '2014-06-01',
|
37
37
|
'number' => 123
|
38
38
|
}
|
39
|
+
end
|
39
40
|
|
41
|
+
it 'calls Handler#handle_invalid_record' do
|
40
42
|
expected_error = 'Missing required property: source_url'
|
41
43
|
expect(@handler).to receive(:handle_invalid_record).
|
42
|
-
with(record, @data_type, expected_error)
|
43
|
-
@processor.process(record.to_json)
|
44
|
+
with(@record, @data_type, expected_error)
|
45
|
+
@processor.process(@record.to_json)
|
46
|
+
end
|
47
|
+
|
48
|
+
context 'requesting non-validation' do
|
49
|
+
it 'calls Handler#handle_valid_record' do
|
50
|
+
expect(@handler).to receive(:handle_valid_record).
|
51
|
+
with(@record, @data_type)
|
52
|
+
@processor.process(@record.to_json, validate: false)
|
53
|
+
end
|
44
54
|
end
|
45
55
|
end
|
46
56
|
|
data/spec/lib/runner_spec.rb
CHANGED
@@ -327,6 +327,18 @@ describe TurbotRunner::Runner do
|
|
327
327
|
expect(@handler.records_seen['simple-licence']).to eq(10)
|
328
328
|
end
|
329
329
|
|
330
|
+
it 'passes opts to processor.process' do
|
331
|
+
test_runner('bot-with-transformer').run
|
332
|
+
runner = test_runner('bot-with-transformer',
|
333
|
+
:record_handler => @handler
|
334
|
+
)
|
335
|
+
opts = {frob: 5}
|
336
|
+
processor = double('processor')
|
337
|
+
allow(TurbotRunner::Processor).to receive(:new).and_return(processor)
|
338
|
+
expect(processor).to receive(:process).with(anything, opts).at_least(:once)
|
339
|
+
runner.process_output(opts)
|
340
|
+
end
|
341
|
+
|
330
342
|
it 'can cope when scraper has failed immediately' do
|
331
343
|
test_runner('bot-that-crashes-immediately').run
|
332
344
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.22
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -228,61 +228,4 @@ rubygems_version: 2.4.5
|
|
228
228
|
signing_key:
|
229
229
|
specification_version: 4
|
230
230
|
summary: Utilities for running bots with Turbot
|
231
|
-
test_files:
|
232
|
-
- spec/bots/bot-that-crashes-immediately/manifest.json
|
233
|
-
- spec/bots/bot-that-crashes-immediately/scraper.rb
|
234
|
-
- spec/bots/bot-that-crashes-immediately/transformer1.rb
|
235
|
-
- spec/bots/bot-that-crashes-in-scraper/manifest.json
|
236
|
-
- spec/bots/bot-that-crashes-in-scraper/scraper.rb
|
237
|
-
- spec/bots/bot-that-crashes-in-scraper/transformer1.rb
|
238
|
-
- spec/bots/bot-that-crashes-in-transformer/manifest.json
|
239
|
-
- spec/bots/bot-that-crashes-in-transformer/scraper.rb
|
240
|
-
- spec/bots/bot-that-crashes-in-transformer/transformer1.rb
|
241
|
-
- spec/bots/bot-that-crashes-in-transformer/transformer2.rb
|
242
|
-
- spec/bots/bot-that-emits-run-ended/manifest.json
|
243
|
-
- spec/bots/bot-that-emits-run-ended/scraper.rb
|
244
|
-
- spec/bots/bot-that-emits-snapshot-ended/manifest.json
|
245
|
-
- spec/bots/bot-that-emits-snapshot-ended/scraper.rb
|
246
|
-
- spec/bots/bot-that-expects-file/manifest.json
|
247
|
-
- spec/bots/bot-that-expects-file/scraper.rb
|
248
|
-
- spec/bots/bot-that-expects-file/something.txt
|
249
|
-
- spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
|
250
|
-
- spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
|
251
|
-
- spec/bots/bot-that-produces-duplicates/manifest.json
|
252
|
-
- spec/bots/bot-that-produces-duplicates/scraper.rb
|
253
|
-
- spec/bots/bot-with-invalid-data-type/manifest.json
|
254
|
-
- spec/bots/bot-with-invalid-data-type/scraper.rb
|
255
|
-
- spec/bots/bot-with-invalid-sample-date/manifest.json
|
256
|
-
- spec/bots/bot-with-invalid-sample-date/scraper.rb
|
257
|
-
- spec/bots/bot-with-pause/manifest.json
|
258
|
-
- spec/bots/bot-with-pause/scraper.rb
|
259
|
-
- spec/bots/bot-with-transformer/manifest.json
|
260
|
-
- spec/bots/bot-with-transformer/scraper.rb
|
261
|
-
- spec/bots/bot-with-transformer/transformer.rb
|
262
|
-
- spec/bots/bot-with-transformers/manifest.json
|
263
|
-
- spec/bots/bot-with-transformers/scraper.rb
|
264
|
-
- spec/bots/bot-with-transformers/transformer1.rb
|
265
|
-
- spec/bots/bot-with-transformers/transformer2.rb
|
266
|
-
- spec/bots/invalid-json-bot/manifest.json
|
267
|
-
- spec/bots/invalid-json-bot/scraper.rb
|
268
|
-
- spec/bots/invalid-record-bot/manifest.json
|
269
|
-
- spec/bots/invalid-record-bot/scraper.rb
|
270
|
-
- spec/bots/logging-bot/manifest.json
|
271
|
-
- spec/bots/logging-bot/scraper.rb
|
272
|
-
- spec/bots/python-bot/manifest.json
|
273
|
-
- spec/bots/python-bot/scraper.py
|
274
|
-
- spec/bots/ruby-bot/manifest.json
|
275
|
-
- spec/bots/ruby-bot/scraper.rb
|
276
|
-
- spec/bots/slow-bot/manifest.json
|
277
|
-
- spec/bots/slow-bot/scraper.rb
|
278
|
-
- spec/lib/processor_spec.rb
|
279
|
-
- spec/lib/runner_spec.rb
|
280
|
-
- spec/lib/utils_spec.rb
|
281
|
-
- spec/lib/validator_spec.rb
|
282
|
-
- spec/manual.rb
|
283
|
-
- spec/outputs/full-scraper.out
|
284
|
-
- spec/outputs/full-transformer.out
|
285
|
-
- spec/outputs/truncated-scraper.out
|
286
|
-
- spec/spec_helper.rb
|
287
|
-
- spec/support/custom_matchers.rb
|
288
|
-
- spec/support/helpers.rb
|
231
|
+
test_files: []
|