turbot-runner 0.2.30 → 0.2.34
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -3
- data/bin/bundle +105 -0
- data/bin/rspec +29 -0
- data/lib/turbot_runner.rb +1 -1
- data/lib/turbot_runner/runner.rb +5 -7
- data/lib/turbot_runner/version.rb +1 -1
- data/schema/schemas/accounts-statement-schema.json +48 -29
- data/schema/schemas/alternate-registration-schema.json +88 -0
- data/schema/schemas/alternative-name-schema.json +69 -0
- data/schema/schemas/company-schema.json +22 -6
- data/schema/schemas/control-statement-schema.json +46 -8
- data/schema/schemas/filing-schema.json +3 -4
- data/schema/schemas/financial-payment-schema.json +1 -1
- data/schema/schemas/gazette-notice-schema.json +337 -312
- data/schema/schemas/includes/{accounts_element.json → accounts-element.json} +1 -1
- data/schema/schemas/includes/address.json +2 -3
- data/schema/schemas/includes/{alternative_name.json → alternative-name.json} +1 -1
- data/schema/schemas/includes/base-statement.json +5 -2
- data/schema/schemas/includes/company-for-nesting.json +3 -3
- data/schema/schemas/includes/company.json +1 -1
- data/schema/schemas/includes/date.json +17 -4
- data/schema/schemas/includes/entity.json +2 -2
- data/schema/schemas/includes/{filing_document.json → filing-document.json} +12 -3
- data/schema/schemas/includes/filing.json +1 -1
- data/schema/schemas/includes/financial-payment-data-object.json +1 -1
- data/schema/schemas/includes/identifier.json +1 -1
- data/schema/schemas/includes/{industry_code.json → industry-code.json} +1 -1
- data/schema/schemas/includes/legislation.json +1 -1
- data/schema/schemas/includes/licence-data-object.json +1 -1
- data/schema/schemas/includes/officer.json +1 -1
- data/schema/schemas/includes/organisation.json +8 -5
- data/schema/schemas/includes/permission.json +1 -1
- data/schema/schemas/includes/{person_name.json → person-name.json} +1 -1
- data/schema/schemas/includes/person.json +9 -7
- data/schema/schemas/includes/{previous_name.json → previous-name.json} +1 -1
- data/schema/schemas/includes/range.json +1 -1
- data/schema/schemas/includes/sanction.json +5 -2
- data/schema/schemas/includes/share-parcel-data.json +1 -1
- data/schema/schemas/includes/share-parcel.json +17 -5
- data/schema/schemas/includes/subsidiary-relationship-data.json +1 -1
- data/schema/schemas/includes/total-shares.json +1 -1
- data/schema/schemas/includes/{unknown_entity_type.json → unknown-entity-type.json} +8 -5
- data/schema/schemas/licence-schema.json +1 -1
- data/schema/schemas/primary-data-schema.json +1 -1
- data/schema/schemas/register-entry-schema.json +4 -3
- data/schema/schemas/sanctioned-entity-schema.json +4 -5
- data/schema/schemas/share-parcel-schema.json +1 -1
- data/schema/schemas/simple-financial-payment-schema.json +1 -1
- data/schema/schemas/simple-licence-schema.json +1 -1
- data/schema/schemas/simple-subsidiary-schema.json +1 -1
- data/schema/schemas/subsequent-registration-schema.json +89 -0
- data/schema/schemas/subsidiary-relationship-schema.json +1 -1
- data/schema/schemas/supplier-relationship-schema.json +82 -0
- data/schema/schemas/trademark-registration-schema.json +3 -1
- data/spec/lib/runner_spec.rb +71 -39
- metadata +19 -11
@@ -5,7 +5,9 @@
|
|
5
5
|
"subject_entity": {
|
6
6
|
"$ref": "includes/entity.json"
|
7
7
|
},
|
8
|
-
"register": {
|
8
|
+
"register": {
|
9
|
+
"$ref": "#/definitions/register"
|
10
|
+
},
|
9
11
|
"identifier": {
|
10
12
|
"description": "A unique identifier used by the register to identifier the register entry. In some cases – e.g. Charity Register for England & Wales, these identifiers (in this case known as the 'charity number') are used outside of the context of the register",
|
11
13
|
"type": "string"
|
@@ -110,6 +112,5 @@
|
|
110
112
|
"title"
|
111
113
|
]
|
112
114
|
}
|
113
|
-
|
114
115
|
}
|
115
|
-
}
|
116
|
+
}
|
@@ -13,8 +13,9 @@
|
|
13
13
|
"DebarredSupplier"
|
14
14
|
]
|
15
15
|
},
|
16
|
-
"sanctions_list": {
|
17
|
-
|
16
|
+
"sanctions_list": {
|
17
|
+
"$ref": "#/definitions/sanctions_list"
|
18
|
+
},
|
18
19
|
"sanctions": {
|
19
20
|
"type": "array",
|
20
21
|
"items": {
|
@@ -109,7 +110,5 @@
|
|
109
110
|
"title"
|
110
111
|
]
|
111
112
|
}
|
112
|
-
|
113
113
|
}
|
114
|
-
|
115
|
-
}
|
114
|
+
}
|
@@ -0,0 +1,89 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"description": "A representation of entity registration changing over time",
|
4
|
+
"type": "object",
|
5
|
+
"properties": {
|
6
|
+
"data_type": {
|
7
|
+
"enum": [
|
8
|
+
"subsequent-registration"
|
9
|
+
]
|
10
|
+
},
|
11
|
+
"previous_entity": {
|
12
|
+
"description": "the previous entity",
|
13
|
+
"$ref": "includes/entity.json"
|
14
|
+
},
|
15
|
+
"subsequent_entity": {
|
16
|
+
"description": "the subsequent entity",
|
17
|
+
"$ref": "includes/entity.json"
|
18
|
+
},
|
19
|
+
"subsequent_registration_start_date": {
|
20
|
+
"description": "date when subsequent registration started",
|
21
|
+
"type": "string",
|
22
|
+
"format": "date"
|
23
|
+
},
|
24
|
+
"previous_registration_end_date": {
|
25
|
+
"description": "date when previous registration ended",
|
26
|
+
"type": "string",
|
27
|
+
"format": "date"
|
28
|
+
},
|
29
|
+
"publication_date": {
|
30
|
+
"description": "the publication date of the filing/notice that gives details of the alternate registration",
|
31
|
+
"type": "string",
|
32
|
+
"format": "date"
|
33
|
+
},
|
34
|
+
"start_date": {
|
35
|
+
"description": "date when subsequent registration was valid from",
|
36
|
+
"type": "string",
|
37
|
+
"format": "date"
|
38
|
+
},
|
39
|
+
"start_date_type": {
|
40
|
+
"enum": [
|
41
|
+
"at",
|
42
|
+
"before",
|
43
|
+
"after"
|
44
|
+
]
|
45
|
+
},
|
46
|
+
"sample_date": {
|
47
|
+
"description": "date when subsequent registration was sampled",
|
48
|
+
"type": "string",
|
49
|
+
"format": "date"
|
50
|
+
},
|
51
|
+
"retrieved_at": {
|
52
|
+
"description": "date when subsequent registration was retrieved",
|
53
|
+
"type": "string",
|
54
|
+
"format": "date"
|
55
|
+
},
|
56
|
+
"source_url": {
|
57
|
+
"description": "URL of the source of the data (e.g. download URL), or if there is not persistent URL the page from which it can be found (e.g. search page)",
|
58
|
+
"type": "string"
|
59
|
+
},
|
60
|
+
"confidence": {
|
61
|
+
"description": "Confidence in accuracy of data",
|
62
|
+
"enum": [
|
63
|
+
"HIGH",
|
64
|
+
"MEDIUM",
|
65
|
+
"LOW"
|
66
|
+
]
|
67
|
+
}
|
68
|
+
},
|
69
|
+
"additionalProperties": false,
|
70
|
+
"required": [
|
71
|
+
"data_type",
|
72
|
+
"previous_entity",
|
73
|
+
"subsequent_entity",
|
74
|
+
"retrieved_at",
|
75
|
+
"confidence"
|
76
|
+
],
|
77
|
+
"anyOf": [
|
78
|
+
{
|
79
|
+
"required": [
|
80
|
+
"start_date"
|
81
|
+
]
|
82
|
+
},
|
83
|
+
{
|
84
|
+
"required": [
|
85
|
+
"sample_date"
|
86
|
+
]
|
87
|
+
}
|
88
|
+
]
|
89
|
+
}
|
@@ -0,0 +1,82 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"type": "object",
|
4
|
+
"properties": {
|
5
|
+
"data_type": {
|
6
|
+
"enum": [
|
7
|
+
"supplier-relationship"
|
8
|
+
]
|
9
|
+
},
|
10
|
+
"customer": {
|
11
|
+
"$ref": "includes/entity.json"
|
12
|
+
},
|
13
|
+
"supplier": {
|
14
|
+
"$ref": "includes/entity.json"
|
15
|
+
},
|
16
|
+
"start_date": {
|
17
|
+
"type": "string",
|
18
|
+
"format": "date"
|
19
|
+
},
|
20
|
+
"start_date_type": {
|
21
|
+
"enum": [
|
22
|
+
"at",
|
23
|
+
"before",
|
24
|
+
"after"
|
25
|
+
]
|
26
|
+
},
|
27
|
+
"end_date": {
|
28
|
+
"type": "string",
|
29
|
+
"format": "date"
|
30
|
+
},
|
31
|
+
"end_date_type": {
|
32
|
+
"enum": [
|
33
|
+
"at",
|
34
|
+
"before",
|
35
|
+
"after"
|
36
|
+
]
|
37
|
+
},
|
38
|
+
"sample_date": {
|
39
|
+
"type": "string",
|
40
|
+
"format": "date"
|
41
|
+
},
|
42
|
+
"retrieved_at": {
|
43
|
+
"type": "string",
|
44
|
+
"format": "date"
|
45
|
+
},
|
46
|
+
"source_url": {
|
47
|
+
"type": "string"
|
48
|
+
},
|
49
|
+
"confidence": {
|
50
|
+
"enum": [
|
51
|
+
"HIGH",
|
52
|
+
"MEDIUM",
|
53
|
+
"LOW"
|
54
|
+
]
|
55
|
+
}
|
56
|
+
},
|
57
|
+
"additionalProperties": false,
|
58
|
+
"required": [
|
59
|
+
"data_type",
|
60
|
+
"supplier",
|
61
|
+
"customer",
|
62
|
+
"retrieved_at",
|
63
|
+
"confidence"
|
64
|
+
],
|
65
|
+
"anyOf": [
|
66
|
+
{
|
67
|
+
"required": [
|
68
|
+
"start_date"
|
69
|
+
]
|
70
|
+
},
|
71
|
+
{
|
72
|
+
"required": [
|
73
|
+
"sample_date"
|
74
|
+
]
|
75
|
+
},
|
76
|
+
{
|
77
|
+
"required": [
|
78
|
+
"end_date"
|
79
|
+
]
|
80
|
+
}
|
81
|
+
]
|
82
|
+
}
|
data/spec/lib/runner_spec.rb
CHANGED
@@ -7,6 +7,10 @@ describe TurbotRunner::Runner do
|
|
7
7
|
puts 'If all specs passed, you should now run `ruby spec/manual.rb`'
|
8
8
|
end
|
9
9
|
|
10
|
+
after do
|
11
|
+
FileUtils.rm_rf(File.join(@runner.base_directory, "output")) if @runner
|
12
|
+
end
|
13
|
+
|
10
14
|
describe '#run' do
|
11
15
|
context 'with a bot written in ruby' do
|
12
16
|
before do
|
@@ -69,11 +73,14 @@ describe TurbotRunner::Runner do
|
|
69
73
|
|
70
74
|
context 'with a bot that logs' do
|
71
75
|
context 'when logging to file enabled' do
|
76
|
+
before do
|
77
|
+
@runner = test_runner('logging-bot', :log_to_file => true)
|
78
|
+
end
|
79
|
+
|
72
80
|
it 'logs to file' do
|
73
81
|
expected_log = "doing...\ndone\n"
|
74
|
-
runner
|
75
|
-
runner.
|
76
|
-
expect(runner).to have_error_output_matching('scraper', expected_log)
|
82
|
+
@runner.run
|
83
|
+
expect(@runner).to have_error_output_matching('scraper', expected_log)
|
77
84
|
end
|
78
85
|
end
|
79
86
|
|
@@ -201,15 +208,21 @@ describe TurbotRunner::Runner do
|
|
201
208
|
end
|
202
209
|
|
203
210
|
context 'with a scraper that produces an invalid record' do
|
204
|
-
|
211
|
+
before do
|
205
212
|
@runner = test_runner('invalid-record-bot')
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'returns false' do
|
206
216
|
expect(@runner).to fail_in_scraper
|
207
217
|
end
|
208
218
|
end
|
209
219
|
|
210
220
|
context 'with a scraper that produces invalid JSON' do
|
211
|
-
|
221
|
+
before do
|
212
222
|
@runner = test_runner('invalid-json-bot')
|
223
|
+
end
|
224
|
+
|
225
|
+
it 'returns false' do
|
213
226
|
expect(@runner).to fail_in_scraper
|
214
227
|
end
|
215
228
|
end
|
@@ -220,11 +233,14 @@ describe TurbotRunner::Runner do
|
|
220
233
|
# output file is created; however, the way we're redirecting
|
221
234
|
# stdout using the shell means the file doesn't get created
|
222
235
|
# until
|
223
|
-
|
236
|
+
before do
|
224
237
|
@runner = test_runner('bot-with-pause',
|
225
238
|
:timeout => 1,
|
226
239
|
:log_to_file => true
|
227
240
|
)
|
241
|
+
end
|
242
|
+
|
243
|
+
it 'returns false' do
|
228
244
|
expect(@runner).to fail_in_scraper
|
229
245
|
end
|
230
246
|
end
|
@@ -315,51 +331,67 @@ describe TurbotRunner::Runner do
|
|
315
331
|
@handler = Handler.new
|
316
332
|
end
|
317
333
|
|
318
|
-
|
319
|
-
|
334
|
+
context 'with a bot that runs correctly' do
|
335
|
+
before do
|
336
|
+
@runner = test_runner('bot-with-transformer')
|
337
|
+
@runner.run
|
338
|
+
end
|
320
339
|
|
321
|
-
|
322
|
-
|
323
|
-
|
340
|
+
it 'calls handler once for each line of output' do
|
341
|
+
runner = test_runner('bot-with-transformer',
|
342
|
+
:record_handler => @handler
|
343
|
+
)
|
324
344
|
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
345
|
+
runner.process_output
|
346
|
+
expect(@handler.records_seen['primary data']).to eq(10)
|
347
|
+
expect(@handler.records_seen['simple-licence']).to eq(10)
|
348
|
+
end
|
329
349
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
350
|
+
it 'passes opts to processor.process' do
|
351
|
+
runner = test_runner('bot-with-transformer',
|
352
|
+
:record_handler => @handler
|
353
|
+
)
|
354
|
+
opts = {frob: 5}
|
355
|
+
processor = double('processor')
|
356
|
+
allow(TurbotRunner::Processor).to receive(:new).and_return(processor)
|
357
|
+
expect(processor).to receive(:process).with(anything, opts).at_least(:once)
|
358
|
+
runner.process_output(opts)
|
359
|
+
end
|
360
|
+
|
361
|
+
context 'when skip_data_types is set' do
|
362
|
+
it 'skips the data type' do
|
363
|
+
runner = test_runner('bot-with-transformer',
|
364
|
+
:record_handler => @handler
|
365
|
+
)
|
366
|
+
|
367
|
+
runner.process_output(skip_data_types: ['primary data'])
|
368
|
+
expect(@handler.records_seen['primary data']).to eq(0)
|
369
|
+
expect(@handler.records_seen['simple-licence']).to eq(10)
|
370
|
+
end
|
371
|
+
end
|
340
372
|
end
|
341
373
|
|
342
|
-
|
343
|
-
|
374
|
+
context 'with a bot that crashes immediately' do
|
375
|
+
before do
|
376
|
+
@runner = test_runner('bot-that-crashes-immediately')
|
377
|
+
@runner.run
|
378
|
+
end
|
344
379
|
|
345
|
-
|
346
|
-
|
347
|
-
|
380
|
+
it 'can cope with the empty files' do
|
381
|
+
runner = test_runner('bot-that-crashes-immediately',
|
382
|
+
:record_handler => @handler
|
383
|
+
)
|
348
384
|
|
349
|
-
|
385
|
+
runner.process_output
|
386
|
+
end
|
350
387
|
end
|
351
388
|
|
352
|
-
context 'when
|
353
|
-
it '
|
354
|
-
test_runner('bot-with-transformer').run
|
355
|
-
|
389
|
+
context 'when no bot has run' do
|
390
|
+
it 'proceeds without errors' do
|
356
391
|
runner = test_runner('bot-with-transformer',
|
357
392
|
:record_handler => @handler
|
358
393
|
)
|
359
|
-
|
360
|
-
runner.process_output(skip_data_types: ['primary data'])
|
361
|
-
expect(@handler.records_seen['primary data']).to eq(0)
|
362
|
-
expect(@handler.records_seen['simple-licence']).to eq(10)
|
394
|
+
runner.process_output
|
363
395
|
end
|
364
396
|
end
|
365
397
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.34
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -68,7 +68,9 @@ dependencies:
|
|
68
68
|
version: 3.4.0
|
69
69
|
description:
|
70
70
|
email: bots@opencorporates.com
|
71
|
-
executables:
|
71
|
+
executables:
|
72
|
+
- bundle
|
73
|
+
- rspec
|
72
74
|
extensions: []
|
73
75
|
extra_rdoc_files: []
|
74
76
|
files:
|
@@ -81,6 +83,8 @@ files:
|
|
81
83
|
- README.md
|
82
84
|
- Rakefile
|
83
85
|
- appveyor.yml
|
86
|
+
- bin/bundle
|
87
|
+
- bin/rspec
|
84
88
|
- lib/turbot_runner.rb
|
85
89
|
- lib/turbot_runner/base_handler.rb
|
86
90
|
- lib/turbot_runner/exceptions.rb
|
@@ -92,41 +96,43 @@ files:
|
|
92
96
|
- lib/turbot_runner/validator.rb
|
93
97
|
- lib/turbot_runner/version.rb
|
94
98
|
- schema/schemas/accounts-statement-schema.json
|
99
|
+
- schema/schemas/alternate-registration-schema.json
|
100
|
+
- schema/schemas/alternative-name-schema.json
|
95
101
|
- schema/schemas/company-schema.json
|
96
102
|
- schema/schemas/control-statement-schema.json
|
97
103
|
- schema/schemas/filing-schema.json
|
98
104
|
- schema/schemas/financial-payment-schema.json
|
99
105
|
- schema/schemas/gazette-notice-schema.json
|
100
|
-
- schema/schemas/includes/
|
106
|
+
- schema/schemas/includes/accounts-element.json
|
101
107
|
- schema/schemas/includes/address-with-type.json
|
102
108
|
- schema/schemas/includes/address.json
|
103
|
-
- schema/schemas/includes/
|
109
|
+
- schema/schemas/includes/alternative-name.json
|
104
110
|
- schema/schemas/includes/base-statement.json
|
105
111
|
- schema/schemas/includes/classification.json
|
106
112
|
- schema/schemas/includes/company-for-nesting.json
|
107
113
|
- schema/schemas/includes/company.json
|
108
114
|
- schema/schemas/includes/date.json
|
109
115
|
- schema/schemas/includes/entity.json
|
116
|
+
- schema/schemas/includes/filing-document.json
|
110
117
|
- schema/schemas/includes/filing.json
|
111
|
-
- schema/schemas/includes/filing_document.json
|
112
118
|
- schema/schemas/includes/financial-payment-data-object.json
|
113
119
|
- schema/schemas/includes/identifier.json
|
114
|
-
- schema/schemas/includes/
|
120
|
+
- schema/schemas/includes/industry-code.json
|
115
121
|
- schema/schemas/includes/legislation.json
|
116
122
|
- schema/schemas/includes/licence-data-object.json
|
117
123
|
- schema/schemas/includes/officer.json
|
118
124
|
- schema/schemas/includes/organisation.json
|
119
125
|
- schema/schemas/includes/permission.json
|
126
|
+
- schema/schemas/includes/person-name.json
|
120
127
|
- schema/schemas/includes/person.json
|
121
|
-
- schema/schemas/includes/
|
122
|
-
- schema/schemas/includes/previous_name.json
|
128
|
+
- schema/schemas/includes/previous-name.json
|
123
129
|
- schema/schemas/includes/range.json
|
124
130
|
- schema/schemas/includes/sanction.json
|
125
131
|
- schema/schemas/includes/share-parcel-data.json
|
126
132
|
- schema/schemas/includes/share-parcel.json
|
127
133
|
- schema/schemas/includes/subsidiary-relationship-data.json
|
128
134
|
- schema/schemas/includes/total-shares.json
|
129
|
-
- schema/schemas/includes/
|
135
|
+
- schema/schemas/includes/unknown-entity-type.json
|
130
136
|
- schema/schemas/licence-schema.json
|
131
137
|
- schema/schemas/primary-data-schema.json
|
132
138
|
- schema/schemas/register-entry-schema.json
|
@@ -135,7 +141,9 @@ files:
|
|
135
141
|
- schema/schemas/simple-financial-payment-schema.json
|
136
142
|
- schema/schemas/simple-licence-schema.json
|
137
143
|
- schema/schemas/simple-subsidiary-schema.json
|
144
|
+
- schema/schemas/subsequent-registration-schema.json
|
138
145
|
- schema/schemas/subsidiary-relationship-schema.json
|
146
|
+
- schema/schemas/supplier-relationship-schema.json
|
139
147
|
- schema/schemas/trademark-registration-schema.json
|
140
148
|
- spec/bots/bot-that-crashes-immediately/manifest.json
|
141
149
|
- spec/bots/bot-that-crashes-immediately/scraper.rb
|
@@ -215,7 +223,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
215
223
|
version: '0'
|
216
224
|
requirements: []
|
217
225
|
rubyforge_project:
|
218
|
-
rubygems_version: 2.5.
|
226
|
+
rubygems_version: 2.5.2.3
|
219
227
|
signing_key:
|
220
228
|
specification_version: 4
|
221
229
|
summary: Utilities for running bots with Turbot
|