turbot-runner 0.2.31 → 0.2.35

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +3 -3
  3. data/bin/bundle +105 -0
  4. data/bin/rspec +29 -0
  5. data/lib/turbot_runner/runner.rb +5 -7
  6. data/lib/turbot_runner/version.rb +1 -1
  7. data/schema/schemas/accounts-statement-schema.json +48 -29
  8. data/schema/schemas/alternate-registration-schema.json +88 -0
  9. data/schema/schemas/alternative-name-schema.json +83 -0
  10. data/schema/schemas/company-schema.json +22 -6
  11. data/schema/schemas/control-statement-schema.json +46 -8
  12. data/schema/schemas/filing-schema.json +3 -4
  13. data/schema/schemas/financial-payment-schema.json +1 -1
  14. data/schema/schemas/gazette-notice-schema.json +337 -312
  15. data/schema/schemas/includes/{accounts_element.json → accounts-element.json} +1 -1
  16. data/schema/schemas/includes/address.json +2 -3
  17. data/schema/schemas/includes/{alternative_name.json → alternative-name.json} +1 -1
  18. data/schema/schemas/includes/base-statement.json +5 -2
  19. data/schema/schemas/includes/company-for-nesting.json +3 -3
  20. data/schema/schemas/includes/company.json +1 -1
  21. data/schema/schemas/includes/date.json +17 -4
  22. data/schema/schemas/includes/entity.json +2 -2
  23. data/schema/schemas/includes/{filing_document.json → filing-document.json} +12 -3
  24. data/schema/schemas/includes/filing.json +1 -1
  25. data/schema/schemas/includes/financial-payment-data-object.json +1 -1
  26. data/schema/schemas/includes/identifier.json +1 -1
  27. data/schema/schemas/includes/{industry_code.json → industry-code.json} +1 -1
  28. data/schema/schemas/includes/legislation.json +1 -1
  29. data/schema/schemas/includes/licence-data-object.json +1 -1
  30. data/schema/schemas/includes/officer.json +1 -1
  31. data/schema/schemas/includes/organisation.json +8 -5
  32. data/schema/schemas/includes/permission.json +1 -1
  33. data/schema/schemas/includes/{person_name.json → person-name.json} +1 -1
  34. data/schema/schemas/includes/person.json +9 -7
  35. data/schema/schemas/includes/{previous_name.json → previous-name.json} +1 -1
  36. data/schema/schemas/includes/range.json +1 -1
  37. data/schema/schemas/includes/sanction.json +5 -2
  38. data/schema/schemas/includes/share-parcel-data.json +1 -1
  39. data/schema/schemas/includes/share-parcel.json +17 -5
  40. data/schema/schemas/includes/subsidiary-relationship-data.json +1 -1
  41. data/schema/schemas/includes/total-shares.json +1 -1
  42. data/schema/schemas/includes/{unknown_entity_type.json → unknown-entity-type.json} +8 -5
  43. data/schema/schemas/licence-schema.json +1 -1
  44. data/schema/schemas/primary-data-schema.json +1 -1
  45. data/schema/schemas/register-entry-schema.json +4 -3
  46. data/schema/schemas/sanctioned-entity-schema.json +4 -5
  47. data/schema/schemas/share-parcel-schema.json +1 -1
  48. data/schema/schemas/simple-financial-payment-schema.json +1 -1
  49. data/schema/schemas/simple-licence-schema.json +1 -1
  50. data/schema/schemas/simple-subsidiary-schema.json +1 -1
  51. data/schema/schemas/subsequent-registration-schema.json +89 -0
  52. data/schema/schemas/subsidiary-relationship-schema.json +1 -1
  53. data/schema/schemas/supplier-relationship-schema.json +82 -0
  54. data/schema/schemas/trademark-registration-schema.json +3 -1
  55. data/spec/lib/runner_spec.rb +71 -39
  56. metadata +19 -11
@@ -107,4 +107,4 @@
107
107
  "licence_issuer",
108
108
  "jurisdiction_of_licence"
109
109
  ]
110
- }
110
+ }
@@ -17,4 +17,4 @@
17
17
  "sample_date"
18
18
  ],
19
19
  "additionalProperties": true
20
- }
20
+ }
@@ -5,7 +5,9 @@
5
5
  "subject_entity": {
6
6
  "$ref": "includes/entity.json"
7
7
  },
8
- "register": {"$ref": "#/definitions/register"},
8
+ "register": {
9
+ "$ref": "#/definitions/register"
10
+ },
9
11
  "identifier": {
10
12
  "description": "A unique identifier used by the register to identifier the register entry. In some cases – e.g. Charity Register for England & Wales, these identifiers (in this case known as the 'charity number') are used outside of the context of the register",
11
13
  "type": "string"
@@ -110,6 +112,5 @@
110
112
  "title"
111
113
  ]
112
114
  }
113
-
114
115
  }
115
- }
116
+ }
@@ -13,8 +13,9 @@
13
13
  "DebarredSupplier"
14
14
  ]
15
15
  },
16
- "sanctions_list": {"$ref": "#/definitions/sanctions_list"},
17
-
16
+ "sanctions_list": {
17
+ "$ref": "#/definitions/sanctions_list"
18
+ },
18
19
  "sanctions": {
19
20
  "type": "array",
20
21
  "items": {
@@ -109,7 +110,5 @@
109
110
  "title"
110
111
  ]
111
112
  }
112
-
113
113
  }
114
-
115
- }
114
+ }
@@ -19,4 +19,4 @@
19
19
  }
20
20
  }
21
21
  ]
22
- }
22
+ }
@@ -124,4 +124,4 @@
124
124
  "date",
125
125
  "currency"
126
126
  ]
127
- }
127
+ }
@@ -83,4 +83,4 @@
83
83
  "company_name",
84
84
  "company_jurisdiction"
85
85
  ]
86
- }
86
+ }
@@ -86,4 +86,4 @@
86
86
  "subsidiary_name",
87
87
  "subsidiary_jurisdiction"
88
88
  ]
89
- }
89
+ }
@@ -0,0 +1,89 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "A representation of entity registration changing over time",
4
+ "type": "object",
5
+ "properties": {
6
+ "data_type": {
7
+ "enum": [
8
+ "subsequent-registration"
9
+ ]
10
+ },
11
+ "previous_entity": {
12
+ "description": "the previous entity",
13
+ "$ref": "includes/entity.json"
14
+ },
15
+ "subsequent_entity": {
16
+ "description": "the subsequent entity",
17
+ "$ref": "includes/entity.json"
18
+ },
19
+ "subsequent_registration_start_date": {
20
+ "description": "date when subsequent registration started",
21
+ "type": "string",
22
+ "format": "date"
23
+ },
24
+ "previous_registration_end_date": {
25
+ "description": "date when previous registration ended",
26
+ "type": "string",
27
+ "format": "date"
28
+ },
29
+ "publication_date": {
30
+ "description": "the publication date of the filing/notice that gives details of the alternate registration",
31
+ "type": "string",
32
+ "format": "date"
33
+ },
34
+ "start_date": {
35
+ "description": "date when subsequent registration was valid from",
36
+ "type": "string",
37
+ "format": "date"
38
+ },
39
+ "start_date_type": {
40
+ "enum": [
41
+ "at",
42
+ "before",
43
+ "after"
44
+ ]
45
+ },
46
+ "sample_date": {
47
+ "description": "date when subsequent registration was sampled",
48
+ "type": "string",
49
+ "format": "date"
50
+ },
51
+ "retrieved_at": {
52
+ "description": "date when subsequent registration was retrieved",
53
+ "type": "string",
54
+ "format": "date"
55
+ },
56
+ "source_url": {
57
+ "description": "URL of the source of the data (e.g. download URL), or if there is not persistent URL the page from which it can be found (e.g. search page)",
58
+ "type": "string"
59
+ },
60
+ "confidence": {
61
+ "description": "Confidence in accuracy of data",
62
+ "enum": [
63
+ "HIGH",
64
+ "MEDIUM",
65
+ "LOW"
66
+ ]
67
+ }
68
+ },
69
+ "additionalProperties": false,
70
+ "required": [
71
+ "data_type",
72
+ "previous_entity",
73
+ "subsequent_entity",
74
+ "retrieved_at",
75
+ "confidence"
76
+ ],
77
+ "anyOf": [
78
+ {
79
+ "required": [
80
+ "start_date"
81
+ ]
82
+ },
83
+ {
84
+ "required": [
85
+ "sample_date"
86
+ ]
87
+ }
88
+ ]
89
+ }
@@ -47,4 +47,4 @@
47
47
  "data",
48
48
  "sample_date"
49
49
  ]
50
- }
50
+ }
@@ -0,0 +1,82 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "properties": {
5
+ "data_type": {
6
+ "enum": [
7
+ "supplier-relationship"
8
+ ]
9
+ },
10
+ "customer": {
11
+ "$ref": "includes/entity.json"
12
+ },
13
+ "supplier": {
14
+ "$ref": "includes/entity.json"
15
+ },
16
+ "start_date": {
17
+ "type": "string",
18
+ "format": "date"
19
+ },
20
+ "start_date_type": {
21
+ "enum": [
22
+ "at",
23
+ "before",
24
+ "after"
25
+ ]
26
+ },
27
+ "end_date": {
28
+ "type": "string",
29
+ "format": "date"
30
+ },
31
+ "end_date_type": {
32
+ "enum": [
33
+ "at",
34
+ "before",
35
+ "after"
36
+ ]
37
+ },
38
+ "sample_date": {
39
+ "type": "string",
40
+ "format": "date"
41
+ },
42
+ "retrieved_at": {
43
+ "type": "string",
44
+ "format": "date"
45
+ },
46
+ "source_url": {
47
+ "type": "string"
48
+ },
49
+ "confidence": {
50
+ "enum": [
51
+ "HIGH",
52
+ "MEDIUM",
53
+ "LOW"
54
+ ]
55
+ }
56
+ },
57
+ "additionalProperties": false,
58
+ "required": [
59
+ "data_type",
60
+ "supplier",
61
+ "customer",
62
+ "retrieved_at",
63
+ "confidence"
64
+ ],
65
+ "anyOf": [
66
+ {
67
+ "required": [
68
+ "start_date"
69
+ ]
70
+ },
71
+ {
72
+ "required": [
73
+ "sample_date"
74
+ ]
75
+ },
76
+ {
77
+ "required": [
78
+ "end_date"
79
+ ]
80
+ }
81
+ ]
82
+ }
@@ -154,7 +154,9 @@
154
154
  "properties": {
155
155
  "code_scheme_id": {
156
156
  "description": "The scheme of the classification",
157
- "enum": ["wipo_nice"]
157
+ "enum": [
158
+ "wipo_nice"
159
+ ]
158
160
  },
159
161
  "code": {
160
162
  "description": "The classification code",
@@ -7,6 +7,10 @@ describe TurbotRunner::Runner do
7
7
  puts 'If all specs passed, you should now run `ruby spec/manual.rb`'
8
8
  end
9
9
 
10
+ after do
11
+ FileUtils.rm_rf(File.join(@runner.base_directory, "output")) if @runner
12
+ end
13
+
10
14
  describe '#run' do
11
15
  context 'with a bot written in ruby' do
12
16
  before do
@@ -69,11 +73,14 @@ describe TurbotRunner::Runner do
69
73
 
70
74
  context 'with a bot that logs' do
71
75
  context 'when logging to file enabled' do
76
+ before do
77
+ @runner = test_runner('logging-bot', :log_to_file => true)
78
+ end
79
+
72
80
  it 'logs to file' do
73
81
  expected_log = "doing...\ndone\n"
74
- runner = test_runner('logging-bot', :log_to_file => true)
75
- runner.run
76
- expect(runner).to have_error_output_matching('scraper', expected_log)
82
+ @runner.run
83
+ expect(@runner).to have_error_output_matching('scraper', expected_log)
77
84
  end
78
85
  end
79
86
 
@@ -201,15 +208,21 @@ describe TurbotRunner::Runner do
201
208
  end
202
209
 
203
210
  context 'with a scraper that produces an invalid record' do
204
- it 'returns false' do
211
+ before do
205
212
  @runner = test_runner('invalid-record-bot')
213
+ end
214
+
215
+ it 'returns false' do
206
216
  expect(@runner).to fail_in_scraper
207
217
  end
208
218
  end
209
219
 
210
220
  context 'with a scraper that produces invalid JSON' do
211
- it 'returns false' do
221
+ before do
212
222
  @runner = test_runner('invalid-json-bot')
223
+ end
224
+
225
+ it 'returns false' do
213
226
  expect(@runner).to fail_in_scraper
214
227
  end
215
228
  end
@@ -220,11 +233,14 @@ describe TurbotRunner::Runner do
220
233
  # output file is created; however, the way we're redirecting
221
234
  # stdout using the shell means the file doesn't get created
222
235
  # until
223
- it 'returns false' do
236
+ before do
224
237
  @runner = test_runner('bot-with-pause',
225
238
  :timeout => 1,
226
239
  :log_to_file => true
227
240
  )
241
+ end
242
+
243
+ it 'returns false' do
228
244
  expect(@runner).to fail_in_scraper
229
245
  end
230
246
  end
@@ -315,51 +331,67 @@ describe TurbotRunner::Runner do
315
331
  @handler = Handler.new
316
332
  end
317
333
 
318
- it 'calls handler once for each line of output' do
319
- test_runner('bot-with-transformer').run
334
+ context 'with a bot that runs correctly' do
335
+ before do
336
+ @runner = test_runner('bot-with-transformer')
337
+ @runner.run
338
+ end
320
339
 
321
- runner = test_runner('bot-with-transformer',
322
- :record_handler => @handler
323
- )
340
+ it 'calls handler once for each line of output' do
341
+ runner = test_runner('bot-with-transformer',
342
+ :record_handler => @handler
343
+ )
324
344
 
325
- runner.process_output
326
- expect(@handler.records_seen['primary data']).to eq(10)
327
- expect(@handler.records_seen['simple-licence']).to eq(10)
328
- end
345
+ runner.process_output
346
+ expect(@handler.records_seen['primary data']).to eq(10)
347
+ expect(@handler.records_seen['simple-licence']).to eq(10)
348
+ end
329
349
 
330
- it 'passes opts to processor.process' do
331
- test_runner('bot-with-transformer').run
332
- runner = test_runner('bot-with-transformer',
333
- :record_handler => @handler
334
- )
335
- opts = {frob: 5}
336
- processor = double('processor')
337
- allow(TurbotRunner::Processor).to receive(:new).and_return(processor)
338
- expect(processor).to receive(:process).with(anything, opts).at_least(:once)
339
- runner.process_output(opts)
350
+ it 'passes opts to processor.process' do
351
+ runner = test_runner('bot-with-transformer',
352
+ :record_handler => @handler
353
+ )
354
+ opts = {frob: 5}
355
+ processor = double('processor')
356
+ allow(TurbotRunner::Processor).to receive(:new).and_return(processor)
357
+ expect(processor).to receive(:process).with(anything, opts).at_least(:once)
358
+ runner.process_output(opts)
359
+ end
360
+
361
+ context 'when skip_data_types is set' do
362
+ it 'skips the data type' do
363
+ runner = test_runner('bot-with-transformer',
364
+ :record_handler => @handler
365
+ )
366
+
367
+ runner.process_output(skip_data_types: ['primary data'])
368
+ expect(@handler.records_seen['primary data']).to eq(0)
369
+ expect(@handler.records_seen['simple-licence']).to eq(10)
370
+ end
371
+ end
340
372
  end
341
373
 
342
- it 'can cope when scraper has failed immediately' do
343
- test_runner('bot-that-crashes-immediately').run
374
+ context 'with a bot that crashes immediately' do
375
+ before do
376
+ @runner = test_runner('bot-that-crashes-immediately')
377
+ @runner.run
378
+ end
344
379
 
345
- runner = test_runner('bot-that-crashes-immediately',
346
- :record_handler => @handler
347
- )
380
+ it 'can cope with the empty files' do
381
+ runner = test_runner('bot-that-crashes-immediately',
382
+ :record_handler => @handler
383
+ )
348
384
 
349
- runner.process_output
385
+ runner.process_output
386
+ end
350
387
  end
351
388
 
352
- context 'when skip_data_types is set' do
353
- it 'skips the data type' do
354
- test_runner('bot-with-transformer').run
355
-
389
+ context 'when no bot has run' do
390
+ it 'proceeds without errors' do
356
391
  runner = test_runner('bot-with-transformer',
357
392
  :record_handler => @handler
358
393
  )
359
-
360
- runner.process_output(skip_data_types: ['primary data'])
361
- expect(@handler.records_seen['primary data']).to eq(0)
362
- expect(@handler.records_seen['simple-licence']).to eq(10)
394
+ runner.process_output
363
395
  end
364
396
  end
365
397
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.31
4
+ version: 0.2.35
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-15 00:00:00.000000000 Z
11
+ date: 2021-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -68,7 +68,9 @@ dependencies:
68
68
  version: 3.4.0
69
69
  description:
70
70
  email: bots@opencorporates.com
71
- executables: []
71
+ executables:
72
+ - bundle
73
+ - rspec
72
74
  extensions: []
73
75
  extra_rdoc_files: []
74
76
  files:
@@ -81,6 +83,8 @@ files:
81
83
  - README.md
82
84
  - Rakefile
83
85
  - appveyor.yml
86
+ - bin/bundle
87
+ - bin/rspec
84
88
  - lib/turbot_runner.rb
85
89
  - lib/turbot_runner/base_handler.rb
86
90
  - lib/turbot_runner/exceptions.rb
@@ -92,41 +96,43 @@ files:
92
96
  - lib/turbot_runner/validator.rb
93
97
  - lib/turbot_runner/version.rb
94
98
  - schema/schemas/accounts-statement-schema.json
99
+ - schema/schemas/alternate-registration-schema.json
100
+ - schema/schemas/alternative-name-schema.json
95
101
  - schema/schemas/company-schema.json
96
102
  - schema/schemas/control-statement-schema.json
97
103
  - schema/schemas/filing-schema.json
98
104
  - schema/schemas/financial-payment-schema.json
99
105
  - schema/schemas/gazette-notice-schema.json
100
- - schema/schemas/includes/accounts_element.json
106
+ - schema/schemas/includes/accounts-element.json
101
107
  - schema/schemas/includes/address-with-type.json
102
108
  - schema/schemas/includes/address.json
103
- - schema/schemas/includes/alternative_name.json
109
+ - schema/schemas/includes/alternative-name.json
104
110
  - schema/schemas/includes/base-statement.json
105
111
  - schema/schemas/includes/classification.json
106
112
  - schema/schemas/includes/company-for-nesting.json
107
113
  - schema/schemas/includes/company.json
108
114
  - schema/schemas/includes/date.json
109
115
  - schema/schemas/includes/entity.json
116
+ - schema/schemas/includes/filing-document.json
110
117
  - schema/schemas/includes/filing.json
111
- - schema/schemas/includes/filing_document.json
112
118
  - schema/schemas/includes/financial-payment-data-object.json
113
119
  - schema/schemas/includes/identifier.json
114
- - schema/schemas/includes/industry_code.json
120
+ - schema/schemas/includes/industry-code.json
115
121
  - schema/schemas/includes/legislation.json
116
122
  - schema/schemas/includes/licence-data-object.json
117
123
  - schema/schemas/includes/officer.json
118
124
  - schema/schemas/includes/organisation.json
119
125
  - schema/schemas/includes/permission.json
126
+ - schema/schemas/includes/person-name.json
120
127
  - schema/schemas/includes/person.json
121
- - schema/schemas/includes/person_name.json
122
- - schema/schemas/includes/previous_name.json
128
+ - schema/schemas/includes/previous-name.json
123
129
  - schema/schemas/includes/range.json
124
130
  - schema/schemas/includes/sanction.json
125
131
  - schema/schemas/includes/share-parcel-data.json
126
132
  - schema/schemas/includes/share-parcel.json
127
133
  - schema/schemas/includes/subsidiary-relationship-data.json
128
134
  - schema/schemas/includes/total-shares.json
129
- - schema/schemas/includes/unknown_entity_type.json
135
+ - schema/schemas/includes/unknown-entity-type.json
130
136
  - schema/schemas/licence-schema.json
131
137
  - schema/schemas/primary-data-schema.json
132
138
  - schema/schemas/register-entry-schema.json
@@ -135,7 +141,9 @@ files:
135
141
  - schema/schemas/simple-financial-payment-schema.json
136
142
  - schema/schemas/simple-licence-schema.json
137
143
  - schema/schemas/simple-subsidiary-schema.json
144
+ - schema/schemas/subsequent-registration-schema.json
138
145
  - schema/schemas/subsidiary-relationship-schema.json
146
+ - schema/schemas/supplier-relationship-schema.json
139
147
  - schema/schemas/trademark-registration-schema.json
140
148
  - spec/bots/bot-that-crashes-immediately/manifest.json
141
149
  - spec/bots/bot-that-crashes-immediately/scraper.rb
@@ -215,7 +223,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
215
223
  version: '0'
216
224
  requirements: []
217
225
  rubyforge_project:
218
- rubygems_version: 2.5.1
226
+ rubygems_version: 2.5.2.3
219
227
  signing_key:
220
228
  specification_version: 4
221
229
  summary: Utilities for running bots with Turbot