turbot-runner 0.2.22 → 0.2.23

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cf3018be33a490d1220c5f1afaeda1d7c5fdd2f1
4
- data.tar.gz: a53d4a8b5a69d1da8a596d4028b3210fa4a87294
3
+ metadata.gz: bef69bef4eaac4dc2e7c0a30ed1a6f7e2220fc6c
4
+ data.tar.gz: 5722c6aa25a5de90983c949eab088a3b7f80a0bb
5
5
  SHA512:
6
- metadata.gz: 1701ffbe66db6b127c620dc902db0c748acd872d315968bf3da04492ed3817534e1653d43c5deffb4ed083bf5844d0de25b7f8fb69b57fab7b0f18e11c9a2cc0
7
- data.tar.gz: 7e50e47e1f2097daf2f83a42ca4d3bab58053ab8acc9e366deec6d81a23ed123411cb17b3a1256dd6357d3c4189fab60e5e4737abb0618728e3173048bc02daa
6
+ metadata.gz: 3aa3226e3bf4511325cc66363dc23c0ebc579d726d85e84ee50861929e5b41336339c21516fda4e55df825bdd96cf47e79bd4c49b5cd9573257bde20f3ffc0ae
7
+ data.tar.gz: 9085d0e78c4602cfcd2a79dd0a798a27ca038ef445ca644dce1103876225fbeddd41158d058d3931ccceb374445b8085ae99a7f5d0c642639a32724b9b5fb901
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.2.22'
2
+ VERSION = '0.2.23'
3
3
  end
@@ -0,0 +1,314 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "A Control Statement is a statement by a company or entity about a the control of a company. In its most straightforward, Company A may say that Entity B controls it. Each statement should be about a single control relationship, i.e. if Company A has two controlling entities, B and C, and those control relationships are distinct (e.g. they started at a different time) then they should each be stored as a indiviudal Control Statements. They should only be stored in the same Control Statement if it is logically part of the same control mechanism, e.g. if company A is controlled by person C and person D, who control the shares *jointly*. Control Statements can be used to describe that an entity has no controlling entity, or that the controlling entity is not disclosed. Beneficial Ownership is a specific example of control relationships, and the ultimate_beneficial_owner flag can be used to signify that a person is the Ultimate Beneficial Owner of a company",
4
+ "properties": {
5
+ "subject_entity": {
6
+ "$ref": "includes/entity.json"
7
+ },
8
+ "control_level": {
9
+ "description": "The level of control, i.e. whether the control is direct (i.e. there are no intermediaries), indirect (there are intermediaries) or unknown. It can be null in the case where a company is saying that there are no controlling entities",
10
+ "enum": [
11
+ "direct",
12
+ "indirect",
13
+ "unknown",
14
+ null
15
+ ]
16
+ },
17
+ "ultimate_beneficial_owner": {
18
+ "description": "Whether the controlling entities are the ultimate beneficial owners (i.e. individuals who ultimately control the company)",
19
+ "type": "boolean"
20
+ },
21
+ "controlling_entities": {
22
+ "type": "array",
23
+ "items": {
24
+ "$ref": "includes/entity.json"
25
+ }
26
+ },
27
+ "control_mechanisms": {
28
+ "description": "The mechanism(s) by which the controlling party/parties control the company",
29
+ "type": "array",
30
+ "items": {
31
+ "oneOf": [
32
+ {
33
+ "type": [
34
+ "string",
35
+ "null"
36
+ ],
37
+ "minLength": 2
38
+ },
39
+ {
40
+ "$ref": "#/definitions/control_mechanism"
41
+ }
42
+ ]
43
+ }
44
+ },
45
+ "no_controlling_entities_reasons": {
46
+ "description": "The reason why no controlling entities are declared, for example because the entity has been unable to discover them, or there are no controlling entities (e.g. 100 people owned 1% each), or the companies is exempt from reporting (as some listed companies are in some jurisdictions)",
47
+ "type": "array",
48
+ "items": {
49
+ "type": "string"
50
+ }
51
+ },
52
+ "uid": {
53
+ "description": "Unique id/reference for the control relationship entity record if the external source has given it one",
54
+ "type": "string"
55
+ },
56
+ "url": {
57
+ "description": "Persistent URL on the external source for the control relationship entity record, if there is one",
58
+ "type": "string"
59
+ },
60
+ "source_url": {
61
+ "description": "Source url of the web page for the filing or if there is not persistent URL the page from which it can be found (e.g. search page). This field is required.",
62
+ "type": "string",
63
+ "minLength": 11
64
+ },
65
+ "confidence": {
66
+ "description": "Confidence in accuracy of data",
67
+ "type": "string",
68
+ "enum": [
69
+ "HIGH",
70
+ "MEDIUM",
71
+ "LOW"
72
+ ]
73
+ },
74
+ "start_date": {
75
+ "description": "Date on which control started",
76
+ "type": "string",
77
+ "format": "date"
78
+ },
79
+ "end_date": {
80
+ "description": "Date on which control ended",
81
+ "type": "string",
82
+ "format": "date"
83
+ },
84
+ "retrieved_at": {
85
+ "description": "Date-time this was retrieved from the source",
86
+ "type": "string",
87
+ "format": "date-time"
88
+ },
89
+ "sample_date": {
90
+ "description": "Date on which we know this to be true (for example the date the control statement was published)",
91
+ "type": "string",
92
+ "format": "date"
93
+ },
94
+ "other_attributes": {
95
+ "description": "Use for other attributes for which we don't yet have curated schema attributes",
96
+ "type": "object"
97
+ }
98
+ },
99
+ "additionalProperties": false,
100
+ "required": [
101
+ "control_level",
102
+ "subject_entity",
103
+ "retrieved_at",
104
+ "source_url",
105
+ "sample_date",
106
+ "confidence"
107
+ ],
108
+ "definitions": {
109
+ "control_mechanism": {
110
+ "title": "Control Mechanism",
111
+ "description": "A specific mechanism(s) by which controls of a company is exercised",
112
+ "oneOf": [
113
+ {
114
+ "properties": {
115
+ "mechanism_type": {
116
+ "enum": [
117
+ "share_ownership"
118
+ ]
119
+ },
120
+ "mechanism_properties": {
121
+ "properties": {
122
+ "number_of_shares": {
123
+ "description": "Number of shares",
124
+ "type": "integer"
125
+ },
126
+ "percentage_of_shares": {
127
+ "description": "Percentage of shares",
128
+ "oneOf": [
129
+ {
130
+ "type": "number",
131
+ "maximum": 100,
132
+ "minimum": 0
133
+ },
134
+ {
135
+ "$ref": "includes/range.json"
136
+ }
137
+ ]
138
+
139
+ },
140
+ "share_class": {
141
+ "description": "Share class or classes",
142
+ "type": "string"
143
+ },
144
+ "exercised_via" : {
145
+ "enum": [
146
+ "trust",
147
+ "firm",
148
+ null
149
+ ]
150
+ }
151
+ }
152
+ },
153
+ "source_description": {
154
+ "description": "Description of the control relationship as given by the source",
155
+ "type": "string"
156
+ }
157
+ },
158
+ "additionalProperties": false
159
+ },
160
+ {
161
+ "properties": {
162
+ "mechanism_type": {
163
+ "enum": [
164
+ "voting_rights"
165
+ ]
166
+ },
167
+ "mechanism_properties": {
168
+ "properties": {
169
+ "voting_percentage": {
170
+ "description": "Percentage of votes",
171
+ "oneOf": [
172
+ {
173
+ "type": "number",
174
+ "maximum": 100,
175
+ "minimum": 0
176
+ },
177
+ {
178
+ "$ref": "includes/range.json"
179
+ }
180
+ ]
181
+ },
182
+ "exercised_via" : {
183
+ "enum": [
184
+ "trust",
185
+ "firm",
186
+ null
187
+ ]
188
+ }
189
+ }
190
+ },
191
+ "source_description": {
192
+ "description": "Description of the control relationship as given by the source",
193
+ "type": "string"
194
+ }
195
+ },
196
+ "additionalProperties": false
197
+ },
198
+ {
199
+ "properties": {
200
+ "mechanism_type": {
201
+ "enum": [
202
+ "right_to_appoint_and_remove_directors"
203
+ ]
204
+ },
205
+ "mechanism_properties": {
206
+ "properties": {
207
+ "exercised_via" : {
208
+ "enum": [
209
+ "trust",
210
+ "firm",
211
+ null
212
+ ]
213
+ }
214
+ }
215
+ },
216
+ "source_description": {
217
+ "description": "Description of the control relationship as given by the source",
218
+ "type": "string"
219
+ }
220
+ },
221
+ "additionalProperties": false
222
+ },
223
+ {
224
+ "properties": {
225
+ "mechanism_type": {
226
+ "enum": [
227
+ "right_to_appoint_and_remove_members"
228
+ ]
229
+ },
230
+ "mechanism_properties": {
231
+ "properties": {
232
+ "exercised_via" : {
233
+ "enum": [
234
+ "trust",
235
+ "firm",
236
+ null
237
+ ]
238
+ }
239
+ }
240
+ },
241
+ "source_description": {
242
+ "description": "Description of the control relationship as given by the source",
243
+ "type": "string"
244
+ }
245
+ },
246
+ "additionalProperties": false
247
+ },
248
+ {
249
+ "properties": {
250
+ "mechanism_type": {
251
+ "enum": [
252
+ "right_to_share_surplus_assets"
253
+ ]
254
+ },
255
+ "mechanism_properties": {
256
+ "properties": {
257
+ "exercised_via" : {
258
+ "enum": [
259
+ "trust",
260
+ "firm",
261
+ null
262
+ ]
263
+ },
264
+ "percentage_rights": {
265
+ "description": "Percentage of rights to assets in the event of a winding up of the entity",
266
+ "oneOf": [
267
+ {
268
+ "type": "number",
269
+ "maximum": 100,
270
+ "minimum": 0
271
+ },
272
+ {
273
+ "$ref": "includes/range.json"
274
+ }
275
+ ]
276
+ }
277
+ }
278
+ },
279
+ "source_description": {
280
+ "description": "Description of the control relationship as given by the source",
281
+ "type": "string"
282
+ }
283
+ },
284
+ "additionalProperties": false
285
+ },
286
+ {
287
+ "properties": {
288
+ "mechanism_type": {
289
+ "enum": [
290
+ "significant_influence_or_control"
291
+ ]
292
+ },
293
+ "mechanism_properties": {
294
+ "properties": {
295
+ "exercised_via" : {
296
+ "enum": [
297
+ "trust",
298
+ "firm",
299
+ null
300
+ ]
301
+ }
302
+ }
303
+ },
304
+ "source_description": {
305
+ "description": "Description of the control relationship as given by the source",
306
+ "type": "string"
307
+ }
308
+ },
309
+ "additionalProperties": false
310
+ }
311
+ ]
312
+ }
313
+ }
314
+ }
@@ -0,0 +1,24 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "A parsed date object. This allows us to represent not just normal dates, but also partial dates too",
4
+ "type": "object",
5
+ "properties": {
6
+ "year": {
7
+ "type": "number",
8
+ "description": "year part as YYYY"
9
+ },
10
+ "month": {
11
+ "type": "number",
12
+ "description": "month part of date in numbers e.g. 11 for November, 4 for April"
13
+ },
14
+ "day": {
15
+ "type": ["number","null"],
16
+ "description": "day part of date in numbers"
17
+ }
18
+ },
19
+ "additionalProperties": false,
20
+ "anyOf": [
21
+ {"required": ["year", "month"]},
22
+ {"required": ["month", "day"]}
23
+ ]
24
+ }
@@ -1,7 +1,8 @@
1
1
  {
2
2
  "$schema": "http://json-schema.org/draft-04/schema#",
3
- "description": "An entity which is a distinct organisation, but is not a company nor an individual. Examples include governments and governmental entities (e.g. Multilateral Devoelpment Banks, Government Departments, municipalities, etc), and also membership organisations",
3
+ "description": "An entity which is a distinct organisation, but is not a company nor an individual. Examples include governments and governmental entities (e.g. Multilateral Development Banks, Government Departments, municipalities, etc), and also membership organisations",
4
4
  "type": "object",
5
+ "additionalProperties": false,
5
6
  "properties": {
6
7
  "name": {
7
8
  "type": "string",
@@ -12,6 +13,10 @@
12
13
  "type": "string",
13
14
  "description": "Name of the jurisdiction in which the entity is incorporated/domiciled (use global for global entities, e.g. UN)"
14
15
  },
16
+ "legal_form": {
17
+ "type": "string",
18
+ "description": "legal form of organisation, e.g. municipality, government department, trust, etc"
19
+ },
15
20
  "website": {
16
21
  "oneOf": [
17
22
  {
@@ -64,6 +69,10 @@
64
69
  "items": {
65
70
  "$ref": "alternative_name.json"
66
71
  }
72
+ },
73
+ "other_attributes": {
74
+ "description": "Use for other attributes for which we don't yet have curated schema attributes",
75
+ "type": "object"
67
76
  }
68
77
  },
69
78
  "required": [
@@ -2,6 +2,7 @@
2
2
  "$schema": "http://json-schema.org/draft-04/schema#",
3
3
  "description": "A person, for example, referenced in some other context, e.g. director of a company, shareholder, licence-holder, lobbyist. This should be used only if the person is the subject of the datum",
4
4
  "type": "object",
5
+ "additionalProperties": false,
5
6
  "properties": {
6
7
  "name": {
7
8
  "$ref": "person_name.json"
@@ -41,7 +42,7 @@
41
42
  ]
42
43
  },
43
44
  "uid": {
44
- "description": "Unique id of the person. Not the unique id is not expected to be globally unique, but only unique with reference to the source from which it is derived. No check is made for uniqueness",
45
+ "description": "Unique id of the person. Note: the unique id is not expected to be globally unique, but only unique with reference to the source from which it is derived. No check is made for uniqueness",
45
46
  "type": "string",
46
47
  "minLength": 1
47
48
  },
@@ -54,7 +55,7 @@
54
55
  "minLength": 4
55
56
  },
56
57
  "registered_address": {
57
- "description":"official address",
58
+ "description":"address for service (or address given in the context of legal requirement)",
58
59
  "$ref": "address.json"
59
60
  },
60
61
  "headquarters_address": {
@@ -76,13 +77,30 @@
76
77
  }
77
78
  },
78
79
  "date_of_birth": {
79
- "type": "string",
80
- "format": "date"
80
+ "anyOf": [
81
+ {
82
+ "type": "string",
83
+ "format": "date"
84
+ },
85
+ {
86
+ "$ref": "date.json"
87
+ }
88
+
89
+ ]
81
90
  },
82
91
  "nationality": {
83
92
  "description": "The person's nationality",
84
93
  "type": "string",
85
94
  "minLength": 2
95
+ },
96
+ "country_of_residence": {
97
+ "description": "The normal country of residence for the person",
98
+ "type": "string",
99
+ "minLength": 2
100
+ },
101
+ "other_attributes": {
102
+ "description": "Use for other attributes for which we don't yet have curated schema attributes",
103
+ "type": "object"
86
104
  }
87
105
  },
88
106
  "required": [
@@ -0,0 +1,24 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "This represents a range of numeric values, particularly useful for where it is not clear of the exact numeric value, only that it is between two bounds, e.g. a shareholding percentage is greater than 50 but less than or equal to 75",
4
+ "type": "object",
5
+ "properties": {
6
+ "minimum": {
7
+ "type": "number",
8
+ "description": "The minimum value"
9
+ },
10
+ "maximum": {
11
+ "type": "number",
12
+ "description": "The maximum value"
13
+ },
14
+ "exclusive_minimum": {
15
+ "type": "boolean",
16
+ "description": "True if the the minimum value is not included in the range, i.e. x > minumum (not x >= minumum)"
17
+ },
18
+ "exclusive_maximum": {
19
+ "type": "boolean",
20
+ "description": "True if the the maximum value is not included in the range, i.e. x < maximum (not x <= maximum)"
21
+ }
22
+ },
23
+ "additionalProperties": false
24
+ }
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "$schema": "http://json-schema.org/draft-04/schema#",
3
3
  "description": "An entity of unknown type - potentially a company, a person, government entity, or some unincorporated organisation",
4
+ "additionalProperties": false,
4
5
  "type": "object",
5
6
  "properties": {
6
7
  "name": {
@@ -12,6 +13,10 @@
12
13
  "type": "string",
13
14
  "description": "Name of the jurisdiction in which the entity is incorporated/domiciled"
14
15
  },
16
+ "uid": {
17
+ "description": "Unique id for the entity if it has one",
18
+ "type": "string"
19
+ },
15
20
  "website": {
16
21
  "oneOf": [
17
22
  {
@@ -64,6 +69,10 @@
64
69
  "items": {
65
70
  "$ref": "alternative_name.json"
66
71
  }
72
+ },
73
+ "other_attributes": {
74
+ "description": "Use for other attributes for which we don't yet have curated schema attributes",
75
+ "type": "object"
67
76
  }
68
77
  },
69
78
  "required": [
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.22
4
+ version: 0.2.23
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-14 00:00:00.000000000 Z
11
+ date: 2016-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -107,6 +107,7 @@ files:
107
107
  - lib/turbot_runner/version.rb
108
108
  - schema/schemas/accounts-statement-schema.json
109
109
  - schema/schemas/company-schema.json
110
+ - schema/schemas/control-statement-schema.json
110
111
  - schema/schemas/filing-schema.json
111
112
  - schema/schemas/financial-payment-schema.json
112
113
  - schema/schemas/gazette-notice-schema.json
@@ -117,6 +118,7 @@ files:
117
118
  - schema/schemas/includes/classification.json
118
119
  - schema/schemas/includes/company-for-nesting.json
119
120
  - schema/schemas/includes/company.json
121
+ - schema/schemas/includes/date.json
120
122
  - schema/schemas/includes/entity.json
121
123
  - schema/schemas/includes/filing.json
122
124
  - schema/schemas/includes/filing_document.json
@@ -131,6 +133,7 @@ files:
131
133
  - schema/schemas/includes/person.json
132
134
  - schema/schemas/includes/person_name.json
133
135
  - schema/schemas/includes/previous_name.json
136
+ - schema/schemas/includes/range.json
134
137
  - schema/schemas/includes/sanction.json
135
138
  - schema/schemas/includes/share-parcel-data.json
136
139
  - schema/schemas/includes/share-parcel.json
@@ -224,8 +227,65 @@ required_rubygems_version: !ruby/object:Gem::Requirement
224
227
  version: '0'
225
228
  requirements: []
226
229
  rubyforge_project:
227
- rubygems_version: 2.4.5
230
+ rubygems_version: 2.5.1
228
231
  signing_key:
229
232
  specification_version: 4
230
233
  summary: Utilities for running bots with Turbot
231
- test_files: []
234
+ test_files:
235
+ - spec/bots/bot-that-crashes-immediately/manifest.json
236
+ - spec/bots/bot-that-crashes-immediately/scraper.rb
237
+ - spec/bots/bot-that-crashes-immediately/transformer1.rb
238
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
239
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
240
+ - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
241
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
242
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
243
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
244
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
245
+ - spec/bots/bot-that-emits-run-ended/manifest.json
246
+ - spec/bots/bot-that-emits-run-ended/scraper.rb
247
+ - spec/bots/bot-that-emits-snapshot-ended/manifest.json
248
+ - spec/bots/bot-that-emits-snapshot-ended/scraper.rb
249
+ - spec/bots/bot-that-expects-file/manifest.json
250
+ - spec/bots/bot-that-expects-file/scraper.rb
251
+ - spec/bots/bot-that-expects-file/something.txt
252
+ - spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
253
+ - spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
254
+ - spec/bots/bot-that-produces-duplicates/manifest.json
255
+ - spec/bots/bot-that-produces-duplicates/scraper.rb
256
+ - spec/bots/bot-with-invalid-data-type/manifest.json
257
+ - spec/bots/bot-with-invalid-data-type/scraper.rb
258
+ - spec/bots/bot-with-invalid-sample-date/manifest.json
259
+ - spec/bots/bot-with-invalid-sample-date/scraper.rb
260
+ - spec/bots/bot-with-pause/manifest.json
261
+ - spec/bots/bot-with-pause/scraper.rb
262
+ - spec/bots/bot-with-transformer/manifest.json
263
+ - spec/bots/bot-with-transformer/scraper.rb
264
+ - spec/bots/bot-with-transformer/transformer.rb
265
+ - spec/bots/bot-with-transformers/manifest.json
266
+ - spec/bots/bot-with-transformers/scraper.rb
267
+ - spec/bots/bot-with-transformers/transformer1.rb
268
+ - spec/bots/bot-with-transformers/transformer2.rb
269
+ - spec/bots/invalid-json-bot/manifest.json
270
+ - spec/bots/invalid-json-bot/scraper.rb
271
+ - spec/bots/invalid-record-bot/manifest.json
272
+ - spec/bots/invalid-record-bot/scraper.rb
273
+ - spec/bots/logging-bot/manifest.json
274
+ - spec/bots/logging-bot/scraper.rb
275
+ - spec/bots/python-bot/manifest.json
276
+ - spec/bots/python-bot/scraper.py
277
+ - spec/bots/ruby-bot/manifest.json
278
+ - spec/bots/ruby-bot/scraper.rb
279
+ - spec/bots/slow-bot/manifest.json
280
+ - spec/bots/slow-bot/scraper.rb
281
+ - spec/lib/processor_spec.rb
282
+ - spec/lib/runner_spec.rb
283
+ - spec/lib/utils_spec.rb
284
+ - spec/lib/validator_spec.rb
285
+ - spec/manual.rb
286
+ - spec/outputs/full-scraper.out
287
+ - spec/outputs/full-transformer.out
288
+ - spec/outputs/truncated-scraper.out
289
+ - spec/spec_helper.rb
290
+ - spec/support/custom_matchers.rb
291
+ - spec/support/helpers.rb