turbot-runner 0.1.23 → 0.1.24
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/turbot_runner.rb +0 -2
- data/lib/turbot_runner/processor.rb +7 -70
- data/lib/turbot_runner/version.rb +1 -1
- data/schema/schemas/company-schema.json +69 -22
- data/schema/schemas/financial-payment-schema.json +12 -5
- data/schema/schemas/includes/address.json +36 -13
- data/schema/schemas/includes/alternative_name.json +32 -9
- data/schema/schemas/includes/company-for-nesting.json +70 -23
- data/schema/schemas/includes/company.json +15 -6
- data/schema/schemas/includes/filing.json +48 -16
- data/schema/schemas/includes/financial-payment-data-object.json +73 -21
- data/schema/schemas/includes/identifier.json +14 -8
- data/schema/schemas/includes/industry_code.json +25 -12
- data/schema/schemas/includes/licence-data-object.json +40 -13
- data/schema/schemas/includes/officer.json +66 -46
- data/schema/schemas/includes/organisation.json +6 -5
- data/schema/schemas/includes/permission.json +36 -17
- data/schema/schemas/includes/person.json +14 -13
- data/schema/schemas/includes/person_name.json +36 -12
- data/schema/schemas/includes/previous_name.json +19 -9
- data/schema/schemas/includes/share-parcel-data.json +55 -40
- data/schema/schemas/includes/share-parcel.json +71 -56
- data/schema/schemas/includes/subsidiary-relationship-data.json +19 -8
- data/schema/schemas/includes/total-shares.json +13 -6
- data/schema/schemas/includes/unknown_entity_type.json +6 -5
- data/schema/schemas/licence-schema.json +157 -17
- data/schema/schemas/primary-data-schema.json +18 -16
- data/schema/schemas/share-parcel-schema.json +8 -7
- data/schema/schemas/simple-financial-payment-schema.json +47 -11
- data/schema/schemas/simple-licence-schema.json +21 -6
- data/schema/schemas/simple-subsidiary-schema.json +15 -4
- data/schema/schemas/subsidiary-relationship-schema.json +26 -7
- data/spec/lib/processor_spec.rb +3 -75
- metadata +5 -10
- data/lib/turbot_runner/validator.rb +0 -77
- data/lib/turbot_runner/validators.rb +0 -12
- data/schema/schemas/rich-licence-schema.json +0 -103
- data/spec/lib/validator_spec.rb +0 -235
- data/spec/lib/validators_spec.rb +0 -48
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzkxMDdjODdjYjg3NDFkZTVjZjJmMzI4M2RhZmQ1NWI5OTBmNzFiOA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ZmU3ODUzNmMxODg5NjZlMDhjZDMzYzk0MTY4YzA4ODVlZjEzZTVlOA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
Mzc3NTg0ZTY2ZWM5YmU4MWM4ODM0NmRkZjdhNDk2NWE4YjYxMDllMjJmOTlm
|
10
|
+
MmQwMzEwMGJkMThiMDFlYzFlYjg0ODQyMjkzMDBlMzgwMWM4YmM3MTI1NjA4
|
11
|
+
OGMxMjExNjIxNDI2N2FmZjYyODU2Y2IwMDEzMTkwZWJjNzVlODM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MDk3NzFhODNlZDA3NGUyMmZlMTBmYzEzM2UyNjFmMDc5NzJjYTJkMjQ0NGRh
|
14
|
+
M2U3MDlmOGYwNGI4Y2U3YzNkZDEwODk4NzA4ZDY1Y2NhNGJlYWE1MzkyNDYw
|
15
|
+
M2MyYTNkNjI0YzEwMTMyOTdiMzMwZmU1NTQ2Njk0YTQ5NjZkMGM=
|
data/lib/turbot_runner.rb
CHANGED
@@ -4,8 +4,6 @@ require 'turbot_runner/processor'
|
|
4
4
|
require 'turbot_runner/runner'
|
5
5
|
require 'turbot_runner/script_runner'
|
6
6
|
require 'turbot_runner/utils'
|
7
|
-
require 'turbot_runner/validator'
|
8
|
-
require 'turbot_runner/validators'
|
9
7
|
require 'turbot_runner/version'
|
10
8
|
|
11
9
|
module TurbotRunner
|
@@ -1,5 +1,4 @@
|
|
1
|
-
require '
|
2
|
-
require 'json-schema'
|
1
|
+
require 'openc/json_schema'
|
3
2
|
|
4
3
|
module TurbotRunner
|
5
4
|
class Processor
|
@@ -16,18 +15,13 @@ module TurbotRunner
|
|
16
15
|
@record_handler.handle_run_ended
|
17
16
|
@runner.interrupt if @runner
|
18
17
|
else
|
19
|
-
record = JSON.parse(line)
|
18
|
+
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
20
19
|
|
21
|
-
|
22
|
-
converted_record = convert_record(record)
|
23
|
-
error_message = validate(converted_record)
|
24
|
-
rescue ConversionError => e
|
25
|
-
error_message = e.message
|
26
|
-
end
|
20
|
+
error_message = validate(record)
|
27
21
|
|
28
22
|
if error_message.nil?
|
29
23
|
begin
|
30
|
-
@record_handler.handle_valid_record(
|
24
|
+
@record_handler.handle_valid_record(record, @data_type)
|
31
25
|
rescue InterruptRun
|
32
26
|
@runner.interrupt if @runner
|
33
27
|
end
|
@@ -46,60 +40,8 @@ module TurbotRunner
|
|
46
40
|
@runner.interrupt
|
47
41
|
end
|
48
42
|
|
49
|
-
def convert_record(record)
|
50
|
-
converted_record = Utils.deep_copy(record)
|
51
|
-
|
52
|
-
date_paths.each do |path|
|
53
|
-
begin
|
54
|
-
tmp = converted_record
|
55
|
-
|
56
|
-
path[0...-1].each do |path_item|
|
57
|
-
tmp = tmp[path_item]
|
58
|
-
end
|
59
|
-
|
60
|
-
value = tmp[path[-1]]
|
61
|
-
rescue NoMethodError
|
62
|
-
next
|
63
|
-
end
|
64
|
-
|
65
|
-
next unless value.is_a?(String)
|
66
|
-
|
67
|
-
if value == ''
|
68
|
-
tmp.delete(path[-1])
|
69
|
-
else
|
70
|
-
begin
|
71
|
-
tmp[path[-1]] = Date.strptime(value, '%Y-%m-%d').strftime('%Y-%m-%d')
|
72
|
-
rescue ArgumentError
|
73
|
-
raise ConversionError.new("Property not a valid date: #{path.join('.')}")
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
converted_record
|
79
|
-
end
|
80
|
-
|
81
|
-
def date_paths
|
82
|
-
@date_paths ||= get_date_paths(schema['properties'])
|
83
|
-
end
|
84
|
-
|
85
|
-
def get_date_paths(properties)
|
86
|
-
date_paths = []
|
87
|
-
|
88
|
-
properties.each do |name, attrs|
|
89
|
-
if attrs['format'] == 'date'
|
90
|
-
date_paths << [name]
|
91
|
-
elsif attrs['type'] == 'object'
|
92
|
-
get_date_paths(attrs['properties']).each do |path|
|
93
|
-
date_paths << [name] + path
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
date_paths
|
99
|
-
end
|
100
|
-
|
101
43
|
def validate(record)
|
102
|
-
error =
|
44
|
+
error = Openc::JsonSchema.validate(schema_path, record)
|
103
45
|
|
104
46
|
message = nil
|
105
47
|
|
@@ -137,14 +79,9 @@ module TurbotRunner
|
|
137
79
|
message
|
138
80
|
end
|
139
81
|
|
140
|
-
def
|
141
|
-
@schema ||= load_schema
|
142
|
-
end
|
143
|
-
|
144
|
-
def load_schema
|
82
|
+
def schema_path
|
145
83
|
hyphenated_name = @data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
146
|
-
|
147
|
-
JSON.load(File.read(path))
|
84
|
+
File.join(SCHEMAS_PATH, "#{hyphenated_name}-schema.json")
|
148
85
|
end
|
149
86
|
|
150
87
|
class ConversionError < StandardError; end
|
@@ -1,9 +1,7 @@
|
|
1
|
-
|
2
1
|
{
|
3
2
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
4
|
-
"title": "Company Schema",
|
5
|
-
"type": "object",
|
6
3
|
"description": "A company in OpenCorporates",
|
4
|
+
"type": "object",
|
7
5
|
"properties": {
|
8
6
|
"company_number": {
|
9
7
|
"type": "string",
|
@@ -22,13 +20,16 @@
|
|
22
20
|
"maxLength": 5
|
23
21
|
},
|
24
22
|
"incorporation_date": {
|
25
|
-
"type": "
|
23
|
+
"type": "string",
|
24
|
+
"format": "date"
|
26
25
|
},
|
27
26
|
"dissolution_date": {
|
28
|
-
"type": "
|
27
|
+
"type": "string",
|
28
|
+
"format": "date"
|
29
29
|
},
|
30
30
|
"retrieved_at": {
|
31
|
-
"type": "
|
31
|
+
"type": "string",
|
32
|
+
"format": "date-time"
|
32
33
|
},
|
33
34
|
"current_status": {
|
34
35
|
"type": "string"
|
@@ -100,43 +101,71 @@
|
|
100
101
|
}
|
101
102
|
},
|
102
103
|
"branch": {
|
103
|
-
"type": [
|
104
|
+
"type": [
|
105
|
+
"string",
|
106
|
+
"null"
|
107
|
+
],
|
104
108
|
"description": "A flag to denote whether a company is a branch entity. This should only be set if the company is a type of branch (otherwise should be null). In general the only option here is 'F' for a 'Foreign' branch, i.e. an out-of-jurisdiction entity that has registered as having a presence in the jurisdiction. In the US this is sometimes called a Foreign Corporation",
|
105
|
-
"enum": [
|
109
|
+
"enum": [
|
110
|
+
"F",
|
111
|
+
"L",
|
112
|
+
null
|
113
|
+
]
|
106
114
|
},
|
107
115
|
"all_attributes": {
|
108
116
|
"type": "object",
|
109
117
|
"description": "Other arbitrary attributes for a given company",
|
110
118
|
"properties": {
|
111
119
|
"jurisdiction_of_origin": {
|
112
|
-
"type": [
|
120
|
+
"type": [
|
121
|
+
"string",
|
122
|
+
"null"
|
123
|
+
],
|
113
124
|
"description": "The jurisdiction of the 'home' company if this is a branch",
|
114
125
|
"minLength": 1
|
115
126
|
},
|
116
127
|
"home_company_number": {
|
117
|
-
"type": [
|
128
|
+
"type": [
|
129
|
+
"string",
|
130
|
+
"null"
|
131
|
+
],
|
118
132
|
"description": "If the entity is a 'branch', this is the company_number of the 'home' company in the home company's jurisdiction",
|
119
133
|
"minLength": 1
|
120
134
|
},
|
121
135
|
"home_legal_name": {
|
122
|
-
"type": [
|
136
|
+
"type": [
|
137
|
+
"string",
|
138
|
+
"null"
|
139
|
+
],
|
123
140
|
"description": "The legal name of the 'home' company in its jurisdiction if this is a branch, and the name is different from the legal name of the branch",
|
124
141
|
"minLength": 1
|
125
142
|
},
|
126
143
|
"registered_agent_address": {
|
127
|
-
"type": [
|
144
|
+
"type": [
|
145
|
+
"string",
|
146
|
+
"null"
|
147
|
+
],
|
128
148
|
"description": "The address of the 'Agent', a public address to which legal papers can be served",
|
129
149
|
"minLength": 1
|
130
150
|
},
|
131
151
|
"registered_agent_name": {
|
132
|
-
"type": [
|
133
|
-
|
152
|
+
"type": [
|
153
|
+
"string",
|
154
|
+
"null"
|
155
|
+
],
|
156
|
+
"description": "The 'Agent' of the company – a person or entity that is empowered to accept service for the company",
|
134
157
|
"minLength": 1
|
135
158
|
},
|
136
159
|
"number_of_employees": {
|
137
160
|
"anyOf": [
|
138
|
-
{
|
139
|
-
|
161
|
+
{
|
162
|
+
"type": "string",
|
163
|
+
"minLength": 1
|
164
|
+
},
|
165
|
+
{
|
166
|
+
"type": "number",
|
167
|
+
"minimum": 0
|
168
|
+
}
|
140
169
|
],
|
141
170
|
"description": "The number of employees"
|
142
171
|
},
|
@@ -145,7 +174,9 @@
|
|
145
174
|
"description": "Information on the merger of this company into a 'survivor' company",
|
146
175
|
"properties": {
|
147
176
|
"surviving_company": {
|
148
|
-
"type": [
|
177
|
+
"type": [
|
178
|
+
"object"
|
179
|
+
],
|
149
180
|
"description": "the surviving_company that this has been merged into",
|
150
181
|
"properties": {
|
151
182
|
"name": {
|
@@ -164,8 +195,18 @@
|
|
164
195
|
"minLength": 2
|
165
196
|
}
|
166
197
|
},
|
167
|
-
"anyOf": [
|
168
|
-
|
198
|
+
"anyOf": [
|
199
|
+
{
|
200
|
+
"required": [
|
201
|
+
"name"
|
202
|
+
]
|
203
|
+
},
|
204
|
+
{
|
205
|
+
"required": [
|
206
|
+
"company_number"
|
207
|
+
]
|
208
|
+
}
|
209
|
+
]
|
169
210
|
},
|
170
211
|
"effective_date": {
|
171
212
|
"type": "string",
|
@@ -174,11 +215,17 @@
|
|
174
215
|
}
|
175
216
|
},
|
176
217
|
"additionalProperties": false,
|
177
|
-
"required": [
|
218
|
+
"required": [
|
219
|
+
"surviving_company"
|
220
|
+
]
|
178
221
|
}
|
179
222
|
}
|
180
223
|
}
|
181
224
|
},
|
182
225
|
"additionalProperties": false,
|
183
|
-
"required": [
|
184
|
-
|
226
|
+
"required": [
|
227
|
+
"company_number",
|
228
|
+
"name",
|
229
|
+
"jurisdiction_code"
|
230
|
+
]
|
231
|
+
}
|
@@ -1,10 +1,12 @@
|
|
1
1
|
{
|
2
|
-
"title": "Financial-Payment",
|
3
|
-
"description": "A Financial-Payment is a payment from government to a recipient",
|
4
2
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"description": "A Financial-Payment is a payment from government to a recipient",
|
5
4
|
"type": "object",
|
6
5
|
"properties": {
|
7
|
-
"sample_date": {
|
6
|
+
"sample_date": {
|
7
|
+
"type": "string",
|
8
|
+
"format": "date"
|
9
|
+
},
|
8
10
|
"source_jurisdiction": {
|
9
11
|
"description": "Jurisdiction of the source of the data",
|
10
12
|
"type": "string"
|
@@ -21,5 +23,10 @@
|
|
21
23
|
"additionalItems": false
|
22
24
|
}
|
23
25
|
},
|
24
|
-
"
|
25
|
-
|
26
|
+
"additionalProperties": false,
|
27
|
+
"required": [
|
28
|
+
"company",
|
29
|
+
"data",
|
30
|
+
"sample_date"
|
31
|
+
]
|
32
|
+
}
|
@@ -1,23 +1,46 @@
|
|
1
|
-
{
|
2
|
-
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"oneOf": [
|
3
4
|
{
|
4
|
-
"type": [
|
5
|
+
"type": [
|
6
|
+
"string",
|
7
|
+
"null"
|
8
|
+
],
|
5
9
|
"minLength": 2
|
6
10
|
},
|
7
11
|
{
|
8
12
|
"name": "Address",
|
9
13
|
"description": "An address object",
|
10
14
|
"type": "object",
|
11
|
-
"properties": {
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
"properties": {
|
16
|
+
"street_address": {
|
17
|
+
"type": "string"
|
18
|
+
},
|
19
|
+
"locality": {
|
20
|
+
"type": "string"
|
21
|
+
},
|
22
|
+
"region": {
|
23
|
+
"type": "string"
|
24
|
+
},
|
25
|
+
"postal_code": {
|
26
|
+
"type": "string"
|
27
|
+
},
|
28
|
+
"country": {
|
29
|
+
"type": "string"
|
30
|
+
}
|
16
31
|
},
|
17
|
-
"anyOf": [
|
18
|
-
|
19
|
-
|
32
|
+
"anyOf": [
|
33
|
+
{
|
34
|
+
"required": [
|
35
|
+
"street_address"
|
36
|
+
]
|
37
|
+
},
|
38
|
+
{
|
39
|
+
"required": [
|
40
|
+
"postal_code"
|
41
|
+
]
|
42
|
+
}
|
43
|
+
]
|
20
44
|
}
|
21
45
|
]
|
22
|
-
}
|
23
|
-
|
46
|
+
}
|
@@ -1,13 +1,36 @@
|
|
1
1
|
{
|
2
|
-
"
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
3
|
"description": "An alternative name of a company, e.g. abbreviation, trading (including dba or doing business as), legal. As well as trading names etc, it can be used for storing alternative language representations of the legal name, in which case the language should be represented as two-letter ISO-639 code",
|
4
4
|
"type": "object",
|
5
|
-
"properties": {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
"properties": {
|
6
|
+
"company_name": {
|
7
|
+
"type": "string",
|
8
|
+
"minLength": 1
|
9
|
+
},
|
10
|
+
"start_date": {
|
11
|
+
"type": "string",
|
12
|
+
"format": "date"
|
13
|
+
},
|
14
|
+
"end_date": {
|
15
|
+
"type": "string",
|
16
|
+
"format": "date"
|
17
|
+
},
|
18
|
+
"language": {
|
19
|
+
"type": "string",
|
20
|
+
"minLength": 2,
|
21
|
+
"maxLength": 2
|
22
|
+
},
|
23
|
+
"type": {
|
24
|
+
"type": "string",
|
25
|
+
"enum": [
|
26
|
+
"trading",
|
27
|
+
"abbreviation",
|
28
|
+
"legal"
|
29
|
+
]
|
30
|
+
}
|
10
31
|
},
|
11
|
-
"required": [
|
12
|
-
|
13
|
-
|
32
|
+
"required": [
|
33
|
+
"company_name",
|
34
|
+
"type"
|
35
|
+
]
|
36
|
+
}
|