openc_bot 0.0.18 → 0.0.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/openc_bot/exceptions.rb +3 -0
- data/lib/openc_bot/helpers/incremental_search.rb +1 -1
- data/lib/openc_bot/helpers/register_methods.rb +28 -9
- data/lib/openc_bot/version.rb +1 -1
- data/publish.sh +2 -0
- data/schemas/Gemfile +1 -1
- data/schemas/schemas/company-schema.json +62 -3
- data/schemas/schemas/includes/address.json +1 -1
- data/schemas/schemas/includes/alternative_name.json +2 -2
- data/schemas/schemas/includes/filing.json +1 -1
- data/schemas/schemas/includes/industry_code.json +6 -3
- data/schemas/schemas/includes/officer.json +40 -4
- data/schemas/schemas/includes/previous_name.json +5 -2
- data/schemas/schemas/includes/share-parcel.json +3 -0
- data/schemas/schemas/simple-licence-schema.json +10 -5
- data/schemas/schemas/simple-subsidiary-schema.json +12 -6
- data/schemas/spec/sample-data/invalid/company-19.json +1 -1
- data/schemas/spec/sample-data/invalid/company-56.json +7 -0
- data/schemas/spec/sample-data/invalid/company-57.json +7 -0
- data/schemas/spec/sample-data/invalid/company-58.json +7 -0
- data/schemas/spec/sample-data/invalid/company-59.json +13 -0
- data/schemas/spec/sample-data/invalid/company-60.json +7 -0
- data/schemas/spec/sample-data/invalid/company-61.json +7 -0
- data/schemas/spec/sample-data/invalid/company-62.json +9 -0
- data/schemas/spec/sample-data/invalid/company-63.json +12 -0
- data/schemas/spec/sample-data/invalid/company-64.json +14 -0
- data/schemas/spec/sample-data/invalid/company-65.json +14 -0
- data/schemas/spec/sample-data/invalid/company-66.json +13 -0
- data/schemas/spec/sample-data/invalid/company-67.json +14 -0
- data/schemas/spec/sample-data/invalid/company-68.json +12 -0
- data/schemas/spec/sample-data/invalid/company-69.json +12 -0
- data/schemas/spec/sample-data/invalid/company-70.json +14 -0
- data/schemas/spec/sample-data/invalid/simple-licence-04.json +10 -0
- data/schemas/spec/sample-data/invalid/simple-licence-05.json +10 -0
- data/schemas/spec/sample-data/invalid/simple-licence-06.json +10 -0
- data/schemas/spec/sample-data/invalid/simple-subsidiary-02.json +13 -0
- data/schemas/spec/sample-data/valid/company-19.json +13 -2
- data/schemas/spec/sample-data/valid/company-33.json +5 -0
- data/schemas/spec/sample-data/valid/company-43.json +7 -0
- data/schemas/spec/sample-data/valid/company-44.json +7 -0
- data/schemas/spec/sample-data/valid/company-45.json +23 -0
- data/schemas/spec/sample-data/valid/company-46.json +7 -0
- data/schemas/spec/sample-data/valid/company-47.json +12 -0
- data/schemas/spec/sample-data/valid/company-48.json +7 -0
- data/schemas/spec/sample-data/valid/company-49.json +14 -0
- data/schemas/spec/sample-data/valid/company-50.json +13 -0
- data/schemas/spec/sample-data/valid/company-51.json +14 -0
- data/schemas/spec/sample-data/valid/company-52.json +12 -0
- data/schemas/spec/sample-data/valid/company-53.json +9 -0
- data/schemas/spec/validation_spec.rb +1 -1
- data/spec/lib/exceptions_spec.rb +25 -2
- data/spec/lib/helpers/incremental_search_spec.rb +13 -1
- data/spec/lib/helpers/register_methods_spec.rb +31 -2
- data/spec/schemas/company-schema_spec.rb +9 -9
- metadata +51 -44
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZGQyZmEzNWYyZmNiMWVmN2JkYmQwMzM3ZmU3YTQxMTRmZjBiNGU4MQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NWQ5ZDdmOTJjYzM1MDQxYzBhODZjMjNjN2YwY2U3MDViYmE1MzcxNw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YzgwMjM1ZDFmN2UzMjUwZGVlNzVkYjNhODUxMDgzNjVjOTI2MDdmZmU3MTcx
|
10
|
+
YzUyOWQ1YWE3N2IxMmE4YjI4ZmI1MzA3OGIyOGVlNTBmOWQwZTllOWFlYWIx
|
11
|
+
MDE2ODgzYzk0YTQ3ZDBiMzA4OTkxOTQwNWZmN2ZkNmVhZjQyMzA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZTBjNTAxZDdkMzA3ZTJmNmYzOTU2ZmVhNGUyYmU2MjgxMDgxNTQ2NzE1ODlk
|
14
|
+
YTQ2MGU4NDU0NzkyZTBmM2I2NzlmNjM0OWQ4ZGU3YzJhZGY1YTNlNTNmMDdh
|
15
|
+
YTE5YTdhN2I1ODg3MDlkNGQzZjNiNmZiODliNGIyNmFlZWYxOGM=
|
data/lib/openc_bot/exceptions.rb
CHANGED
@@ -21,7 +21,7 @@ module OpencBot
|
|
21
21
|
def highest_entry_uids(force_get = false)
|
22
22
|
bad_results = []
|
23
23
|
results = get_var('highest_entry_uids')
|
24
|
-
if results.nil? || results.empty? || (results.is_a?(Array) && results.any?{ |r| r.nil? || r.empty? })
|
24
|
+
if force_get || results.nil? || results.empty? || (results.is_a?(Array) && results.any?{ |r| r.nil? || r.empty? })
|
25
25
|
results = entity_uid_prefixes.collect do |prefix|
|
26
26
|
hcn = highest_entry_uid_result(:prefix => prefix)
|
27
27
|
bad_results << prefix if (hcn.nil? || hcn.empty?)
|
@@ -6,6 +6,7 @@ require 'active_support/core_ext'
|
|
6
6
|
module OpencBot
|
7
7
|
module Helpers
|
8
8
|
module RegisterMethods
|
9
|
+
MAX_BUSY_RETRIES = 3
|
9
10
|
|
10
11
|
def use_alpha_search
|
11
12
|
self.const_defined?('USE_ALPHA_SEARCH') && self.const_get('USE_ALPHA_SEARCH')
|
@@ -39,7 +40,21 @@ module OpencBot
|
|
39
40
|
|
40
41
|
def prepare_and_save_data(all_data,options={})
|
41
42
|
data_to_be_saved = prepare_for_saving(all_data)
|
42
|
-
|
43
|
+
fail_count, retry_interval = 0, 5
|
44
|
+
begin
|
45
|
+
insert_or_update([primary_key_name], data_to_be_saved)
|
46
|
+
rescue SQLite3::BusyException => e
|
47
|
+
fail_count += 1
|
48
|
+
if fail_count <= MAX_BUSY_RETRIES
|
49
|
+
puts "#{e.inspect} raised #{fail_count} times saving:\n#{all_data}\n\nNow retrying in #{retry_interval} seconds" if verbose?
|
50
|
+
sleep retry_interval
|
51
|
+
retry_interval = retry_interval * 2
|
52
|
+
retry
|
53
|
+
else
|
54
|
+
raise e
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
43
58
|
end
|
44
59
|
|
45
60
|
def primary_key_name
|
@@ -117,7 +132,8 @@ module OpencBot
|
|
117
132
|
def update_datum(uid, output_as_json=false,replace_existing_data=false)
|
118
133
|
return unless raw_data = fetch_datum(uid)
|
119
134
|
default_options = {primary_key_name => uid, :retrieved_at => Time.now}
|
120
|
-
|
135
|
+
return unless base_processed_data = process_datum(raw_data)
|
136
|
+
processed_data = default_options.merge(base_processed_data)
|
121
137
|
# prepare the data for saving (converting Arrays, Hashes to json) and
|
122
138
|
# save the original data too, as we may not extracting everything from it yet
|
123
139
|
save_entity(processed_data.merge(:data => raw_data))
|
@@ -130,8 +146,9 @@ module OpencBot
|
|
130
146
|
if output_as_json
|
131
147
|
output_json_error_message(e)
|
132
148
|
else
|
133
|
-
|
134
|
-
|
149
|
+
rich_message = "#{e.message} updating entry with uid: #{uid}"
|
150
|
+
puts rich_message if verbose?
|
151
|
+
raise $!, rich_message, $!.backtrace
|
135
152
|
end
|
136
153
|
end
|
137
154
|
|
@@ -143,11 +160,13 @@ module OpencBot
|
|
143
160
|
end
|
144
161
|
|
145
162
|
def validate_datum(record)
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
163
|
+
Dir.chdir('schemas/schemas') do
|
164
|
+
JSON::Validator.fully_validate(
|
165
|
+
"#{schema_name}.json",
|
166
|
+
record.to_json,
|
167
|
+
{:errors_as_objects => true}
|
168
|
+
)
|
169
|
+
end
|
151
170
|
end
|
152
171
|
|
153
172
|
def post_process(row_hash, skip_nulls=false)
|
data/lib/openc_bot/version.rb
CHANGED
data/publish.sh
CHANGED
data/schemas/Gemfile
CHANGED
@@ -6,14 +6,17 @@
|
|
6
6
|
"properties": {
|
7
7
|
"company_number": {
|
8
8
|
"type": "string",
|
9
|
+
"description": "unique identifier given by the corporate register with which it is incorporated",
|
9
10
|
"minLength": 1
|
10
11
|
},
|
11
12
|
"name": {
|
12
13
|
"type": "string",
|
14
|
+
"description": "Legal name of the company",
|
13
15
|
"minLength": 1
|
14
16
|
},
|
15
17
|
"jurisdiction_code": {
|
16
18
|
"type": "string",
|
19
|
+
"description": "Code representing the jurisdiction/company register which is the canonical record of the company’s existence. Uses underscored ISO 3166-2 to represent it, e.g. es for Spain, us_de for Delaware",
|
17
20
|
"minLength": 2,
|
18
21
|
"maxLength": 5
|
19
22
|
},
|
@@ -35,9 +38,27 @@
|
|
35
38
|
"registry_url": {
|
36
39
|
"type": "string"
|
37
40
|
},
|
41
|
+
"website": {
|
42
|
+
"type": "string",
|
43
|
+
"minLength": 5
|
44
|
+
},
|
45
|
+
"telephone_number": {
|
46
|
+
"type": "string",
|
47
|
+
"minLength": 4
|
48
|
+
},
|
49
|
+
"fax_number": {
|
50
|
+
"type": "string",
|
51
|
+
"minLength": 4
|
52
|
+
},
|
38
53
|
"registered_address": {
|
39
54
|
"$ref": "includes/address.json"
|
40
55
|
},
|
56
|
+
"headquarters_address": {
|
57
|
+
"$ref": "includes/address.json"
|
58
|
+
},
|
59
|
+
"mailing_address": {
|
60
|
+
"$ref": "includes/address.json"
|
61
|
+
},
|
41
62
|
"officers": {
|
42
63
|
"type": "array",
|
43
64
|
"items": {
|
@@ -112,9 +133,47 @@
|
|
112
133
|
"minLength": 1
|
113
134
|
},
|
114
135
|
"number_of_employees": {
|
115
|
-
"
|
116
|
-
|
117
|
-
|
136
|
+
"anyOf": [
|
137
|
+
{"type":"string","minLength": 1},
|
138
|
+
{"type":"number","minimum": 0}
|
139
|
+
],
|
140
|
+
"description": "The number of employees"
|
141
|
+
},
|
142
|
+
"merged_into": {
|
143
|
+
"type": "object",
|
144
|
+
"description": "Information on the merger of this company into a 'survivor' company",
|
145
|
+
"properties": {
|
146
|
+
"surviving_company": {
|
147
|
+
"type": ["object"],
|
148
|
+
"description": "the surviving_company that this has been merged into",
|
149
|
+
"properties": {
|
150
|
+
"name": {
|
151
|
+
"type": "string",
|
152
|
+
"description": "The legal name of the surviving company",
|
153
|
+
"minLength": 1
|
154
|
+
},
|
155
|
+
"company_number": {
|
156
|
+
"type": "string",
|
157
|
+
"description": "The company_number of the surviving company, if known",
|
158
|
+
"minLength": 1
|
159
|
+
},
|
160
|
+
"jurisdiction": {
|
161
|
+
"type": "string",
|
162
|
+
"description": "The jurisdiction of incorporation of the surviving company",
|
163
|
+
"minLength": 2
|
164
|
+
}
|
165
|
+
},
|
166
|
+
"anyOf": [{"required": ["name"]},
|
167
|
+
{"required": ["company_number"]}]
|
168
|
+
},
|
169
|
+
"effective_date": {
|
170
|
+
"type": "string",
|
171
|
+
"format": "date",
|
172
|
+
"description": "The date of the merger"
|
173
|
+
}
|
174
|
+
},
|
175
|
+
"additionalProperties": false,
|
176
|
+
"required": ["surviving_company"]
|
118
177
|
}
|
119
178
|
}
|
120
179
|
}
|
@@ -9,7 +9,7 @@
|
|
9
9
|
"description": "An address object",
|
10
10
|
"type": "object",
|
11
11
|
"properties": { "street_address": { "type": "string" },
|
12
|
-
"locality": {
|
12
|
+
"locality": {"type":"string"},
|
13
13
|
"region": { "type": "string" },
|
14
14
|
"postal_code": { "type": "string" },
|
15
15
|
"country": { "type": "string" }
|
@@ -3,8 +3,8 @@
|
|
3
3
|
"description": "An alternative name of a company, e.g. abbreviation, trading (including dba or doing business as), legal. As well as trading names etc, it can be used for storing alternative language representations of the legal name, in which case the language should be represented as two-letter ISO-639 code",
|
4
4
|
"type": "object",
|
5
5
|
"properties": { "company_name": { "type": "string", "minLength": 1 },
|
6
|
-
"start_date": {
|
7
|
-
"end_date": {
|
6
|
+
"start_date": {"type":"string","format": "date"},
|
7
|
+
"end_date": {"type":"string","format": "date"},
|
8
8
|
"language": { "type": "string", "minLength": 2, "maxLength": 2 },
|
9
9
|
"type": { "type": "string", "enum": ["trading", "abbreviation", "legal"] }
|
10
10
|
},
|
@@ -3,7 +3,7 @@
|
|
3
3
|
"description": "A statutory filing",
|
4
4
|
"type": "object",
|
5
5
|
"properties": { "title": { "type": "string" },
|
6
|
-
"date": {
|
6
|
+
"date": {"type":"string","format": "date"},
|
7
7
|
"description": { "type": "string" },
|
8
8
|
"uid": { "type": "string" },
|
9
9
|
"url": { "type": "string" },
|
@@ -4,9 +4,12 @@
|
|
4
4
|
"type": "object",
|
5
5
|
"properties": { "name": { "type": "string" },
|
6
6
|
"code": { "type": "string" },
|
7
|
-
"code_scheme_id": {
|
8
|
-
|
9
|
-
|
7
|
+
"code_scheme_id": {
|
8
|
+
"type": "string",
|
9
|
+
"description": "An identifier representing industry code scheme. At the moment these are eu_nace_2, uk_sic_2003, uk_sic_2007, us_naics_2002, us_naics_2007, be_nace_2008, dk_db_2007, nz_bic_2006, no_sic_2007, anz_sic_2006, nz_bic_2006, in_nic_2004_mca, ca_qc_cae, lu_nace_2. For other code schemes, or details of these, contact info@opencorporates.com"
|
10
|
+
},
|
11
|
+
"start_date": {"type":"string","format": "date"},
|
12
|
+
"end_date": {"type":"string","format": "date"}
|
10
13
|
},
|
11
14
|
"required": [ "code", "code_scheme_id" ]
|
12
15
|
}
|
@@ -3,11 +3,47 @@
|
|
3
3
|
"description": "An officer (director, senior executive) of a company",
|
4
4
|
"type": "object",
|
5
5
|
"properties": { "name": { "type": "string", "minLength": 1 },
|
6
|
-
"start_date": {
|
7
|
-
|
6
|
+
"start_date": {
|
7
|
+
"anyOf": [
|
8
|
+
{"type":"string","format": "date"},
|
9
|
+
{"type":"null"}
|
10
|
+
]
|
11
|
+
},
|
12
|
+
"end_date": {
|
13
|
+
"anyOf": [
|
14
|
+
{"type":"string","format": "date"},
|
15
|
+
{"type":"null"}
|
16
|
+
]
|
17
|
+
},
|
8
18
|
"position": { "type": "string" },
|
9
|
-
"other_attributes": {
|
10
|
-
|
19
|
+
"other_attributes": {
|
20
|
+
"type": "object",
|
21
|
+
"properties": {
|
22
|
+
"date_of_birth": {
|
23
|
+
"type": "string",
|
24
|
+
"format": "date"
|
25
|
+
},
|
26
|
+
"nationality": {
|
27
|
+
"type": "string"
|
28
|
+
},
|
29
|
+
"person_uid": {
|
30
|
+
"type": "string",
|
31
|
+
"description": "a unique identifier given to the individual (as opposed to the officership)"
|
32
|
+
},
|
33
|
+
"address": {
|
34
|
+
"type": "string"
|
35
|
+
},
|
36
|
+
"type": {
|
37
|
+
"type": "string",
|
38
|
+
"enum": ["Person", "Company"],
|
39
|
+
"description": "The type of entity that is the officer (either 'Person' or 'Company')"
|
40
|
+
}
|
41
|
+
}
|
42
|
+
},
|
43
|
+
"uid": {
|
44
|
+
"type": "string",
|
45
|
+
"description": "a unique identifier given to the officership"
|
46
|
+
}
|
11
47
|
},
|
12
48
|
"required": [ "name" ]
|
13
49
|
}
|
@@ -3,8 +3,11 @@
|
|
3
3
|
"description": "A previous name of a company",
|
4
4
|
"type": "object",
|
5
5
|
"properties": { "company_name": { "type": "string", "minLength": 1 },
|
6
|
-
"con_date": {
|
7
|
-
|
6
|
+
"con_date": {
|
7
|
+
"type": "date",
|
8
|
+
"description": "The end (conversion) date of the name"
|
9
|
+
},
|
10
|
+
"start_date": {"type":"string","format": "date"}
|
8
11
|
},
|
9
12
|
"required": [ "company_name" ]
|
10
13
|
}
|
@@ -13,6 +13,9 @@
|
|
13
13
|
"maximum": 100,
|
14
14
|
"minimum": 0
|
15
15
|
},
|
16
|
+
"start_date": {"type":"string","format": "date"},
|
17
|
+
"end_date": {"type":"string","format": "date"},
|
18
|
+
"sample_date": {"type":"string","format": "date"},
|
16
19
|
"shareholders": {
|
17
20
|
"description": "Legal persons who own this share parcel",
|
18
21
|
"type": "array",
|
@@ -5,11 +5,13 @@
|
|
5
5
|
"properties": {
|
6
6
|
"source_url": {
|
7
7
|
"description": "Place where this fact can be verified",
|
8
|
-
"type": "string"
|
8
|
+
"type": "string",
|
9
|
+
"minLength": 1
|
9
10
|
},
|
10
11
|
"sample_date": {
|
11
12
|
"description": "Date on which this fact was known to be true",
|
12
|
-
"type": "
|
13
|
+
"type": "date",
|
14
|
+
"minLength": 1
|
13
15
|
},
|
14
16
|
"confidence": {
|
15
17
|
"description": "Confidence in accuracy of data",
|
@@ -18,11 +20,13 @@
|
|
18
20
|
},
|
19
21
|
"company_name": {
|
20
22
|
"description": "Name of the company holding the licence",
|
21
|
-
"type": "string"
|
23
|
+
"type": "string",
|
24
|
+
"minLength": 1
|
22
25
|
},
|
23
26
|
"company_jurisdiction": {
|
24
27
|
"description": "Jurisdiction where company is based",
|
25
|
-
"type": "string"
|
28
|
+
"type": "string",
|
29
|
+
"minLength": 1
|
26
30
|
},
|
27
31
|
"licence_number": {
|
28
32
|
"description": "Licence number or code",
|
@@ -38,7 +42,8 @@
|
|
38
42
|
},
|
39
43
|
"licence_jurisdiction": {
|
40
44
|
"description": "Jurisdiction for which licence is issued",
|
41
|
-
"type": "string"
|
45
|
+
"type": "string",
|
46
|
+
"minLength": 1
|
42
47
|
},
|
43
48
|
"status": {
|
44
49
|
"description": "Status of the licence",
|
@@ -5,11 +5,13 @@
|
|
5
5
|
"properties": {
|
6
6
|
"source_url": {
|
7
7
|
"description": "Place where this fact can be verified",
|
8
|
-
"type": "string"
|
8
|
+
"type": "string",
|
9
|
+
"minLength": 1
|
9
10
|
},
|
10
11
|
"sample_date": {
|
11
12
|
"description": "Date on which this fact was known to be true",
|
12
|
-
"type": "string"
|
13
|
+
"type": "string",
|
14
|
+
"minLength": 1
|
13
15
|
},
|
14
16
|
"start_date": {
|
15
17
|
"description": "Earliest known date this was known to be a subsidiary",
|
@@ -26,11 +28,13 @@
|
|
26
28
|
},
|
27
29
|
"parent_name": {
|
28
30
|
"description": "Name of the controlling company",
|
29
|
-
"type": "string"
|
31
|
+
"type": "string",
|
32
|
+
"minLength": 1
|
30
33
|
},
|
31
34
|
"parent_jurisdiction": {
|
32
35
|
"description": "Jurisdiction where controlling company is based",
|
33
|
-
"type": "string"
|
36
|
+
"type": "string",
|
37
|
+
"minLength": 1
|
34
38
|
},
|
35
39
|
"parent_identifier": {
|
36
40
|
"description": "Any unique ID for the controlling company",
|
@@ -47,11 +51,13 @@
|
|
47
51
|
},
|
48
52
|
"subsidiary_name": {
|
49
53
|
"description": "The name of the subsidiary",
|
50
|
-
"type": "string"
|
54
|
+
"type": "string",
|
55
|
+
"minLength": 1
|
51
56
|
},
|
52
57
|
"subsidiary_jurisdiction": {
|
53
58
|
"description": "Jurisdiction of the subsidiary",
|
54
|
-
"type": "string"
|
59
|
+
"type": "string",
|
60
|
+
"minLength": 1
|
55
61
|
},
|
56
62
|
"subsidiary_identifier": {
|
57
63
|
"description": "Any unique id for the subsidiary that might be provided in the source",
|