openc_bot 0.0.18 → 0.0.26

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +15 -0
  2. data/lib/openc_bot/exceptions.rb +3 -0
  3. data/lib/openc_bot/helpers/incremental_search.rb +1 -1
  4. data/lib/openc_bot/helpers/register_methods.rb +28 -9
  5. data/lib/openc_bot/version.rb +1 -1
  6. data/publish.sh +2 -0
  7. data/schemas/Gemfile +1 -1
  8. data/schemas/schemas/company-schema.json +62 -3
  9. data/schemas/schemas/includes/address.json +1 -1
  10. data/schemas/schemas/includes/alternative_name.json +2 -2
  11. data/schemas/schemas/includes/filing.json +1 -1
  12. data/schemas/schemas/includes/industry_code.json +6 -3
  13. data/schemas/schemas/includes/officer.json +40 -4
  14. data/schemas/schemas/includes/previous_name.json +5 -2
  15. data/schemas/schemas/includes/share-parcel.json +3 -0
  16. data/schemas/schemas/simple-licence-schema.json +10 -5
  17. data/schemas/schemas/simple-subsidiary-schema.json +12 -6
  18. data/schemas/spec/sample-data/invalid/company-19.json +1 -1
  19. data/schemas/spec/sample-data/invalid/company-56.json +7 -0
  20. data/schemas/spec/sample-data/invalid/company-57.json +7 -0
  21. data/schemas/spec/sample-data/invalid/company-58.json +7 -0
  22. data/schemas/spec/sample-data/invalid/company-59.json +13 -0
  23. data/schemas/spec/sample-data/invalid/company-60.json +7 -0
  24. data/schemas/spec/sample-data/invalid/company-61.json +7 -0
  25. data/schemas/spec/sample-data/invalid/company-62.json +9 -0
  26. data/schemas/spec/sample-data/invalid/company-63.json +12 -0
  27. data/schemas/spec/sample-data/invalid/company-64.json +14 -0
  28. data/schemas/spec/sample-data/invalid/company-65.json +14 -0
  29. data/schemas/spec/sample-data/invalid/company-66.json +13 -0
  30. data/schemas/spec/sample-data/invalid/company-67.json +14 -0
  31. data/schemas/spec/sample-data/invalid/company-68.json +12 -0
  32. data/schemas/spec/sample-data/invalid/company-69.json +12 -0
  33. data/schemas/spec/sample-data/invalid/company-70.json +14 -0
  34. data/schemas/spec/sample-data/invalid/simple-licence-04.json +10 -0
  35. data/schemas/spec/sample-data/invalid/simple-licence-05.json +10 -0
  36. data/schemas/spec/sample-data/invalid/simple-licence-06.json +10 -0
  37. data/schemas/spec/sample-data/invalid/simple-subsidiary-02.json +13 -0
  38. data/schemas/spec/sample-data/valid/company-19.json +13 -2
  39. data/schemas/spec/sample-data/valid/company-33.json +5 -0
  40. data/schemas/spec/sample-data/valid/company-43.json +7 -0
  41. data/schemas/spec/sample-data/valid/company-44.json +7 -0
  42. data/schemas/spec/sample-data/valid/company-45.json +23 -0
  43. data/schemas/spec/sample-data/valid/company-46.json +7 -0
  44. data/schemas/spec/sample-data/valid/company-47.json +12 -0
  45. data/schemas/spec/sample-data/valid/company-48.json +7 -0
  46. data/schemas/spec/sample-data/valid/company-49.json +14 -0
  47. data/schemas/spec/sample-data/valid/company-50.json +13 -0
  48. data/schemas/spec/sample-data/valid/company-51.json +14 -0
  49. data/schemas/spec/sample-data/valid/company-52.json +12 -0
  50. data/schemas/spec/sample-data/valid/company-53.json +9 -0
  51. data/schemas/spec/validation_spec.rb +1 -1
  52. data/spec/lib/exceptions_spec.rb +25 -2
  53. data/spec/lib/helpers/incremental_search_spec.rb +13 -1
  54. data/spec/lib/helpers/register_methods_spec.rb +31 -2
  55. data/spec/schemas/company-schema_spec.rb +9 -9
  56. metadata +51 -44
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ZGQyZmEzNWYyZmNiMWVmN2JkYmQwMzM3ZmU3YTQxMTRmZjBiNGU4MQ==
5
+ data.tar.gz: !binary |-
6
+ NWQ5ZDdmOTJjYzM1MDQxYzBhODZjMjNjN2YwY2U3MDViYmE1MzcxNw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YzgwMjM1ZDFmN2UzMjUwZGVlNzVkYjNhODUxMDgzNjVjOTI2MDdmZmU3MTcx
10
+ YzUyOWQ1YWE3N2IxMmE4YjI4ZmI1MzA3OGIyOGVlNTBmOWQwZTllOWFlYWIx
11
+ MDE2ODgzYzk0YTQ3ZDBiMzA4OTkxOTQwNWZmN2ZkNmVhZjQyMzA=
12
+ data.tar.gz: !binary |-
13
+ ZTBjNTAxZDdkMzA3ZTJmNmYzOTU2ZmVhNGUyYmU2MjgxMDgxNTQ2NzE1ODlk
14
+ YTQ2MGU4NDU0NzkyZTBmM2I2NzlmNjM0OWQ4ZGU3YzJhZGY1YTNlNTNmMDdh
15
+ YTE5YTdhN2I1ODg3MDlkNGQzZjNiNmZiODliNGIyNmFlZWYxOGM=
@@ -11,7 +11,10 @@ module OpencBot
11
11
 
12
12
  def initialize(validation_errors)
13
13
  @validation_errors = validation_errors
14
+ message = "Validation failed:" + validation_errors.collect{ |e| e[:message] }.join("\n")
15
+ super(message)
14
16
  end
17
+
15
18
  end
16
19
 
17
20
  end
@@ -21,7 +21,7 @@ module OpencBot
21
21
  def highest_entry_uids(force_get = false)
22
22
  bad_results = []
23
23
  results = get_var('highest_entry_uids')
24
- if results.nil? || results.empty? || (results.is_a?(Array) && results.any?{ |r| r.nil? || r.empty? })
24
+ if force_get || results.nil? || results.empty? || (results.is_a?(Array) && results.any?{ |r| r.nil? || r.empty? })
25
25
  results = entity_uid_prefixes.collect do |prefix|
26
26
  hcn = highest_entry_uid_result(:prefix => prefix)
27
27
  bad_results << prefix if (hcn.nil? || hcn.empty?)
@@ -6,6 +6,7 @@ require 'active_support/core_ext'
6
6
  module OpencBot
7
7
  module Helpers
8
8
  module RegisterMethods
9
+ MAX_BUSY_RETRIES = 3
9
10
 
10
11
  def use_alpha_search
11
12
  self.const_defined?('USE_ALPHA_SEARCH') && self.const_get('USE_ALPHA_SEARCH')
@@ -39,7 +40,21 @@ module OpencBot
39
40
 
40
41
  def prepare_and_save_data(all_data,options={})
41
42
  data_to_be_saved = prepare_for_saving(all_data)
42
- insert_or_update([primary_key_name], data_to_be_saved)
43
+ fail_count, retry_interval = 0, 5
44
+ begin
45
+ insert_or_update([primary_key_name], data_to_be_saved)
46
+ rescue SQLite3::BusyException => e
47
+ fail_count += 1
48
+ if fail_count <= MAX_BUSY_RETRIES
49
+ puts "#{e.inspect} raised #{fail_count} times saving:\n#{all_data}\n\nNow retrying in #{retry_interval} seconds" if verbose?
50
+ sleep retry_interval
51
+ retry_interval = retry_interval * 2
52
+ retry
53
+ else
54
+ raise e
55
+ end
56
+ end
57
+
43
58
  end
44
59
 
45
60
  def primary_key_name
@@ -117,7 +132,8 @@ module OpencBot
117
132
  def update_datum(uid, output_as_json=false,replace_existing_data=false)
118
133
  return unless raw_data = fetch_datum(uid)
119
134
  default_options = {primary_key_name => uid, :retrieved_at => Time.now}
120
- processed_data = default_options.merge(process_datum(raw_data))
135
+ return unless base_processed_data = process_datum(raw_data)
136
+ processed_data = default_options.merge(base_processed_data)
121
137
  # prepare the data for saving (converting Arrays, Hashes to json) and
122
138
  # save the original data too, as we may not extracting everything from it yet
123
139
  save_entity(processed_data.merge(:data => raw_data))
@@ -130,8 +146,9 @@ module OpencBot
130
146
  if output_as_json
131
147
  output_json_error_message(e)
132
148
  else
133
- puts e.inspect if verbose?
134
- raise e
149
+ rich_message = "#{e.message} updating entry with uid: #{uid}"
150
+ puts rich_message if verbose?
151
+ raise $!, rich_message, $!.backtrace
135
152
  end
136
153
  end
137
154
 
@@ -143,11 +160,13 @@ module OpencBot
143
160
  end
144
161
 
145
162
  def validate_datum(record)
146
- schema = File.expand_path("../../../../schemas/schemas/#{schema_name}.json", __FILE__)
147
- errors = JSON::Validator.fully_validate(
148
- schema,
149
- record.to_json,
150
- {:errors_as_objects => true})
163
+ Dir.chdir('schemas/schemas') do
164
+ JSON::Validator.fully_validate(
165
+ "#{schema_name}.json",
166
+ record.to_json,
167
+ {:errors_as_objects => true}
168
+ )
169
+ end
151
170
  end
152
171
 
153
172
  def post_process(row_hash, skip_nulls=false)
@@ -1,3 +1,3 @@
1
1
  module OpencBot
2
- VERSION = "0.0.18"
2
+ VERSION = "0.0.26"
3
3
  end
data/publish.sh CHANGED
@@ -4,6 +4,8 @@ UNPUSHED=$(git log origin/master..HEAD)
4
4
  if [ "x$UNPUSHED" == "x" ]; then
5
5
  gem build openc_bot.gemspec
6
6
  gem push $(ls *gem|tail -1)
7
+ else
8
+ echo "Unpushed commits!"
7
9
  fi
8
10
 
9
11
  function clean {
data/schemas/Gemfile CHANGED
@@ -4,6 +4,6 @@ source "https://rubygems.org"
4
4
  group :test do
5
5
  gem "rspec"
6
6
  gem 'json-schema'
7
- gem 'debugger'
7
+ # gem 'debugger'
8
8
  gem 'rake'
9
9
  end
@@ -6,14 +6,17 @@
6
6
  "properties": {
7
7
  "company_number": {
8
8
  "type": "string",
9
+ "description": "unique identifier given by the corporate register with which it is incorporated",
9
10
  "minLength": 1
10
11
  },
11
12
  "name": {
12
13
  "type": "string",
14
+ "description": "Legal name of the company",
13
15
  "minLength": 1
14
16
  },
15
17
  "jurisdiction_code": {
16
18
  "type": "string",
19
+ "description": "Code representing the jurisdiction/company register which is the canonical record of the company’s existence. Uses underscored ISO 3166-2 to represent it, e.g. es for Spain, us_de for Delaware",
17
20
  "minLength": 2,
18
21
  "maxLength": 5
19
22
  },
@@ -35,9 +38,27 @@
35
38
  "registry_url": {
36
39
  "type": "string"
37
40
  },
41
+ "website": {
42
+ "type": "string",
43
+ "minLength": 5
44
+ },
45
+ "telephone_number": {
46
+ "type": "string",
47
+ "minLength": 4
48
+ },
49
+ "fax_number": {
50
+ "type": "string",
51
+ "minLength": 4
52
+ },
38
53
  "registered_address": {
39
54
  "$ref": "includes/address.json"
40
55
  },
56
+ "headquarters_address": {
57
+ "$ref": "includes/address.json"
58
+ },
59
+ "mailing_address": {
60
+ "$ref": "includes/address.json"
61
+ },
41
62
  "officers": {
42
63
  "type": "array",
43
64
  "items": {
@@ -112,9 +133,47 @@
112
133
  "minLength": 1
113
134
  },
114
135
  "number_of_employees": {
115
- "type": "number",
116
- "description": "The number of employees",
117
- "minimum": 0
136
+ "anyOf": [
137
+ {"type":"string","minLength": 1},
138
+ {"type":"number","minimum": 0}
139
+ ],
140
+ "description": "The number of employees"
141
+ },
142
+ "merged_into": {
143
+ "type": "object",
144
+ "description": "Information on the merger of this company into a 'survivor' company",
145
+ "properties": {
146
+ "surviving_company": {
147
+ "type": ["object"],
148
+ "description": "the surviving_company that this has been merged into",
149
+ "properties": {
150
+ "name": {
151
+ "type": "string",
152
+ "description": "The legal name of the surviving company",
153
+ "minLength": 1
154
+ },
155
+ "company_number": {
156
+ "type": "string",
157
+ "description": "The company_number of the surviving company, if known",
158
+ "minLength": 1
159
+ },
160
+ "jurisdiction": {
161
+ "type": "string",
162
+ "description": "The jurisdiction of incorporation of the surviving company",
163
+ "minLength": 2
164
+ }
165
+ },
166
+ "anyOf": [{"required": ["name"]},
167
+ {"required": ["company_number"]}]
168
+ },
169
+ "effective_date": {
170
+ "type": "string",
171
+ "format": "date",
172
+ "description": "The date of the merger"
173
+ }
174
+ },
175
+ "additionalProperties": false,
176
+ "required": ["surviving_company"]
118
177
  }
119
178
  }
120
179
  }
@@ -9,7 +9,7 @@
9
9
  "description": "An address object",
10
10
  "type": "object",
11
11
  "properties": { "street_address": { "type": "string" },
12
- "locality": { "type": "date" },
12
+ "locality": {"type":"string"},
13
13
  "region": { "type": "string" },
14
14
  "postal_code": { "type": "string" },
15
15
  "country": { "type": "string" }
@@ -3,8 +3,8 @@
3
3
  "description": "An alternative name of a company, e.g. abbreviation, trading (including dba or doing business as), legal. As well as trading names etc, it can be used for storing alternative language representations of the legal name, in which case the language should be represented as two-letter ISO-639 code",
4
4
  "type": "object",
5
5
  "properties": { "company_name": { "type": "string", "minLength": 1 },
6
- "start_date": { "type": "date" },
7
- "end_date": { "type": "date" },
6
+ "start_date": {"type":"string","format": "date"},
7
+ "end_date": {"type":"string","format": "date"},
8
8
  "language": { "type": "string", "minLength": 2, "maxLength": 2 },
9
9
  "type": { "type": "string", "enum": ["trading", "abbreviation", "legal"] }
10
10
  },
@@ -3,7 +3,7 @@
3
3
  "description": "A statutory filing",
4
4
  "type": "object",
5
5
  "properties": { "title": { "type": "string" },
6
- "date": { "type": "date" },
6
+ "date": {"type":"string","format": "date"},
7
7
  "description": { "type": "string" },
8
8
  "uid": { "type": "string" },
9
9
  "url": { "type": "string" },
@@ -4,9 +4,12 @@
4
4
  "type": "object",
5
5
  "properties": { "name": { "type": "string" },
6
6
  "code": { "type": "string" },
7
- "code_scheme_id": { "type": "string", "enum": ["eu_nace_2", "uk_sic_2003", "uk_sic_2007", "us_naics_2002", "us_naics_2007", "be_nace_2008"] },
8
- "start_date": { "type": "date" },
9
- "end_date": { "type": "date" }
7
+ "code_scheme_id": {
8
+ "type": "string",
9
+ "description": "An identifier representing industry code scheme. At the moment these are eu_nace_2, uk_sic_2003, uk_sic_2007, us_naics_2002, us_naics_2007, be_nace_2008, dk_db_2007, nz_bic_2006, no_sic_2007, anz_sic_2006, nz_bic_2006, in_nic_2004_mca, ca_qc_cae, lu_nace_2. For other code schemes, or details of these, contact info@opencorporates.com"
10
+ },
11
+ "start_date": {"type":"string","format": "date"},
12
+ "end_date": {"type":"string","format": "date"}
10
13
  },
11
14
  "required": [ "code", "code_scheme_id" ]
12
15
  }
@@ -3,11 +3,47 @@
3
3
  "description": "An officer (director, senior executive) of a company",
4
4
  "type": "object",
5
5
  "properties": { "name": { "type": "string", "minLength": 1 },
6
- "start_date": { "type": "date" },
7
- "end_date": { "type": "date" },
6
+ "start_date": {
7
+ "anyOf": [
8
+ {"type":"string","format": "date"},
9
+ {"type":"null"}
10
+ ]
11
+ },
12
+ "end_date": {
13
+ "anyOf": [
14
+ {"type":"string","format": "date"},
15
+ {"type":"null"}
16
+ ]
17
+ },
8
18
  "position": { "type": "string" },
9
- "other_attributes": { "type": "object" },
10
- "uid": { "type": "string" }
19
+ "other_attributes": {
20
+ "type": "object",
21
+ "properties": {
22
+ "date_of_birth": {
23
+ "type": "string",
24
+ "format": "date"
25
+ },
26
+ "nationality": {
27
+ "type": "string"
28
+ },
29
+ "person_uid": {
30
+ "type": "string",
31
+ "description": "a unique identifier given to the individual (as opposed to the officership)"
32
+ },
33
+ "address": {
34
+ "type": "string"
35
+ },
36
+ "type": {
37
+ "type": "string",
38
+ "enum": ["Person", "Company"],
39
+ "description": "The type of entity that is the officer (either 'Person' or 'Company')"
40
+ }
41
+ }
42
+ },
43
+ "uid": {
44
+ "type": "string",
45
+ "description": "a unique identifier given to the officership"
46
+ }
11
47
  },
12
48
  "required": [ "name" ]
13
49
  }
@@ -3,8 +3,11 @@
3
3
  "description": "A previous name of a company",
4
4
  "type": "object",
5
5
  "properties": { "company_name": { "type": "string", "minLength": 1 },
6
- "con_date": { "type": "date" },
7
- "start_date": { "type": "date" }
6
+ "con_date": {
7
+ "type": "date",
8
+ "description": "The end (conversion) date of the name"
9
+ },
10
+ "start_date": {"type":"string","format": "date"}
8
11
  },
9
12
  "required": [ "company_name" ]
10
13
  }
@@ -13,6 +13,9 @@
13
13
  "maximum": 100,
14
14
  "minimum": 0
15
15
  },
16
+ "start_date": {"type":"string","format": "date"},
17
+ "end_date": {"type":"string","format": "date"},
18
+ "sample_date": {"type":"string","format": "date"},
16
19
  "shareholders": {
17
20
  "description": "Legal persons who own this share parcel",
18
21
  "type": "array",
@@ -5,11 +5,13 @@
5
5
  "properties": {
6
6
  "source_url": {
7
7
  "description": "Place where this fact can be verified",
8
- "type": "string"
8
+ "type": "string",
9
+ "minLength": 1
9
10
  },
10
11
  "sample_date": {
11
12
  "description": "Date on which this fact was known to be true",
12
- "type": "string"
13
+ "type": "date",
14
+ "minLength": 1
13
15
  },
14
16
  "confidence": {
15
17
  "description": "Confidence in accuracy of data",
@@ -18,11 +20,13 @@
18
20
  },
19
21
  "company_name": {
20
22
  "description": "Name of the company holding the licence",
21
- "type": "string"
23
+ "type": "string",
24
+ "minLength": 1
22
25
  },
23
26
  "company_jurisdiction": {
24
27
  "description": "Jurisdiction where company is based",
25
- "type": "string"
28
+ "type": "string",
29
+ "minLength": 1
26
30
  },
27
31
  "licence_number": {
28
32
  "description": "Licence number or code",
@@ -38,7 +42,8 @@
38
42
  },
39
43
  "licence_jurisdiction": {
40
44
  "description": "Jurisdiction for which licence is issued",
41
- "type": "string"
45
+ "type": "string",
46
+ "minLength": 1
42
47
  },
43
48
  "status": {
44
49
  "description": "Status of the licence",
@@ -5,11 +5,13 @@
5
5
  "properties": {
6
6
  "source_url": {
7
7
  "description": "Place where this fact can be verified",
8
- "type": "string"
8
+ "type": "string",
9
+ "minLength": 1
9
10
  },
10
11
  "sample_date": {
11
12
  "description": "Date on which this fact was known to be true",
12
- "type": "string"
13
+ "type": "string",
14
+ "minLength": 1
13
15
  },
14
16
  "start_date": {
15
17
  "description": "Earliest known date this was known to be a subsidiary",
@@ -26,11 +28,13 @@
26
28
  },
27
29
  "parent_name": {
28
30
  "description": "Name of the controlling company",
29
- "type": "string"
31
+ "type": "string",
32
+ "minLength": 1
30
33
  },
31
34
  "parent_jurisdiction": {
32
35
  "description": "Jurisdiction where controlling company is based",
33
- "type": "string"
36
+ "type": "string",
37
+ "minLength": 1
34
38
  },
35
39
  "parent_identifier": {
36
40
  "description": "Any unique ID for the controlling company",
@@ -47,11 +51,13 @@
47
51
  },
48
52
  "subsidiary_name": {
49
53
  "description": "The name of the subsidiary",
50
- "type": "string"
54
+ "type": "string",
55
+ "minLength": 1
51
56
  },
52
57
  "subsidiary_jurisdiction": {
53
58
  "description": "Jurisdiction of the subsidiary",
54
- "type": "string"
59
+ "type": "string",
60
+ "minLength": 1
55
61
  },
56
62
  "subsidiary_identifier": {
57
63
  "description": "Any unique id for the subsidiary that might be provided in the source",