atlas_engine 0.7.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +70 -10
- data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +1 -1
- data/app/countries/atlas_engine/gg/address_importer/corrections/open_address/city_corrector.rb +38 -0
- data/app/countries/atlas_engine/gg/country_profile.yml +4 -0
- data/app/countries/atlas_engine/gg/synonyms.yml +3 -0
- data/app/countries/atlas_engine/gg/validation_transcriber/address_parser.rb +15 -1
- data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +1 -1
- data/app/countries/atlas_engine/us/address_importer/open_address/filter.rb +28 -0
- data/app/countries/atlas_engine/us/address_importer/open_address/mapper.rb +66 -0
- data/app/countries/atlas_engine/us/country_profile.yml +8 -4
- data/app/countries/atlas_engine/us/jobs/address_importer/combined_import_job.rb +120 -0
- data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +3 -2
- data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +4 -3
- data/app/models/atlas_engine/address_importer/open_address/transformer.rb +3 -2
- data/app/models/atlas_engine/address_validation/validator.rb +2 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern_builder.rb +7 -14
- data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +0 -1
- data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +0 -3
- data/app/tasks/maintenance/atlas_engine/us_geo_json_directory_import_task.rb +20 -0
- data/lib/atlas_engine/version.rb +1 -1
- metadata +23 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2236db9cf72e66e7bec983fec0ce919b31e1a795e6509ac65e1f914e8cda1c2b
|
4
|
+
data.tar.gz: c4bb6b2c1c8c927abbdf22d66d7a198052ab772f9243ffbd4c02bccced29b4ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1ccff91cd2b8026fd568ba9aced9688741bf1fd107fd180a83447e9d62ce20750d1ba8d14efe690ab22db6d239dfc1af51edda770c00de31cf07bfc210dc8cf
|
7
|
+
data.tar.gz: 9ee3408a1926463cf05cef4ba3b835e71fb8f490b61033154ac47dc586e2eb8b20fa971c9ca8c23859cec7fd4f7f8e81370f506fdba16e081b6299595a517f05
|
data/README.md
CHANGED
@@ -167,17 +167,23 @@ The validation scope excludes zip because the zip was not successfully validated
|
|
167
167
|
## Rails App Installation
|
168
168
|
|
169
169
|
### Initial setup
|
170
|
-
Add the engine to your gemfile
|
170
|
+
* Add the engine to your gemfile
|
171
171
|
```
|
172
172
|
gem "atlas_engine"
|
173
173
|
```
|
174
174
|
|
175
|
-
Run the following commands to install the engine in your rails app
|
176
|
-
|
175
|
+
* Run the following commands to install the engine in your rails app
|
177
176
|
```
|
178
177
|
bundle lock
|
179
|
-
|
178
|
+
rails atlas_engine:install:migrations
|
179
|
+
rails db:migrate
|
180
180
|
```
|
181
|
+
* In `config/routes` mount AtlasEngine
|
182
|
+
* Adding the line `mount AtlasEngine::Engine => "/atlas_engine"
|
183
|
+
`
|
184
|
+
* In `app/assets/config/manifest.js`
|
185
|
+
* Adding the line `//= link atlas_engine/application.css`
|
186
|
+
* Install [maintenance_tasks](https://github.com/Shopify/maintenance_tasks?tab=readme-ov-file#installation) - a dependency for Atlas Engine that is used to ingest country data.
|
181
187
|
|
182
188
|
### Updating to a newer version of the engine
|
183
189
|
|
@@ -290,12 +296,47 @@ level address validation, your app must have a populated elasticsearch index per
|
|
290
296
|
to query.
|
291
297
|
|
292
298
|
The data we use to power atlas engine validation is free open source data from the [open addresses](https://openaddresses.io/)
|
293
|
-
project.
|
299
|
+
project.
|
300
|
+
|
301
|
+
### Supported countries
|
302
|
+
At the moment, `atlas_engine` supports advanced address validation for the following countries.
|
303
|
+
|
304
|
+
| Country/territory | Two-letter code | Locales | Street | City | Postal Code | Province/State |
|
305
|
+
|-------------------|-----------------|----------|--------|------|-------------|----------------|
|
306
|
+
| Australia | AU | | | x | x | x |
|
307
|
+
| Austria | AT | | | x | x | x |
|
308
|
+
| Belgium | BE | fr,nl,de | | x | x | |
|
309
|
+
| Bermuda | BM | | | x | x | x |
|
310
|
+
| Czechia | CZ | | | x | x | |
|
311
|
+
| Denmark | DK | | | x | x | |
|
312
|
+
| Faroe Islands | FO | | | x | x | |
|
313
|
+
| France | FR | | | x | x | |
|
314
|
+
| Italy | IT | | | | x | |
|
315
|
+
| Liechtenstein | LI | | | x | x | x |
|
316
|
+
| Luxembourg | LU | fr,lb | | x | x | |
|
317
|
+
| Netherlands | NL | nl | | x | x | x |
|
318
|
+
| Poland | PL | | | x | x | x |
|
319
|
+
| Portugal | PT | | | x | x | x |
|
320
|
+
| Slovenia | SI | | | x | x | x |
|
321
|
+
| South Korea | KR | | | x | x | x |
|
322
|
+
| Switzerland | CH | de,fr,it | | x | x | |
|
323
|
+
| United States | US | en | x | x | x | x |
|
324
|
+
|
325
|
+
### Downloading and indexing instructions
|
326
|
+
|
327
|
+
|
328
|
+
The following guide demonstrates how to ingest data with the dummy app, but the process is the same with
|
294
329
|
the engine mounted into your own rails app.
|
295
330
|
|
296
331
|
1. Go to the [open addresses](https://openaddresses.io/) download center, create an account, support the project, and
|
297
|
-
download a GeoJSON+LD file for the country or region you wish to validate.
|
298
|
-
|
332
|
+
download a GeoJSON+LD file for the country or region you wish to validate.
|
333
|
+
|
334
|
+
Restrictions on the file:
|
335
|
+
- Must be an `addresses` file, as opposed to a `buildings` or `parcels` file.
|
336
|
+
- Must be gzipped (.gz format)
|
337
|
+
- Datasets listed under the _Individual Sources_ section work fine. Those under _Data Collections_ must first be unzipped. The `addresses` geojson files within may then be gzipped and imported.
|
338
|
+
|
339
|
+
For this example, we will be using the `au/countrywide` --> `addresses - country` data for Australia, in the GeoJSON+LD format.
|
299
340
|
|
300
341
|
2. Once the file is downloaded, start your app with `rails s` and navigate to `http://localhost:3000/maintenance_tasks`
|
301
342
|
(see [the github repo](https://github.com/Shopify/maintenance_tasks) for more information about maintenance_tasks).
|
@@ -309,7 +350,7 @@ records in our mysql database and has the following parameters:
|
|
309
350
|
- **country_code: (required)** The ISO country code of the data we are ingesting.
|
310
351
|
In this example, the country code of Australia is `AU`.
|
311
352
|
|
312
|
-
- **geojson_file_path: (required)** The fully qualified path of the previously downloaded geojson data from
|
353
|
+
- **geojson_file_path: (required)** The fully qualified path of the previously downloaded geojson data from OpenAddresses. A comma-delimited list of fully-qualified paths is also accepted.
|
313
354
|
|
314
355
|
- **locale: (optional)** The language of the data in the open addresses file.
|
315
356
|
|
@@ -319,8 +360,7 @@ In this example, the country code of Australia is `AU`.
|
|
319
360
|
link for a more detailed view. Once the import status has updated from `in_progress` to `complete` we will have all of
|
320
361
|
the raw open address data imported into our mysql database's `atlas_engine_post_addresses` table.
|
321
362
|
|
322
|
-
6. Navigate back to `http://localhost:3000/maintenance_tasks` and click on the `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. This task will ingest the data we have staged in mysql
|
323
|
-
and use it to create documents in a new elasticsearch index which Atlas Engine will ultimately use for validation.
|
363
|
+
6. Navigate back to `http://localhost:3000/maintenance_tasks` and click on the `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. This task will ingest the data we have staged in mysql and use it to create documents in a new elasticsearch index which Atlas Engine will ultimately use for validation.
|
324
364
|
|
325
365
|
7. The `ElasticsearchIndexCreateTask` includes the following parameters:
|
326
366
|
|
@@ -346,6 +386,26 @@ If unchecked, the created index will need to be activated manually.
|
|
346
386
|
We may now use the `es` and `es_street` matching strategies with `AU` addresses. See [below](#elasticsearch-matching-strategy)
|
347
387
|
for an example of its usage.
|
348
388
|
|
389
|
+
#### Instructions for US import
|
390
|
+
|
391
|
+
1. Go to the [open addresses](https://openaddresses.io/) download center and download the collection-us-{region}.zip
|
392
|
+
files for each of the four regions (west, midwest, northeast, south).
|
393
|
+
|
394
|
+
2. Run the US create state geojson script to create a statewide geojson.gz file for each state
|
395
|
+
```
|
396
|
+
bin/us_create_state_geojson execute /path/to/us_collection_zips /path/to/output_dir
|
397
|
+
```
|
398
|
+
|
399
|
+
3. Start your app with `rails s` and navigate to `http://localhost:3000/maintenance_tasks`. There is a task only used
|
400
|
+
for the US import called `Maintenance::AtlasEngine::UsGeoJsonDirectoryImportTask`
|
401
|
+
|
402
|
+
4. Parameterize the `UsGeoJsonDirectoryImportTask` with the output directory that contains all of the `{state}-statewide.geojson.gz` files created in step 2.
|
403
|
+
|
404
|
+
5. Once properly parameterized, click run. The process will initialize a `country_import` and should succeed immediately.
|
405
|
+
|
406
|
+
6. Navigate to `http://localhost:3000/country_imports` to track the progress of the country import. Once the import is complete
|
407
|
+
and the US data is in mysql the rest of the process for creating the elasticsearch index and verifying should be the same as above.
|
408
|
+
|
349
409
|
## Elasticsearch Matching Strategy
|
350
410
|
|
351
411
|
Once we have successfully created and activated an elasticsearch index using open address data, we may now use
|
data/app/countries/atlas_engine/gg/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Gg
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
CITY_ALIASES_MAPPING = {
|
14
|
+
"St. Sampson" => ["Saint Samsaon"],
|
15
|
+
"St. Saviour" => ["Saint-Sauveur", "Saint Sauveux"],
|
16
|
+
"St. Peter Port" => ["Saint-Pierre Port"],
|
17
|
+
"St. Andrew" => ["Saint Andri", "Saint-André-de-la-Pommeraye"],
|
18
|
+
"St. Pierre Du Bois" => ["St. Peter's", "St. Pierre"],
|
19
|
+
"Castel" => ["Lé Casté", "Sainte-Marie-du-Câtel"],
|
20
|
+
"Forest" => ["Le Fôret", "La Fouarêt"],
|
21
|
+
"Torteval" => ["Tortévas"],
|
22
|
+
"Vale" => ["Lé Vale", "Le Valle"],
|
23
|
+
}
|
24
|
+
|
25
|
+
sig { params(address: Hash).void }
|
26
|
+
def apply(address)
|
27
|
+
city = address[:city].first
|
28
|
+
if CITY_ALIASES_MAPPING.include?(city)
|
29
|
+
address[:city] += CITY_ALIASES_MAPPING[city]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -6,6 +6,10 @@ validation:
|
|
6
6
|
address_parser: AtlasEngine::Gg::ValidationTranscriber::AddressParser
|
7
7
|
restrictions:
|
8
8
|
- class: AtlasEngine::Gg::AddressValidation::Validators::FullAddress::Restrictions::UnsupportedCity
|
9
|
+
ingestion:
|
10
|
+
correctors:
|
11
|
+
open_address:
|
12
|
+
- AtlasEngine::Gg::AddressImporter::Corrections::OpenAddress::CityCorrector
|
9
13
|
exclusions:
|
10
14
|
city:
|
11
15
|
- AtlasEngine::Gg::AddressValidation::Validators::FullAddress::Exclusions::City
|
@@ -10,16 +10,30 @@ module AtlasEngine
|
|
10
10
|
CITY = %r{
|
11
11
|
(?<city>
|
12
12
|
st\.?\s?saviour[']?[s]?|
|
13
|
+
saint[-|\s]sauveur|
|
14
|
+
saint\s?sauveux|
|
13
15
|
st\.?\s?sampson[']?[s]?|
|
16
|
+
saint\s?samsaon|
|
14
17
|
st\.?\s?andrew[']?[s]?|
|
18
|
+
saint\s?andri|
|
19
|
+
saint[-|\s]andr[é|e][-|\s]de[-|\s]la[-|\s]pommeraye|
|
15
20
|
st\.?\s?martin[']?[s]?|
|
16
21
|
st\.?\s?peter[']?[s]?\s?port|
|
22
|
+
saint[-|\s]pierre\s?port|
|
17
23
|
st\.?\s?peter[']?[s]?|
|
18
24
|
st\.?\s?pierre\s?du\s?bois|
|
25
|
+
st\.?\s?pierre|
|
19
26
|
vale|
|
27
|
+
l[é|e]\s?vale|
|
28
|
+
le\s?valle|
|
20
29
|
torteval|
|
30
|
+
tort[é|e]vas|
|
21
31
|
castel|
|
22
|
-
|
32
|
+
l[é|e]\s?cast[é|e]|
|
33
|
+
sainte[-|\s]marie[-|\s]du[-|\s]c[â|a]tel|
|
34
|
+
forest|
|
35
|
+
le\s?f[ô|o]ret|
|
36
|
+
la\s?fouar[ê|e]t
|
23
37
|
)
|
24
38
|
}ix
|
25
39
|
|
@@ -10,7 +10,7 @@ module AtlasEngine
|
|
10
10
|
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
11
11
|
end
|
12
12
|
def map(feature)
|
13
|
-
super
|
13
|
+
super.merge(region4: feature["properties"]["district"])
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Us
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Filter
|
9
|
+
extend T::Sig
|
10
|
+
include AtlasEngine::AddressImporter::OpenAddress::Filter
|
11
|
+
|
12
|
+
def initialize(country_import:); end
|
13
|
+
|
14
|
+
sig { override.params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Boolean) }
|
15
|
+
def filter(feature)
|
16
|
+
# Only consider features with lat lon geometry
|
17
|
+
geometry = feature["geometry"]
|
18
|
+
if geometry.present? && geometry["type"] == "Point"
|
19
|
+
return false if geometry["coordinates"].size > 2
|
20
|
+
end
|
21
|
+
|
22
|
+
true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Us
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
|
9
|
+
ORDINAL_REGEX = /\b(\d+)(?:\s+)(st|nd|rd|th)\b/i
|
10
|
+
|
11
|
+
sig do
|
12
|
+
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
13
|
+
end
|
14
|
+
def map(feature)
|
15
|
+
city, street, number, unit, postcode = feature["properties"].values_at(
|
16
|
+
"city",
|
17
|
+
"street",
|
18
|
+
"number",
|
19
|
+
"unit",
|
20
|
+
"postcode",
|
21
|
+
)
|
22
|
+
{
|
23
|
+
source_id: openaddress_source_id(feature),
|
24
|
+
locale: @locale,
|
25
|
+
country_code: @country_code,
|
26
|
+
province_code: @province_code, # region is inconsistently set, override with passed in province_code
|
27
|
+
# Omitted: region1..4
|
28
|
+
city: sanitize_city(city),
|
29
|
+
suburb: nil,
|
30
|
+
zip: postcode.first(5), # truncate zip+4 data
|
31
|
+
street: sanitize_street(street),
|
32
|
+
building_and_unit_ranges: housenumber_and_unit(number, unit),
|
33
|
+
latitude: geometry(feature)&.at(1),
|
34
|
+
longitude: geometry(feature)&.at(0),
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
sig { params(street: T.nilable(String)).returns(T.nilable(String)) }
|
41
|
+
def sanitize_street(street)
|
42
|
+
combine_ordinal_string(strip_extra_spaces(street&.downcase))&.titleize
|
43
|
+
end
|
44
|
+
|
45
|
+
sig { params(city: T.nilable(String)).returns(T::Array[String]) }
|
46
|
+
def sanitize_city(city)
|
47
|
+
return [] if city.nil?
|
48
|
+
|
49
|
+
city.split("/").map { |c| c.titleize.strip }
|
50
|
+
end
|
51
|
+
|
52
|
+
sig { params(text: T.nilable(String)).returns(T.nilable(String)) }
|
53
|
+
def combine_ordinal_string(text)
|
54
|
+
text&.gsub!(ORDINAL_REGEX) { "#{::Regexp.last_match(1)}#{::Regexp.last_match(2)}" }
|
55
|
+
text
|
56
|
+
end
|
57
|
+
|
58
|
+
sig { params(text: T.nilable(String)).returns(T.nilable(String)) }
|
59
|
+
def strip_extra_spaces(text)
|
60
|
+
text&.strip&.gsub(/\s+/, " ")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -1,10 +1,14 @@
|
|
1
1
|
id: US
|
2
|
-
validation:
|
3
|
-
enabled: true
|
4
|
-
default_matching_strategy: es_street
|
5
|
-
address_parser: AtlasEngine::ValidationTranscriber::AddressParserNorthAmerica
|
6
2
|
ingestion:
|
7
3
|
settings:
|
8
4
|
number_of_shards: "7"
|
9
5
|
min_zip_edge_ngram: "1"
|
10
6
|
max_zip_edge_ngram: "10"
|
7
|
+
post_address_mapper:
|
8
|
+
open_address: AtlasEngine::Us::AddressImporter::OpenAddress::Mapper
|
9
|
+
validation:
|
10
|
+
enabled: true
|
11
|
+
default_matching_strategy: es_street
|
12
|
+
address_parser: AtlasEngine::ValidationTranscriber::AddressParserNorthAmerica
|
13
|
+
open_address:
|
14
|
+
filter: AtlasEngine::Us::AddressImporter::OpenAddress::Filter
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# typed: false
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "zip"
|
5
|
+
|
6
|
+
module AtlasEngine
|
7
|
+
module Us
|
8
|
+
module Jobs
|
9
|
+
module AddressImporter
|
10
|
+
class CombinedImportJob < ApplicationJob
|
11
|
+
include ::AtlasEngine::AddressImporter::ImportLogHelper
|
12
|
+
discard_on Exception
|
13
|
+
rescue_from(ArgumentError) { raise }
|
14
|
+
|
15
|
+
COUNTRY_CODE = "US"
|
16
|
+
LOCALE = "en"
|
17
|
+
US_STATES = [
|
18
|
+
"AK",
|
19
|
+
"AL",
|
20
|
+
"AR",
|
21
|
+
"AZ",
|
22
|
+
"CA",
|
23
|
+
"CO",
|
24
|
+
"CT",
|
25
|
+
"DC",
|
26
|
+
"DE",
|
27
|
+
"FL",
|
28
|
+
"GA",
|
29
|
+
"HI",
|
30
|
+
"IA",
|
31
|
+
"ID",
|
32
|
+
"IL",
|
33
|
+
"IN",
|
34
|
+
"KS",
|
35
|
+
"KY",
|
36
|
+
"LA",
|
37
|
+
"MA",
|
38
|
+
"MD",
|
39
|
+
"ME",
|
40
|
+
"MI",
|
41
|
+
"MN",
|
42
|
+
"MO",
|
43
|
+
"MS",
|
44
|
+
"MT",
|
45
|
+
"NC",
|
46
|
+
"ND",
|
47
|
+
"NE",
|
48
|
+
"NH",
|
49
|
+
"NJ",
|
50
|
+
"NM",
|
51
|
+
"NV",
|
52
|
+
"NY",
|
53
|
+
"OH",
|
54
|
+
"OK",
|
55
|
+
"OR",
|
56
|
+
"PA",
|
57
|
+
"RI",
|
58
|
+
"SC",
|
59
|
+
"SD",
|
60
|
+
"TN",
|
61
|
+
"TX",
|
62
|
+
"UT",
|
63
|
+
"VA",
|
64
|
+
"VT",
|
65
|
+
"WA",
|
66
|
+
"WI",
|
67
|
+
"WV",
|
68
|
+
"WY",
|
69
|
+
]
|
70
|
+
|
71
|
+
def perform(geojson_directory:)
|
72
|
+
country_import = AtlasEngine::CountryImport.create!(country_code: COUNTRY_CODE)
|
73
|
+
country_import.start!
|
74
|
+
|
75
|
+
import_log_info(
|
76
|
+
country_import: country_import,
|
77
|
+
message: "Starting import for #{COUNTRY_CODE} from #{geojson_directory}",
|
78
|
+
notify: true,
|
79
|
+
)
|
80
|
+
|
81
|
+
jobs_to_run = job_list(geojson_directory, country_import)
|
82
|
+
first_job = jobs_to_run.shift
|
83
|
+
first_job[:job_name].perform_later(**first_job[:job_args].merge(followed_by: jobs_to_run))
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def job_list(geojson_directory, country_import)
|
89
|
+
job_list = [
|
90
|
+
clear_records_job(country_import),
|
91
|
+
]
|
92
|
+
|
93
|
+
US_STATES.each do |us_state|
|
94
|
+
job_list <<
|
95
|
+
{
|
96
|
+
job_name: ::AtlasEngine::AddressImporter::OpenAddress::GeoJsonImportJob,
|
97
|
+
job_args: {
|
98
|
+
country_code: COUNTRY_CODE,
|
99
|
+
province_code: us_state.upcase,
|
100
|
+
locale: LOCALE,
|
101
|
+
geojson_file_path: File.join(geojson_directory, "us-#{us_state.downcase}-statewide.geojson.gz"),
|
102
|
+
country_import_id: country_import.id,
|
103
|
+
},
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
job_list
|
108
|
+
end
|
109
|
+
|
110
|
+
def clear_records_job(country_import)
|
111
|
+
{
|
112
|
+
job_name: AtlasEngine::AddressImporter::ClearRecordsJob,
|
113
|
+
job_args: { country_code: COUNTRY_CODE, country_import_id: country_import.id },
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -24,7 +24,7 @@ module AtlasEngine
|
|
24
24
|
extend T::Sig
|
25
25
|
include HandlesInterruption
|
26
26
|
include PreparesGeoJsonFile
|
27
|
-
attr_reader :geojson_path, :country_import, :country_code, :loader, :transformer
|
27
|
+
attr_reader :geojson_path, :country_import, :country_code, :province_code, :loader, :transformer
|
28
28
|
|
29
29
|
CHUNK_SIZE = 10_000
|
30
30
|
REPORT_STEP = 5
|
@@ -36,10 +36,11 @@ module AtlasEngine
|
|
36
36
|
def setup_and_download(&block)
|
37
37
|
@loader = Loader.new
|
38
38
|
@country_code = argument(:country_code)
|
39
|
+
@province_code = argument(:province_code)
|
39
40
|
@geojson_path = Pathname.new(argument(:geojson_file_path))
|
40
41
|
@locale = argument(:locale)&.downcase
|
41
42
|
@country_import = CountryImport.find(argument(:country_import_id))
|
42
|
-
@transformer = Transformer.new(country_import: country_import, locale: @locale)
|
43
|
+
@transformer = Transformer.new(country_import: country_import, province_code: @province_code, locale: @locale)
|
43
44
|
|
44
45
|
import_log_info(
|
45
46
|
country_import: country_import,
|
@@ -7,9 +7,10 @@ module AtlasEngine
|
|
7
7
|
class DefaultMapper
|
8
8
|
extend T::Sig
|
9
9
|
include FeatureHelper
|
10
|
-
sig { params(country_code: String, locale: T.nilable(String)).void }
|
11
|
-
def initialize(country_code:, locale: nil)
|
10
|
+
sig { params(country_code: String, province_code: T.nilable(String), locale: T.nilable(String)).void }
|
11
|
+
def initialize(country_code:, province_code: nil, locale: nil)
|
12
12
|
@country_code = country_code
|
13
|
+
@province_code = province_code
|
13
14
|
@locale = locale
|
14
15
|
end
|
15
16
|
|
@@ -27,7 +28,7 @@ module AtlasEngine
|
|
27
28
|
source_id: openaddress_source_id(feature),
|
28
29
|
locale: @locale,
|
29
30
|
country_code: @country_code,
|
30
|
-
province_code:
|
31
|
+
province_code: @province_code,
|
31
32
|
region1: region,
|
32
33
|
# Don't titleize. The sources have proper capitalization, and it's a problem for cities like
|
33
34
|
# 's-Graveland, which would get titleized to "'S Graveland" which is wrong.
|
@@ -8,11 +8,12 @@ module AtlasEngine
|
|
8
8
|
extend T::Sig
|
9
9
|
extend T::Helpers
|
10
10
|
|
11
|
-
def initialize(country_import:, locale: nil)
|
11
|
+
def initialize(country_import:, province_code: nil, locale: nil)
|
12
12
|
@country_code = country_import.country_code
|
13
13
|
@locale = locale
|
14
|
+
@province_code = province_code
|
14
15
|
@mapper = CountryProfile.for(@country_code).ingestion.post_address_mapper("open_address").new(
|
15
|
-
country_code: @country_code, locale: @locale,
|
16
|
+
country_code: @country_code, province_code: @province_code, locale: @locale,
|
16
17
|
)
|
17
18
|
@corrector = AddressImporter::Corrections::Corrector.new(country_code: @country_code, source: "open_address")
|
18
19
|
@validator = AddressImporter::Validation::Wrapper.new(
|
@@ -114,6 +114,8 @@ module AtlasEngine
|
|
114
114
|
local_concerns = {}
|
115
115
|
cache = Validators::Predicates::Cache.new(pipeline_address)
|
116
116
|
@predicate_pipeline.pipeline.each do |config|
|
117
|
+
break if local_concerns[:country].present?
|
118
|
+
|
117
119
|
local_concerns[config.field] = [] if local_concerns[config.field].nil?
|
118
120
|
next if local_concerns[config.field].present?
|
119
121
|
|
@@ -8,7 +8,7 @@ module AtlasEngine
|
|
8
8
|
class UnmatchedFieldConcernBuilder
|
9
9
|
extend T::Sig
|
10
10
|
include ConcernFormatter
|
11
|
-
attr_reader :address, :
|
11
|
+
attr_reader :address, :unmatched_component, :matched_components, :unmatched_field
|
12
12
|
|
13
13
|
COMPONENTS_TO_LABELS = {
|
14
14
|
zip: "ZIP",
|
@@ -31,9 +31,9 @@ module AtlasEngine
|
|
31
31
|
end
|
32
32
|
def initialize(unmatched_component, matched_components, address, unmatched_field = nil)
|
33
33
|
@address = address
|
34
|
-
@
|
34
|
+
@unmatched_component = unmatched_component
|
35
35
|
@matched_components = matched_components
|
36
|
-
@unmatched_field = unmatched_field
|
36
|
+
@unmatched_field = unmatched_field || unmatched_component
|
37
37
|
end
|
38
38
|
|
39
39
|
sig do
|
@@ -61,7 +61,7 @@ module AtlasEngine
|
|
61
61
|
|
62
62
|
sig { returns(Symbol) }
|
63
63
|
def code
|
64
|
-
"#{
|
64
|
+
"#{unmatched_component_name}_inconsistent".to_sym
|
65
65
|
end
|
66
66
|
|
67
67
|
sig { returns(T::Array[Symbol]) }
|
@@ -69,21 +69,14 @@ module AtlasEngine
|
|
69
69
|
[field_name]
|
70
70
|
end
|
71
71
|
|
72
|
-
sig { returns(T::Array[String]) }
|
73
|
-
def valid_address_component_values
|
74
|
-
matched_components.last(2).map do |component|
|
75
|
-
component == :province_code ? province_name : address[component]
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
72
|
sig { returns(Symbol) }
|
80
|
-
def
|
81
|
-
SHORTENED_COMPONENT_NAMES[
|
73
|
+
def unmatched_component_name
|
74
|
+
SHORTENED_COMPONENT_NAMES[unmatched_component] || unmatched_component
|
82
75
|
end
|
83
76
|
|
84
77
|
sig { returns(Symbol) }
|
85
78
|
def field_name
|
86
|
-
unmatched_field ||
|
79
|
+
SHORTENED_COMPONENT_NAMES[unmatched_field] || unmatched_field
|
87
80
|
end
|
88
81
|
end
|
89
82
|
end
|
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class Present < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return if @cache.country.field(key: :city).autofill(locale: :en).present?
|
14
13
|
|
15
14
|
build_concern if @address.city.blank?
|
@@ -9,8 +9,6 @@ module AtlasEngine
|
|
9
9
|
class Valid < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return if @address.country_code.blank?
|
13
|
-
return unless @cache.country.country?
|
14
12
|
return if @address.phone.blank?
|
15
13
|
|
16
14
|
phone = Worldwide::Phone.new(number: @address.phone, country_code: @address.country_code)
|
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class Exists < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return if address.province_code.present? ||
|
14
13
|
country_has_no_provinces ||
|
15
14
|
@cache.country.province_optional?
|
data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb
CHANGED
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class ValidForCountry < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return if address.province_code.blank?
|
14
13
|
return if @cache.country.zones.none?(&:province?)
|
15
14
|
return if @cache.country.hide_provinces_from_addresses
|
@@ -9,8 +9,6 @@ module AtlasEngine
|
|
9
9
|
class BuildingNumberInAddress1 < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
|
-
|
14
12
|
return unless @cache.country.building_number_required
|
15
13
|
return if @cache.country.building_number_may_be_in_address2
|
16
14
|
|
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class BuildingNumberInAddress1OrAddress2 < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return unless @cache.country.building_number_required
|
14
13
|
return unless @cache.country.building_number_may_be_in_address2
|
15
14
|
return if contains_number?(T.must(@address.address1)) || contains_number?(@address.address2)
|
@@ -12,9 +12,6 @@ module Maintenance
|
|
12
12
|
# ISO3166 two-letter country code.
|
13
13
|
attribute :country_code, :string
|
14
14
|
validates :country_code, presence: true
|
15
|
-
# Filename to import. When running in staging or production, the worker expects to find
|
16
|
-
# this file in the relevant GCS bucket, configured in `config/storage/{environment}.yml`
|
17
|
-
# It must be placed under `openaddress/` with the same filename.
|
18
15
|
attribute :geojson_file_path, :string
|
19
16
|
attribute :locale, :string
|
20
17
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# typed: false
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Maintenance
|
5
|
+
module AtlasEngine
|
6
|
+
class UsGeoJsonDirectoryImportTask < MaintenanceTasks::Task
|
7
|
+
include ::AtlasEngine::HandlesBlob
|
8
|
+
|
9
|
+
no_collection
|
10
|
+
|
11
|
+
attribute :geojson_directory, :string
|
12
|
+
|
13
|
+
def process
|
14
|
+
::AtlasEngine::Us::Jobs::AddressImporter::CombinedImportJob.perform_later(
|
15
|
+
geojson_directory: geojson_directory.strip,
|
16
|
+
)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/atlas_engine/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atlas_engine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: annex_29
|
@@ -178,6 +178,20 @@ dependencies:
|
|
178
178
|
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: sprockets-rails
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
181
195
|
- !ruby/object:Gem::Dependency
|
182
196
|
name: state_machines-activerecord
|
183
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -305,9 +319,11 @@ files:
|
|
305
319
|
- app/countries/atlas_engine/gb/country_profile.yml
|
306
320
|
- app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb
|
307
321
|
- app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb
|
322
|
+
- app/countries/atlas_engine/gg/address_importer/corrections/open_address/city_corrector.rb
|
308
323
|
- app/countries/atlas_engine/gg/address_validation/validators/full_address/exclusions/city.rb
|
309
324
|
- app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb
|
310
325
|
- app/countries/atlas_engine/gg/country_profile.yml
|
326
|
+
- app/countries/atlas_engine/gg/synonyms.yml
|
311
327
|
- app/countries/atlas_engine/gg/validation_transcriber/address_parser.rb
|
312
328
|
- app/countries/atlas_engine/ie/country_profile.yml
|
313
329
|
- app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb
|
@@ -362,7 +378,10 @@ files:
|
|
362
378
|
- app/countries/atlas_engine/si/validation_transcriber/address_parser.rb
|
363
379
|
- app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb
|
364
380
|
- app/countries/atlas_engine/tt/country_profile.yml
|
381
|
+
- app/countries/atlas_engine/us/address_importer/open_address/filter.rb
|
382
|
+
- app/countries/atlas_engine/us/address_importer/open_address/mapper.rb
|
365
383
|
- app/countries/atlas_engine/us/country_profile.yml
|
384
|
+
- app/countries/atlas_engine/us/jobs/address_importer/combined_import_job.rb
|
366
385
|
- app/countries/atlas_engine/us/synonyms.yml
|
367
386
|
- app/graphql/atlas_engine/errors/locale_unsupported_error.rb
|
368
387
|
- app/graphql/atlas_engine/schema.graphql
|
@@ -546,6 +565,7 @@ files:
|
|
546
565
|
- app/models/atlas_engine/street.rb
|
547
566
|
- app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb
|
548
567
|
- app/tasks/maintenance/atlas_engine/geo_json_import_task.rb
|
568
|
+
- app/tasks/maintenance/atlas_engine/us_geo_json_directory_import_task.rb
|
549
569
|
- app/views/atlas_engine/connectivity/index.html.erb
|
550
570
|
- app/views/atlas_engine/country_imports/index.html.erb
|
551
571
|
- app/views/atlas_engine/country_imports/show.html.erb
|
@@ -595,7 +615,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
595
615
|
- !ruby/object:Gem::Version
|
596
616
|
version: '0'
|
597
617
|
requirements: []
|
598
|
-
rubygems_version: 3.5.
|
618
|
+
rubygems_version: 3.5.10
|
599
619
|
signing_key:
|
600
620
|
specification_version: 4
|
601
621
|
summary: Address Validation API
|