atlas_engine 0.7.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +70 -10
- data/app/countries/atlas_engine/gb/address_validation/es/query_builder.rb +1 -1
- data/app/countries/atlas_engine/gg/address_importer/corrections/open_address/city_corrector.rb +38 -0
- data/app/countries/atlas_engine/gg/country_profile.yml +4 -0
- data/app/countries/atlas_engine/gg/synonyms.yml +3 -0
- data/app/countries/atlas_engine/gg/validation_transcriber/address_parser.rb +15 -1
- data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +1 -1
- data/app/countries/atlas_engine/us/address_importer/open_address/filter.rb +28 -0
- data/app/countries/atlas_engine/us/address_importer/open_address/mapper.rb +66 -0
- data/app/countries/atlas_engine/us/country_profile.yml +8 -4
- data/app/countries/atlas_engine/us/jobs/address_importer/combined_import_job.rb +120 -0
- data/app/jobs/atlas_engine/address_importer/open_address/geo_json_import_job.rb +3 -2
- data/app/models/atlas_engine/address_importer/open_address/default_mapper.rb +4 -3
- data/app/models/atlas_engine/address_importer/open_address/transformer.rb +3 -2
- data/app/models/atlas_engine/address_validation/validator.rb +2 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/unmatched_field_concern_builder.rb +7 -14
- data/app/models/atlas_engine/address_validation/validators/predicates/city/present.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/no_emojis.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/no_html_tags.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/no_url.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/not_exceed_max_length.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/phone/valid.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/province/exists.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +0 -1
- data/app/models/atlas_engine/address_validation/validators/predicates/street/present.rb +0 -2
- data/app/models/atlas_engine/address_validation/validators/predicates/zip/zip_base.rb +0 -1
- data/app/tasks/maintenance/atlas_engine/geo_json_import_task.rb +0 -3
- data/app/tasks/maintenance/atlas_engine/us_geo_json_directory_import_task.rb +20 -0
- data/lib/atlas_engine/version.rb +1 -1
- metadata +23 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2236db9cf72e66e7bec983fec0ce919b31e1a795e6509ac65e1f914e8cda1c2b
|
4
|
+
data.tar.gz: c4bb6b2c1c8c927abbdf22d66d7a198052ab772f9243ffbd4c02bccced29b4ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1ccff91cd2b8026fd568ba9aced9688741bf1fd107fd180a83447e9d62ce20750d1ba8d14efe690ab22db6d239dfc1af51edda770c00de31cf07bfc210dc8cf
|
7
|
+
data.tar.gz: 9ee3408a1926463cf05cef4ba3b835e71fb8f490b61033154ac47dc586e2eb8b20fa971c9ca8c23859cec7fd4f7f8e81370f506fdba16e081b6299595a517f05
|
data/README.md
CHANGED
@@ -167,17 +167,23 @@ The validation scope excludes zip because the zip was not successfully validated
|
|
167
167
|
## Rails App Installation
|
168
168
|
|
169
169
|
### Initial setup
|
170
|
-
Add the engine to your gemfile
|
170
|
+
* Add the engine to your gemfile
|
171
171
|
```
|
172
172
|
gem "atlas_engine"
|
173
173
|
```
|
174
174
|
|
175
|
-
Run the following commands to install the engine in your rails app
|
176
|
-
|
175
|
+
* Run the following commands to install the engine in your rails app
|
177
176
|
```
|
178
177
|
bundle lock
|
179
|
-
|
178
|
+
rails atlas_engine:install:migrations
|
179
|
+
rails db:migrate
|
180
180
|
```
|
181
|
+
* In `config/routes` mount AtlasEngine
|
182
|
+
* Adding the line `mount AtlasEngine::Engine => "/atlas_engine"
|
183
|
+
`
|
184
|
+
* In `app/assets/config/manifest.js`
|
185
|
+
* Adding the line `//= link atlas_engine/application.css`
|
186
|
+
* Install [maintenance_tasks](https://github.com/Shopify/maintenance_tasks?tab=readme-ov-file#installation) - a dependency for Atlas Engine that is used to ingest country data.
|
181
187
|
|
182
188
|
### Updating to a newer version of the engine
|
183
189
|
|
@@ -290,12 +296,47 @@ level address validation, your app must have a populated elasticsearch index per
|
|
290
296
|
to query.
|
291
297
|
|
292
298
|
The data we use to power atlas engine validation is free open source data from the [open addresses](https://openaddresses.io/)
|
293
|
-
project.
|
299
|
+
project.
|
300
|
+
|
301
|
+
### Supported countries
|
302
|
+
At the moment, `atlas_engine` supports advanced address validation for the following countries.
|
303
|
+
|
304
|
+
| Country/territory | Two-letter code | Locales | Street | City | Postal Code | Province/State |
|
305
|
+
|-------------------|-----------------|----------|--------|------|-------------|----------------|
|
306
|
+
| Australia | AU | | | x | x | x |
|
307
|
+
| Austria | AT | | | x | x | x |
|
308
|
+
| Belgium | BE | fr,nl,de | | x | x | |
|
309
|
+
| Bermuda | BM | | | x | x | x |
|
310
|
+
| Czechia | CZ | | | x | x | |
|
311
|
+
| Denmark | DK | | | x | x | |
|
312
|
+
| Faroe Islands | FO | | | x | x | |
|
313
|
+
| France | FR | | | x | x | |
|
314
|
+
| Italy | IT | | | | x | |
|
315
|
+
| Liechtenstein | LI | | | x | x | x |
|
316
|
+
| Luxembourg | LU | fr,lb | | x | x | |
|
317
|
+
| Netherlands | NL | nl | | x | x | x |
|
318
|
+
| Poland | PL | | | x | x | x |
|
319
|
+
| Portugal | PT | | | x | x | x |
|
320
|
+
| Slovenia | SI | | | x | x | x |
|
321
|
+
| South Korea | KR | | | x | x | x |
|
322
|
+
| Switzerland | CH | de,fr,it | | x | x | |
|
323
|
+
| United States | US | en | x | x | x | x |
|
324
|
+
|
325
|
+
### Downloading and indexing instructions
|
326
|
+
|
327
|
+
|
328
|
+
The following guide demonstrates how to ingest data with the dummy app, but the process is the same with
|
294
329
|
the engine mounted into your own rails app.
|
295
330
|
|
296
331
|
1. Go to the [open addresses](https://openaddresses.io/) download center, create an account, support the project, and
|
297
|
-
download a GeoJSON+LD file for the country or region you wish to validate.
|
298
|
-
|
332
|
+
download a GeoJSON+LD file for the country or region you wish to validate.
|
333
|
+
|
334
|
+
Restrictions on the file:
|
335
|
+
- Must be an `addresses` file, as opposed to a `buildings` or `parcels` file.
|
336
|
+
- Must be gzipped (.gz format)
|
337
|
+
- Datasets listed under the _Individual Sources_ section work fine. Those under _Data Collections_ must first be unzipped. The `addresses` geojson files within may then be gzipped and imported.
|
338
|
+
|
339
|
+
For this example, we will be using the `au/countrywide` --> `addresses - country` data for Australia, in the GeoJSON+LD format.
|
299
340
|
|
300
341
|
2. Once the file is downloaded, start your app with `rails s` and navigate to `http://localhost:3000/maintenance_tasks`
|
301
342
|
(see [the github repo](https://github.com/Shopify/maintenance_tasks) for more information about maintenance_tasks).
|
@@ -309,7 +350,7 @@ records in our mysql database and has the following parameters:
|
|
309
350
|
- **country_code: (required)** The ISO country code of the data we are ingesting.
|
310
351
|
In this example, the country code of Australia is `AU`.
|
311
352
|
|
312
|
-
- **geojson_file_path: (required)** The fully qualified path of the previously downloaded geojson data from
|
353
|
+
- **geojson_file_path: (required)** The fully qualified path of the previously downloaded geojson data from OpenAddresses. A comma-delimited list of fully-qualified paths is also accepted.
|
313
354
|
|
314
355
|
- **locale: (optional)** The language of the data in the open addresses file.
|
315
356
|
|
@@ -319,8 +360,7 @@ In this example, the country code of Australia is `AU`.
|
|
319
360
|
link for a more detailed view. Once the import status has updated from `in_progress` to `complete` we will have all of
|
320
361
|
the raw open address data imported into our mysql database's `atlas_engine_post_addresses` table.
|
321
362
|
|
322
|
-
6. Navigate back to `http://localhost:3000/maintenance_tasks` and click on the `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. This task will ingest the data we have staged in mysql
|
323
|
-
and use it to create documents in a new elasticsearch index which Atlas Engine will ultimately use for validation.
|
363
|
+
6. Navigate back to `http://localhost:3000/maintenance_tasks` and click on the `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. This task will ingest the data we have staged in mysql and use it to create documents in a new elasticsearch index which Atlas Engine will ultimately use for validation.
|
324
364
|
|
325
365
|
7. The `ElasticsearchIndexCreateTask` includes the following parameters:
|
326
366
|
|
@@ -346,6 +386,26 @@ If unchecked, the created index will need to be activated manually.
|
|
346
386
|
We may now use the `es` and `es_street` matching strategies with `AU` addresses. See [below](#elasticsearch-matching-strategy)
|
347
387
|
for an example of its usage.
|
348
388
|
|
389
|
+
#### Instructions for US import
|
390
|
+
|
391
|
+
1. Go to the [open addresses](https://openaddresses.io/) download center and download the collection-us-{region}.zip
|
392
|
+
files for each of the four regions (west, midwest, northeast, south).
|
393
|
+
|
394
|
+
2. Run the US create state geojson script to create a statewide geojson.gz file for each state
|
395
|
+
```
|
396
|
+
bin/us_create_state_geojson execute /path/to/us_collection_zips /path/to/output_dir
|
397
|
+
```
|
398
|
+
|
399
|
+
3. Start your app with `rails s` and navigate to `http://localhost:3000/maintenance_tasks`. There is a task only used
|
400
|
+
for the US import called `Maintenance::AtlasEngine::UsGeoJsonDirectoryImportTask`
|
401
|
+
|
402
|
+
4. Parameterize the `UsGeoJsonDirectoryImportTask` with the output directory that contains all of the `{state}-statewide.geojson.gz` files created in step 2.
|
403
|
+
|
404
|
+
5. Once properly parameterized, click run. The process will initialize a `country_import` and should succeed immediately.
|
405
|
+
|
406
|
+
6. Navigate to `http://localhost:3000/country_imports` to track the progress of the country import. Once the import is complete
|
407
|
+
and the US data is in mysql the rest of the process for creating the elasticsearch index and verifying should be the same as above.
|
408
|
+
|
349
409
|
## Elasticsearch Matching Strategy
|
350
410
|
|
351
411
|
Once we have successfully created and activated an elasticsearch index using open address data, we may now use
|
data/app/countries/atlas_engine/gg/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Gg
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
CITY_ALIASES_MAPPING = {
|
14
|
+
"St. Sampson" => ["Saint Samsaon"],
|
15
|
+
"St. Saviour" => ["Saint-Sauveur", "Saint Sauveux"],
|
16
|
+
"St. Peter Port" => ["Saint-Pierre Port"],
|
17
|
+
"St. Andrew" => ["Saint Andri", "Saint-André-de-la-Pommeraye"],
|
18
|
+
"St. Pierre Du Bois" => ["St. Peter's", "St. Pierre"],
|
19
|
+
"Castel" => ["Lé Casté", "Sainte-Marie-du-Câtel"],
|
20
|
+
"Forest" => ["Le Fôret", "La Fouarêt"],
|
21
|
+
"Torteval" => ["Tortévas"],
|
22
|
+
"Vale" => ["Lé Vale", "Le Valle"],
|
23
|
+
}
|
24
|
+
|
25
|
+
sig { params(address: Hash).void }
|
26
|
+
def apply(address)
|
27
|
+
city = address[:city].first
|
28
|
+
if CITY_ALIASES_MAPPING.include?(city)
|
29
|
+
address[:city] += CITY_ALIASES_MAPPING[city]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -6,6 +6,10 @@ validation:
|
|
6
6
|
address_parser: AtlasEngine::Gg::ValidationTranscriber::AddressParser
|
7
7
|
restrictions:
|
8
8
|
- class: AtlasEngine::Gg::AddressValidation::Validators::FullAddress::Restrictions::UnsupportedCity
|
9
|
+
ingestion:
|
10
|
+
correctors:
|
11
|
+
open_address:
|
12
|
+
- AtlasEngine::Gg::AddressImporter::Corrections::OpenAddress::CityCorrector
|
9
13
|
exclusions:
|
10
14
|
city:
|
11
15
|
- AtlasEngine::Gg::AddressValidation::Validators::FullAddress::Exclusions::City
|
@@ -10,16 +10,30 @@ module AtlasEngine
|
|
10
10
|
CITY = %r{
|
11
11
|
(?<city>
|
12
12
|
st\.?\s?saviour[']?[s]?|
|
13
|
+
saint[-|\s]sauveur|
|
14
|
+
saint\s?sauveux|
|
13
15
|
st\.?\s?sampson[']?[s]?|
|
16
|
+
saint\s?samsaon|
|
14
17
|
st\.?\s?andrew[']?[s]?|
|
18
|
+
saint\s?andri|
|
19
|
+
saint[-|\s]andr[é|e][-|\s]de[-|\s]la[-|\s]pommeraye|
|
15
20
|
st\.?\s?martin[']?[s]?|
|
16
21
|
st\.?\s?peter[']?[s]?\s?port|
|
22
|
+
saint[-|\s]pierre\s?port|
|
17
23
|
st\.?\s?peter[']?[s]?|
|
18
24
|
st\.?\s?pierre\s?du\s?bois|
|
25
|
+
st\.?\s?pierre|
|
19
26
|
vale|
|
27
|
+
l[é|e]\s?vale|
|
28
|
+
le\s?valle|
|
20
29
|
torteval|
|
30
|
+
tort[é|e]vas|
|
21
31
|
castel|
|
22
|
-
|
32
|
+
l[é|e]\s?cast[é|e]|
|
33
|
+
sainte[-|\s]marie[-|\s]du[-|\s]c[â|a]tel|
|
34
|
+
forest|
|
35
|
+
le\s?f[ô|o]ret|
|
36
|
+
la\s?fouar[ê|e]t
|
23
37
|
)
|
24
38
|
}ix
|
25
39
|
|
@@ -10,7 +10,7 @@ module AtlasEngine
|
|
10
10
|
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
11
11
|
end
|
12
12
|
def map(feature)
|
13
|
-
super
|
13
|
+
super.merge(region4: feature["properties"]["district"])
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Us
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Filter
|
9
|
+
extend T::Sig
|
10
|
+
include AtlasEngine::AddressImporter::OpenAddress::Filter
|
11
|
+
|
12
|
+
def initialize(country_import:); end
|
13
|
+
|
14
|
+
sig { override.params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Boolean) }
|
15
|
+
def filter(feature)
|
16
|
+
# Only consider features with lat lon geometry
|
17
|
+
geometry = feature["geometry"]
|
18
|
+
if geometry.present? && geometry["type"] == "Point"
|
19
|
+
return false if geometry["coordinates"].size > 2
|
20
|
+
end
|
21
|
+
|
22
|
+
true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Us
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
|
9
|
+
ORDINAL_REGEX = /\b(\d+)(?:\s+)(st|nd|rd|th)\b/i
|
10
|
+
|
11
|
+
sig do
|
12
|
+
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
13
|
+
end
|
14
|
+
def map(feature)
|
15
|
+
city, street, number, unit, postcode = feature["properties"].values_at(
|
16
|
+
"city",
|
17
|
+
"street",
|
18
|
+
"number",
|
19
|
+
"unit",
|
20
|
+
"postcode",
|
21
|
+
)
|
22
|
+
{
|
23
|
+
source_id: openaddress_source_id(feature),
|
24
|
+
locale: @locale,
|
25
|
+
country_code: @country_code,
|
26
|
+
province_code: @province_code, # region is inconsistently set, override with passed in province_code
|
27
|
+
# Omitted: region1..4
|
28
|
+
city: sanitize_city(city),
|
29
|
+
suburb: nil,
|
30
|
+
zip: postcode.first(5), # truncate zip+4 data
|
31
|
+
street: sanitize_street(street),
|
32
|
+
building_and_unit_ranges: housenumber_and_unit(number, unit),
|
33
|
+
latitude: geometry(feature)&.at(1),
|
34
|
+
longitude: geometry(feature)&.at(0),
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
sig { params(street: T.nilable(String)).returns(T.nilable(String)) }
|
41
|
+
def sanitize_street(street)
|
42
|
+
combine_ordinal_string(strip_extra_spaces(street&.downcase))&.titleize
|
43
|
+
end
|
44
|
+
|
45
|
+
sig { params(city: T.nilable(String)).returns(T::Array[String]) }
|
46
|
+
def sanitize_city(city)
|
47
|
+
return [] if city.nil?
|
48
|
+
|
49
|
+
city.split("/").map { |c| c.titleize.strip }
|
50
|
+
end
|
51
|
+
|
52
|
+
sig { params(text: T.nilable(String)).returns(T.nilable(String)) }
|
53
|
+
def combine_ordinal_string(text)
|
54
|
+
text&.gsub!(ORDINAL_REGEX) { "#{::Regexp.last_match(1)}#{::Regexp.last_match(2)}" }
|
55
|
+
text
|
56
|
+
end
|
57
|
+
|
58
|
+
sig { params(text: T.nilable(String)).returns(T.nilable(String)) }
|
59
|
+
def strip_extra_spaces(text)
|
60
|
+
text&.strip&.gsub(/\s+/, " ")
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -1,10 +1,14 @@
|
|
1
1
|
id: US
|
2
|
-
validation:
|
3
|
-
enabled: true
|
4
|
-
default_matching_strategy: es_street
|
5
|
-
address_parser: AtlasEngine::ValidationTranscriber::AddressParserNorthAmerica
|
6
2
|
ingestion:
|
7
3
|
settings:
|
8
4
|
number_of_shards: "7"
|
9
5
|
min_zip_edge_ngram: "1"
|
10
6
|
max_zip_edge_ngram: "10"
|
7
|
+
post_address_mapper:
|
8
|
+
open_address: AtlasEngine::Us::AddressImporter::OpenAddress::Mapper
|
9
|
+
validation:
|
10
|
+
enabled: true
|
11
|
+
default_matching_strategy: es_street
|
12
|
+
address_parser: AtlasEngine::ValidationTranscriber::AddressParserNorthAmerica
|
13
|
+
open_address:
|
14
|
+
filter: AtlasEngine::Us::AddressImporter::OpenAddress::Filter
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# typed: false
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require "zip"
|
5
|
+
|
6
|
+
module AtlasEngine
|
7
|
+
module Us
|
8
|
+
module Jobs
|
9
|
+
module AddressImporter
|
10
|
+
class CombinedImportJob < ApplicationJob
|
11
|
+
include ::AtlasEngine::AddressImporter::ImportLogHelper
|
12
|
+
discard_on Exception
|
13
|
+
rescue_from(ArgumentError) { raise }
|
14
|
+
|
15
|
+
COUNTRY_CODE = "US"
|
16
|
+
LOCALE = "en"
|
17
|
+
US_STATES = [
|
18
|
+
"AK",
|
19
|
+
"AL",
|
20
|
+
"AR",
|
21
|
+
"AZ",
|
22
|
+
"CA",
|
23
|
+
"CO",
|
24
|
+
"CT",
|
25
|
+
"DC",
|
26
|
+
"DE",
|
27
|
+
"FL",
|
28
|
+
"GA",
|
29
|
+
"HI",
|
30
|
+
"IA",
|
31
|
+
"ID",
|
32
|
+
"IL",
|
33
|
+
"IN",
|
34
|
+
"KS",
|
35
|
+
"KY",
|
36
|
+
"LA",
|
37
|
+
"MA",
|
38
|
+
"MD",
|
39
|
+
"ME",
|
40
|
+
"MI",
|
41
|
+
"MN",
|
42
|
+
"MO",
|
43
|
+
"MS",
|
44
|
+
"MT",
|
45
|
+
"NC",
|
46
|
+
"ND",
|
47
|
+
"NE",
|
48
|
+
"NH",
|
49
|
+
"NJ",
|
50
|
+
"NM",
|
51
|
+
"NV",
|
52
|
+
"NY",
|
53
|
+
"OH",
|
54
|
+
"OK",
|
55
|
+
"OR",
|
56
|
+
"PA",
|
57
|
+
"RI",
|
58
|
+
"SC",
|
59
|
+
"SD",
|
60
|
+
"TN",
|
61
|
+
"TX",
|
62
|
+
"UT",
|
63
|
+
"VA",
|
64
|
+
"VT",
|
65
|
+
"WA",
|
66
|
+
"WI",
|
67
|
+
"WV",
|
68
|
+
"WY",
|
69
|
+
]
|
70
|
+
|
71
|
+
def perform(geojson_directory:)
|
72
|
+
country_import = AtlasEngine::CountryImport.create!(country_code: COUNTRY_CODE)
|
73
|
+
country_import.start!
|
74
|
+
|
75
|
+
import_log_info(
|
76
|
+
country_import: country_import,
|
77
|
+
message: "Starting import for #{COUNTRY_CODE} from #{geojson_directory}",
|
78
|
+
notify: true,
|
79
|
+
)
|
80
|
+
|
81
|
+
jobs_to_run = job_list(geojson_directory, country_import)
|
82
|
+
first_job = jobs_to_run.shift
|
83
|
+
first_job[:job_name].perform_later(**first_job[:job_args].merge(followed_by: jobs_to_run))
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def job_list(geojson_directory, country_import)
|
89
|
+
job_list = [
|
90
|
+
clear_records_job(country_import),
|
91
|
+
]
|
92
|
+
|
93
|
+
US_STATES.each do |us_state|
|
94
|
+
job_list <<
|
95
|
+
{
|
96
|
+
job_name: ::AtlasEngine::AddressImporter::OpenAddress::GeoJsonImportJob,
|
97
|
+
job_args: {
|
98
|
+
country_code: COUNTRY_CODE,
|
99
|
+
province_code: us_state.upcase,
|
100
|
+
locale: LOCALE,
|
101
|
+
geojson_file_path: File.join(geojson_directory, "us-#{us_state.downcase}-statewide.geojson.gz"),
|
102
|
+
country_import_id: country_import.id,
|
103
|
+
},
|
104
|
+
}
|
105
|
+
end
|
106
|
+
|
107
|
+
job_list
|
108
|
+
end
|
109
|
+
|
110
|
+
def clear_records_job(country_import)
|
111
|
+
{
|
112
|
+
job_name: AtlasEngine::AddressImporter::ClearRecordsJob,
|
113
|
+
job_args: { country_code: COUNTRY_CODE, country_import_id: country_import.id },
|
114
|
+
}
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -24,7 +24,7 @@ module AtlasEngine
|
|
24
24
|
extend T::Sig
|
25
25
|
include HandlesInterruption
|
26
26
|
include PreparesGeoJsonFile
|
27
|
-
attr_reader :geojson_path, :country_import, :country_code, :loader, :transformer
|
27
|
+
attr_reader :geojson_path, :country_import, :country_code, :province_code, :loader, :transformer
|
28
28
|
|
29
29
|
CHUNK_SIZE = 10_000
|
30
30
|
REPORT_STEP = 5
|
@@ -36,10 +36,11 @@ module AtlasEngine
|
|
36
36
|
def setup_and_download(&block)
|
37
37
|
@loader = Loader.new
|
38
38
|
@country_code = argument(:country_code)
|
39
|
+
@province_code = argument(:province_code)
|
39
40
|
@geojson_path = Pathname.new(argument(:geojson_file_path))
|
40
41
|
@locale = argument(:locale)&.downcase
|
41
42
|
@country_import = CountryImport.find(argument(:country_import_id))
|
42
|
-
@transformer = Transformer.new(country_import: country_import, locale: @locale)
|
43
|
+
@transformer = Transformer.new(country_import: country_import, province_code: @province_code, locale: @locale)
|
43
44
|
|
44
45
|
import_log_info(
|
45
46
|
country_import: country_import,
|
@@ -7,9 +7,10 @@ module AtlasEngine
|
|
7
7
|
class DefaultMapper
|
8
8
|
extend T::Sig
|
9
9
|
include FeatureHelper
|
10
|
-
sig { params(country_code: String, locale: T.nilable(String)).void }
|
11
|
-
def initialize(country_code:, locale: nil)
|
10
|
+
sig { params(country_code: String, province_code: T.nilable(String), locale: T.nilable(String)).void }
|
11
|
+
def initialize(country_code:, province_code: nil, locale: nil)
|
12
12
|
@country_code = country_code
|
13
|
+
@province_code = province_code
|
13
14
|
@locale = locale
|
14
15
|
end
|
15
16
|
|
@@ -27,7 +28,7 @@ module AtlasEngine
|
|
27
28
|
source_id: openaddress_source_id(feature),
|
28
29
|
locale: @locale,
|
29
30
|
country_code: @country_code,
|
30
|
-
province_code:
|
31
|
+
province_code: @province_code,
|
31
32
|
region1: region,
|
32
33
|
# Don't titleize. The sources have proper capitalization, and it's a problem for cities like
|
33
34
|
# 's-Graveland, which would get titleized to "'S Graveland" which is wrong.
|
@@ -8,11 +8,12 @@ module AtlasEngine
|
|
8
8
|
extend T::Sig
|
9
9
|
extend T::Helpers
|
10
10
|
|
11
|
-
def initialize(country_import:, locale: nil)
|
11
|
+
def initialize(country_import:, province_code: nil, locale: nil)
|
12
12
|
@country_code = country_import.country_code
|
13
13
|
@locale = locale
|
14
|
+
@province_code = province_code
|
14
15
|
@mapper = CountryProfile.for(@country_code).ingestion.post_address_mapper("open_address").new(
|
15
|
-
country_code: @country_code, locale: @locale,
|
16
|
+
country_code: @country_code, province_code: @province_code, locale: @locale,
|
16
17
|
)
|
17
18
|
@corrector = AddressImporter::Corrections::Corrector.new(country_code: @country_code, source: "open_address")
|
18
19
|
@validator = AddressImporter::Validation::Wrapper.new(
|
@@ -114,6 +114,8 @@ module AtlasEngine
|
|
114
114
|
local_concerns = {}
|
115
115
|
cache = Validators::Predicates::Cache.new(pipeline_address)
|
116
116
|
@predicate_pipeline.pipeline.each do |config|
|
117
|
+
break if local_concerns[:country].present?
|
118
|
+
|
117
119
|
local_concerns[config.field] = [] if local_concerns[config.field].nil?
|
118
120
|
next if local_concerns[config.field].present?
|
119
121
|
|
@@ -8,7 +8,7 @@ module AtlasEngine
|
|
8
8
|
class UnmatchedFieldConcernBuilder
|
9
9
|
extend T::Sig
|
10
10
|
include ConcernFormatter
|
11
|
-
attr_reader :address, :
|
11
|
+
attr_reader :address, :unmatched_component, :matched_components, :unmatched_field
|
12
12
|
|
13
13
|
COMPONENTS_TO_LABELS = {
|
14
14
|
zip: "ZIP",
|
@@ -31,9 +31,9 @@ module AtlasEngine
|
|
31
31
|
end
|
32
32
|
def initialize(unmatched_component, matched_components, address, unmatched_field = nil)
|
33
33
|
@address = address
|
34
|
-
@
|
34
|
+
@unmatched_component = unmatched_component
|
35
35
|
@matched_components = matched_components
|
36
|
-
@unmatched_field = unmatched_field
|
36
|
+
@unmatched_field = unmatched_field || unmatched_component
|
37
37
|
end
|
38
38
|
|
39
39
|
sig do
|
@@ -61,7 +61,7 @@ module AtlasEngine
|
|
61
61
|
|
62
62
|
sig { returns(Symbol) }
|
63
63
|
def code
|
64
|
-
"#{
|
64
|
+
"#{unmatched_component_name}_inconsistent".to_sym
|
65
65
|
end
|
66
66
|
|
67
67
|
sig { returns(T::Array[Symbol]) }
|
@@ -69,21 +69,14 @@ module AtlasEngine
|
|
69
69
|
[field_name]
|
70
70
|
end
|
71
71
|
|
72
|
-
sig { returns(T::Array[String]) }
|
73
|
-
def valid_address_component_values
|
74
|
-
matched_components.last(2).map do |component|
|
75
|
-
component == :province_code ? province_name : address[component]
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
72
|
sig { returns(Symbol) }
|
80
|
-
def
|
81
|
-
SHORTENED_COMPONENT_NAMES[
|
73
|
+
def unmatched_component_name
|
74
|
+
SHORTENED_COMPONENT_NAMES[unmatched_component] || unmatched_component
|
82
75
|
end
|
83
76
|
|
84
77
|
sig { returns(Symbol) }
|
85
78
|
def field_name
|
86
|
-
unmatched_field ||
|
79
|
+
SHORTENED_COMPONENT_NAMES[unmatched_field] || unmatched_field
|
87
80
|
end
|
88
81
|
end
|
89
82
|
end
|
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class Present < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return if @cache.country.field(key: :city).autofill(locale: :en).present?
|
14
13
|
|
15
14
|
build_concern if @address.city.blank?
|
@@ -9,8 +9,6 @@ module AtlasEngine
|
|
9
9
|
class Valid < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return if @address.country_code.blank?
|
13
|
-
return unless @cache.country.country?
|
14
12
|
return if @address.phone.blank?
|
15
13
|
|
16
14
|
phone = Worldwide::Phone.new(number: @address.phone, country_code: @address.country_code)
|
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class Exists < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return if address.province_code.present? ||
|
14
13
|
country_has_no_provinces ||
|
15
14
|
@cache.country.province_optional?
|
data/app/models/atlas_engine/address_validation/validators/predicates/province/valid_for_country.rb
CHANGED
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class ValidForCountry < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return if address.province_code.blank?
|
14
13
|
return if @cache.country.zones.none?(&:province?)
|
15
14
|
return if @cache.country.hide_provinces_from_addresses
|
@@ -9,8 +9,6 @@ module AtlasEngine
|
|
9
9
|
class BuildingNumberInAddress1 < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
|
-
|
14
12
|
return unless @cache.country.building_number_required
|
15
13
|
return if @cache.country.building_number_may_be_in_address2
|
16
14
|
|
@@ -9,7 +9,6 @@ module AtlasEngine
|
|
9
9
|
class BuildingNumberInAddress1OrAddress2 < Predicate
|
10
10
|
sig { override.returns(T.nilable(Concern)) }
|
11
11
|
def evaluate
|
12
|
-
return unless @cache.country.country?
|
13
12
|
return unless @cache.country.building_number_required
|
14
13
|
return unless @cache.country.building_number_may_be_in_address2
|
15
14
|
return if contains_number?(T.must(@address.address1)) || contains_number?(@address.address2)
|
@@ -12,9 +12,6 @@ module Maintenance
|
|
12
12
|
# ISO3166 two-letter country code.
|
13
13
|
attribute :country_code, :string
|
14
14
|
validates :country_code, presence: true
|
15
|
-
# Filename to import. When running in staging or production, the worker expects to find
|
16
|
-
# this file in the relevant GCS bucket, configured in `config/storage/{environment}.yml`
|
17
|
-
# It must be placed under `openaddress/` with the same filename.
|
18
15
|
attribute :geojson_file_path, :string
|
19
16
|
attribute :locale, :string
|
20
17
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# typed: false
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Maintenance
|
5
|
+
module AtlasEngine
|
6
|
+
class UsGeoJsonDirectoryImportTask < MaintenanceTasks::Task
|
7
|
+
include ::AtlasEngine::HandlesBlob
|
8
|
+
|
9
|
+
no_collection
|
10
|
+
|
11
|
+
attribute :geojson_directory, :string
|
12
|
+
|
13
|
+
def process
|
14
|
+
::AtlasEngine::Us::Jobs::AddressImporter::CombinedImportJob.perform_later(
|
15
|
+
geojson_directory: geojson_directory.strip,
|
16
|
+
)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/atlas_engine/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atlas_engine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shopify
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: annex_29
|
@@ -178,6 +178,20 @@ dependencies:
|
|
178
178
|
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: sprockets-rails
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - ">="
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - ">="
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0'
|
181
195
|
- !ruby/object:Gem::Dependency
|
182
196
|
name: state_machines-activerecord
|
183
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -305,9 +319,11 @@ files:
|
|
305
319
|
- app/countries/atlas_engine/gb/country_profile.yml
|
306
320
|
- app/countries/atlas_engine/gb/validation_transcriber/full_address_parser.rb
|
307
321
|
- app/countries/atlas_engine/gb/validation_transcriber/parsed_address.rb
|
322
|
+
- app/countries/atlas_engine/gg/address_importer/corrections/open_address/city_corrector.rb
|
308
323
|
- app/countries/atlas_engine/gg/address_validation/validators/full_address/exclusions/city.rb
|
309
324
|
- app/countries/atlas_engine/gg/address_validation/validators/full_address/restrictions/unsupported_city.rb
|
310
325
|
- app/countries/atlas_engine/gg/country_profile.yml
|
326
|
+
- app/countries/atlas_engine/gg/synonyms.yml
|
311
327
|
- app/countries/atlas_engine/gg/validation_transcriber/address_parser.rb
|
312
328
|
- app/countries/atlas_engine/ie/country_profile.yml
|
313
329
|
- app/countries/atlas_engine/it/address_importer/corrections/open_address/city_corrector.rb
|
@@ -362,7 +378,10 @@ files:
|
|
362
378
|
- app/countries/atlas_engine/si/validation_transcriber/address_parser.rb
|
363
379
|
- app/countries/atlas_engine/tt/address_importer/open_address/mapper.rb
|
364
380
|
- app/countries/atlas_engine/tt/country_profile.yml
|
381
|
+
- app/countries/atlas_engine/us/address_importer/open_address/filter.rb
|
382
|
+
- app/countries/atlas_engine/us/address_importer/open_address/mapper.rb
|
365
383
|
- app/countries/atlas_engine/us/country_profile.yml
|
384
|
+
- app/countries/atlas_engine/us/jobs/address_importer/combined_import_job.rb
|
366
385
|
- app/countries/atlas_engine/us/synonyms.yml
|
367
386
|
- app/graphql/atlas_engine/errors/locale_unsupported_error.rb
|
368
387
|
- app/graphql/atlas_engine/schema.graphql
|
@@ -546,6 +565,7 @@ files:
|
|
546
565
|
- app/models/atlas_engine/street.rb
|
547
566
|
- app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb
|
548
567
|
- app/tasks/maintenance/atlas_engine/geo_json_import_task.rb
|
568
|
+
- app/tasks/maintenance/atlas_engine/us_geo_json_directory_import_task.rb
|
549
569
|
- app/views/atlas_engine/connectivity/index.html.erb
|
550
570
|
- app/views/atlas_engine/country_imports/index.html.erb
|
551
571
|
- app/views/atlas_engine/country_imports/show.html.erb
|
@@ -595,7 +615,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
595
615
|
- !ruby/object:Gem::Version
|
596
616
|
version: '0'
|
597
617
|
requirements: []
|
598
|
-
rubygems_version: 3.5.
|
618
|
+
rubygems_version: 3.5.10
|
599
619
|
signing_key:
|
600
620
|
specification_version: 4
|
601
621
|
summary: Address Validation API
|