atlas_engine 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +498 -34
- data/app/countries/atlas_engine/be/country_profile.yml +2 -0
- data/app/countries/atlas_engine/be/validation_transcriber/address_parser.rb +84 -0
- data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +12 -11
- data/app/countries/atlas_engine/bm/synonyms.yml +6 -0
- data/app/countries/atlas_engine/ch/country_profile.yml +2 -0
- data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +0 -1
- data/app/countries/atlas_engine/ch/locales/fr/country_profile.yml +3 -0
- data/app/countries/atlas_engine/ch/locales/fr/validation_transcriber/address_parser.rb +29 -0
- data/app/countries/atlas_engine/cz/address_validation/es/query_builder.rb +43 -0
- data/app/countries/atlas_engine/cz/country_profile.yml +2 -1
- data/app/countries/atlas_engine/cz/validation_transcriber/address_parser.rb +26 -0
- data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +1 -1
- data/app/countries/atlas_engine/it/country_profile.yml +1 -0
- data/app/countries/atlas_engine/sa/country_profile.yml +4 -1
- data/app/countries/atlas_engine/us/country_profile.yml +0 -2
- data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +1 -1
- data/app/lib/atlas_engine/validation_transcriber/formatter.rb +2 -2
- data/app/models/atlas_engine/address_validation/datastore_base.rb +3 -0
- data/app/models/atlas_engine/address_validation/es/datastore.rb +11 -6
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +40 -29
- data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +1 -1
- data/app/models/atlas_engine/address_validation/log_emitter.rb +1 -0
- data/app/models/atlas_engine/address_validation/normalizer.rb +0 -9
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +7 -23
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +42 -16
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +109 -109
- data/app/models/atlas_engine/address_validation/validators/full_address/{components_to_validate.rb → relevant_components.rb} +26 -18
- data/app/models/atlas_engine/country_profile_validation_subset.rb +5 -0
- data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +1 -1
- data/db/data/country_profiles/default.yml +0 -2
- data/lib/atlas_engine/version.rb +1 -1
- metadata +15 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f78501b09c1f8bfeae5d8a5be2ff3b83d7f68f689e66294f704f3c3bcf5830b
|
4
|
+
data.tar.gz: d1100e782d95c89565be662da85ba69788fa457bbefcd11ccd8082c356086df2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d2b8abdf8ae247fcb95b10a0e7f301deea99ee2505f8817c85a8bf10459e3cdedfd7a631738071cf018db7545ca0b2506ec518f31c1e5ba1f507a49915d2f25
|
7
|
+
data.tar.gz: d7b2177df1ca73d45a37993dfdea005d92fc3705ef422c0a1b7e90285825880f4d82f7ae3fff47d8f425abf3a2865636ec5bd8e546a2e60c113de19aeeba8df0
|
data/README.md
CHANGED
@@ -1,15 +1,167 @@
|
|
1
1
|
# Atlas Engine
|
2
2
|
|
3
|
-
|
3
|
+
Atlas Engine is a rails engine that provides a global end-to-end address validation API for rails apps.
|
4
|
+
|
5
|
+
The validation API is powered by GraphQL, an example request and explanation of the parameters and response follows:
|
6
|
+
|
7
|
+
```graphql
|
8
|
+
query validation {
|
9
|
+
validation(
|
10
|
+
address: {
|
11
|
+
address1: "151 O'Connor St"
|
12
|
+
address2: ""
|
13
|
+
city: "Ottawa"
|
14
|
+
provinceCode: "ON"
|
15
|
+
countryCode: CA
|
16
|
+
zip: "K2P 2L8"
|
17
|
+
}
|
18
|
+
locale: "en"
|
19
|
+
matchingStrategy: LOCAL
|
20
|
+
) {
|
21
|
+
validationScope
|
22
|
+
concerns {
|
23
|
+
code
|
24
|
+
fieldNames
|
25
|
+
suggestionIds
|
26
|
+
message
|
27
|
+
}
|
28
|
+
suggestions {
|
29
|
+
address1
|
30
|
+
address2
|
31
|
+
city
|
32
|
+
provinceCode
|
33
|
+
zip
|
34
|
+
}
|
35
|
+
}
|
36
|
+
}
|
37
|
+
```
|
38
|
+
|
39
|
+
Response:
|
40
|
+
|
41
|
+
```json
|
42
|
+
{
|
43
|
+
"data": {
|
44
|
+
"validation": {
|
45
|
+
"validationScope": [
|
46
|
+
"country_code",
|
47
|
+
"province_code",
|
48
|
+
"zip",
|
49
|
+
"city",
|
50
|
+
"address1"
|
51
|
+
],
|
52
|
+
"concerns": [],
|
53
|
+
"suggestions": []
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
```
|
58
|
+
|
59
|
+
**Address:** The raw input for each address line that is to be validated. Requirements for each field's format and even
|
60
|
+
presence or absence differs per country.
|
61
|
+
|
62
|
+
**Locale:** The language in which to render any messages in the validation API response.
|
63
|
+
|
64
|
+
**MatchingStrategy:** The strategy used to evaluate the validity of the address input. Out of the box, Atlas Engine
|
65
|
+
supports three different matching strategies: `local`, `es`, and `es_street`.
|
66
|
+
- `local` matching uses the [worldwide](https://github.com/Shopify/worldwide) gem to provide the most basic level of
|
67
|
+
address validation. This may include simple errors (required fields not populated) or more advanced errors (province
|
68
|
+
not belonging to the country, zip code not belonging to the province). This level of matching does not require
|
69
|
+
[ingestion](#ingestion) of country data to work, but the level of support and suggestions it can provide in its
|
70
|
+
responses is minimal.
|
71
|
+
- `es` matching uses data indexed in elasticsearch via our [ingestion](#ingestion) process to validate the city,
|
72
|
+
province, country, and zip code fields of the input address, in addition to all of the basic functionality provided
|
73
|
+
in the `local` strategy. A more detailed explanation for how this strategy works can be found [here](#elasticsearch-matching-strategy).
|
74
|
+
- `es_street` is our most advanced matching strategy and requires the highest quality data indexed in elasticsearch
|
75
|
+
via our [ingestion](#ingestion) process. This matching strategy provides everything that `es` and `local` does along
|
76
|
+
with validation of the address1 and address2 components of the address input. A more detailed explanation of how
|
77
|
+
this strategy works can be found [here](#elasticsearch-matching-strategy).
|
78
|
+
|
79
|
+
**Validation Scope:** This response object is populated with the field names from the input that have been successfully
|
80
|
+
validated.
|
81
|
+
|
82
|
+
**Concerns:** This response object is populated with a code if there is a validation error with the input address.
|
83
|
+
A concern may also include a suggestion to fix the issue.
|
84
|
+
|
85
|
+
**Suggestions:** This response object provides the corrected value for a field that has a concern if available.
|
86
|
+
|
87
|
+
### Example request with a concern:
|
88
|
+
|
89
|
+
Navigate to http://localhost:3000/graphiql and initiate the following request. Note the invalid zip field.
|
90
|
+
|
91
|
+
```graphql
|
92
|
+
query validation {
|
93
|
+
validation(
|
94
|
+
address: {
|
95
|
+
address1: "151 O'Connor St"
|
96
|
+
address2: ""
|
97
|
+
city: "Ottawa"
|
98
|
+
provinceCode: "ON"
|
99
|
+
countryCode: CA
|
100
|
+
zip: "90210"
|
101
|
+
}
|
102
|
+
locale: "en"
|
103
|
+
matchingStrategy: LOCAL
|
104
|
+
) {
|
105
|
+
validationScope
|
106
|
+
concerns {
|
107
|
+
code
|
108
|
+
fieldNames
|
109
|
+
suggestionIds
|
110
|
+
message
|
111
|
+
}
|
112
|
+
suggestions {
|
113
|
+
address1
|
114
|
+
address2
|
115
|
+
city
|
116
|
+
provinceCode
|
117
|
+
zip
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
```
|
122
|
+
|
123
|
+
Response:
|
124
|
+
|
125
|
+
```json
|
126
|
+
{
|
127
|
+
"data": {
|
128
|
+
"validation": {
|
129
|
+
"validationScope": [
|
130
|
+
"country_code",
|
131
|
+
"province_code",
|
132
|
+
"city",
|
133
|
+
"address1"
|
134
|
+
],
|
135
|
+
"concerns": [
|
136
|
+
{
|
137
|
+
"code": "zip_invalid_for_province",
|
138
|
+
"fieldNames": [
|
139
|
+
"zip",
|
140
|
+
"country",
|
141
|
+
"province"
|
142
|
+
],
|
143
|
+
"suggestionIds": [],
|
144
|
+
"message": "Enter a valid postal code for Ontario"
|
145
|
+
}
|
146
|
+
],
|
147
|
+
"suggestions": []
|
148
|
+
}
|
149
|
+
}
|
150
|
+
}
|
151
|
+
```
|
152
|
+
|
153
|
+
The concerns object contains a concern code `zip_invalid_for_province` to highlight the validation error of `90210`
|
154
|
+
being an invalid zip code for the province `ON`. It also returns the human readable message
|
155
|
+
`"Enter a valid postal code for Ontario"` in the provided language `en`.
|
4
156
|
|
5
|
-
|
157
|
+
The validation scope excludes zip because the zip was not successfully validated.
|
6
158
|
|
7
|
-
|
159
|
+
## Installation of Atlas Engine in your rails app
|
8
160
|
|
9
|
-
|
161
|
+
### Initial setup
|
10
162
|
Add the engine to your gemfile
|
11
163
|
```
|
12
|
-
gem "atlas_engine"
|
164
|
+
gem "atlas_engine"
|
13
165
|
```
|
14
166
|
|
15
167
|
Run the following commands to install the engine in your rails app
|
@@ -19,7 +171,7 @@ bundle lock
|
|
19
171
|
bin/rails generate atlas_engine:install
|
20
172
|
```
|
21
173
|
|
22
|
-
|
174
|
+
### Updating to a newer version of the engine
|
23
175
|
|
24
176
|
Working with migrations
|
25
177
|
```
|
@@ -30,9 +182,11 @@ rails atlas_engine:install:migrations
|
|
30
182
|
rails db:migrate
|
31
183
|
```
|
32
184
|
|
33
|
-
|
185
|
+
## Setup Atlas Engine for contribution / local development
|
34
186
|
|
35
|
-
|
187
|
+
This setup guide is based on a mac os development environment. Your tooling may vary.
|
188
|
+
|
189
|
+
### Install + Setup Docker
|
36
190
|
|
37
191
|
```
|
38
192
|
brew install docker
|
@@ -46,62 +200,61 @@ colima start --cpu 4 --memory 8
|
|
46
200
|
colima ssh
|
47
201
|
sudo sysctl -w vm.max_map_count=262144
|
48
202
|
exit
|
203
|
+
```
|
204
|
+
|
205
|
+
Verify docker is running with: `docker info`
|
206
|
+
|
207
|
+
### Clone the atlas_engine git repository
|
49
208
|
|
50
209
|
```
|
210
|
+
git clone https://github.com/Shopify/atlas-engine.git
|
211
|
+
```
|
212
|
+
|
213
|
+
### Setup Ruby and Rails
|
51
214
|
|
52
|
-
|
215
|
+
Install ruby >= 3.2.1
|
53
216
|
|
54
|
-
|
217
|
+
In the newly cloned repository directory run:
|
55
218
|
|
56
219
|
```
|
57
220
|
bundle install
|
58
221
|
|
59
|
-
# If you get an ssl error
|
222
|
+
# *Note* If you get an ssl error during the puma installation run the following command:
|
60
223
|
bundle config build.puma --with-pkg-config=$(brew --prefix openssl@3)/lib/pkgconfig
|
61
224
|
```
|
62
225
|
|
63
|
-
|
226
|
+
### Setup up Dockerized Elasticsearch and MySQL
|
64
227
|
|
228
|
+
In a separate terminal, from the cloned atlas_engine directory run:
|
65
229
|
```
|
66
|
-
bash setup
|
67
230
|
docker-compose up
|
68
231
|
|
69
|
-
# If you encounter an error getting docker credentials, remove or update the `credsStore`
|
232
|
+
# *Note* If you encounter an error getting docker credentials, remove or update the `credsStore`
|
70
233
|
key in your Docker configuration file:
|
71
234
|
|
72
235
|
# ~/.docker/config.json
|
73
236
|
"credsStore": "desktop", # remove this line
|
74
237
|
```
|
75
238
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
_note: if you have updated any of the ports in your .env file then use those ports instead_
|
239
|
+
Verify your connection to the newly created Docker services with the following commands:
|
240
|
+
- MySQL : `mysql --host=127.0.0.1 --user=root`
|
241
|
+
- Elasticsearch : `curl http://localhost:9200`
|
81
242
|
|
82
|
-
|
83
|
-
#### Setting up db
|
243
|
+
### Setup the local db
|
84
244
|
```
|
85
245
|
rails db:setup
|
86
246
|
```
|
87
247
|
|
88
|
-
#### Setting up maintenance tasks
|
89
|
-
After locally setting up Atlas Engine:
|
90
|
-
```
|
91
|
-
rails app:maintenance_tasks:install:migrations
|
92
|
-
rails db:migrate
|
93
|
-
```
|
94
|
-
|
95
|
-
## Using the App
|
96
|
-
|
97
248
|
### Infrastructure Requirements
|
98
|
-
The elasticsearch implementation depends on the ICU analysis plugin. Refer to the [
|
249
|
+
The elasticsearch implementation depends on the ICU analysis plugin. Refer to the [Dockerfile](./docker/elasticsearch/Dockerfile) leveraged in local setup for plugin installation.
|
99
250
|
|
100
|
-
### Starting the App
|
251
|
+
### Starting the App / Running Tests
|
101
252
|
* `bin/rails server` to start the server
|
102
253
|
* `bin/rails test` to run tests
|
254
|
+
* `bundle exec rubocop` to run ruby style checks
|
255
|
+
* `src tc` to run sorbet typechecks
|
103
256
|
|
104
|
-
###
|
257
|
+
### Sorbet
|
105
258
|
|
106
259
|
Generate rbi files for custom code
|
107
260
|
```
|
@@ -117,7 +270,318 @@ bin/tapioca gems
|
|
117
270
|
bin/tapioca gems --all
|
118
271
|
```
|
119
272
|
|
120
|
-
|
273
|
+
Run sorbet check
|
121
274
|
```
|
122
275
|
srb tc
|
123
276
|
```
|
277
|
+
|
278
|
+
## Ingestion
|
279
|
+
|
280
|
+
In order to power the more advanced validation matching strategies that provide city / state / zip and even street
|
281
|
+
level address validation, your app must have a populated elasticsearch index per country available for `atlas_engine`
|
282
|
+
to query.
|
283
|
+
|
284
|
+
The data we use to power atlas engine validation is free open source data from the [open addresses](https://openaddresses.io/)
|
285
|
+
project. The following guide demostrates how to ingest data with the dummy app, but the process is the same with
|
286
|
+
the engine mounted into your own rails app.
|
287
|
+
|
288
|
+
1. Go to the [open addresses](https://openaddresses.io/) download center, create an account, support the project, and
|
289
|
+
download a GeoJSON+LD file for the country or region you wish to validate. For this example, we will be using the
|
290
|
+
countrywide addresses data for Australia.
|
291
|
+
|
292
|
+
2. Once the file is downloaded, start your app with `rails s` and navigate to `http://localhost:3000/maintenance_tasks`
|
293
|
+
(see [the github repo](https://github.com/Shopify/maintenance_tasks) for more information about maintenance_tasks).
|
294
|
+
There are two tasks available: `Maintenance::AtlasEngine::GeoJsonImportTask` and `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. We will be using both in the ingestion process.
|
295
|
+
|
296
|
+
3. Navigate to the `Maintenance::AtlasEngine::GeoJsonImportTask`. This task will transform the raw geo json file into
|
297
|
+
records in our mysql database and has the following parameters:
|
298
|
+
|
299
|
+
- **clear_records:** If checked, removes any existing records for the country in the database.
|
300
|
+
|
301
|
+
- **country_code: (required)** The ISO country code of the data we are ingesting.
|
302
|
+
In this example, the country code of Australia is `AU`.
|
303
|
+
|
304
|
+
- **geojson_file_path: (required)** The fully qualified path of the previously downloaded geojson data from open addresses.
|
305
|
+
|
306
|
+
- **locale: (optional)** The language of the data in the open addresses file.
|
307
|
+
|
308
|
+
4. Once properly parameterized, click run. The process will initialize a `country_import` and should succeed immediately.
|
309
|
+
|
310
|
+
5. Navigate to `http://localhost:3000/country_imports` to track the progress of the country import. Click the import id
|
311
|
+
link for a more detailed view. Once the import status has updated from `in_progress` to `complete` we will have all of
|
312
|
+
the raw open address data imported into our mysql database's `atlas_engine_post_addresses` table.
|
313
|
+
|
314
|
+
6. Navigate back to `http://localhost:3000/maintenance_tasks` and click on the `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. This task will ingest the data we have staged in mysql
|
315
|
+
and use it to create documents in a new elasticsearch index which Atlas Engine will ultimately use for validation.
|
316
|
+
|
317
|
+
7. The `ElasticsearchIndexCreateTask` includes the following parameters:
|
318
|
+
|
319
|
+
- **country_code: (required)** the ISO country code of the data we are ingesting and the name of the elasticsearch index we
|
320
|
+
will be creating. In this example, the country code of Australia is `AU`.
|
321
|
+
|
322
|
+
- **locale: (optional)** the language of the documents we will be creating. This is required for multi-locale countries
|
323
|
+
as our indexes are separated by language.
|
324
|
+
|
325
|
+
- **province_codes: (optional)** an allow list of province codes to create documents for. If left blank the task will create
|
326
|
+
documents for the entire dataset.
|
327
|
+
|
328
|
+
- **shard_override: (optional)** the number of shards to create this index with. If left blank the default will be used.
|
329
|
+
|
330
|
+
- **replica_override: (optional)** the number of replicas to create this index with. If left blank the default will be used.
|
331
|
+
|
332
|
+
- **activate_index: (optional)** if checked, immediately promote this index to be the index queried by atlas engine.
|
333
|
+
If unchecked, the created index will need to be activated manually.
|
334
|
+
|
335
|
+
8. Once properly parameterized, click run. The maintenance task UI will track the progress of the index creation.
|
336
|
+
|
337
|
+
9. When completed, the index documents may be verified manually with an elasticsearch client.
|
338
|
+
We may now use the `es` and `es_street` matching strategies with `AU` addresses. See [below](#elasticsearch-matching-strategy)
|
339
|
+
for an example of its usage.
|
340
|
+
|
341
|
+
## Elasticsearch Matching Strategy
|
342
|
+
|
343
|
+
Once we have successfully created and activated an elasticsearch index using open address data, we may now use
|
344
|
+
the more advanced elasticsearch matching strategies `es` and `es_street`.
|
345
|
+
|
346
|
+
Consider the following example of an invalid `AU` address:
|
347
|
+
|
348
|
+
```graphql
|
349
|
+
query validation {
|
350
|
+
validation(
|
351
|
+
address: {
|
352
|
+
address1: "100 miller st"
|
353
|
+
address2: ""
|
354
|
+
city: "sydney"
|
355
|
+
provinceCode: "NSW"
|
356
|
+
countryCode: AU
|
357
|
+
zip: "2060"
|
358
|
+
}
|
359
|
+
locale: "en"
|
360
|
+
matchingStrategy: ES
|
361
|
+
) {
|
362
|
+
validationScope
|
363
|
+
concerns {
|
364
|
+
code
|
365
|
+
fieldNames
|
366
|
+
suggestionIds
|
367
|
+
message
|
368
|
+
}
|
369
|
+
suggestions {
|
370
|
+
address1
|
371
|
+
address2
|
372
|
+
city
|
373
|
+
provinceCode
|
374
|
+
zip
|
375
|
+
}
|
376
|
+
}
|
377
|
+
}
|
378
|
+
```
|
379
|
+
|
380
|
+
When input into `http://localhost:3000/graphiql`, this query should produce the following response:
|
381
|
+
|
382
|
+
```json
|
383
|
+
{
|
384
|
+
"data": {
|
385
|
+
"validation": {
|
386
|
+
"candidate": ",NSW,,,,2060,[North Sydney],,Miller Street",
|
387
|
+
"validationScope": [
|
388
|
+
"country_code",
|
389
|
+
"province_code",
|
390
|
+
"zip",
|
391
|
+
"city",
|
392
|
+
"address1"
|
393
|
+
],
|
394
|
+
"concerns": [
|
395
|
+
{
|
396
|
+
"code": "city_inconsistent",
|
397
|
+
"typeLevel": 3,
|
398
|
+
"fieldNames": [
|
399
|
+
"city"
|
400
|
+
],
|
401
|
+
"suggestionIds": [
|
402
|
+
"665ffd09-75b8-584d-8e4a-a0f471bfea01"
|
403
|
+
],
|
404
|
+
"message": "Enter a valid city for New South Wales, 2060"
|
405
|
+
}
|
406
|
+
],
|
407
|
+
"suggestions": [
|
408
|
+
{
|
409
|
+
"id": "665ffd09-75b8-584d-8e4a-a0f471bfea01",
|
410
|
+
"address1": null,
|
411
|
+
"address2": null,
|
412
|
+
"city": "North Sydney",
|
413
|
+
"province": null,
|
414
|
+
"provinceCode": null,
|
415
|
+
"zip": null
|
416
|
+
}
|
417
|
+
]
|
418
|
+
}
|
419
|
+
}
|
420
|
+
}
|
421
|
+
```
|
422
|
+
|
423
|
+
The concerns object contains a concern code `city_inconsistent` to highlight the validation error of `sydney`
|
424
|
+
being an incorrect city for the rest of the provided address. The concern message field is the human readable
|
425
|
+
error nudge `"Enter a valid city for New South Wales, 2060"`, pointing to the supporting pieces of evidence (province
|
426
|
+
and zip) that were used to determine city as the inconsistent value in this address input.
|
427
|
+
|
428
|
+
The suggestion object contains a corrected city field `North Sydney` which will result in no concerns or suggestions
|
429
|
+
for the validation endpoint if applied.
|
430
|
+
|
431
|
+
The candidate field contains a representation of the matching document in the elasticsearch index that was found and
|
432
|
+
used to determine the suggestions and concerns in the api response.
|
433
|
+
|
434
|
+
The `es_street` level of validation can also be used to correct errors in the `address1` or `address2` fields of the
|
435
|
+
input. In the following request we have modified our query to make a second error in our input - searching for
|
436
|
+
`miller ave` instead of `miller st`.
|
437
|
+
|
438
|
+
```graphql
|
439
|
+
query validation {
|
440
|
+
validation(
|
441
|
+
address: {
|
442
|
+
address1: "100 miller ave"
|
443
|
+
address2: ""
|
444
|
+
city: "sydney"
|
445
|
+
provinceCode: "NSW"
|
446
|
+
countryCode: AU
|
447
|
+
zip: "2060"
|
448
|
+
}
|
449
|
+
locale: "en"
|
450
|
+
matchingStrategy: ES_STREET
|
451
|
+
) {
|
452
|
+
validationScope
|
453
|
+
concerns {
|
454
|
+
code
|
455
|
+
fieldNames
|
456
|
+
suggestionIds
|
457
|
+
message
|
458
|
+
}
|
459
|
+
suggestions {
|
460
|
+
address1
|
461
|
+
address2
|
462
|
+
city
|
463
|
+
provinceCode
|
464
|
+
zip
|
465
|
+
}
|
466
|
+
}
|
467
|
+
}
|
468
|
+
```
|
469
|
+
|
470
|
+
This query produces the following response:
|
471
|
+
|
472
|
+
```json
|
473
|
+
{
|
474
|
+
"data": {
|
475
|
+
"validation": {
|
476
|
+
"candidate": ",NSW,,,,2060,[North Sydney],,Miller Street",
|
477
|
+
"validationScope": [
|
478
|
+
"country_code",
|
479
|
+
"province_code",
|
480
|
+
"zip",
|
481
|
+
"city",
|
482
|
+
"address1"
|
483
|
+
],
|
484
|
+
"concerns": [
|
485
|
+
{
|
486
|
+
"code": "city_inconsistent",
|
487
|
+
"typeLevel": 3,
|
488
|
+
"fieldNames": [
|
489
|
+
"city"
|
490
|
+
],
|
491
|
+
"suggestionIds": [
|
492
|
+
"88779db6-2c5d-5dbb-9f77-f7b07c07206a"
|
493
|
+
],
|
494
|
+
"message": "Enter a valid city for New South Wales, 2060"
|
495
|
+
},
|
496
|
+
{
|
497
|
+
"code": "street_inconsistent",
|
498
|
+
"typeLevel": 3,
|
499
|
+
"fieldNames": [
|
500
|
+
"address1"
|
501
|
+
],
|
502
|
+
"suggestionIds": [
|
503
|
+
"88779db6-2c5d-5dbb-9f77-f7b07c07206a"
|
504
|
+
],
|
505
|
+
"message": "Enter a valid street name for New South Wales, 2060"
|
506
|
+
}
|
507
|
+
],
|
508
|
+
"suggestions": [
|
509
|
+
{
|
510
|
+
"id": "88779db6-2c5d-5dbb-9f77-f7b07c07206a",
|
511
|
+
"address1": "100 Miller Street",
|
512
|
+
"address2": null,
|
513
|
+
"city": "North Sydney",
|
514
|
+
"province": null,
|
515
|
+
"provinceCode": null,
|
516
|
+
"zip": null
|
517
|
+
}
|
518
|
+
]
|
519
|
+
}
|
520
|
+
}
|
521
|
+
}
|
522
|
+
```
|
523
|
+
|
524
|
+
The concerns object now contains an additional concern code `street_inconsistent` to highlight the validation error of
|
525
|
+
`miller ave` being an incorrect street for the rest of the address input. The concern message field is the human
|
526
|
+
readable error nudge `"Enter a valid street name for New South Wales, 2060"`, pointing to the supporting pieces of
|
527
|
+
evidence (province and zip) that were used to determine street as an inconsistent value in this address input.
|
528
|
+
|
529
|
+
The suggestion object contains a corrected street field `100 Miller Street` and a corrected city field `North Sydney`
|
530
|
+
If both of these suggestions are applied to the input address the subsequent request will be valid.
|
531
|
+
|
532
|
+
The corrected input of
|
533
|
+
|
534
|
+
```graphql
|
535
|
+
query validation {
|
536
|
+
validation(
|
537
|
+
address: {
|
538
|
+
address1: "100 miller st"
|
539
|
+
address2: ""
|
540
|
+
city: "north sydney"
|
541
|
+
provinceCode: "NSW"
|
542
|
+
countryCode: AU
|
543
|
+
zip: "2060"
|
544
|
+
}
|
545
|
+
locale: "en"
|
546
|
+
matchingStrategy: ES_STREET
|
547
|
+
) {
|
548
|
+
validationScope
|
549
|
+
concerns {
|
550
|
+
code
|
551
|
+
fieldNames
|
552
|
+
suggestionIds
|
553
|
+
message
|
554
|
+
}
|
555
|
+
suggestions {
|
556
|
+
address1
|
557
|
+
address2
|
558
|
+
city
|
559
|
+
provinceCode
|
560
|
+
zip
|
561
|
+
}
|
562
|
+
}
|
563
|
+
}
|
564
|
+
```
|
565
|
+
|
566
|
+
will produce the response:
|
567
|
+
|
568
|
+
```json
|
569
|
+
{
|
570
|
+
"data": {
|
571
|
+
"validation": {
|
572
|
+
"candidate": ",NSW,,,,2060,[North Sydney],,Miller Street",
|
573
|
+
"validationScope": [
|
574
|
+
"country_code",
|
575
|
+
"province_code",
|
576
|
+
"zip",
|
577
|
+
"city",
|
578
|
+
"address1"
|
579
|
+
],
|
580
|
+
"concerns": [],
|
581
|
+
"suggestions": []
|
582
|
+
}
|
583
|
+
}
|
584
|
+
}
|
585
|
+
```
|
586
|
+
|
587
|
+
This response has no concerns or suggestions, and the input address is therefore considered to be valid.
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Be
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+)"
|
11
|
+
NUMBERED_STREET = "(?<street>.+\s+[0-9]+)"
|
12
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
|
13
|
+
UNIT_NUM = "(?<unit_num>[[:alnum:]]+)"
|
14
|
+
PO_BOX = /\b(?<box_type>pb|box|bte|bus|boîte|boite|postbus|antwoordnummer)\s+(?<number>\d+)\b/i
|
15
|
+
STREET_SUFFIXES = %r{
|
16
|
+
\A(
|
17
|
+
dwarsstraat|dwstr|dwarsweg|dwwg|dijk|dk|gracht|gr|kade|kd|kanaal|kan
|
18
|
+
|laan|leane|loane|ln|park|pk|plantsoen|plnts|plein|pln|singel|sngl
|
19
|
+
|straat|strjitte|str|straatweg|strwg|weg|wg
|
20
|
+
)\z
|
21
|
+
}ix
|
22
|
+
|
23
|
+
sig { returns(T::Array[Regexp]) }
|
24
|
+
def country_regex_formats
|
25
|
+
@country_regex_formats ||= [
|
26
|
+
/^#{STREET},?\s+#{BUILDING_NUM}/,
|
27
|
+
/^#{BUILDING_NUM}\s?(,\s?)?#{STREET}/,
|
28
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}$/,
|
29
|
+
%r{^#{STREET},?\s+#{BUILDING_NUM}[\s,-/]+#{UNIT_NUM}$},
|
30
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
31
|
+
]
|
32
|
+
end
|
33
|
+
|
34
|
+
sig { override.params(address_line: String).returns(T::Array[T.nilable(String)]) }
|
35
|
+
def extract_po_box(address_line)
|
36
|
+
po_box_match = address_line.match(PO_BOX)
|
37
|
+
|
38
|
+
if po_box_match
|
39
|
+
po_box = po_box_match["number"]
|
40
|
+
address_line = address_line.gsub(PO_BOX, "").strip.delete_suffix(",")
|
41
|
+
else
|
42
|
+
po_box = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
[address_line, po_box]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return true if something's obviously wrong with this regex match
|
49
|
+
sig do
|
50
|
+
override.params(
|
51
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
52
|
+
address: AddressValidation::TAddress,
|
53
|
+
).returns(T::Boolean)
|
54
|
+
end
|
55
|
+
def ridiculous?(captures, address)
|
56
|
+
building_num = captures[:building_num]&.downcase
|
57
|
+
street = captures[:street]&.downcase
|
58
|
+
unit_num = captures[:unit_num]&.downcase
|
59
|
+
|
60
|
+
if street.present?
|
61
|
+
return true unless address.address1&.upcase&.include?(street.upcase) ||
|
62
|
+
address.address2&.upcase&.include?(street.upcase)
|
63
|
+
end
|
64
|
+
|
65
|
+
[building_num, unit_num].any? do |token|
|
66
|
+
po_box?(token) || street_suffix?(token)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
|
71
|
+
def po_box?(token)
|
72
|
+
return false if token.blank?
|
73
|
+
|
74
|
+
token.match?(PO_BOX)
|
75
|
+
end
|
76
|
+
|
77
|
+
sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
|
78
|
+
def street_suffix?(token)
|
79
|
+
token.present? && token.match?(STREET_SUFFIXES)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|