atlas_engine 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +498 -34
  3. data/app/countries/atlas_engine/be/country_profile.yml +2 -0
  4. data/app/countries/atlas_engine/be/validation_transcriber/address_parser.rb +84 -0
  5. data/app/countries/atlas_engine/bm/address_importer/corrections/open_address/city_alias_corrector.rb +12 -11
  6. data/app/countries/atlas_engine/bm/synonyms.yml +6 -0
  7. data/app/countries/atlas_engine/ch/country_profile.yml +2 -0
  8. data/app/countries/atlas_engine/ch/locales/de/country_profile.yml +0 -1
  9. data/app/countries/atlas_engine/ch/locales/fr/country_profile.yml +3 -0
  10. data/app/countries/atlas_engine/ch/locales/fr/validation_transcriber/address_parser.rb +29 -0
  11. data/app/countries/atlas_engine/cz/address_validation/es/query_builder.rb +43 -0
  12. data/app/countries/atlas_engine/cz/country_profile.yml +2 -1
  13. data/app/countries/atlas_engine/cz/validation_transcriber/address_parser.rb +26 -0
  14. data/app/countries/atlas_engine/it/address_importer/open_address/mapper.rb +1 -1
  15. data/app/countries/atlas_engine/it/country_profile.yml +1 -0
  16. data/app/countries/atlas_engine/sa/country_profile.yml +4 -1
  17. data/app/countries/atlas_engine/us/country_profile.yml +0 -2
  18. data/app/lib/atlas_engine/validation_transcriber/address_parsings.rb +1 -1
  19. data/app/lib/atlas_engine/validation_transcriber/formatter.rb +2 -2
  20. data/app/models/atlas_engine/address_validation/datastore_base.rb +3 -0
  21. data/app/models/atlas_engine/address_validation/es/datastore.rb +11 -6
  22. data/app/models/atlas_engine/address_validation/es/query_builder.rb +40 -29
  23. data/app/models/atlas_engine/address_validation/es/validators/full_address.rb +1 -1
  24. data/app/models/atlas_engine/address_validation/log_emitter.rb +1 -0
  25. data/app/models/atlas_engine/address_validation/normalizer.rb +0 -9
  26. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +7 -23
  27. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +42 -16
  28. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +109 -109
  29. data/app/models/atlas_engine/address_validation/validators/full_address/{components_to_validate.rb → relevant_components.rb} +26 -18
  30. data/app/models/atlas_engine/country_profile_validation_subset.rb +5 -0
  31. data/app/tasks/maintenance/atlas_engine/elasticsearch_index_create_task.rb +1 -1
  32. data/db/data/country_profiles/default.yml +0 -2
  33. data/lib/atlas_engine/version.rb +1 -1
  34. metadata +15 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e069dc1376e45c54b9a96462e06599a0081aceef2a4ab4f79f1d56356c5f2f9e
4
- data.tar.gz: baf0c1fe4a6e19df64cfd5052079bba2db8884ffbdd8f39245afad00a01fc902
3
+ metadata.gz: 4f78501b09c1f8bfeae5d8a5be2ff3b83d7f68f689e66294f704f3c3bcf5830b
4
+ data.tar.gz: d1100e782d95c89565be662da85ba69788fa457bbefcd11ccd8082c356086df2
5
5
  SHA512:
6
- metadata.gz: 860af5abf6f9f79c5fedd2b96d33cab18c216b1583cce2fb2d56c9d9ca17e07278a84d060bf95cbc3084105a9c7807544bc394200b21fb34095c1e5b1b9dca5b
7
- data.tar.gz: 671307d5dd17866af504d84015d38a6d8d94b8ddb175c5324b3404854462ee90335597c88c775ca6dc33b189b080a720742614546ccecf2cd7b5d0d6d0ba35a3
6
+ metadata.gz: 7d2b8abdf8ae247fcb95b10a0e7f301deea99ee2505f8817c85a8bf10459e3cdedfd7a631738071cf018db7545ca0b2506ec518f31c1e5ba1f507a49915d2f25
7
+ data.tar.gz: d7b2177df1ca73d45a37993dfdea005d92fc3705ef422c0a1b7e90285825880f4d82f7ae3fff47d8f425abf3a2865636ec5bd8e546a2e60c113de19aeeba8df0
data/README.md CHANGED
@@ -1,15 +1,167 @@
1
1
  # Atlas Engine
2
2
 
3
- This is a rails engine that is meant to provide end-to-end address validation for rails apps
3
+ Atlas Engine is a rails engine that provides a global end-to-end address validation API for rails apps.
4
+
5
+ The validation API is powered by GraphQL, an example request and explanation of the parameters and response follows:
6
+
7
+ ```graphql
8
+ query validation {
9
+ validation(
10
+ address: {
11
+ address1: "151 O'Connor St"
12
+ address2: ""
13
+ city: "Ottawa"
14
+ provinceCode: "ON"
15
+ countryCode: CA
16
+ zip: "K2P 2L8"
17
+ }
18
+ locale: "en"
19
+ matchingStrategy: LOCAL
20
+ ) {
21
+ validationScope
22
+ concerns {
23
+ code
24
+ fieldNames
25
+ suggestionIds
26
+ message
27
+ }
28
+ suggestions {
29
+ address1
30
+ address2
31
+ city
32
+ provinceCode
33
+ zip
34
+ }
35
+ }
36
+ }
37
+ ```
38
+
39
+ Response:
40
+
41
+ ```json
42
+ {
43
+ "data": {
44
+ "validation": {
45
+ "validationScope": [
46
+ "country_code",
47
+ "province_code",
48
+ "zip",
49
+ "city",
50
+ "address1"
51
+ ],
52
+ "concerns": [],
53
+ "suggestions": []
54
+ }
55
+ }
56
+ }
57
+ ```
58
+
59
+ **Address:** The raw input for each address line that is to be validated. Requirements for each field's format and even
60
+ presence or absence differs per country.
61
+
62
+ **Locale:** The language in which to render any messages in the validation API response.
63
+
64
+ **MatchingStrategy:** The strategy used to evaluate the validity of the address input. Out of the box, Atlas Engine
65
+ supports three different matching strategies: `local`, `es`, and `es_street`.
66
+ - `local` matching uses the [worldwide](https://github.com/Shopify/worldwide) gem to provide the most basic level of
67
+ address validation. This may include simple errors (required fields not populated) or more advanced errors (province
68
+ not belonging to the country, zip code not belonging to the province). This level of matching does not require
69
+ [ingestion](#ingestion) of country data to work, but the level of support and suggestions it can provide in its
70
+ responses is minimal.
71
+ - `es` matching uses data indexed in elasticsearch via our [ingestion](#ingestion) process to validate the city,
72
+ province, country, and zip code fields of the input address, in addition to all of the basic functionality provided
73
+ in the `local` strategy. A more detailed explanation for how this strategy works can be found [here](#elasticsearch-matching-strategy).
74
+ - `es_street` is our most advanced matching strategy and requires the highest quality data indexed in elasticsearch
75
+ via our [ingestion](#ingestion) process. This matching strategy provides everything that `es` and `local` does along
76
+ with validation of the address1 and address2 components of the address input. A more detailed explanation of how
77
+ this strategy works can be found [here](#elasticsearch-matching-strategy).
78
+
79
+ **Validation Scope:** This response object is populated with the field names from the input that have been successfully
80
+ validated.
81
+
82
+ **Concerns:** This response object is populated with a code if there is a validation error with the input address.
83
+ A concern may also include a suggestion to fix the issue.
84
+
85
+ **Suggestions:** This response object provides the corrected value for a field that has a concern if available.
86
+
87
+ ### Example request with a concern:
88
+
89
+ Navigate to http://localhost:3000/graphiql and initiate the following request. Note the invalid zip field.
90
+
91
+ ```graphql
92
+ query validation {
93
+ validation(
94
+ address: {
95
+ address1: "151 O'Connor St"
96
+ address2: ""
97
+ city: "Ottawa"
98
+ provinceCode: "ON"
99
+ countryCode: CA
100
+ zip: "90210"
101
+ }
102
+ locale: "en"
103
+ matchingStrategy: LOCAL
104
+ ) {
105
+ validationScope
106
+ concerns {
107
+ code
108
+ fieldNames
109
+ suggestionIds
110
+ message
111
+ }
112
+ suggestions {
113
+ address1
114
+ address2
115
+ city
116
+ provinceCode
117
+ zip
118
+ }
119
+ }
120
+ }
121
+ ```
122
+
123
+ Response:
124
+
125
+ ```json
126
+ {
127
+ "data": {
128
+ "validation": {
129
+ "validationScope": [
130
+ "country_code",
131
+ "province_code",
132
+ "city",
133
+ "address1"
134
+ ],
135
+ "concerns": [
136
+ {
137
+ "code": "zip_invalid_for_province",
138
+ "fieldNames": [
139
+ "zip",
140
+ "country",
141
+ "province"
142
+ ],
143
+ "suggestionIds": [],
144
+ "message": "Enter a valid postal code for Ontario"
145
+ }
146
+ ],
147
+ "suggestions": []
148
+ }
149
+ }
150
+ }
151
+ ```
152
+
153
+ The concerns object contains a concern code `zip_invalid_for_province` to highlight the validation error of `90210`
154
+ being an invalid zip code for the province `ON`. It also returns the human readable message
155
+ `"Enter a valid postal code for Ontario"` in the provided language `en`.
4
156
 
5
- ## Local Setup
157
+ The validation scope excludes zip because the zip was not successfully validated.
6
158
 
7
- ### In your rails app
159
+ ## Installation of Atlas Engine in your rails app
8
160
 
9
- #### Initial setup
161
+ ### Initial setup
10
162
  Add the engine to your gemfile
11
163
  ```
12
- gem "atlas_engine", git: "https://github.com/Shopify/atlas-engine"
164
+ gem "atlas_engine"
13
165
  ```
14
166
 
15
167
  Run the following commands to install the engine in your rails app
@@ -19,7 +171,7 @@ bundle lock
19
171
  bin/rails generate atlas_engine:install
20
172
  ```
21
173
 
22
- #### Updating to a newer version of the engine
174
+ ### Updating to a newer version of the engine
23
175
 
24
176
  Working with migrations
25
177
  ```
@@ -30,9 +182,11 @@ rails atlas_engine:install:migrations
30
182
  rails db:migrate
31
183
  ```
32
184
 
33
- ### Developing in the engine
185
+ ## Setup Atlas Engine for contribution / local development
34
186
 
35
- #### Setup Docker
187
+ This setup guide is based on a mac os development environment. Your tooling may vary.
188
+
189
+ ### Install + Setup Docker
36
190
 
37
191
  ```
38
192
  brew install docker
@@ -46,62 +200,61 @@ colima start --cpu 4 --memory 8
46
200
  colima ssh
47
201
  sudo sysctl -w vm.max_map_count=262144
48
202
  exit
203
+ ```
204
+
205
+ Verify docker is running with: `docker info`
206
+
207
+ ### Clone the atlas_engine git repository
49
208
 
50
209
  ```
210
+ git clone https://github.com/Shopify/atlas-engine.git
211
+ ```
212
+
213
+ ### Setup Ruby and Rails
51
214
 
52
- Verify if docker is running: `docker info`
215
+ Install ruby >= 3.2.1
53
216
 
54
- #### Setup Rails
217
+ In the newly cloned repository directory run:
55
218
 
56
219
  ```
57
220
  bundle install
58
221
 
59
- # If you get an ssl error with puma installation run
222
+ # *Note* If you get an ssl error during the puma installation run the following command:
60
223
  bundle config build.puma --with-pkg-config=$(brew --prefix openssl@3)/lib/pkgconfig
61
224
  ```
62
225
 
63
- #### Setting up Elasticsearch, Mysql
226
+ ### Setup up Dockerized Elasticsearch and MySQL
64
227
 
228
+ In a separate terminal, from the cloned atlas_engine directory run:
65
229
  ```
66
- bash setup
67
230
  docker-compose up
68
231
 
69
- # If you encounter an error getting docker credentials, remove or update the `credsStore`
232
+ # *Note* If you encounter an error getting docker credentials, remove or update the `credsStore`
70
233
  key in your Docker configuration file:
71
234
 
72
235
  # ~/.docker/config.json
73
236
  "credsStore": "desktop", # remove this line
74
237
  ```
75
238
 
76
- Connecting to Docker services
77
- * for Mysql : `mysql --host=127.0.0.1 --user=user --password=changeme`
78
- * for Elasticsearch : `http://localhost:9200`
79
-
80
- _note: if you have updated any of the ports in your .env file then use those ports instead_
239
+ Verify your connection to the newly created Docker services with the following commands:
240
+ - MySQL : `mysql --host=127.0.0.1 --user=root`
241
+ - Elasticsearch : `curl http://localhost:9200`
81
242
 
82
-
83
- #### Setting up db
243
+ ### Setup the local db
84
244
  ```
85
245
  rails db:setup
86
246
  ```
87
247
 
88
- #### Setting up maintenance tasks
89
- After locally setting up Atlas Engine:
90
- ```
91
- rails app:maintenance_tasks:install:migrations
92
- rails db:migrate
93
- ```
94
-
95
- ## Using the App
96
-
97
248
  ### Infrastructure Requirements
98
- The elasticsearch implementation depends on the ICU analysis plugin. Refer to the [Dockfile](./Dockfile) leveraged in local setup for plugin installation.
249
+ The elasticsearch implementation depends on the ICU analysis plugin. Refer to the [Dockerfile](./docker/elasticsearch/Dockerfile) leveraged in local setup for plugin installation.
99
250
 
100
- ### Starting the App and Testing
251
+ ### Starting the App / Running Tests
101
252
  * `bin/rails server` to start the server
102
253
  * `bin/rails test` to run tests
254
+ * `bundle exec rubocop` to run ruby style checks
255
+ * `src tc` to run sorbet typechecks
103
256
 
104
- ### Running Sorbet
257
+ ### Sorbet
105
258
 
106
259
  Generate rbi files for custom code
107
260
  ```
@@ -117,7 +270,318 @@ bin/tapioca gems
117
270
  bin/tapioca gems --all
118
271
  ```
119
272
 
120
- Running a sorbet check
273
+ Run sorbet check
121
274
  ```
122
275
  srb tc
123
276
  ```
277
+
278
+ ## Ingestion
279
+
280
+ In order to power the more advanced validation matching strategies that provide city / state / zip and even street
281
+ level address validation, your app must have a populated elasticsearch index per country available for `atlas_engine`
282
+ to query.
283
+
284
+ The data we use to power atlas engine validation is free open source data from the [open addresses](https://openaddresses.io/)
285
+ project. The following guide demostrates how to ingest data with the dummy app, but the process is the same with
286
+ the engine mounted into your own rails app.
287
+
288
+ 1. Go to the [open addresses](https://openaddresses.io/) download center, create an account, support the project, and
289
+ download a GeoJSON+LD file for the country or region you wish to validate. For this example, we will be using the
290
+ countrywide addresses data for Australia.
291
+
292
+ 2. Once the file is downloaded, start your app with `rails s` and navigate to `http://localhost:3000/maintenance_tasks`
293
+ (see [the github repo](https://github.com/Shopify/maintenance_tasks) for more information about maintenance_tasks).
294
+ There are two tasks available: `Maintenance::AtlasEngine::GeoJsonImportTask` and `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. We will be using both in the ingestion process.
295
+
296
+ 3. Navigate to the `Maintenance::AtlasEngine::GeoJsonImportTask`. This task will transform the raw geo json file into
297
+ records in our mysql database and has the following parameters:
298
+
299
+ - **clear_records:** If checked, removes any existing records for the country in the database.
300
+
301
+ - **country_code: (required)** The ISO country code of the data we are ingesting.
302
+ In this example, the country code of Australia is `AU`.
303
+
304
+ - **geojson_file_path: (required)** The fully qualified path of the previously downloaded geojson data from open addresses.
305
+
306
+ - **locale: (optional)** The language of the data in the open addresses file.
307
+
308
+ 4. Once properly parameterized, click run. The process will initialize a `country_import` and should succeed immediately.
309
+
310
+ 5. Navigate to `http://localhost:3000/country_imports` to track the progress of the country import. Click the import id
311
+ link for a more detailed view. Once the import status has updated from `in_progress` to `complete` we will have all of
312
+ the raw open address data imported into our mysql database's `atlas_engine_post_addresses` table.
313
+
314
+ 6. Navigate back to `http://localhost:3000/maintenance_tasks` and click on the `Maintenance::AtlasEngine::ElasticsearchIndexCreateTask`. This task will ingest the data we have staged in mysql
315
+ and use it to create documents in a new elasticsearch index which Atlas Engine will ultimately use for validation.
316
+
317
+ 7. The `ElasticsearchIndexCreateTask` includes the following parameters:
318
+
319
+ - **country_code: (required)** the ISO country code of the data we are ingesting and the name of the elasticsearch index we
320
+ will be creating. In this example, the country code of Australia is `AU`.
321
+
322
+ - **locale: (optional)** the language of the documents we will be creating. This is required for multi-locale countries
323
+ as our indexes are separated by language.
324
+
325
+ - **province_codes: (optional)** an allow list of province codes to create documents for. If left blank the task will create
326
+ documents for the entire dataset.
327
+
328
+ - **shard_override: (optional)** the number of shards to create this index with. If left blank the default will be used.
329
+
330
+ - **replica_override: (optional)** the number of replicas to create this index with. If left blank the default will be used.
331
+
332
+ - **activate_index: (optional)** if checked, immediately promote this index to be the index queried by atlas engine.
333
+ If unchecked, the created index will need to be activated manually.
334
+
335
+ 8. Once properly parameterized, click run. The maintenance task UI will track the progress of the index creation.
336
+
337
+ 9. When completed, the index documents may be verified manually with an elasticsearch client.
338
+ We may now use the `es` and `es_street` matching strategies with `AU` addresses. See [below](#elasticsearch-matching-strategy)
339
+ for an example of its usage.
340
+
341
+ ## Elasticsearch Matching Strategy
342
+
343
+ Once we have successfully created and activated an elasticsearch index using open address data, we may now use
344
+ the more advanced elasticsearch matching strategies `es` and `es_street`.
345
+
346
+ Consider the following example of an invalid `AU` address:
347
+
348
+ ```graphql
349
+ query validation {
350
+ validation(
351
+ address: {
352
+ address1: "100 miller st"
353
+ address2: ""
354
+ city: "sydney"
355
+ provinceCode: "NSW"
356
+ countryCode: AU
357
+ zip: "2060"
358
+ }
359
+ locale: "en"
360
+ matchingStrategy: ES
361
+ ) {
362
+ validationScope
363
+ concerns {
364
+ code
365
+ fieldNames
366
+ suggestionIds
367
+ message
368
+ }
369
+ suggestions {
370
+ address1
371
+ address2
372
+ city
373
+ provinceCode
374
+ zip
375
+ }
376
+ }
377
+ }
378
+ ```
379
+
380
+ When input into `http://localhost:3000/graphiql`, this query should produce the following response:
381
+
382
+ ```json
383
+ {
384
+ "data": {
385
+ "validation": {
386
+ "candidate": ",NSW,,,,2060,[North Sydney],,Miller Street",
387
+ "validationScope": [
388
+ "country_code",
389
+ "province_code",
390
+ "zip",
391
+ "city",
392
+ "address1"
393
+ ],
394
+ "concerns": [
395
+ {
396
+ "code": "city_inconsistent",
397
+ "typeLevel": 3,
398
+ "fieldNames": [
399
+ "city"
400
+ ],
401
+ "suggestionIds": [
402
+ "665ffd09-75b8-584d-8e4a-a0f471bfea01"
403
+ ],
404
+ "message": "Enter a valid city for New South Wales, 2060"
405
+ }
406
+ ],
407
+ "suggestions": [
408
+ {
409
+ "id": "665ffd09-75b8-584d-8e4a-a0f471bfea01",
410
+ "address1": null,
411
+ "address2": null,
412
+ "city": "North Sydney",
413
+ "province": null,
414
+ "provinceCode": null,
415
+ "zip": null
416
+ }
417
+ ]
418
+ }
419
+ }
420
+ }
421
+ ```
422
+
423
+ The concerns object contains a concern code `city_inconsistent` to highlight the validation error of `sydney`
424
+ being an incorrect city for the rest of the provided address. The concern message field is the human readable
425
+ error nudge `"Enter a valid city for New South Wales, 2060"`, pointing to the supporting pieces of evidence (province
426
+ and zip) that were used to determine city as the inconsistent value in this address input.
427
+
428
+ The suggestion object contains a corrected city field `North Sydney` which will result in no concerns or suggestions
429
+ for the validation endpoint if applied.
430
+
431
+ The candidate field contains a representation of the matching document in the elasticsearch index that was found and
432
+ used to determine the suggestions and concerns in the api response.
433
+
434
+ The `es_street` level of validation can also be used to correct errors in the `address1` or `address2` fields of the
435
+ input. In the following request we have modified our query to make a second error in our input - searching for
436
+ `miller ave` instead of `miller st`.
437
+
438
+ ```graphql
439
+ query validation {
440
+ validation(
441
+ address: {
442
+ address1: "100 miller ave"
443
+ address2: ""
444
+ city: "sydney"
445
+ provinceCode: "NSW"
446
+ countryCode: AU
447
+ zip: "2060"
448
+ }
449
+ locale: "en"
450
+ matchingStrategy: ES_STREET
451
+ ) {
452
+ validationScope
453
+ concerns {
454
+ code
455
+ fieldNames
456
+ suggestionIds
457
+ message
458
+ }
459
+ suggestions {
460
+ address1
461
+ address2
462
+ city
463
+ provinceCode
464
+ zip
465
+ }
466
+ }
467
+ }
468
+ ```
469
+
470
+ This query produces the following response:
471
+
472
+ ```json
473
+ {
474
+ "data": {
475
+ "validation": {
476
+ "candidate": ",NSW,,,,2060,[North Sydney],,Miller Street",
477
+ "validationScope": [
478
+ "country_code",
479
+ "province_code",
480
+ "zip",
481
+ "city",
482
+ "address1"
483
+ ],
484
+ "concerns": [
485
+ {
486
+ "code": "city_inconsistent",
487
+ "typeLevel": 3,
488
+ "fieldNames": [
489
+ "city"
490
+ ],
491
+ "suggestionIds": [
492
+ "88779db6-2c5d-5dbb-9f77-f7b07c07206a"
493
+ ],
494
+ "message": "Enter a valid city for New South Wales, 2060"
495
+ },
496
+ {
497
+ "code": "street_inconsistent",
498
+ "typeLevel": 3,
499
+ "fieldNames": [
500
+ "address1"
501
+ ],
502
+ "suggestionIds": [
503
+ "88779db6-2c5d-5dbb-9f77-f7b07c07206a"
504
+ ],
505
+ "message": "Enter a valid street name for New South Wales, 2060"
506
+ }
507
+ ],
508
+ "suggestions": [
509
+ {
510
+ "id": "88779db6-2c5d-5dbb-9f77-f7b07c07206a",
511
+ "address1": "100 Miller Street",
512
+ "address2": null,
513
+ "city": "North Sydney",
514
+ "province": null,
515
+ "provinceCode": null,
516
+ "zip": null
517
+ }
518
+ ]
519
+ }
520
+ }
521
+ }
522
+ ```
523
+
524
+ The concerns object now contains an additional concern code `street_inconsistent` to highlight the validation error of
525
+ `miller ave` being an incorrect street for the rest of the address input. The concern message field is the human
526
+ readable error nudge `"Enter a valid street name for New South Wales, 2060"`, pointing to the supporting pieces of
527
+ evidence (province and zip) that were used to determine street as an inconsistent value in this address input.
528
+
529
+ The suggestion object contains a corrected street field `100 Miller Street` and a corrected city field `North Sydney`
530
+ If both of these suggestions are applied to the input address the subsequent request will be valid.
531
+
532
+ The corrected input of
533
+
534
+ ```graphql
535
+ query validation {
536
+ validation(
537
+ address: {
538
+ address1: "100 miller st"
539
+ address2: ""
540
+ city: "north sydney"
541
+ provinceCode: "NSW"
542
+ countryCode: AU
543
+ zip: "2060"
544
+ }
545
+ locale: "en"
546
+ matchingStrategy: ES_STREET
547
+ ) {
548
+ validationScope
549
+ concerns {
550
+ code
551
+ fieldNames
552
+ suggestionIds
553
+ message
554
+ }
555
+ suggestions {
556
+ address1
557
+ address2
558
+ city
559
+ provinceCode
560
+ zip
561
+ }
562
+ }
563
+ }
564
+ ```
565
+
566
+ will produce the response:
567
+
568
+ ```json
569
+ {
570
+ "data": {
571
+ "validation": {
572
+ "candidate": ",NSW,,,,2060,[North Sydney],,Miller Street",
573
+ "validationScope": [
574
+ "country_code",
575
+ "province_code",
576
+ "zip",
577
+ "city",
578
+ "address1"
579
+ ],
580
+ "concerns": [],
581
+ "suggestions": []
582
+ }
583
+ }
584
+ }
585
+ ```
586
+
587
+ This response has no concerns or suggestions, and the input address is therefore considered to be valid.
@@ -6,6 +6,8 @@ ingestion:
6
6
  validation:
7
7
  enabled: true
8
8
  default_matching_strategy: local
9
+ address_parser: AtlasEngine::Be::ValidationTranscriber::AddressParser
10
+ has_provinces: false
9
11
  index_locales:
10
12
  - fr
11
13
  - nl
@@ -0,0 +1,84 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Be
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ STREET = "(?<street>.+)"
11
+ NUMBERED_STREET = "(?<street>.+\s+[0-9]+)"
12
+ BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
13
+ UNIT_NUM = "(?<unit_num>[[:alnum:]]+)"
14
+ PO_BOX = /\b(?<box_type>pb|box|bte|bus|boîte|boite|postbus|antwoordnummer)\s+(?<number>\d+)\b/i
15
+ STREET_SUFFIXES = %r{
16
+ \A(
17
+ dwarsstraat|dwstr|dwarsweg|dwwg|dijk|dk|gracht|gr|kade|kd|kanaal|kan
18
+ |laan|leane|loane|ln|park|pk|plantsoen|plnts|plein|pln|singel|sngl
19
+ |straat|strjitte|str|straatweg|strwg|weg|wg
20
+ )\z
21
+ }ix
22
+
23
+ sig { returns(T::Array[Regexp]) }
24
+ def country_regex_formats
25
+ @country_regex_formats ||= [
26
+ /^#{STREET},?\s+#{BUILDING_NUM}/,
27
+ /^#{BUILDING_NUM}\s?(,\s?)?#{STREET}/,
28
+ /^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}$/,
29
+ %r{^#{STREET},?\s+#{BUILDING_NUM}[\s,-/]+#{UNIT_NUM}$},
30
+ /^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
31
+ ]
32
+ end
33
+
34
+ sig { override.params(address_line: String).returns(T::Array[T.nilable(String)]) }
35
+ def extract_po_box(address_line)
36
+ po_box_match = address_line.match(PO_BOX)
37
+
38
+ if po_box_match
39
+ po_box = po_box_match["number"]
40
+ address_line = address_line.gsub(PO_BOX, "").strip.delete_suffix(",")
41
+ else
42
+ po_box = nil
43
+ end
44
+
45
+ [address_line, po_box]
46
+ end
47
+
48
+ # Return true if something's obviously wrong with this regex match
49
+ sig do
50
+ override.params(
51
+ captures: T::Hash[Symbol, T.nilable(String)],
52
+ address: AddressValidation::TAddress,
53
+ ).returns(T::Boolean)
54
+ end
55
+ def ridiculous?(captures, address)
56
+ building_num = captures[:building_num]&.downcase
57
+ street = captures[:street]&.downcase
58
+ unit_num = captures[:unit_num]&.downcase
59
+
60
+ if street.present?
61
+ return true unless address.address1&.upcase&.include?(street.upcase) ||
62
+ address.address2&.upcase&.include?(street.upcase)
63
+ end
64
+
65
+ [building_num, unit_num].any? do |token|
66
+ po_box?(token) || street_suffix?(token)
67
+ end
68
+ end
69
+
70
+ sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
71
+ def po_box?(token)
72
+ return false if token.blank?
73
+
74
+ token.match?(PO_BOX)
75
+ end
76
+
77
+ sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
78
+ def street_suffix?(token)
79
+ token.present? && token.match?(STREET_SUFFIXES)
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end