atlas_engine 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -4
  3. data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
  4. data/app/countries/atlas_engine/es/country_profile.yml +3 -0
  5. data/app/countries/atlas_engine/es/synonyms.yml +2 -0
  6. data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
  7. data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
  8. data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
  9. data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
  10. data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
  11. data/app/countries/atlas_engine/it/country_profile.yml +4 -0
  12. data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
  13. data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
  14. data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
  15. data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
  16. data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
  17. data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
  18. data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
  19. data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
  20. data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
  21. data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
  22. data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
  23. data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
  24. data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
  25. data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
  26. data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
  27. data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
  28. data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
  29. data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
  30. data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
  31. data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
  32. data/app/countries/atlas_engine/si/country_profile.yml +17 -0
  33. data/app/countries/atlas_engine/si/synonyms.yml +7 -0
  34. data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
  35. data/app/graphql/atlas_engine/schema.graphql +1 -1
  36. data/app/jobs/atlas_engine/concerns/address_importer/handles_errors.rb +1 -1
  37. data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
  38. data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
  39. data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
  40. data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
  41. data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
  42. data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
  43. data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
  44. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
  45. data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
  46. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
  47. data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
  48. data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
  49. data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
  50. data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
  51. data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
  52. data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
  53. data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
  54. data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
  55. data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
  56. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
  57. data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
  58. data/db/data/country_profiles/default.yml +12 -0
  59. data/lib/atlas_engine/version.rb +1 -1
  60. data/lib/tasks/atlas_engine/graphql.rake +13 -0
  61. metadata +49 -6
  62. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f78501b09c1f8bfeae5d8a5be2ff3b83d7f68f689e66294f704f3c3bcf5830b
4
- data.tar.gz: d1100e782d95c89565be662da85ba69788fa457bbefcd11ccd8082c356086df2
3
+ metadata.gz: 99901e1ca20e5c32dd90945bbe7bd2199ad9cf0c11507524e7210b621f0a647c
4
+ data.tar.gz: c14b30e2629d198b5341e87190c0ccc38d016a644b9fea74e6d1bece611360b1
5
5
  SHA512:
6
- metadata.gz: 7d2b8abdf8ae247fcb95b10a0e7f301deea99ee2505f8817c85a8bf10459e3cdedfd7a631738071cf018db7545ca0b2506ec518f31c1e5ba1f507a49915d2f25
7
- data.tar.gz: d7b2177df1ca73d45a37993dfdea005d92fc3705ef422c0a1b7e90285825880f4d82f7ae3fff47d8f425abf3a2865636ec5bd8e546a2e60c113de19aeeba8df0
6
+ metadata.gz: b424ffd2640d6248aa00adfa8e1942fad3b0f954ecfeb339257eccf84e2976107151f7da4cf936f4f48eb450337582c1692e90d79b4ecf6658964cc78507b963
7
+ data.tar.gz: 463dbcc8c3ad8a9aab64ee1212fc9d9148b49c5ad44a51fece73a1d8185af58453c6d9fa4002df4f9acca3aa8ee0122a140575e4b6a90b67633fc4565d5659ce
data/README.md CHANGED
@@ -1,7 +1,15 @@
1
- # Atlas Engine
1
+ # 🌐 Atlas Engine
2
2
 
3
3
  Atlas Engine is a rails engine that provides a global end-to-end address validation API for rails apps.
4
4
 
5
+ * [Address Validation API](#address-validation-api)
6
+ * [Rails App Installation](#rails-app-installation)
7
+ * [Local Development Installation](#local-development-installation)
8
+ * [Address Data Ingestion](#address-data-ingestion)
9
+ * [Elasticsearch Matching Strategy](#elasticsearch-matching-strategy)
10
+
11
+ ## Address Validation API
12
+
5
13
  The validation API is powered by GraphQL, an example request and explanation of the parameters and response follows:
6
14
 
7
15
  ```graphql
@@ -156,7 +164,7 @@ being an invalid zip code for the province `ON`. It also returns the human reada
156
164
 
157
165
  The validation scope excludes zip because the zip was not successfully validated.
158
166
 
159
- ## Installation of Atlas Engine in your rails app
167
+ ## Rails App Installation
160
168
 
161
169
  ### Initial setup
162
170
  Add the engine to your gemfile
@@ -182,7 +190,7 @@ rails atlas_engine:install:migrations
182
190
  rails db:migrate
183
191
  ```
184
192
 
185
- ## Setup Atlas Engine for contribution / local development
193
+ ## Local Development Installation
186
194
 
187
195
  This setup guide is based on a mac os development environment. Your tooling may vary.
188
196
 
@@ -275,7 +283,7 @@ Run sorbet check
275
283
  srb tc
276
284
  ```
277
285
 
278
- ## Ingestion
286
+ ## Address Data Ingestion
279
287
 
280
288
  In order to power the more advanced validation matching strategies that provide city / state / zip and even street
281
289
  level address validation, your app must have a populated elasticsearch index per country available for `atlas_engine`
@@ -5,3 +5,6 @@ validation:
5
5
  address_parser: AtlasEngine::Cz::ValidationTranscriber::AddressParser
6
6
  query_builder: AtlasEngine::Cz::AddressValidation::Es::QueryBuilder
7
7
  has_provinces: false
8
+ comparison_policies:
9
+ city:
10
+ unmatched: ignore_left_unmatched
@@ -0,0 +1,3 @@
1
+ id: ES
2
+ validation:
3
+ address_parser: AtlasEngine::Es::ValidationTranscriber::AddressParser
@@ -0,0 +1,2 @@
1
+ street_synonyms:
2
+ - c/, calle
@@ -0,0 +1,28 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Es
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ NON_NUMERIC_STREET = "(?<street>[^[:digit:]]+)"
11
+ BUILDING_NUM_DESIGNATOR = /(?i)(n|n°|número)/
12
+ CATCH_ALL = /(?:,|\s|\s*.+)/
13
+
14
+ sig { returns(T::Array[Regexp]) }
15
+ def country_regex_formats
16
+ @country_regex_formats ||= [
17
+ /^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}$/,
18
+ /^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
19
+ /^#{NON_NUMERIC_STREET},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
20
+ /^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}/,
21
+ /^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
22
+ /^#{NON_NUMERIC_STREET},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
23
+ ]
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,27 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Fo
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class CityCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:city] == ["Nes, Eysturoy"] || address[:city] == ["Nes, Vágur"]
16
+ address[:city] = ["Nes"]
17
+ elsif address[:city] == ["Syðradalur, Kalsoy"] || address[:city] == ["Syðradalur, Streymoy"]
18
+ address[:city] = ["Syðradalur"]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -3,3 +3,7 @@ validation:
3
3
  enabled: true
4
4
  default_matching_strategy: es
5
5
  has_provinces: false
6
+ ingestion:
7
+ correctors:
8
+ open_address:
9
+ - AtlasEngine::Fo::AddressImporter::Corrections::OpenAddress::CityCorrector
@@ -11,3 +11,5 @@ validation:
11
11
  address_parser: AtlasEngine::Fr::ValidationTranscriber::AddressParser
12
12
  enabled: true
13
13
  default_matching_strategy: es
14
+ has_provinces: false
15
+ zip_prefix_length: 3
@@ -0,0 +1,31 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module It
6
+ module AddressValidation
7
+ module Validators
8
+ module FullAddress
9
+ module Exclusions
10
+ class City <
11
+ AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
12
+ class << self
13
+ sig do
14
+ override.params(
15
+ session: AtlasEngine::AddressValidation::Session,
16
+ candidate: AtlasEngine::AddressValidation::Candidate,
17
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
18
+ )
19
+ .returns(T::Boolean)
20
+ end
21
+ def apply?(session, candidate, address_comparison)
22
+ true
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -3,6 +3,10 @@ validation:
3
3
  enabled: true
4
4
  default_matching_strategy: es
5
5
  address_parser: AtlasEngine::Dk::ValidationTranscriber::AddressParser
6
+ exclusions:
7
+ city:
8
+ - AtlasEngine::It::AddressValidation::Validators::FullAddress::Exclusions::City
9
+ unmatched_components_suggestion_threshold: 1
6
10
  ingestion:
7
11
  open_address:
8
12
  feature_mapper: AtlasEngine::It::AddressImporter::OpenAddress::Mapper
@@ -0,0 +1,69 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Kr
6
+ module AddressValidation
7
+ module Validators
8
+ module FullAddress
9
+ module Exclusions
10
+ class City <
11
+ AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
12
+ extend T::Sig
13
+ class << self
14
+ COMPONENT_IDENTIFIER = {
15
+ si: "시",
16
+ gu: "구",
17
+ }.freeze
18
+
19
+ sig do
20
+ override.params(
21
+ session: AtlasEngine::AddressValidation::Session,
22
+ candidate: AtlasEngine::AddressValidation::Candidate,
23
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
24
+ )
25
+ .returns(T::Boolean)
26
+ end
27
+ def apply?(session, candidate, address_comparison)
28
+ candidate_si = extract_component_from_city(candidate, :si)
29
+ candidate_gu = extract_component_from_city(candidate, :gu)
30
+
31
+ (candidate_si.present? && contains_component?(:si, candidate_si, session)) ||
32
+ (candidate_gu.present? && contains_component?(:gu, candidate_gu, session))
33
+ end
34
+
35
+ private
36
+
37
+ sig do
38
+ params(
39
+ candidate: AtlasEngine::AddressValidation::Candidate,
40
+ component: Symbol,
41
+ ).returns(T.nilable(String))
42
+ end
43
+ def extract_component_from_city(candidate, component)
44
+ city = candidate.component(:city)&.value&.first
45
+
46
+ city_parts = city.split(" ")
47
+ city_parts.find do |part|
48
+ part.end_with?(COMPONENT_IDENTIFIER[component])
49
+ end
50
+ end
51
+
52
+ sig do
53
+ params(
54
+ type: Symbol,
55
+ value: String,
56
+ session: AtlasEngine::AddressValidation::Session,
57
+ ).returns(T::Boolean)
58
+ end
59
+ def contains_component?(type, value, session)
60
+ session.parsings.parsings.pluck(type)&.include?(value) || session.city&.include?(value)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -5,6 +5,13 @@ ingestion:
5
5
  validation:
6
6
  enabled: true
7
7
  default_matching_strategy: es
8
+ address_parser: AtlasEngine::Kr::ValidationTranscriber::AddressParser
9
+ comparison_policies:
10
+ city:
11
+ unmatched: ignore_left_unmatched
12
+ exclusions:
13
+ city:
14
+ - AtlasEngine::Kr::AddressValidation::Validators::FullAddress::Exclusions::City
8
15
  restrictions:
9
16
  - class: AtlasEngine::Restrictions::UnsupportedScript
10
17
  params:
@@ -0,0 +1,36 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Kr
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ PROVINCE = "(?<province>.+기|서울)"
11
+ GU = "(?<gu>.+구)"
12
+ SI = "(?<si>.+시)"
13
+ DONG = "(?<dong>.+동)"
14
+ EUP = "(?<eup>.+읍)"
15
+ STREET = "(?<street>\\S+)"
16
+ BUILDING_NUM = "(?<building_num>\\d+(^호)?)"
17
+ UNIT_NUM = "(?<unit_num>\\d+(^동)?)"
18
+
19
+ sig { returns(T::Array[Regexp]) }
20
+ def country_regex_formats
21
+ @country_regex_formats ||= [
22
+ %r{
23
+ (#{PROVINCE}\s+)?
24
+ (#{SI}\s+)?
25
+ (#{GU}\s+)?
26
+ (#{DONG}\s+)?
27
+ (#{EUP}\s+)?
28
+ (#{STREET}\s+)?
29
+ (#{BUILDING_NUM}(-|\s)?)?#{UNIT_NUM}?
30
+ }x,
31
+ ]
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,45 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Lu
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class CityCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ # NOTE: ensure keys match the city names we have in our db
14
+ CITY_ALIASES = {
15
+ "luxembourg" => ["Lëtzebuerg"],
16
+ "esch-sur-alzette" => ["Esch-Uelzecht", "Esch/Alzette"],
17
+ "dudelange" => ["Diddeleng", "Düdelingen"],
18
+ "schifflange" => ["Schëffleng"],
19
+ "bettembourg" => ["Beetebuerg"],
20
+ "pétange" => ["Péiteng"],
21
+ "ettelbruck" => ["Ettelbréck"],
22
+ "diekirch" => ["Dikrech"],
23
+ "strassen" => ["Stroossen"],
24
+ "bertrange" => ["Bartreng"],
25
+ "belvaux" => ["Bieles"],
26
+ "differdange" => ["Déifferdeng"],
27
+ "wiltz" => ["Wolz"],
28
+ "grevenmacher" => ["Gréiwemaacher"],
29
+ "mersch" => ["Miersch"],
30
+ "redange/attert" => ["Redange", "Réiden", "Redange-sur-Attert"],
31
+ }
32
+
33
+ sig { params(address: Hash).void }
34
+ def apply(address)
35
+ city = address[:city].first.downcase
36
+ aliases = CITY_ALIASES[city.downcase] || []
37
+ address[:city] = address[:city] + aliases
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -1,12 +1,15 @@
1
1
  id: LU
2
2
  validation:
3
3
  enabled: true
4
- has_provinces: true
4
+ has_provinces: false
5
5
  default_matching_strategy: es
6
+ address_parser: AtlasEngine::Lu::ValidationTranscriber::AddressParser
6
7
  index_locales:
7
8
  - fr
8
9
  - lb
10
+ unmatched_components_suggestion_threshold: 1
9
11
  ingestion:
10
12
  correctors:
11
13
  open_address:
12
14
  - AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::LocaleCorrector
15
+ - AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::CityCorrector
@@ -0,0 +1,23 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Lu
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ STREET = "(?<street>.+)"
11
+ BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)([/-][0-9])?"
12
+
13
+ sig { returns(T::Array[Regexp]) }
14
+ def country_regex_formats
15
+ @country_regex_formats ||= [
16
+ /^#{STREET},?\s+#{BUILDING_NUM}/,
17
+ /^#{BUILDING_NUM}\s?(,\s?)?#{STREET}/,
18
+ ]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class CityCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:city] == ["Warszawa"]
16
+ address[:city] << "Warsaw"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,32 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class EmptyStreetCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:street] == "" && address[:city].present?
16
+ # Many smaller rural towns in Poland don't have street names. Mailing addresses are
17
+ # often expressed as
18
+ # address1: <town name> <building number>
19
+ # city: <town name> OR <nearest postal town>
20
+ # postal_code: <postal code>
21
+ #
22
+ # The OpenAddresses dataset does not currently include county/postal town info.
23
+ address[:street] = Array(address[:city]).first
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressValidation
7
+ module Exclusions
8
+ class PlaceholderPostalCode < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
9
+ extend T::Sig
10
+
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ ).returns(T::Boolean)
18
+ end
19
+ def apply?(session, candidate, address_comparison)
20
+ placeholder_postal_code?(candidate)
21
+ end
22
+
23
+ private
24
+
25
+ sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
26
+ def placeholder_postal_code?(candidate)
27
+ zip_values = T.must(candidate.component(:zip)&.values)
28
+ zip_values.all?("00-000")
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,42 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressValidation
7
+ module Exclusions
8
+ class RuralAddress < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
9
+ extend T::Sig
10
+
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ ).returns(T::Boolean)
18
+ end
19
+ def apply?(session, candidate, address_comparison)
20
+ rural_address?(candidate) && poor_city_match?(address_comparison)
21
+ end
22
+
23
+ private
24
+
25
+ def poor_city_match?(address_comparison)
26
+ address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
27
+ end
28
+
29
+ sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
30
+ def rural_address?(candidate)
31
+ return false if candidate.component(:city)&.values.blank?
32
+
33
+ street = candidate.component(:street)&.first_value
34
+ city_values = T.must(candidate.component(:city)&.values)
35
+ city_values.any?(street)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,5 +1,18 @@
1
1
  id: PL
2
+ ingestion:
3
+ correctors:
4
+ open_address:
5
+ - AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::CityCorrector
6
+ - AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::EmptyStreetCorrector
2
7
  validation:
3
8
  address_parser: AtlasEngine::Pl::ValidationTranscriber::AddressParser
4
9
  enabled: true
5
10
  default_matching_strategy: es
11
+ exclusions:
12
+ city:
13
+ - AtlasEngine::Pl::AddressValidation::Exclusions::RuralAddress
14
+ zip:
15
+ - AtlasEngine::Pl::AddressValidation::Exclusions::PlaceholderPostalCode
16
+ comparison_policies:
17
+ street:
18
+ unmatched: ignore_largest_unmatched_side
@@ -0,0 +1,13 @@
1
+ street_synonyms:
2
+ ## street suffixes
3
+ - aleja, al # avenue
4
+ - osiedle, os # housing estate
5
+ - plac, pl # square
6
+ - ulica, ul # street
7
+ ## titles
8
+ - kardynała, kard # cardinal (masculine)
9
+ - święta, św # saint (feminine)
10
+ - świętego, św # saint
11
+ - święty, św # saint (masculine)
12
+ city_synonyms:
13
+ - wielkopolska, wlkp # Greater Poland
@@ -5,14 +5,49 @@ module AtlasEngine
5
5
  module Pl
6
6
  module ValidationTranscriber
7
7
  class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
9
+ BUILDING_NUM_PREFIX = "(?:nr.?\s+)"
10
+ BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
11
+ UNIT_NUM_PREFIX = "(?:\s*[/-]|\s+m.?)"
12
+ UNIT_NUM = "(?<unit_num>[[:alpha:]0-9]+)"
13
+
14
+ sig { override.returns(T::Array[AddressComponents]) }
15
+ def parse
16
+ # polish addressses sometimes follow an abbreviation with a period and no space afterward
17
+ super.each do |components|
18
+ components[:street]&.gsub!(
19
+ /\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
20
+ "\\k<prefix> \\k<non_space>",
21
+ )
22
+ end
23
+ end
24
+
8
25
  private
9
26
 
10
27
  sig { returns(T::Array[Regexp]) }
11
28
  def country_regex_formats
12
29
  @country_regex_formats ||= [
13
- %r{^(?<street>.+)\s+(?<building_num>[0-9][[:alpha:]0-9]*)(\s*/\s*(?<unit_num>[[:alpha:]0-9]+))?$},
30
+ /^#{STREET}\s+#{BUILDING_NUM_PREFIX}?#{BUILDING_NUM}(#{UNIT_NUM_PREFIX}\s*#{UNIT_NUM})?$/,
31
+ /^#{STREET}$/,
14
32
  ]
15
33
  end
34
+
35
+ sig do
36
+ override.params(
37
+ captures: T::Hash[Symbol, T.nilable(String)],
38
+ address: AtlasEngine::AddressValidation::AbstractAddress,
39
+ ).returns(T::Boolean)
40
+ end
41
+ def ridiculous?(captures, address)
42
+ street = captures[:street]&.downcase
43
+
44
+ if street.present?
45
+ true unless address.address1&.downcase&.include?(street) ||
46
+ address.address2&.downcase&.include?(street)
47
+ end
48
+
49
+ false
50
+ end
16
51
  end
17
52
  end
18
53
  end
@@ -0,0 +1,38 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pt
6
+ module AddressValidation
7
+ module Validators
8
+ module FullAddress
9
+ module Exclusions
10
+ class Zip < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ )
18
+ .returns(T::Boolean)
19
+ end
20
+ def apply?(session, candidate, address_comparison)
21
+ street_comparison_result = address_comparison.street_comparison.sequence_comparison
22
+ building_comparison_result = address_comparison.building_comparison.sequence_comparison
23
+
24
+ return true if street_comparison_result.nil? ||
25
+ building_comparison_result.nil? ||
26
+ T.must(building_comparison_result).candidate_ranges.empty?
27
+
28
+ !T.must(street_comparison_result).match? ||
29
+ !T.must(building_comparison_result).match?
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -8,3 +8,7 @@ ingestion:
8
8
  validation:
9
9
  enabled: true
10
10
  default_matching_strategy: es
11
+ address_parser: AtlasEngine::Pt::ValidationTranscriber::AddressParser
12
+ exclusions:
13
+ zip:
14
+ - AtlasEngine::Pt::AddressValidation::Validators::FullAddress::Exclusions::Zip