atlas_engine 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -4
  3. data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
  4. data/app/countries/atlas_engine/es/country_profile.yml +3 -0
  5. data/app/countries/atlas_engine/es/synonyms.yml +2 -0
  6. data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
  7. data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
  8. data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
  9. data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
  10. data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
  11. data/app/countries/atlas_engine/it/country_profile.yml +4 -0
  12. data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
  13. data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
  14. data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
  15. data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
  16. data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
  17. data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
  18. data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
  19. data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
  20. data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
  21. data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
  22. data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
  23. data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
  24. data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
  25. data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
  26. data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
  27. data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
  28. data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
  29. data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
  30. data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
  31. data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
  32. data/app/countries/atlas_engine/si/country_profile.yml +17 -0
  33. data/app/countries/atlas_engine/si/synonyms.yml +7 -0
  34. data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
  35. data/app/graphql/atlas_engine/schema.graphql +1 -1
  36. data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
  37. data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
  38. data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
  39. data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
  40. data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
  41. data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
  42. data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
  43. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
  44. data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
  45. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
  46. data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
  47. data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
  48. data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
  49. data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
  50. data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
  51. data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
  52. data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
  53. data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
  54. data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
  55. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
  56. data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
  57. data/db/data/country_profiles/default.yml +12 -0
  58. data/lib/atlas_engine/version.rb +1 -1
  59. data/lib/tasks/atlas_engine/graphql.rake +13 -0
  60. metadata +35 -6
  61. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f78501b09c1f8bfeae5d8a5be2ff3b83d7f68f689e66294f704f3c3bcf5830b
4
- data.tar.gz: d1100e782d95c89565be662da85ba69788fa457bbefcd11ccd8082c356086df2
3
+ metadata.gz: ae2dfdcc902973978f88d2d0bcd07808dbde5c48eecfe53977c4449b422bd492
4
+ data.tar.gz: 5e2d0b59cdb714a01a5e1df904390253760046151be3fecddce1f69b3fa1cc28
5
5
  SHA512:
6
- metadata.gz: 7d2b8abdf8ae247fcb95b10a0e7f301deea99ee2505f8817c85a8bf10459e3cdedfd7a631738071cf018db7545ca0b2506ec518f31c1e5ba1f507a49915d2f25
7
- data.tar.gz: d7b2177df1ca73d45a37993dfdea005d92fc3705ef422c0a1b7e90285825880f4d82f7ae3fff47d8f425abf3a2865636ec5bd8e546a2e60c113de19aeeba8df0
6
+ metadata.gz: 50b5daf2a3a65fe064d37fe3f54944b1300f6d80a5009447af0a96caaa191a67563a1f6964e96dfc3cf849e67ff9af1279bbd71f8f0dcf3cc8c64e86489b7dd6
7
+ data.tar.gz: cce398aa5d5389a01effce955b2b224b34593013064649afd833323e23afc8cf6a746b136cca3eb36b1fc33eb15c5f6cd30110f1f6c4585f6a17c935761e38c5
data/README.md CHANGED
@@ -1,7 +1,15 @@
1
- # Atlas Engine
1
+ # 🌐 Atlas Engine
2
2
 
3
3
  Atlas Engine is a rails engine that provides a global end-to-end address validation API for rails apps.
4
4
 
5
+ * [Address Validation API](#address-validation-api)
6
+ * [Rails App Installation](#rails-app-installation)
7
+ * [Local Development Installation](#local-development-installation)
8
+ * [Address Data Ingestion](#address-data-ingestion)
9
+ * [Elasticsearch Matching Strategy](#elasticsearch-matching-strategy)
10
+
11
+ ## Address Validation API
12
+
5
13
  The validation API is powered by GraphQL, an example request and explanation of the parameters and response follows:
6
14
 
7
15
  ```graphql
@@ -156,7 +164,7 @@ being an invalid zip code for the province `ON`. It also returns the human reada
156
164
 
157
165
  The validation scope excludes zip because the zip was not successfully validated.
158
166
 
159
- ## Installation of Atlas Engine in your rails app
167
+ ## Rails App Installation
160
168
 
161
169
  ### Initial setup
162
170
  Add the engine to your gemfile
@@ -182,7 +190,7 @@ rails atlas_engine:install:migrations
182
190
  rails db:migrate
183
191
  ```
184
192
 
185
- ## Setup Atlas Engine for contribution / local development
193
+ ## Local Development Installation
186
194
 
187
195
  This setup guide is based on a mac os development environment. Your tooling may vary.
188
196
 
@@ -275,7 +283,7 @@ Run sorbet check
275
283
  srb tc
276
284
  ```
277
285
 
278
- ## Ingestion
286
+ ## Address Data Ingestion
279
287
 
280
288
  In order to power the more advanced validation matching strategies that provide city / state / zip and even street
281
289
  level address validation, your app must have a populated elasticsearch index per country available for `atlas_engine`
@@ -5,3 +5,6 @@ validation:
5
5
  address_parser: AtlasEngine::Cz::ValidationTranscriber::AddressParser
6
6
  query_builder: AtlasEngine::Cz::AddressValidation::Es::QueryBuilder
7
7
  has_provinces: false
8
+ comparison_policies:
9
+ city:
10
+ unmatched: ignore_left_unmatched
@@ -0,0 +1,3 @@
1
+ id: ES
2
+ validation:
3
+ address_parser: AtlasEngine::Es::ValidationTranscriber::AddressParser
@@ -0,0 +1,2 @@
1
+ street_synonyms:
2
+ - c/, calle
@@ -0,0 +1,28 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Es
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ NON_NUMERIC_STREET = "(?<street>[^[:digit:]]+)"
11
+ BUILDING_NUM_DESIGNATOR = /(?i)(n|n°|número)/
12
+ CATCH_ALL = /(?:,|\s|\s*.+)/
13
+
14
+ sig { returns(T::Array[Regexp]) }
15
+ def country_regex_formats
16
+ @country_regex_formats ||= [
17
+ /^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}$/,
18
+ /^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
19
+ /^#{NON_NUMERIC_STREET},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
20
+ /^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}/,
21
+ /^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
22
+ /^#{NON_NUMERIC_STREET},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
23
+ ]
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,27 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Fo
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class CityCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:city] == ["Nes, Eysturoy"] || address[:city] == ["Nes, Vágur"]
16
+ address[:city] = ["Nes"]
17
+ elsif address[:city] == ["Syðradalur, Kalsoy"] || address[:city] == ["Syðradalur, Streymoy"]
18
+ address[:city] = ["Syðradalur"]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -3,3 +3,7 @@ validation:
3
3
  enabled: true
4
4
  default_matching_strategy: es
5
5
  has_provinces: false
6
+ ingestion:
7
+ correctors:
8
+ open_address:
9
+ - AtlasEngine::Fo::AddressImporter::Corrections::OpenAddress::CityCorrector
@@ -11,3 +11,5 @@ validation:
11
11
  address_parser: AtlasEngine::Fr::ValidationTranscriber::AddressParser
12
12
  enabled: true
13
13
  default_matching_strategy: es
14
+ has_provinces: false
15
+ zip_prefix_length: 3
@@ -0,0 +1,31 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module It
6
+ module AddressValidation
7
+ module Validators
8
+ module FullAddress
9
+ module Exclusions
10
+ class City <
11
+ AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
12
+ class << self
13
+ sig do
14
+ override.params(
15
+ session: AtlasEngine::AddressValidation::Session,
16
+ candidate: AtlasEngine::AddressValidation::Candidate,
17
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
18
+ )
19
+ .returns(T::Boolean)
20
+ end
21
+ def apply?(session, candidate, address_comparison)
22
+ true
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -3,6 +3,10 @@ validation:
3
3
  enabled: true
4
4
  default_matching_strategy: es
5
5
  address_parser: AtlasEngine::Dk::ValidationTranscriber::AddressParser
6
+ exclusions:
7
+ city:
8
+ - AtlasEngine::It::AddressValidation::Validators::FullAddress::Exclusions::City
9
+ unmatched_components_suggestion_threshold: 1
6
10
  ingestion:
7
11
  open_address:
8
12
  feature_mapper: AtlasEngine::It::AddressImporter::OpenAddress::Mapper
@@ -0,0 +1,69 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Kr
6
+ module AddressValidation
7
+ module Validators
8
+ module FullAddress
9
+ module Exclusions
10
+ class City <
11
+ AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
12
+ extend T::Sig
13
+ class << self
14
+ COMPONENT_IDENTIFIER = {
15
+ si: "시",
16
+ gu: "구",
17
+ }.freeze
18
+
19
+ sig do
20
+ override.params(
21
+ session: AtlasEngine::AddressValidation::Session,
22
+ candidate: AtlasEngine::AddressValidation::Candidate,
23
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
24
+ )
25
+ .returns(T::Boolean)
26
+ end
27
+ def apply?(session, candidate, address_comparison)
28
+ candidate_si = extract_component_from_city(candidate, :si)
29
+ candidate_gu = extract_component_from_city(candidate, :gu)
30
+
31
+ (candidate_si.present? && contains_component?(:si, candidate_si, session)) ||
32
+ (candidate_gu.present? && contains_component?(:gu, candidate_gu, session))
33
+ end
34
+
35
+ private
36
+
37
+ sig do
38
+ params(
39
+ candidate: AtlasEngine::AddressValidation::Candidate,
40
+ component: Symbol,
41
+ ).returns(T.nilable(String))
42
+ end
43
+ def extract_component_from_city(candidate, component)
44
+ city = candidate.component(:city)&.value&.first
45
+
46
+ city_parts = city.split(" ")
47
+ city_parts.find do |part|
48
+ part.end_with?(COMPONENT_IDENTIFIER[component])
49
+ end
50
+ end
51
+
52
+ sig do
53
+ params(
54
+ type: Symbol,
55
+ value: String,
56
+ session: AtlasEngine::AddressValidation::Session,
57
+ ).returns(T::Boolean)
58
+ end
59
+ def contains_component?(type, value, session)
60
+ session.parsings.parsings.pluck(type)&.include?(value) || session.city&.include?(value)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -5,6 +5,13 @@ ingestion:
5
5
  validation:
6
6
  enabled: true
7
7
  default_matching_strategy: es
8
+ address_parser: AtlasEngine::Kr::ValidationTranscriber::AddressParser
9
+ comparison_policies:
10
+ city:
11
+ unmatched: ignore_left_unmatched
12
+ exclusions:
13
+ city:
14
+ - AtlasEngine::Kr::AddressValidation::Validators::FullAddress::Exclusions::City
8
15
  restrictions:
9
16
  - class: AtlasEngine::Restrictions::UnsupportedScript
10
17
  params:
@@ -0,0 +1,36 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Kr
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ PROVINCE = "(?<province>.+기|서울)"
11
+ GU = "(?<gu>.+구)"
12
+ SI = "(?<si>.+시)"
13
+ DONG = "(?<dong>.+동)"
14
+ EUP = "(?<eup>.+읍)"
15
+ STREET = "(?<street>\\S+)"
16
+ BUILDING_NUM = "(?<building_num>\\d+(^호)?)"
17
+ UNIT_NUM = "(?<unit_num>\\d+(^동)?)"
18
+
19
+ sig { returns(T::Array[Regexp]) }
20
+ def country_regex_formats
21
+ @country_regex_formats ||= [
22
+ %r{
23
+ (#{PROVINCE}\s+)?
24
+ (#{SI}\s+)?
25
+ (#{GU}\s+)?
26
+ (#{DONG}\s+)?
27
+ (#{EUP}\s+)?
28
+ (#{STREET}\s+)?
29
+ (#{BUILDING_NUM}(-|\s)?)?#{UNIT_NUM}?
30
+ }x,
31
+ ]
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,45 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Lu
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class CityCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ # NOTE: ensure keys match the city names we have in our db
14
+ CITY_ALIASES = {
15
+ "luxembourg" => ["Lëtzebuerg"],
16
+ "esch-sur-alzette" => ["Esch-Uelzecht", "Esch/Alzette"],
17
+ "dudelange" => ["Diddeleng", "Düdelingen"],
18
+ "schifflange" => ["Schëffleng"],
19
+ "bettembourg" => ["Beetebuerg"],
20
+ "pétange" => ["Péiteng"],
21
+ "ettelbruck" => ["Ettelbréck"],
22
+ "diekirch" => ["Dikrech"],
23
+ "strassen" => ["Stroossen"],
24
+ "bertrange" => ["Bartreng"],
25
+ "belvaux" => ["Bieles"],
26
+ "differdange" => ["Déifferdeng"],
27
+ "wiltz" => ["Wolz"],
28
+ "grevenmacher" => ["Gréiwemaacher"],
29
+ "mersch" => ["Miersch"],
30
+ "redange/attert" => ["Redange", "Réiden", "Redange-sur-Attert"],
31
+ }
32
+
33
+ sig { params(address: Hash).void }
34
+ def apply(address)
35
+ city = address[:city].first.downcase
36
+ aliases = CITY_ALIASES[city.downcase] || []
37
+ address[:city] = address[:city] + aliases
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -1,12 +1,15 @@
1
1
  id: LU
2
2
  validation:
3
3
  enabled: true
4
- has_provinces: true
4
+ has_provinces: false
5
5
  default_matching_strategy: es
6
+ address_parser: AtlasEngine::Lu::ValidationTranscriber::AddressParser
6
7
  index_locales:
7
8
  - fr
8
9
  - lb
10
+ unmatched_components_suggestion_threshold: 1
9
11
  ingestion:
10
12
  correctors:
11
13
  open_address:
12
14
  - AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::LocaleCorrector
15
+ - AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::CityCorrector
@@ -0,0 +1,23 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Lu
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ STREET = "(?<street>.+)"
11
+ BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)([/-][0-9])?"
12
+
13
+ sig { returns(T::Array[Regexp]) }
14
+ def country_regex_formats
15
+ @country_regex_formats ||= [
16
+ /^#{STREET},?\s+#{BUILDING_NUM}/,
17
+ /^#{BUILDING_NUM}\s?(,\s?)?#{STREET}/,
18
+ ]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class CityCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:city] == ["Warszawa"]
16
+ address[:city] << "Warsaw"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,32 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressImporter
7
+ module Corrections
8
+ module OpenAddress
9
+ class EmptyStreetCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:street] == "" && address[:city].present?
16
+ # Many smaller rural towns in Poland don't have street names. Mailing addresses are
17
+ # often expressed as
18
+ # address1: <town name> <building number>
19
+ # city: <town name> OR <nearest postal town>
20
+ # postal_code: <postal code>
21
+ #
22
+ # The OpenAddresses dataset does not currently include county/postal town info.
23
+ address[:street] = Array(address[:city]).first
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,35 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressValidation
7
+ module Exclusions
8
+ class PlaceholderPostalCode < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
9
+ extend T::Sig
10
+
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ ).returns(T::Boolean)
18
+ end
19
+ def apply?(session, candidate, address_comparison)
20
+ placeholder_postal_code?(candidate)
21
+ end
22
+
23
+ private
24
+
25
+ sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
26
+ def placeholder_postal_code?(candidate)
27
+ zip_values = T.must(candidate.component(:zip)&.values)
28
+ zip_values.all?("00-000")
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,42 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pl
6
+ module AddressValidation
7
+ module Exclusions
8
+ class RuralAddress < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
9
+ extend T::Sig
10
+
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ ).returns(T::Boolean)
18
+ end
19
+ def apply?(session, candidate, address_comparison)
20
+ rural_address?(candidate) && poor_city_match?(address_comparison)
21
+ end
22
+
23
+ private
24
+
25
+ def poor_city_match?(address_comparison)
26
+ address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
27
+ end
28
+
29
+ sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
30
+ def rural_address?(candidate)
31
+ return false if candidate.component(:city)&.values.blank?
32
+
33
+ street = candidate.component(:street)&.first_value
34
+ city_values = T.must(candidate.component(:city)&.values)
35
+ city_values.any?(street)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -1,5 +1,18 @@
1
1
  id: PL
2
+ ingestion:
3
+ correctors:
4
+ open_address:
5
+ - AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::CityCorrector
6
+ - AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::EmptyStreetCorrector
2
7
  validation:
3
8
  address_parser: AtlasEngine::Pl::ValidationTranscriber::AddressParser
4
9
  enabled: true
5
10
  default_matching_strategy: es
11
+ exclusions:
12
+ city:
13
+ - AtlasEngine::Pl::AddressValidation::Exclusions::RuralAddress
14
+ zip:
15
+ - AtlasEngine::Pl::AddressValidation::Exclusions::PlaceholderPostalCode
16
+ comparison_policies:
17
+ street:
18
+ unmatched: ignore_largest_unmatched_side
@@ -0,0 +1,13 @@
1
+ street_synonyms:
2
+ ## street suffixes
3
+ - aleja, al # avenue
4
+ - osiedle, os # housing estate
5
+ - plac, pl # square
6
+ - ulica, ul # street
7
+ ## titles
8
+ - kardynała, kard # cardinal (masculine)
9
+ - święta, św # saint (feminine)
10
+ - świętego, św # saint
11
+ - święty, św # saint (masculine)
12
+ city_synonyms:
13
+ - wielkopolska, wlkp # Greater Poland
@@ -5,14 +5,49 @@ module AtlasEngine
5
5
  module Pl
6
6
  module ValidationTranscriber
7
7
  class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
9
+ BUILDING_NUM_PREFIX = "(?:nr.?\s+)"
10
+ BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
11
+ UNIT_NUM_PREFIX = "(?:\s*[/-]|\s+m.?)"
12
+ UNIT_NUM = "(?<unit_num>[[:alpha:]0-9]+)"
13
+
14
+ sig { override.returns(T::Array[AddressComponents]) }
15
+ def parse
16
+ # polish addressses sometimes follow an abbreviation with a period and no space afterward
17
+ super.each do |components|
18
+ components[:street]&.gsub!(
19
+ /\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
20
+ "\\k<prefix> \\k<non_space>",
21
+ )
22
+ end
23
+ end
24
+
8
25
  private
9
26
 
10
27
  sig { returns(T::Array[Regexp]) }
11
28
  def country_regex_formats
12
29
  @country_regex_formats ||= [
13
- %r{^(?<street>.+)\s+(?<building_num>[0-9][[:alpha:]0-9]*)(\s*/\s*(?<unit_num>[[:alpha:]0-9]+))?$},
30
+ /^#{STREET}\s+#{BUILDING_NUM_PREFIX}?#{BUILDING_NUM}(#{UNIT_NUM_PREFIX}\s*#{UNIT_NUM})?$/,
31
+ /^#{STREET}$/,
14
32
  ]
15
33
  end
34
+
35
+ sig do
36
+ override.params(
37
+ captures: T::Hash[Symbol, T.nilable(String)],
38
+ address: AtlasEngine::AddressValidation::AbstractAddress,
39
+ ).returns(T::Boolean)
40
+ end
41
+ def ridiculous?(captures, address)
42
+ street = captures[:street]&.downcase
43
+
44
+ if street.present?
45
+ true unless address.address1&.downcase&.include?(street) ||
46
+ address.address2&.downcase&.include?(street)
47
+ end
48
+
49
+ false
50
+ end
16
51
  end
17
52
  end
18
53
  end
@@ -0,0 +1,38 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pt
6
+ module AddressValidation
7
+ module Validators
8
+ module FullAddress
9
+ module Exclusions
10
+ class Zip < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ )
18
+ .returns(T::Boolean)
19
+ end
20
+ def apply?(session, candidate, address_comparison)
21
+ street_comparison_result = address_comparison.street_comparison.sequence_comparison
22
+ building_comparison_result = address_comparison.building_comparison.sequence_comparison
23
+
24
+ return true if street_comparison_result.nil? ||
25
+ building_comparison_result.nil? ||
26
+ T.must(building_comparison_result).candidate_ranges.empty?
27
+
28
+ !T.must(street_comparison_result).match? ||
29
+ !T.must(building_comparison_result).match?
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -8,3 +8,7 @@ ingestion:
8
8
  validation:
9
9
  enabled: true
10
10
  default_matching_strategy: es
11
+ address_parser: AtlasEngine::Pt::ValidationTranscriber::AddressParser
12
+ exclusions:
13
+ zip:
14
+ - AtlasEngine::Pt::AddressValidation::Validators::FullAddress::Exclusions::Zip