atlas_engine 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -4
- data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/synonyms.yml +2 -0
- data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
- data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
- data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
- data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
- data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
- data/app/countries/atlas_engine/it/country_profile.yml +4 -0
- data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
- data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
- data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
- data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
- data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
- data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
- data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
- data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
- data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
- data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
- data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
- data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
- data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
- data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
- data/app/countries/atlas_engine/si/country_profile.yml +17 -0
- data/app/countries/atlas_engine/si/synonyms.yml +7 -0
- data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
- data/app/graphql/atlas_engine/schema.graphql +1 -1
- data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
- data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
- data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
- data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
- data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
- data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
- data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
- data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
- data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
- data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
- data/db/data/country_profiles/default.yml +12 -0
- data/lib/atlas_engine/version.rb +1 -1
- data/lib/tasks/atlas_engine/graphql.rake +13 -0
- metadata +35 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae2dfdcc902973978f88d2d0bcd07808dbde5c48eecfe53977c4449b422bd492
|
4
|
+
data.tar.gz: 5e2d0b59cdb714a01a5e1df904390253760046151be3fecddce1f69b3fa1cc28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 50b5daf2a3a65fe064d37fe3f54944b1300f6d80a5009447af0a96caaa191a67563a1f6964e96dfc3cf849e67ff9af1279bbd71f8f0dcf3cc8c64e86489b7dd6
|
7
|
+
data.tar.gz: cce398aa5d5389a01effce955b2b224b34593013064649afd833323e23afc8cf6a746b136cca3eb36b1fc33eb15c5f6cd30110f1f6c4585f6a17c935761e38c5
|
data/README.md
CHANGED
@@ -1,7 +1,15 @@
|
|
1
|
-
# Atlas Engine
|
1
|
+
# 🌐 Atlas Engine
|
2
2
|
|
3
3
|
Atlas Engine is a rails engine that provides a global end-to-end address validation API for rails apps.
|
4
4
|
|
5
|
+
* [Address Validation API](#address-validation-api)
|
6
|
+
* [Rails App Installation](#rails-app-installation)
|
7
|
+
* [Local Development Installation](#local-development-installation)
|
8
|
+
* [Address Data Ingestion](#address-data-ingestion)
|
9
|
+
* [Elasticsearch Matching Strategy](#elasticsearch-matching-strategy)
|
10
|
+
|
11
|
+
## Address Validation API
|
12
|
+
|
5
13
|
The validation API is powered by GraphQL, an example request and explanation of the parameters and response follows:
|
6
14
|
|
7
15
|
```graphql
|
@@ -156,7 +164,7 @@ being an invalid zip code for the province `ON`. It also returns the human reada
|
|
156
164
|
|
157
165
|
The validation scope excludes zip because the zip was not successfully validated.
|
158
166
|
|
159
|
-
##
|
167
|
+
## Rails App Installation
|
160
168
|
|
161
169
|
### Initial setup
|
162
170
|
Add the engine to your gemfile
|
@@ -182,7 +190,7 @@ rails atlas_engine:install:migrations
|
|
182
190
|
rails db:migrate
|
183
191
|
```
|
184
192
|
|
185
|
-
##
|
193
|
+
## Local Development Installation
|
186
194
|
|
187
195
|
This setup guide is based on a mac os development environment. Your tooling may vary.
|
188
196
|
|
@@ -275,7 +283,7 @@ Run sorbet check
|
|
275
283
|
srb tc
|
276
284
|
```
|
277
285
|
|
278
|
-
## Ingestion
|
286
|
+
## Address Data Ingestion
|
279
287
|
|
280
288
|
In order to power the more advanced validation matching strategies that provide city / state / zip and even street
|
281
289
|
level address validation, your app must have a populated elasticsearch index per country available for `atlas_engine`
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Es
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
NON_NUMERIC_STREET = "(?<street>[^[:digit:]]+)"
|
11
|
+
BUILDING_NUM_DESIGNATOR = /(?i)(n|n°|número)/
|
12
|
+
CATCH_ALL = /(?:,|\s|\s*.+)/
|
13
|
+
|
14
|
+
sig { returns(T::Array[Regexp]) }
|
15
|
+
def country_regex_formats
|
16
|
+
@country_regex_formats ||= [
|
17
|
+
/^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}$/,
|
18
|
+
/^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
|
19
|
+
/^#{NON_NUMERIC_STREET},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
|
20
|
+
/^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}/,
|
21
|
+
/^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
|
22
|
+
/^#{NON_NUMERIC_STREET},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
|
23
|
+
]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Fo
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:city] == ["Nes, Eysturoy"] || address[:city] == ["Nes, Vágur"]
|
16
|
+
address[:city] = ["Nes"]
|
17
|
+
elsif address[:city] == ["Syðradalur, Kalsoy"] || address[:city] == ["Syðradalur, Streymoy"]
|
18
|
+
address[:city] = ["Syðradalur"]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module It
|
6
|
+
module AddressValidation
|
7
|
+
module Validators
|
8
|
+
module FullAddress
|
9
|
+
module Exclusions
|
10
|
+
class City <
|
11
|
+
AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
12
|
+
class << self
|
13
|
+
sig do
|
14
|
+
override.params(
|
15
|
+
session: AtlasEngine::AddressValidation::Session,
|
16
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
17
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
18
|
+
)
|
19
|
+
.returns(T::Boolean)
|
20
|
+
end
|
21
|
+
def apply?(session, candidate, address_comparison)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -3,6 +3,10 @@ validation:
|
|
3
3
|
enabled: true
|
4
4
|
default_matching_strategy: es
|
5
5
|
address_parser: AtlasEngine::Dk::ValidationTranscriber::AddressParser
|
6
|
+
exclusions:
|
7
|
+
city:
|
8
|
+
- AtlasEngine::It::AddressValidation::Validators::FullAddress::Exclusions::City
|
9
|
+
unmatched_components_suggestion_threshold: 1
|
6
10
|
ingestion:
|
7
11
|
open_address:
|
8
12
|
feature_mapper: AtlasEngine::It::AddressImporter::OpenAddress::Mapper
|
data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Kr
|
6
|
+
module AddressValidation
|
7
|
+
module Validators
|
8
|
+
module FullAddress
|
9
|
+
module Exclusions
|
10
|
+
class City <
|
11
|
+
AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
12
|
+
extend T::Sig
|
13
|
+
class << self
|
14
|
+
COMPONENT_IDENTIFIER = {
|
15
|
+
si: "시",
|
16
|
+
gu: "구",
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
sig do
|
20
|
+
override.params(
|
21
|
+
session: AtlasEngine::AddressValidation::Session,
|
22
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
23
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
24
|
+
)
|
25
|
+
.returns(T::Boolean)
|
26
|
+
end
|
27
|
+
def apply?(session, candidate, address_comparison)
|
28
|
+
candidate_si = extract_component_from_city(candidate, :si)
|
29
|
+
candidate_gu = extract_component_from_city(candidate, :gu)
|
30
|
+
|
31
|
+
(candidate_si.present? && contains_component?(:si, candidate_si, session)) ||
|
32
|
+
(candidate_gu.present? && contains_component?(:gu, candidate_gu, session))
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
sig do
|
38
|
+
params(
|
39
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
40
|
+
component: Symbol,
|
41
|
+
).returns(T.nilable(String))
|
42
|
+
end
|
43
|
+
def extract_component_from_city(candidate, component)
|
44
|
+
city = candidate.component(:city)&.value&.first
|
45
|
+
|
46
|
+
city_parts = city.split(" ")
|
47
|
+
city_parts.find do |part|
|
48
|
+
part.end_with?(COMPONENT_IDENTIFIER[component])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
sig do
|
53
|
+
params(
|
54
|
+
type: Symbol,
|
55
|
+
value: String,
|
56
|
+
session: AtlasEngine::AddressValidation::Session,
|
57
|
+
).returns(T::Boolean)
|
58
|
+
end
|
59
|
+
def contains_component?(type, value, session)
|
60
|
+
session.parsings.parsings.pluck(type)&.include?(value) || session.city&.include?(value)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -5,6 +5,13 @@ ingestion:
|
|
5
5
|
validation:
|
6
6
|
enabled: true
|
7
7
|
default_matching_strategy: es
|
8
|
+
address_parser: AtlasEngine::Kr::ValidationTranscriber::AddressParser
|
9
|
+
comparison_policies:
|
10
|
+
city:
|
11
|
+
unmatched: ignore_left_unmatched
|
12
|
+
exclusions:
|
13
|
+
city:
|
14
|
+
- AtlasEngine::Kr::AddressValidation::Validators::FullAddress::Exclusions::City
|
8
15
|
restrictions:
|
9
16
|
- class: AtlasEngine::Restrictions::UnsupportedScript
|
10
17
|
params:
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Kr
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
PROVINCE = "(?<province>.+기|서울)"
|
11
|
+
GU = "(?<gu>.+구)"
|
12
|
+
SI = "(?<si>.+시)"
|
13
|
+
DONG = "(?<dong>.+동)"
|
14
|
+
EUP = "(?<eup>.+읍)"
|
15
|
+
STREET = "(?<street>\\S+)"
|
16
|
+
BUILDING_NUM = "(?<building_num>\\d+(^호)?)"
|
17
|
+
UNIT_NUM = "(?<unit_num>\\d+(^동)?)"
|
18
|
+
|
19
|
+
sig { returns(T::Array[Regexp]) }
|
20
|
+
def country_regex_formats
|
21
|
+
@country_regex_formats ||= [
|
22
|
+
%r{
|
23
|
+
(#{PROVINCE}\s+)?
|
24
|
+
(#{SI}\s+)?
|
25
|
+
(#{GU}\s+)?
|
26
|
+
(#{DONG}\s+)?
|
27
|
+
(#{EUP}\s+)?
|
28
|
+
(#{STREET}\s+)?
|
29
|
+
(#{BUILDING_NUM}(-|\s)?)?#{UNIT_NUM}?
|
30
|
+
}x,
|
31
|
+
]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Lu
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
# NOTE: ensure keys match the city names we have in our db
|
14
|
+
CITY_ALIASES = {
|
15
|
+
"luxembourg" => ["Lëtzebuerg"],
|
16
|
+
"esch-sur-alzette" => ["Esch-Uelzecht", "Esch/Alzette"],
|
17
|
+
"dudelange" => ["Diddeleng", "Düdelingen"],
|
18
|
+
"schifflange" => ["Schëffleng"],
|
19
|
+
"bettembourg" => ["Beetebuerg"],
|
20
|
+
"pétange" => ["Péiteng"],
|
21
|
+
"ettelbruck" => ["Ettelbréck"],
|
22
|
+
"diekirch" => ["Dikrech"],
|
23
|
+
"strassen" => ["Stroossen"],
|
24
|
+
"bertrange" => ["Bartreng"],
|
25
|
+
"belvaux" => ["Bieles"],
|
26
|
+
"differdange" => ["Déifferdeng"],
|
27
|
+
"wiltz" => ["Wolz"],
|
28
|
+
"grevenmacher" => ["Gréiwemaacher"],
|
29
|
+
"mersch" => ["Miersch"],
|
30
|
+
"redange/attert" => ["Redange", "Réiden", "Redange-sur-Attert"],
|
31
|
+
}
|
32
|
+
|
33
|
+
sig { params(address: Hash).void }
|
34
|
+
def apply(address)
|
35
|
+
city = address[:city].first.downcase
|
36
|
+
aliases = CITY_ALIASES[city.downcase] || []
|
37
|
+
address[:city] = address[:city] + aliases
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
id: LU
|
2
2
|
validation:
|
3
3
|
enabled: true
|
4
|
-
has_provinces:
|
4
|
+
has_provinces: false
|
5
5
|
default_matching_strategy: es
|
6
|
+
address_parser: AtlasEngine::Lu::ValidationTranscriber::AddressParser
|
6
7
|
index_locales:
|
7
8
|
- fr
|
8
9
|
- lb
|
10
|
+
unmatched_components_suggestion_threshold: 1
|
9
11
|
ingestion:
|
10
12
|
correctors:
|
11
13
|
open_address:
|
12
14
|
- AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::LocaleCorrector
|
15
|
+
- AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::CityCorrector
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Lu
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+)"
|
11
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)([/-][0-9])?"
|
12
|
+
|
13
|
+
sig { returns(T::Array[Regexp]) }
|
14
|
+
def country_regex_formats
|
15
|
+
@country_regex_formats ||= [
|
16
|
+
/^#{STREET},?\s+#{BUILDING_NUM}/,
|
17
|
+
/^#{BUILDING_NUM}\s?(,\s?)?#{STREET}/,
|
18
|
+
]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:city] == ["Warszawa"]
|
16
|
+
address[:city] << "Warsaw"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class EmptyStreetCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:street] == "" && address[:city].present?
|
16
|
+
# Many smaller rural towns in Poland don't have street names. Mailing addresses are
|
17
|
+
# often expressed as
|
18
|
+
# address1: <town name> <building number>
|
19
|
+
# city: <town name> OR <nearest postal town>
|
20
|
+
# postal_code: <postal code>
|
21
|
+
#
|
22
|
+
# The OpenAddresses dataset does not currently include county/postal town info.
|
23
|
+
address[:street] = Array(address[:city]).first
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressValidation
|
7
|
+
module Exclusions
|
8
|
+
class PlaceholderPostalCode < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
).returns(T::Boolean)
|
18
|
+
end
|
19
|
+
def apply?(session, candidate, address_comparison)
|
20
|
+
placeholder_postal_code?(candidate)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
|
26
|
+
def placeholder_postal_code?(candidate)
|
27
|
+
zip_values = T.must(candidate.component(:zip)&.values)
|
28
|
+
zip_values.all?("00-000")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressValidation
|
7
|
+
module Exclusions
|
8
|
+
class RuralAddress < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
).returns(T::Boolean)
|
18
|
+
end
|
19
|
+
def apply?(session, candidate, address_comparison)
|
20
|
+
rural_address?(candidate) && poor_city_match?(address_comparison)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def poor_city_match?(address_comparison)
|
26
|
+
address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
|
27
|
+
end
|
28
|
+
|
29
|
+
sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
|
30
|
+
def rural_address?(candidate)
|
31
|
+
return false if candidate.component(:city)&.values.blank?
|
32
|
+
|
33
|
+
street = candidate.component(:street)&.first_value
|
34
|
+
city_values = T.must(candidate.component(:city)&.values)
|
35
|
+
city_values.any?(street)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,5 +1,18 @@
|
|
1
1
|
id: PL
|
2
|
+
ingestion:
|
3
|
+
correctors:
|
4
|
+
open_address:
|
5
|
+
- AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::CityCorrector
|
6
|
+
- AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::EmptyStreetCorrector
|
2
7
|
validation:
|
3
8
|
address_parser: AtlasEngine::Pl::ValidationTranscriber::AddressParser
|
4
9
|
enabled: true
|
5
10
|
default_matching_strategy: es
|
11
|
+
exclusions:
|
12
|
+
city:
|
13
|
+
- AtlasEngine::Pl::AddressValidation::Exclusions::RuralAddress
|
14
|
+
zip:
|
15
|
+
- AtlasEngine::Pl::AddressValidation::Exclusions::PlaceholderPostalCode
|
16
|
+
comparison_policies:
|
17
|
+
street:
|
18
|
+
unmatched: ignore_largest_unmatched_side
|
@@ -0,0 +1,13 @@
|
|
1
|
+
street_synonyms:
|
2
|
+
## street suffixes
|
3
|
+
- aleja, al # avenue
|
4
|
+
- osiedle, os # housing estate
|
5
|
+
- plac, pl # square
|
6
|
+
- ulica, ul # street
|
7
|
+
## titles
|
8
|
+
- kardynała, kard # cardinal (masculine)
|
9
|
+
- święta, św # saint (feminine)
|
10
|
+
- świętego, św # saint
|
11
|
+
- święty, św # saint (masculine)
|
12
|
+
city_synonyms:
|
13
|
+
- wielkopolska, wlkp # Greater Poland
|
@@ -5,14 +5,49 @@ module AtlasEngine
|
|
5
5
|
module Pl
|
6
6
|
module ValidationTranscriber
|
7
7
|
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
|
9
|
+
BUILDING_NUM_PREFIX = "(?:nr.?\s+)"
|
10
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
|
11
|
+
UNIT_NUM_PREFIX = "(?:\s*[/-]|\s+m.?)"
|
12
|
+
UNIT_NUM = "(?<unit_num>[[:alpha:]0-9]+)"
|
13
|
+
|
14
|
+
sig { override.returns(T::Array[AddressComponents]) }
|
15
|
+
def parse
|
16
|
+
# polish addressses sometimes follow an abbreviation with a period and no space afterward
|
17
|
+
super.each do |components|
|
18
|
+
components[:street]&.gsub!(
|
19
|
+
/\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
|
20
|
+
"\\k<prefix> \\k<non_space>",
|
21
|
+
)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
8
25
|
private
|
9
26
|
|
10
27
|
sig { returns(T::Array[Regexp]) }
|
11
28
|
def country_regex_formats
|
12
29
|
@country_regex_formats ||= [
|
13
|
-
|
30
|
+
/^#{STREET}\s+#{BUILDING_NUM_PREFIX}?#{BUILDING_NUM}(#{UNIT_NUM_PREFIX}\s*#{UNIT_NUM})?$/,
|
31
|
+
/^#{STREET}$/,
|
14
32
|
]
|
15
33
|
end
|
34
|
+
|
35
|
+
sig do
|
36
|
+
override.params(
|
37
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
38
|
+
address: AtlasEngine::AddressValidation::AbstractAddress,
|
39
|
+
).returns(T::Boolean)
|
40
|
+
end
|
41
|
+
def ridiculous?(captures, address)
|
42
|
+
street = captures[:street]&.downcase
|
43
|
+
|
44
|
+
if street.present?
|
45
|
+
true unless address.address1&.downcase&.include?(street) ||
|
46
|
+
address.address2&.downcase&.include?(street)
|
47
|
+
end
|
48
|
+
|
49
|
+
false
|
50
|
+
end
|
16
51
|
end
|
17
52
|
end
|
18
53
|
end
|
data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pt
|
6
|
+
module AddressValidation
|
7
|
+
module Validators
|
8
|
+
module FullAddress
|
9
|
+
module Exclusions
|
10
|
+
class Zip < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
)
|
18
|
+
.returns(T::Boolean)
|
19
|
+
end
|
20
|
+
def apply?(session, candidate, address_comparison)
|
21
|
+
street_comparison_result = address_comparison.street_comparison.sequence_comparison
|
22
|
+
building_comparison_result = address_comparison.building_comparison.sequence_comparison
|
23
|
+
|
24
|
+
return true if street_comparison_result.nil? ||
|
25
|
+
building_comparison_result.nil? ||
|
26
|
+
T.must(building_comparison_result).candidate_ranges.empty?
|
27
|
+
|
28
|
+
!T.must(street_comparison_result).match? ||
|
29
|
+
!T.must(building_comparison_result).match?
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|