atlas_engine 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -4
- data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/synonyms.yml +2 -0
- data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
- data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
- data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
- data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
- data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
- data/app/countries/atlas_engine/it/country_profile.yml +4 -0
- data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
- data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
- data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
- data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
- data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
- data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
- data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
- data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
- data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
- data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
- data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
- data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
- data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
- data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
- data/app/countries/atlas_engine/si/country_profile.yml +17 -0
- data/app/countries/atlas_engine/si/synonyms.yml +7 -0
- data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
- data/app/graphql/atlas_engine/schema.graphql +1 -1
- data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
- data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
- data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
- data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
- data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
- data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
- data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
- data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
- data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
- data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
- data/db/data/country_profiles/default.yml +12 -0
- data/lib/atlas_engine/version.rb +1 -1
- data/lib/tasks/atlas_engine/graphql.rake +13 -0
- metadata +35 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ae2dfdcc902973978f88d2d0bcd07808dbde5c48eecfe53977c4449b422bd492
|
4
|
+
data.tar.gz: 5e2d0b59cdb714a01a5e1df904390253760046151be3fecddce1f69b3fa1cc28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 50b5daf2a3a65fe064d37fe3f54944b1300f6d80a5009447af0a96caaa191a67563a1f6964e96dfc3cf849e67ff9af1279bbd71f8f0dcf3cc8c64e86489b7dd6
|
7
|
+
data.tar.gz: cce398aa5d5389a01effce955b2b224b34593013064649afd833323e23afc8cf6a746b136cca3eb36b1fc33eb15c5f6cd30110f1f6c4585f6a17c935761e38c5
|
data/README.md
CHANGED
@@ -1,7 +1,15 @@
|
|
1
|
-
# Atlas Engine
|
1
|
+
# 🌐 Atlas Engine
|
2
2
|
|
3
3
|
Atlas Engine is a rails engine that provides a global end-to-end address validation API for rails apps.
|
4
4
|
|
5
|
+
* [Address Validation API](#address-validation-api)
|
6
|
+
* [Rails App Installation](#rails-app-installation)
|
7
|
+
* [Local Development Installation](#local-development-installation)
|
8
|
+
* [Address Data Ingestion](#address-data-ingestion)
|
9
|
+
* [Elasticsearch Matching Strategy](#elasticsearch-matching-strategy)
|
10
|
+
|
11
|
+
## Address Validation API
|
12
|
+
|
5
13
|
The validation API is powered by GraphQL, an example request and explanation of the parameters and response follows:
|
6
14
|
|
7
15
|
```graphql
|
@@ -156,7 +164,7 @@ being an invalid zip code for the province `ON`. It also returns the human reada
|
|
156
164
|
|
157
165
|
The validation scope excludes zip because the zip was not successfully validated.
|
158
166
|
|
159
|
-
##
|
167
|
+
## Rails App Installation
|
160
168
|
|
161
169
|
### Initial setup
|
162
170
|
Add the engine to your gemfile
|
@@ -182,7 +190,7 @@ rails atlas_engine:install:migrations
|
|
182
190
|
rails db:migrate
|
183
191
|
```
|
184
192
|
|
185
|
-
##
|
193
|
+
## Local Development Installation
|
186
194
|
|
187
195
|
This setup guide is based on a mac os development environment. Your tooling may vary.
|
188
196
|
|
@@ -275,7 +283,7 @@ Run sorbet check
|
|
275
283
|
srb tc
|
276
284
|
```
|
277
285
|
|
278
|
-
## Ingestion
|
286
|
+
## Address Data Ingestion
|
279
287
|
|
280
288
|
In order to power the more advanced validation matching strategies that provide city / state / zip and even street
|
281
289
|
level address validation, your app must have a populated elasticsearch index per country available for `atlas_engine`
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Es
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
NON_NUMERIC_STREET = "(?<street>[^[:digit:]]+)"
|
11
|
+
BUILDING_NUM_DESIGNATOR = /(?i)(n|n°|número)/
|
12
|
+
CATCH_ALL = /(?:,|\s|\s*.+)/
|
13
|
+
|
14
|
+
sig { returns(T::Array[Regexp]) }
|
15
|
+
def country_regex_formats
|
16
|
+
@country_regex_formats ||= [
|
17
|
+
/^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}$/,
|
18
|
+
/^#{STREET_NO_COMMAS},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
|
19
|
+
/^#{NON_NUMERIC_STREET},?\s+#{BUILDING_NUM}#{CATCH_ALL}$/,
|
20
|
+
/^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}/,
|
21
|
+
/^#{STREET_NO_COMMAS},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
|
22
|
+
/^#{NON_NUMERIC_STREET},?\s+(#{BUILDING_NUM_DESIGNATOR}\s?)#{BUILDING_NUM}#{CATCH_ALL}$/,
|
23
|
+
]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Fo
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:city] == ["Nes, Eysturoy"] || address[:city] == ["Nes, Vágur"]
|
16
|
+
address[:city] = ["Nes"]
|
17
|
+
elsif address[:city] == ["Syðradalur, Kalsoy"] || address[:city] == ["Syðradalur, Streymoy"]
|
18
|
+
address[:city] = ["Syðradalur"]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module It
|
6
|
+
module AddressValidation
|
7
|
+
module Validators
|
8
|
+
module FullAddress
|
9
|
+
module Exclusions
|
10
|
+
class City <
|
11
|
+
AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
12
|
+
class << self
|
13
|
+
sig do
|
14
|
+
override.params(
|
15
|
+
session: AtlasEngine::AddressValidation::Session,
|
16
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
17
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
18
|
+
)
|
19
|
+
.returns(T::Boolean)
|
20
|
+
end
|
21
|
+
def apply?(session, candidate, address_comparison)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -3,6 +3,10 @@ validation:
|
|
3
3
|
enabled: true
|
4
4
|
default_matching_strategy: es
|
5
5
|
address_parser: AtlasEngine::Dk::ValidationTranscriber::AddressParser
|
6
|
+
exclusions:
|
7
|
+
city:
|
8
|
+
- AtlasEngine::It::AddressValidation::Validators::FullAddress::Exclusions::City
|
9
|
+
unmatched_components_suggestion_threshold: 1
|
6
10
|
ingestion:
|
7
11
|
open_address:
|
8
12
|
feature_mapper: AtlasEngine::It::AddressImporter::OpenAddress::Mapper
|
data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Kr
|
6
|
+
module AddressValidation
|
7
|
+
module Validators
|
8
|
+
module FullAddress
|
9
|
+
module Exclusions
|
10
|
+
class City <
|
11
|
+
AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
12
|
+
extend T::Sig
|
13
|
+
class << self
|
14
|
+
COMPONENT_IDENTIFIER = {
|
15
|
+
si: "시",
|
16
|
+
gu: "구",
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
sig do
|
20
|
+
override.params(
|
21
|
+
session: AtlasEngine::AddressValidation::Session,
|
22
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
23
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
24
|
+
)
|
25
|
+
.returns(T::Boolean)
|
26
|
+
end
|
27
|
+
def apply?(session, candidate, address_comparison)
|
28
|
+
candidate_si = extract_component_from_city(candidate, :si)
|
29
|
+
candidate_gu = extract_component_from_city(candidate, :gu)
|
30
|
+
|
31
|
+
(candidate_si.present? && contains_component?(:si, candidate_si, session)) ||
|
32
|
+
(candidate_gu.present? && contains_component?(:gu, candidate_gu, session))
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
sig do
|
38
|
+
params(
|
39
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
40
|
+
component: Symbol,
|
41
|
+
).returns(T.nilable(String))
|
42
|
+
end
|
43
|
+
def extract_component_from_city(candidate, component)
|
44
|
+
city = candidate.component(:city)&.value&.first
|
45
|
+
|
46
|
+
city_parts = city.split(" ")
|
47
|
+
city_parts.find do |part|
|
48
|
+
part.end_with?(COMPONENT_IDENTIFIER[component])
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
sig do
|
53
|
+
params(
|
54
|
+
type: Symbol,
|
55
|
+
value: String,
|
56
|
+
session: AtlasEngine::AddressValidation::Session,
|
57
|
+
).returns(T::Boolean)
|
58
|
+
end
|
59
|
+
def contains_component?(type, value, session)
|
60
|
+
session.parsings.parsings.pluck(type)&.include?(value) || session.city&.include?(value)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -5,6 +5,13 @@ ingestion:
|
|
5
5
|
validation:
|
6
6
|
enabled: true
|
7
7
|
default_matching_strategy: es
|
8
|
+
address_parser: AtlasEngine::Kr::ValidationTranscriber::AddressParser
|
9
|
+
comparison_policies:
|
10
|
+
city:
|
11
|
+
unmatched: ignore_left_unmatched
|
12
|
+
exclusions:
|
13
|
+
city:
|
14
|
+
- AtlasEngine::Kr::AddressValidation::Validators::FullAddress::Exclusions::City
|
8
15
|
restrictions:
|
9
16
|
- class: AtlasEngine::Restrictions::UnsupportedScript
|
10
17
|
params:
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Kr
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
PROVINCE = "(?<province>.+기|서울)"
|
11
|
+
GU = "(?<gu>.+구)"
|
12
|
+
SI = "(?<si>.+시)"
|
13
|
+
DONG = "(?<dong>.+동)"
|
14
|
+
EUP = "(?<eup>.+읍)"
|
15
|
+
STREET = "(?<street>\\S+)"
|
16
|
+
BUILDING_NUM = "(?<building_num>\\d+(^호)?)"
|
17
|
+
UNIT_NUM = "(?<unit_num>\\d+(^동)?)"
|
18
|
+
|
19
|
+
sig { returns(T::Array[Regexp]) }
|
20
|
+
def country_regex_formats
|
21
|
+
@country_regex_formats ||= [
|
22
|
+
%r{
|
23
|
+
(#{PROVINCE}\s+)?
|
24
|
+
(#{SI}\s+)?
|
25
|
+
(#{GU}\s+)?
|
26
|
+
(#{DONG}\s+)?
|
27
|
+
(#{EUP}\s+)?
|
28
|
+
(#{STREET}\s+)?
|
29
|
+
(#{BUILDING_NUM}(-|\s)?)?#{UNIT_NUM}?
|
30
|
+
}x,
|
31
|
+
]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Lu
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
# NOTE: ensure keys match the city names we have in our db
|
14
|
+
CITY_ALIASES = {
|
15
|
+
"luxembourg" => ["Lëtzebuerg"],
|
16
|
+
"esch-sur-alzette" => ["Esch-Uelzecht", "Esch/Alzette"],
|
17
|
+
"dudelange" => ["Diddeleng", "Düdelingen"],
|
18
|
+
"schifflange" => ["Schëffleng"],
|
19
|
+
"bettembourg" => ["Beetebuerg"],
|
20
|
+
"pétange" => ["Péiteng"],
|
21
|
+
"ettelbruck" => ["Ettelbréck"],
|
22
|
+
"diekirch" => ["Dikrech"],
|
23
|
+
"strassen" => ["Stroossen"],
|
24
|
+
"bertrange" => ["Bartreng"],
|
25
|
+
"belvaux" => ["Bieles"],
|
26
|
+
"differdange" => ["Déifferdeng"],
|
27
|
+
"wiltz" => ["Wolz"],
|
28
|
+
"grevenmacher" => ["Gréiwemaacher"],
|
29
|
+
"mersch" => ["Miersch"],
|
30
|
+
"redange/attert" => ["Redange", "Réiden", "Redange-sur-Attert"],
|
31
|
+
}
|
32
|
+
|
33
|
+
sig { params(address: Hash).void }
|
34
|
+
def apply(address)
|
35
|
+
city = address[:city].first.downcase
|
36
|
+
aliases = CITY_ALIASES[city.downcase] || []
|
37
|
+
address[:city] = address[:city] + aliases
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -1,12 +1,15 @@
|
|
1
1
|
id: LU
|
2
2
|
validation:
|
3
3
|
enabled: true
|
4
|
-
has_provinces:
|
4
|
+
has_provinces: false
|
5
5
|
default_matching_strategy: es
|
6
|
+
address_parser: AtlasEngine::Lu::ValidationTranscriber::AddressParser
|
6
7
|
index_locales:
|
7
8
|
- fr
|
8
9
|
- lb
|
10
|
+
unmatched_components_suggestion_threshold: 1
|
9
11
|
ingestion:
|
10
12
|
correctors:
|
11
13
|
open_address:
|
12
14
|
- AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::LocaleCorrector
|
15
|
+
- AtlasEngine::Lu::AddressImporter::Corrections::OpenAddress::CityCorrector
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Lu
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+)"
|
11
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)([/-][0-9])?"
|
12
|
+
|
13
|
+
sig { returns(T::Array[Regexp]) }
|
14
|
+
def country_regex_formats
|
15
|
+
@country_regex_formats ||= [
|
16
|
+
/^#{STREET},?\s+#{BUILDING_NUM}/,
|
17
|
+
/^#{BUILDING_NUM}\s?(,\s?)?#{STREET}/,
|
18
|
+
]
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class CityCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:city] == ["Warszawa"]
|
16
|
+
address[:city] << "Warsaw"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressImporter
|
7
|
+
module Corrections
|
8
|
+
module OpenAddress
|
9
|
+
class EmptyStreetCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:street] == "" && address[:city].present?
|
16
|
+
# Many smaller rural towns in Poland don't have street names. Mailing addresses are
|
17
|
+
# often expressed as
|
18
|
+
# address1: <town name> <building number>
|
19
|
+
# city: <town name> OR <nearest postal town>
|
20
|
+
# postal_code: <postal code>
|
21
|
+
#
|
22
|
+
# The OpenAddresses dataset does not currently include county/postal town info.
|
23
|
+
address[:street] = Array(address[:city]).first
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressValidation
|
7
|
+
module Exclusions
|
8
|
+
class PlaceholderPostalCode < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
).returns(T::Boolean)
|
18
|
+
end
|
19
|
+
def apply?(session, candidate, address_comparison)
|
20
|
+
placeholder_postal_code?(candidate)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
|
26
|
+
def placeholder_postal_code?(candidate)
|
27
|
+
zip_values = T.must(candidate.component(:zip)&.values)
|
28
|
+
zip_values.all?("00-000")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pl
|
6
|
+
module AddressValidation
|
7
|
+
module Exclusions
|
8
|
+
class RuralAddress < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
).returns(T::Boolean)
|
18
|
+
end
|
19
|
+
def apply?(session, candidate, address_comparison)
|
20
|
+
rural_address?(candidate) && poor_city_match?(address_comparison)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def poor_city_match?(address_comparison)
|
26
|
+
address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
|
27
|
+
end
|
28
|
+
|
29
|
+
sig { params(candidate: AtlasEngine::AddressValidation::Candidate).returns(T::Boolean) }
|
30
|
+
def rural_address?(candidate)
|
31
|
+
return false if candidate.component(:city)&.values.blank?
|
32
|
+
|
33
|
+
street = candidate.component(:street)&.first_value
|
34
|
+
city_values = T.must(candidate.component(:city)&.values)
|
35
|
+
city_values.any?(street)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,5 +1,18 @@
|
|
1
1
|
id: PL
|
2
|
+
ingestion:
|
3
|
+
correctors:
|
4
|
+
open_address:
|
5
|
+
- AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::CityCorrector
|
6
|
+
- AtlasEngine::Pl::AddressImporter::Corrections::OpenAddress::EmptyStreetCorrector
|
2
7
|
validation:
|
3
8
|
address_parser: AtlasEngine::Pl::ValidationTranscriber::AddressParser
|
4
9
|
enabled: true
|
5
10
|
default_matching_strategy: es
|
11
|
+
exclusions:
|
12
|
+
city:
|
13
|
+
- AtlasEngine::Pl::AddressValidation::Exclusions::RuralAddress
|
14
|
+
zip:
|
15
|
+
- AtlasEngine::Pl::AddressValidation::Exclusions::PlaceholderPostalCode
|
16
|
+
comparison_policies:
|
17
|
+
street:
|
18
|
+
unmatched: ignore_largest_unmatched_side
|
@@ -0,0 +1,13 @@
|
|
1
|
+
street_synonyms:
|
2
|
+
## street suffixes
|
3
|
+
- aleja, al # avenue
|
4
|
+
- osiedle, os # housing estate
|
5
|
+
- plac, pl # square
|
6
|
+
- ulica, ul # street
|
7
|
+
## titles
|
8
|
+
- kardynała, kard # cardinal (masculine)
|
9
|
+
- święta, św # saint (feminine)
|
10
|
+
- świętego, św # saint
|
11
|
+
- święty, św # saint (masculine)
|
12
|
+
city_synonyms:
|
13
|
+
- wielkopolska, wlkp # Greater Poland
|
@@ -5,14 +5,49 @@ module AtlasEngine
|
|
5
5
|
module Pl
|
6
6
|
module ValidationTranscriber
|
7
7
|
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
|
9
|
+
BUILDING_NUM_PREFIX = "(?:nr.?\s+)"
|
10
|
+
BUILDING_NUM = "(?<building_num>[0-9]+[[:alpha:]]*)"
|
11
|
+
UNIT_NUM_PREFIX = "(?:\s*[/-]|\s+m.?)"
|
12
|
+
UNIT_NUM = "(?<unit_num>[[:alpha:]0-9]+)"
|
13
|
+
|
14
|
+
sig { override.returns(T::Array[AddressComponents]) }
|
15
|
+
def parse
|
16
|
+
# polish addressses sometimes follow an abbreviation with a period and no space afterward
|
17
|
+
super.each do |components|
|
18
|
+
components[:street]&.gsub!(
|
19
|
+
/\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
|
20
|
+
"\\k<prefix> \\k<non_space>",
|
21
|
+
)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
8
25
|
private
|
9
26
|
|
10
27
|
sig { returns(T::Array[Regexp]) }
|
11
28
|
def country_regex_formats
|
12
29
|
@country_regex_formats ||= [
|
13
|
-
|
30
|
+
/^#{STREET}\s+#{BUILDING_NUM_PREFIX}?#{BUILDING_NUM}(#{UNIT_NUM_PREFIX}\s*#{UNIT_NUM})?$/,
|
31
|
+
/^#{STREET}$/,
|
14
32
|
]
|
15
33
|
end
|
34
|
+
|
35
|
+
sig do
|
36
|
+
override.params(
|
37
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
38
|
+
address: AtlasEngine::AddressValidation::AbstractAddress,
|
39
|
+
).returns(T::Boolean)
|
40
|
+
end
|
41
|
+
def ridiculous?(captures, address)
|
42
|
+
street = captures[:street]&.downcase
|
43
|
+
|
44
|
+
if street.present?
|
45
|
+
true unless address.address1&.downcase&.include?(street) ||
|
46
|
+
address.address2&.downcase&.include?(street)
|
47
|
+
end
|
48
|
+
|
49
|
+
false
|
50
|
+
end
|
16
51
|
end
|
17
52
|
end
|
18
53
|
end
|
data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pt
|
6
|
+
module AddressValidation
|
7
|
+
module Validators
|
8
|
+
module FullAddress
|
9
|
+
module Exclusions
|
10
|
+
class Zip < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
)
|
18
|
+
.returns(T::Boolean)
|
19
|
+
end
|
20
|
+
def apply?(session, candidate, address_comparison)
|
21
|
+
street_comparison_result = address_comparison.street_comparison.sequence_comparison
|
22
|
+
building_comparison_result = address_comparison.building_comparison.sequence_comparison
|
23
|
+
|
24
|
+
return true if street_comparison_result.nil? ||
|
25
|
+
building_comparison_result.nil? ||
|
26
|
+
T.must(building_comparison_result).candidate_ranges.empty?
|
27
|
+
|
28
|
+
!T.must(street_comparison_result).match? ||
|
29
|
+
!T.must(building_comparison_result).match?
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|