atlas_engine 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -4
- data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/synonyms.yml +2 -0
- data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
- data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
- data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
- data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
- data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
- data/app/countries/atlas_engine/it/country_profile.yml +4 -0
- data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
- data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
- data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
- data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
- data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
- data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
- data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
- data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
- data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
- data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
- data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
- data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
- data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
- data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
- data/app/countries/atlas_engine/si/country_profile.yml +17 -0
- data/app/countries/atlas_engine/si/synonyms.yml +7 -0
- data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
- data/app/graphql/atlas_engine/schema.graphql +1 -1
- data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
- data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
- data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
- data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
- data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
- data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
- data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
- data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
- data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
- data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
- data/db/data/country_profiles/default.yml +12 -0
- data/lib/atlas_engine/version.rb +1 -1
- data/lib/tasks/atlas_engine/graphql.rake +13 -0
- metadata +35 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
@@ -1,3 +1,14 @@
|
|
1
|
+
street_synonyms:
|
2
|
+
- rua, r
|
3
|
+
- avenida, av
|
4
|
+
- praca, pc, pca
|
5
|
+
- travessa, tv
|
6
|
+
- largo, lg, l, lgo
|
7
|
+
- beco, bc
|
8
|
+
- calcada, cc
|
9
|
+
- professor, prof
|
10
|
+
- camino, cam
|
11
|
+
- estrada, estr
|
1
12
|
city_synonyms:
|
2
13
|
- GDM, Gondomar
|
3
14
|
- GMR, Guimarães
|
@@ -5,3 +16,4 @@ city_synonyms:
|
|
5
16
|
- VGS, Vagos
|
6
17
|
- VN, Vila Nova De
|
7
18
|
- VN, Vila Nova Da
|
19
|
+
- TCS, Trancoso
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pt
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+)"
|
11
|
+
NUMBERED_STREET = "(?<street>.+\s+[0-9]+)"
|
12
|
+
BUILDING_NUM = "n?(?<building_num>[0-9]+[a-z]*)"
|
13
|
+
UNIT_NUM = "(?<unit_num>[[:alnum:]]+)"
|
14
|
+
DIRECTION = /\b(?<direction>esq|dir|dto|fte|e|d|f|esquerda|direito|frente|fundo|andar)\b\.?/i
|
15
|
+
PO_BOX = /\b(?<box_type>ap|apartado|caixa postal|cp)\s+(?<number>\d+)\b/i
|
16
|
+
|
17
|
+
sig { returns(T::Array[Regexp]) }
|
18
|
+
def country_regex_formats
|
19
|
+
@country_regex_formats ||= [
|
20
|
+
/^#{STREET},?\s+#{BUILDING_NUM}$/,
|
21
|
+
/^#{STREET},?\s+#{BUILDING_NUM},?\s.*$/,
|
22
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}$/,
|
23
|
+
/^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
24
|
+
/^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}[\s,-]+#{DIRECTION}$/,
|
25
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
26
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}[\s,-]+#{DIRECTION}$/,
|
27
|
+
]
|
28
|
+
end
|
29
|
+
|
30
|
+
sig { override.params(address_line: String).returns(T::Array[T.nilable(String)]) }
|
31
|
+
def extract_po_box(address_line)
|
32
|
+
po_box_match = address_line.match(PO_BOX)
|
33
|
+
|
34
|
+
if po_box_match
|
35
|
+
po_box = po_box_match["number"]
|
36
|
+
address_line = address_line.gsub(PO_BOX, "").strip.delete_suffix(",")
|
37
|
+
else
|
38
|
+
po_box = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
[address_line, po_box]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return true if something's obviously wrong with this regex match
|
45
|
+
sig do
|
46
|
+
override.params(
|
47
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
48
|
+
address: ::AtlasEngine::AddressValidation::AbstractAddress,
|
49
|
+
).returns(T::Boolean)
|
50
|
+
end
|
51
|
+
def ridiculous?(captures, address)
|
52
|
+
building_num = captures[:building_num]&.downcase
|
53
|
+
street = captures[:street]&.downcase
|
54
|
+
unit_num = captures[:unit_num]&.downcase
|
55
|
+
|
56
|
+
if street.present?
|
57
|
+
return true unless address.address1&.upcase&.include?(street.upcase) ||
|
58
|
+
address.address2&.upcase&.include?(street.upcase)
|
59
|
+
end
|
60
|
+
|
61
|
+
[building_num, unit_num].any? do |token|
|
62
|
+
po_box?(token) || street_suffix?(token)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
|
67
|
+
def po_box?(token)
|
68
|
+
return false if token.blank?
|
69
|
+
|
70
|
+
token.match?(PO_BOX)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
module Corrections
|
9
|
+
class CityDistrictCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:region4].present?
|
16
|
+
address[:city] << address[:region4] if address[:city].exclude?(address[:region4])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
|
9
|
+
sig do
|
10
|
+
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
11
|
+
end
|
12
|
+
def map(feature)
|
13
|
+
super(feature).merge(region4: feature["properties"]["district"])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module AddressValidation
|
7
|
+
module Exclusions
|
8
|
+
class UnknownCity < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
).returns(T::Boolean)
|
18
|
+
end
|
19
|
+
def apply?(session, candidate, address_comparison)
|
20
|
+
poor_city_match?(address_comparison)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def poor_city_match?(address_comparison)
|
26
|
+
address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
id: SI
|
2
|
+
ingestion:
|
3
|
+
open_address:
|
4
|
+
feature_mapper: AtlasEngine::Si::AddressImporter::OpenAddress::Mapper
|
5
|
+
correctors:
|
6
|
+
open_address:
|
7
|
+
- AtlasEngine::Si::AddressImporter::OpenAddress::Corrections::CityDistrictCorrector
|
8
|
+
validation:
|
9
|
+
enabled: true
|
10
|
+
default_matching_strategy: es
|
11
|
+
exclusions:
|
12
|
+
city:
|
13
|
+
- AtlasEngine::Si::AddressValidation::Exclusions::UnknownCity
|
14
|
+
address_parser: AtlasEngine::Si::ValidationTranscriber::AddressParser
|
15
|
+
comparison_policies:
|
16
|
+
street:
|
17
|
+
unmatched: ignore_right_unmatched
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
|
9
|
+
BUILDING_NUM = "(?<building_num>[0-9]+(\s?[[:alpha:]]*))"
|
10
|
+
# the current OpenAddress dataset does not include unit numbers
|
11
|
+
|
12
|
+
sig { override.returns(T::Array[AddressComponents]) }
|
13
|
+
def parse
|
14
|
+
# addressses sometimes follow an abbreviation with a period and no space afterward
|
15
|
+
super.each do |components|
|
16
|
+
components[:street]&.gsub!(
|
17
|
+
/\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
|
18
|
+
"\\k<prefix> \\k<non_space>",
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
sig { returns(T::Array[Regexp]) }
|
26
|
+
def country_regex_formats
|
27
|
+
@country_regex_formats ||= [
|
28
|
+
/^#{STREET}\s+#{BUILDING_NUM}$/,
|
29
|
+
/^#{STREET}$/,
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
33
|
+
sig do
|
34
|
+
override.params(
|
35
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
36
|
+
address: AtlasEngine::AddressValidation::AbstractAddress,
|
37
|
+
).returns(T::Boolean)
|
38
|
+
end
|
39
|
+
def ridiculous?(captures, address)
|
40
|
+
street = captures[:street]&.downcase
|
41
|
+
|
42
|
+
if street.present?
|
43
|
+
true unless address.address1&.downcase&.include?(street) ||
|
44
|
+
address.address2&.downcase&.include?(street)
|
45
|
+
end
|
46
|
+
|
47
|
+
false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -53,13 +53,18 @@ module AtlasEngine
|
|
53
53
|
new(
|
54
54
|
**T.unsafe(
|
55
55
|
{
|
56
|
-
result: result,
|
56
|
+
result: duplicate(result),
|
57
57
|
**result.address,
|
58
58
|
**context.except(:client_request_id),
|
59
59
|
},
|
60
60
|
),
|
61
61
|
)
|
62
62
|
end
|
63
|
+
|
64
|
+
sig { params(obj: T.untyped).returns(T.untyped) }
|
65
|
+
def duplicate(obj)
|
66
|
+
Marshal.load(Marshal.dump(obj))
|
67
|
+
end
|
63
68
|
end
|
64
69
|
|
65
70
|
sig do
|
@@ -150,9 +150,14 @@ module AtlasEngine
|
|
150
150
|
normalized_zip = ValidationTranscriber::ZipNormalizer.normalize(
|
151
151
|
country_code: address.country_code, zip: address.zip,
|
152
152
|
)
|
153
|
+
|
153
154
|
{
|
154
155
|
"match" => {
|
155
|
-
"zip" => {
|
156
|
+
"zip" => {
|
157
|
+
"query" => normalized_zip,
|
158
|
+
"fuzziness" => "auto",
|
159
|
+
"prefix_length" => profile.validation.zip_prefix_length,
|
160
|
+
},
|
156
161
|
},
|
157
162
|
}
|
158
163
|
end
|
@@ -45,7 +45,11 @@ module AtlasEngine
|
|
45
45
|
concerns.each do |concern|
|
46
46
|
tags.merge!(concern.attributes.slice(:code, :type))
|
47
47
|
|
48
|
-
|
48
|
+
if concern.attributes[:code] == :address_unknown
|
49
|
+
StatsD.increment("AddressValidation.unknown", tags: tags.except(:component))
|
50
|
+
else
|
51
|
+
StatsD.increment("AddressValidation.#{ending_breadcrumb}", tags: tags)
|
52
|
+
end
|
49
53
|
end
|
50
54
|
end
|
51
55
|
end
|
@@ -57,7 +61,7 @@ module AtlasEngine
|
|
57
61
|
def component_concerns(component)
|
58
62
|
if component.equal?(:street)
|
59
63
|
result.concerns.select do |c|
|
60
|
-
c.attributes[:code] =~ /^(address1|address2|street).*/
|
64
|
+
c.attributes[:code] =~ /^(address1|address2|street|address_unknown).*/
|
61
65
|
end
|
62
66
|
elsif component.equal?(:building_number)
|
63
67
|
result.concerns.select do |c|
|
@@ -11,14 +11,22 @@ module AtlasEngine
|
|
11
11
|
sig { returns(Sequence) }
|
12
12
|
attr_reader :left, :right
|
13
13
|
|
14
|
+
sig { returns(T::Hash[T::Array[Token], Token::Comparison]) }
|
14
15
|
attr_reader :comparison_cache
|
15
16
|
|
16
17
|
MAX_ALLOWED_EDIT_DISTANCE_PERCENT = 0.5
|
17
18
|
|
18
|
-
sig
|
19
|
-
|
19
|
+
sig do
|
20
|
+
params(
|
21
|
+
left_sequence: Sequence,
|
22
|
+
right_sequence: Sequence,
|
23
|
+
comparison_policy: ComparisonPolicy,
|
24
|
+
).void
|
25
|
+
end
|
26
|
+
def initialize(left_sequence:, right_sequence:, comparison_policy: ComparisonPolicy::DEFAULT_POLICY)
|
20
27
|
@left = left_sequence
|
21
28
|
@right = right_sequence
|
29
|
+
@comparison_policy = comparison_policy
|
22
30
|
@comparison_cache = Hash.new do |h, (l_tok, r_tok)|
|
23
31
|
h[[l_tok, r_tok]] = AddressValidation::Token::Comparator.new(l_tok, r_tok).compare
|
24
32
|
end
|
@@ -35,6 +43,9 @@ module AtlasEngine
|
|
35
43
|
|
36
44
|
private
|
37
45
|
|
46
|
+
sig { returns(ComparisonPolicy) }
|
47
|
+
attr_reader :comparison_policy
|
48
|
+
|
38
49
|
sig do
|
39
50
|
params(
|
40
51
|
left_permutations: T::Array[Token],
|
@@ -43,7 +54,7 @@ module AtlasEngine
|
|
43
54
|
end
|
44
55
|
def token_comparisons(left_permutations, right_permutations)
|
45
56
|
left_permutations.product(right_permutations).map do |l_tok, r_tok|
|
46
|
-
comparison_cache[[l_tok, r_tok]]
|
57
|
+
T.must(comparison_cache[[l_tok, r_tok]])
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
@@ -133,7 +144,7 @@ module AtlasEngine
|
|
133
144
|
|
134
145
|
remaining_right_tokens = remove_synonyms_at_same_position(remaining_right_tokens)
|
135
146
|
|
136
|
-
remaining_left_tokens
|
147
|
+
apply_unmatched_policy(remaining_left_tokens, remaining_right_tokens)
|
137
148
|
end
|
138
149
|
|
139
150
|
sig { params(token: Token, other_token: Token).returns(T::Boolean) }
|
@@ -151,6 +162,29 @@ module AtlasEngine
|
|
151
162
|
end
|
152
163
|
.values.flatten
|
153
164
|
end
|
165
|
+
|
166
|
+
sig do
|
167
|
+
params(
|
168
|
+
left_unmatched_tokens: T::Array[Token],
|
169
|
+
right_unmatched_tokens: T::Array[Token],
|
170
|
+
).returns(T::Array[Token])
|
171
|
+
end
|
172
|
+
def apply_unmatched_policy(left_unmatched_tokens, right_unmatched_tokens)
|
173
|
+
case comparison_policy.unmatched
|
174
|
+
when :ignore_left_unmatched
|
175
|
+
right_unmatched_tokens
|
176
|
+
when :ignore_right_unmatched
|
177
|
+
left_unmatched_tokens
|
178
|
+
when :ignore_largest_unmatched_side
|
179
|
+
if right_unmatched_tokens.size > left_unmatched_tokens.size
|
180
|
+
left_unmatched_tokens
|
181
|
+
else
|
182
|
+
right_unmatched_tokens
|
183
|
+
end
|
184
|
+
else
|
185
|
+
left_unmatched_tokens.concat(right_unmatched_tokens)
|
186
|
+
end
|
187
|
+
end
|
154
188
|
end
|
155
189
|
end
|
156
190
|
end
|
@@ -45,7 +45,7 @@ module AtlasEngine
|
|
45
45
|
longest_subsequence = longest_subsequence_comparison <=> other.longest_subsequence_comparison
|
46
46
|
return -1 * longest_subsequence if longest_subsequence.nonzero?
|
47
47
|
|
48
|
-
edit_distance =
|
48
|
+
edit_distance = aggregate_distance <=> other.aggregate_distance
|
49
49
|
return edit_distance if edit_distance.nonzero?
|
50
50
|
|
51
51
|
prefixes = count_by_qualifier(:prefix) <=> other.count_by_qualifier(:prefix)
|
@@ -92,7 +92,7 @@ module AtlasEngine
|
|
92
92
|
|
93
93
|
sig { returns(T::Boolean) }
|
94
94
|
def match?
|
95
|
-
|
95
|
+
aggregate_distance == 0 && unmatched_tokens.empty?
|
96
96
|
end
|
97
97
|
|
98
98
|
sig { params(threshold_percent: Float).returns(T::Boolean) }
|
@@ -101,8 +101,8 @@ module AtlasEngine
|
|
101
101
|
end
|
102
102
|
|
103
103
|
sig { returns(Integer) }
|
104
|
-
def
|
105
|
-
token_comparisons.sum(&:edit_distance)
|
104
|
+
def aggregate_distance
|
105
|
+
token_comparisons.sum(&:edit_distance) + unmatched_tokens.map(&:value).sum(&:length)
|
106
106
|
end
|
107
107
|
|
108
108
|
sig { returns(Integer) }
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module AddressValidation
|
6
|
+
class Token
|
7
|
+
class Sequence
|
8
|
+
class ComparisonPolicy
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
UNMATCHED_POLICIES = [
|
12
|
+
:retain, # keep all unmatched tokens in comparison
|
13
|
+
:ignore_left_unmatched, # omit unmatched tokens from left sequence in comparison
|
14
|
+
:ignore_right_unmatched, # omit unmatched tokens from right sequence in comparison
|
15
|
+
:ignore_largest_unmatched_side, # omit unmatched tokens from the side with the most unmatched tokens,
|
16
|
+
# omit from left in case of a tie
|
17
|
+
].freeze
|
18
|
+
|
19
|
+
attr_reader :unmatched
|
20
|
+
|
21
|
+
sig { params(unmatched: Symbol).void }
|
22
|
+
def initialize(unmatched:)
|
23
|
+
raise "Unknown unmatched policy: #{unmatched}" if UNMATCHED_POLICIES.exclude?(unmatched)
|
24
|
+
|
25
|
+
@unmatched = unmatched
|
26
|
+
end
|
27
|
+
|
28
|
+
DEFAULT_POLICY = ComparisonPolicy.new(unmatched: :retain).freeze
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb
CHANGED
@@ -9,18 +9,13 @@ module AtlasEngine
|
|
9
9
|
extend T::Sig
|
10
10
|
include Comparable
|
11
11
|
|
12
|
-
attr_reader :
|
13
|
-
|
14
|
-
delegate :street_comparison,
|
15
|
-
:city_comparison,
|
16
|
-
:province_code_comparison,
|
17
|
-
:zip_comparison,
|
18
|
-
:building_comparison,
|
19
|
-
to: :comparison_helper
|
12
|
+
attr_reader :address, :candidate, :datastore
|
20
13
|
|
21
14
|
sig { params(address: AbstractAddress, candidate: Candidate, datastore: DatastoreBase).void }
|
22
15
|
def initialize(address:, candidate:, datastore:)
|
23
|
-
@
|
16
|
+
@address = address
|
17
|
+
@candidate = candidate
|
18
|
+
@datastore = datastore
|
24
19
|
end
|
25
20
|
|
26
21
|
sig { params(other: AddressComparison).returns(Integer) }
|
@@ -42,13 +37,38 @@ module AtlasEngine
|
|
42
37
|
|
43
38
|
sig { returns(T::Boolean) }
|
44
39
|
def potential_match?
|
45
|
-
street_comparison.nil? || T.must(street_comparison).potential_match?
|
40
|
+
street_comparison.sequence_comparison.nil? || T.must(street_comparison.sequence_comparison).potential_match?
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { returns(ZipComparison) }
|
44
|
+
def zip_comparison
|
45
|
+
@zip_comparison ||= field_comparison(field: :zip)
|
46
|
+
end
|
47
|
+
|
48
|
+
sig { returns(StreetComparison) }
|
49
|
+
def street_comparison
|
50
|
+
@street_comparison ||= field_comparison(field: :street)
|
51
|
+
end
|
52
|
+
|
53
|
+
sig { returns(CityComparison) }
|
54
|
+
def city_comparison
|
55
|
+
@city_comparison ||= field_comparison(field: :city)
|
56
|
+
end
|
57
|
+
|
58
|
+
sig { returns(ProvinceCodeComparison) }
|
59
|
+
def province_code_comparison
|
60
|
+
@province_code_comparison ||= field_comparison(field: :province_code)
|
61
|
+
end
|
62
|
+
|
63
|
+
sig { returns(BuildingComparison) }
|
64
|
+
def building_comparison
|
65
|
+
@building_comparison ||= field_comparison(field: :building)
|
46
66
|
end
|
47
67
|
|
48
68
|
protected
|
49
69
|
|
50
70
|
sig do
|
51
|
-
returns(T::Array[
|
71
|
+
returns(T::Array[FieldComparisonBase])
|
52
72
|
end
|
53
73
|
def comparisons
|
54
74
|
[
|
@@ -63,10 +83,10 @@ module AtlasEngine
|
|
63
83
|
sig { returns(T::Array[AtlasEngine::AddressValidation::Token::Sequence::Comparison]) }
|
64
84
|
def text_comparisons
|
65
85
|
[
|
66
|
-
street_comparison,
|
67
|
-
city_comparison,
|
68
|
-
zip_comparison,
|
69
|
-
province_code_comparison,
|
86
|
+
street_comparison.sequence_comparison,
|
87
|
+
city_comparison.sequence_comparison,
|
88
|
+
zip_comparison.sequence_comparison,
|
89
|
+
province_code_comparison.sequence_comparison,
|
70
90
|
].compact_blank
|
71
91
|
end
|
72
92
|
|
@@ -74,6 +94,12 @@ module AtlasEngine
|
|
74
94
|
def merged_comparison
|
75
95
|
@merged_comparisons ||= text_comparisons.reduce(&:merge)
|
76
96
|
end
|
97
|
+
|
98
|
+
sig { params(field: Symbol).returns(FieldComparisonBase) }
|
99
|
+
def field_comparison(field:)
|
100
|
+
klass = CountryProfile.for(address.country_code).validation.address_comparison(field: field)
|
101
|
+
klass.new(address: address, candidate: candidate, datastore: datastore)
|
102
|
+
end
|
77
103
|
end
|
78
104
|
end
|
79
105
|
end
|
data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module AddressValidation
|
6
|
+
module Validators
|
7
|
+
module FullAddress
|
8
|
+
class BuildingComparison < FieldComparisonBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
sig { override.returns(T.nilable(NumberComparison)) }
|
12
|
+
def sequence_comparison
|
13
|
+
@building_comparison ||= NumberComparison.new(
|
14
|
+
numbers: datastore.parsings.potential_building_numbers,
|
15
|
+
candidate_ranges: building_ranges_from_candidate(candidate),
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
sig { params(candidate: Candidate).returns(T::Array[AddressNumberRange]) }
|
22
|
+
def building_ranges_from_candidate(candidate)
|
23
|
+
building_and_unit_ranges = candidate.component(:building_and_unit_ranges)&.value
|
24
|
+
return [] if building_and_unit_ranges.blank?
|
25
|
+
|
26
|
+
building_ranges = JSON.parse(building_and_unit_ranges).keys
|
27
|
+
building_ranges.map { |building_range| AddressNumberRange.new(range_string: building_range) }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -61,7 +61,7 @@ module AtlasEngine
|
|
61
61
|
concern = InvalidZipConcernBuilder.for(session.address, [])
|
62
62
|
result.concerns << concern if concern
|
63
63
|
|
64
|
-
if ConcernBuilder.too_many_unmatched_components?(unmatched_components.keys)
|
64
|
+
if ConcernBuilder.too_many_unmatched_components?(session.address, unmatched_components.keys)
|
65
65
|
result.concerns << UnknownAddressConcern.new(session.address)
|
66
66
|
end
|
67
67
|
end
|
@@ -128,7 +128,7 @@ module AtlasEngine
|
|
128
128
|
components = {}
|
129
129
|
@matched_and_unmatched_components ||= begin
|
130
130
|
components_to_compare.each do |field|
|
131
|
-
components[field] = @address_comparison.send(:"#{field}_comparison")
|
131
|
+
components[field] = @address_comparison.send(:"#{field}_comparison").sequence_comparison
|
132
132
|
end
|
133
133
|
components
|
134
134
|
end
|
@@ -146,7 +146,7 @@ module AtlasEngine
|
|
146
146
|
|
147
147
|
sig { returns(RelevantComponents) }
|
148
148
|
def relevant_components
|
149
|
-
@relevant_components ||= RelevantComponents.new(session, candidate,
|
149
|
+
@relevant_components ||= RelevantComponents.new(session, candidate, address_comparison)
|
150
150
|
end
|
151
151
|
|
152
152
|
sig do
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module AddressValidation
|
6
|
+
module Validators
|
7
|
+
module FullAddress
|
8
|
+
class CityComparison < FieldComparisonBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
sig { override.returns(T.nilable(Token::Sequence::Comparison)) }
|
12
|
+
def sequence_comparison
|
13
|
+
return @city_comparison if defined?(@city_comparison)
|
14
|
+
|
15
|
+
@city_comparison = best_comparison(
|
16
|
+
datastore.fetch_city_sequence,
|
17
|
+
T.must(candidate.component(:city)).sequences,
|
18
|
+
field_policy(:city),
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|