atlas_engine 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -4
- data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/country_profile.yml +3 -0
- data/app/countries/atlas_engine/es/synonyms.yml +2 -0
- data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
- data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
- data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
- data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
- data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
- data/app/countries/atlas_engine/it/country_profile.yml +4 -0
- data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
- data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
- data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
- data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
- data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
- data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
- data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
- data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
- data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
- data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
- data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
- data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
- data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
- data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
- data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
- data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
- data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
- data/app/countries/atlas_engine/si/country_profile.yml +17 -0
- data/app/countries/atlas_engine/si/synonyms.yml +7 -0
- data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
- data/app/graphql/atlas_engine/schema.graphql +1 -1
- data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
- data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
- data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
- data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
- data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
- data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
- data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
- data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
- data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
- data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
- data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
- data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
- data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
- data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
- data/db/data/country_profiles/default.yml +12 -0
- data/lib/atlas_engine/version.rb +1 -1
- data/lib/tasks/atlas_engine/graphql.rake +13 -0
- metadata +35 -6
- data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
@@ -1,3 +1,14 @@
|
|
1
|
+
street_synonyms:
|
2
|
+
- rua, r
|
3
|
+
- avenida, av
|
4
|
+
- praca, pc, pca
|
5
|
+
- travessa, tv
|
6
|
+
- largo, lg, l, lgo
|
7
|
+
- beco, bc
|
8
|
+
- calcada, cc
|
9
|
+
- professor, prof
|
10
|
+
- camino, cam
|
11
|
+
- estrada, estr
|
1
12
|
city_synonyms:
|
2
13
|
- GDM, Gondomar
|
3
14
|
- GMR, Guimarães
|
@@ -5,3 +16,4 @@ city_synonyms:
|
|
5
16
|
- VGS, Vagos
|
6
17
|
- VN, Vila Nova De
|
7
18
|
- VN, Vila Nova Da
|
19
|
+
- TCS, Trancoso
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Pt
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
private
|
9
|
+
|
10
|
+
STREET = "(?<street>.+)"
|
11
|
+
NUMBERED_STREET = "(?<street>.+\s+[0-9]+)"
|
12
|
+
BUILDING_NUM = "n?(?<building_num>[0-9]+[a-z]*)"
|
13
|
+
UNIT_NUM = "(?<unit_num>[[:alnum:]]+)"
|
14
|
+
DIRECTION = /\b(?<direction>esq|dir|dto|fte|e|d|f|esquerda|direito|frente|fundo|andar)\b\.?/i
|
15
|
+
PO_BOX = /\b(?<box_type>ap|apartado|caixa postal|cp)\s+(?<number>\d+)\b/i
|
16
|
+
|
17
|
+
sig { returns(T::Array[Regexp]) }
|
18
|
+
def country_regex_formats
|
19
|
+
@country_regex_formats ||= [
|
20
|
+
/^#{STREET},?\s+#{BUILDING_NUM}$/,
|
21
|
+
/^#{STREET},?\s+#{BUILDING_NUM},?\s.*$/,
|
22
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}$/,
|
23
|
+
/^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
24
|
+
/^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}[\s,-]+#{DIRECTION}$/,
|
25
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
|
26
|
+
/^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}[\s,-]+#{DIRECTION}$/,
|
27
|
+
]
|
28
|
+
end
|
29
|
+
|
30
|
+
sig { override.params(address_line: String).returns(T::Array[T.nilable(String)]) }
|
31
|
+
def extract_po_box(address_line)
|
32
|
+
po_box_match = address_line.match(PO_BOX)
|
33
|
+
|
34
|
+
if po_box_match
|
35
|
+
po_box = po_box_match["number"]
|
36
|
+
address_line = address_line.gsub(PO_BOX, "").strip.delete_suffix(",")
|
37
|
+
else
|
38
|
+
po_box = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
[address_line, po_box]
|
42
|
+
end
|
43
|
+
|
44
|
+
# Return true if something's obviously wrong with this regex match
|
45
|
+
sig do
|
46
|
+
override.params(
|
47
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
48
|
+
address: ::AtlasEngine::AddressValidation::AbstractAddress,
|
49
|
+
).returns(T::Boolean)
|
50
|
+
end
|
51
|
+
def ridiculous?(captures, address)
|
52
|
+
building_num = captures[:building_num]&.downcase
|
53
|
+
street = captures[:street]&.downcase
|
54
|
+
unit_num = captures[:unit_num]&.downcase
|
55
|
+
|
56
|
+
if street.present?
|
57
|
+
return true unless address.address1&.upcase&.include?(street.upcase) ||
|
58
|
+
address.address2&.upcase&.include?(street.upcase)
|
59
|
+
end
|
60
|
+
|
61
|
+
[building_num, unit_num].any? do |token|
|
62
|
+
po_box?(token) || street_suffix?(token)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
|
67
|
+
def po_box?(token)
|
68
|
+
return false if token.blank?
|
69
|
+
|
70
|
+
token.match?(PO_BOX)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
module Corrections
|
9
|
+
class CityDistrictCorrector
|
10
|
+
class << self
|
11
|
+
extend T::Sig
|
12
|
+
|
13
|
+
sig { params(address: Hash).void }
|
14
|
+
def apply(address)
|
15
|
+
if address[:region4].present?
|
16
|
+
address[:city] << address[:region4] if address[:city].exclude?(address[:region4])
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module AddressImporter
|
7
|
+
module OpenAddress
|
8
|
+
class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
|
9
|
+
sig do
|
10
|
+
params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
|
11
|
+
end
|
12
|
+
def map(feature)
|
13
|
+
super(feature).merge(region4: feature["properties"]["district"])
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module AddressValidation
|
7
|
+
module Exclusions
|
8
|
+
class UnknownCity < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
class << self
|
12
|
+
sig do
|
13
|
+
override.params(
|
14
|
+
session: AtlasEngine::AddressValidation::Session,
|
15
|
+
candidate: AtlasEngine::AddressValidation::Candidate,
|
16
|
+
address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
|
17
|
+
).returns(T::Boolean)
|
18
|
+
end
|
19
|
+
def apply?(session, candidate, address_comparison)
|
20
|
+
poor_city_match?(address_comparison)
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def poor_city_match?(address_comparison)
|
26
|
+
address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
id: SI
|
2
|
+
ingestion:
|
3
|
+
open_address:
|
4
|
+
feature_mapper: AtlasEngine::Si::AddressImporter::OpenAddress::Mapper
|
5
|
+
correctors:
|
6
|
+
open_address:
|
7
|
+
- AtlasEngine::Si::AddressImporter::OpenAddress::Corrections::CityDistrictCorrector
|
8
|
+
validation:
|
9
|
+
enabled: true
|
10
|
+
default_matching_strategy: es
|
11
|
+
exclusions:
|
12
|
+
city:
|
13
|
+
- AtlasEngine::Si::AddressValidation::Exclusions::UnknownCity
|
14
|
+
address_parser: AtlasEngine::Si::ValidationTranscriber::AddressParser
|
15
|
+
comparison_policies:
|
16
|
+
street:
|
17
|
+
unmatched: ignore_right_unmatched
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module Si
|
6
|
+
module ValidationTranscriber
|
7
|
+
class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
|
8
|
+
STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
|
9
|
+
BUILDING_NUM = "(?<building_num>[0-9]+(\s?[[:alpha:]]*))"
|
10
|
+
# the current OpenAddress dataset does not include unit numbers
|
11
|
+
|
12
|
+
sig { override.returns(T::Array[AddressComponents]) }
|
13
|
+
def parse
|
14
|
+
# addressses sometimes follow an abbreviation with a period and no space afterward
|
15
|
+
super.each do |components|
|
16
|
+
components[:street]&.gsub!(
|
17
|
+
/\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
|
18
|
+
"\\k<prefix> \\k<non_space>",
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
sig { returns(T::Array[Regexp]) }
|
26
|
+
def country_regex_formats
|
27
|
+
@country_regex_formats ||= [
|
28
|
+
/^#{STREET}\s+#{BUILDING_NUM}$/,
|
29
|
+
/^#{STREET}$/,
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
33
|
+
sig do
|
34
|
+
override.params(
|
35
|
+
captures: T::Hash[Symbol, T.nilable(String)],
|
36
|
+
address: AtlasEngine::AddressValidation::AbstractAddress,
|
37
|
+
).returns(T::Boolean)
|
38
|
+
end
|
39
|
+
def ridiculous?(captures, address)
|
40
|
+
street = captures[:street]&.downcase
|
41
|
+
|
42
|
+
if street.present?
|
43
|
+
true unless address.address1&.downcase&.include?(street) ||
|
44
|
+
address.address2&.downcase&.include?(street)
|
45
|
+
end
|
46
|
+
|
47
|
+
false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -53,13 +53,18 @@ module AtlasEngine
|
|
53
53
|
new(
|
54
54
|
**T.unsafe(
|
55
55
|
{
|
56
|
-
result: result,
|
56
|
+
result: duplicate(result),
|
57
57
|
**result.address,
|
58
58
|
**context.except(:client_request_id),
|
59
59
|
},
|
60
60
|
),
|
61
61
|
)
|
62
62
|
end
|
63
|
+
|
64
|
+
sig { params(obj: T.untyped).returns(T.untyped) }
|
65
|
+
def duplicate(obj)
|
66
|
+
Marshal.load(Marshal.dump(obj))
|
67
|
+
end
|
63
68
|
end
|
64
69
|
|
65
70
|
sig do
|
@@ -150,9 +150,14 @@ module AtlasEngine
|
|
150
150
|
normalized_zip = ValidationTranscriber::ZipNormalizer.normalize(
|
151
151
|
country_code: address.country_code, zip: address.zip,
|
152
152
|
)
|
153
|
+
|
153
154
|
{
|
154
155
|
"match" => {
|
155
|
-
"zip" => {
|
156
|
+
"zip" => {
|
157
|
+
"query" => normalized_zip,
|
158
|
+
"fuzziness" => "auto",
|
159
|
+
"prefix_length" => profile.validation.zip_prefix_length,
|
160
|
+
},
|
156
161
|
},
|
157
162
|
}
|
158
163
|
end
|
@@ -45,7 +45,11 @@ module AtlasEngine
|
|
45
45
|
concerns.each do |concern|
|
46
46
|
tags.merge!(concern.attributes.slice(:code, :type))
|
47
47
|
|
48
|
-
|
48
|
+
if concern.attributes[:code] == :address_unknown
|
49
|
+
StatsD.increment("AddressValidation.unknown", tags: tags.except(:component))
|
50
|
+
else
|
51
|
+
StatsD.increment("AddressValidation.#{ending_breadcrumb}", tags: tags)
|
52
|
+
end
|
49
53
|
end
|
50
54
|
end
|
51
55
|
end
|
@@ -57,7 +61,7 @@ module AtlasEngine
|
|
57
61
|
def component_concerns(component)
|
58
62
|
if component.equal?(:street)
|
59
63
|
result.concerns.select do |c|
|
60
|
-
c.attributes[:code] =~ /^(address1|address2|street).*/
|
64
|
+
c.attributes[:code] =~ /^(address1|address2|street|address_unknown).*/
|
61
65
|
end
|
62
66
|
elsif component.equal?(:building_number)
|
63
67
|
result.concerns.select do |c|
|
@@ -11,14 +11,22 @@ module AtlasEngine
|
|
11
11
|
sig { returns(Sequence) }
|
12
12
|
attr_reader :left, :right
|
13
13
|
|
14
|
+
sig { returns(T::Hash[T::Array[Token], Token::Comparison]) }
|
14
15
|
attr_reader :comparison_cache
|
15
16
|
|
16
17
|
MAX_ALLOWED_EDIT_DISTANCE_PERCENT = 0.5
|
17
18
|
|
18
|
-
sig
|
19
|
-
|
19
|
+
sig do
|
20
|
+
params(
|
21
|
+
left_sequence: Sequence,
|
22
|
+
right_sequence: Sequence,
|
23
|
+
comparison_policy: ComparisonPolicy,
|
24
|
+
).void
|
25
|
+
end
|
26
|
+
def initialize(left_sequence:, right_sequence:, comparison_policy: ComparisonPolicy::DEFAULT_POLICY)
|
20
27
|
@left = left_sequence
|
21
28
|
@right = right_sequence
|
29
|
+
@comparison_policy = comparison_policy
|
22
30
|
@comparison_cache = Hash.new do |h, (l_tok, r_tok)|
|
23
31
|
h[[l_tok, r_tok]] = AddressValidation::Token::Comparator.new(l_tok, r_tok).compare
|
24
32
|
end
|
@@ -35,6 +43,9 @@ module AtlasEngine
|
|
35
43
|
|
36
44
|
private
|
37
45
|
|
46
|
+
sig { returns(ComparisonPolicy) }
|
47
|
+
attr_reader :comparison_policy
|
48
|
+
|
38
49
|
sig do
|
39
50
|
params(
|
40
51
|
left_permutations: T::Array[Token],
|
@@ -43,7 +54,7 @@ module AtlasEngine
|
|
43
54
|
end
|
44
55
|
def token_comparisons(left_permutations, right_permutations)
|
45
56
|
left_permutations.product(right_permutations).map do |l_tok, r_tok|
|
46
|
-
comparison_cache[[l_tok, r_tok]]
|
57
|
+
T.must(comparison_cache[[l_tok, r_tok]])
|
47
58
|
end
|
48
59
|
end
|
49
60
|
|
@@ -133,7 +144,7 @@ module AtlasEngine
|
|
133
144
|
|
134
145
|
remaining_right_tokens = remove_synonyms_at_same_position(remaining_right_tokens)
|
135
146
|
|
136
|
-
remaining_left_tokens
|
147
|
+
apply_unmatched_policy(remaining_left_tokens, remaining_right_tokens)
|
137
148
|
end
|
138
149
|
|
139
150
|
sig { params(token: Token, other_token: Token).returns(T::Boolean) }
|
@@ -151,6 +162,29 @@ module AtlasEngine
|
|
151
162
|
end
|
152
163
|
.values.flatten
|
153
164
|
end
|
165
|
+
|
166
|
+
sig do
|
167
|
+
params(
|
168
|
+
left_unmatched_tokens: T::Array[Token],
|
169
|
+
right_unmatched_tokens: T::Array[Token],
|
170
|
+
).returns(T::Array[Token])
|
171
|
+
end
|
172
|
+
def apply_unmatched_policy(left_unmatched_tokens, right_unmatched_tokens)
|
173
|
+
case comparison_policy.unmatched
|
174
|
+
when :ignore_left_unmatched
|
175
|
+
right_unmatched_tokens
|
176
|
+
when :ignore_right_unmatched
|
177
|
+
left_unmatched_tokens
|
178
|
+
when :ignore_largest_unmatched_side
|
179
|
+
if right_unmatched_tokens.size > left_unmatched_tokens.size
|
180
|
+
left_unmatched_tokens
|
181
|
+
else
|
182
|
+
right_unmatched_tokens
|
183
|
+
end
|
184
|
+
else
|
185
|
+
left_unmatched_tokens.concat(right_unmatched_tokens)
|
186
|
+
end
|
187
|
+
end
|
154
188
|
end
|
155
189
|
end
|
156
190
|
end
|
@@ -45,7 +45,7 @@ module AtlasEngine
|
|
45
45
|
longest_subsequence = longest_subsequence_comparison <=> other.longest_subsequence_comparison
|
46
46
|
return -1 * longest_subsequence if longest_subsequence.nonzero?
|
47
47
|
|
48
|
-
edit_distance =
|
48
|
+
edit_distance = aggregate_distance <=> other.aggregate_distance
|
49
49
|
return edit_distance if edit_distance.nonzero?
|
50
50
|
|
51
51
|
prefixes = count_by_qualifier(:prefix) <=> other.count_by_qualifier(:prefix)
|
@@ -92,7 +92,7 @@ module AtlasEngine
|
|
92
92
|
|
93
93
|
sig { returns(T::Boolean) }
|
94
94
|
def match?
|
95
|
-
|
95
|
+
aggregate_distance == 0 && unmatched_tokens.empty?
|
96
96
|
end
|
97
97
|
|
98
98
|
sig { params(threshold_percent: Float).returns(T::Boolean) }
|
@@ -101,8 +101,8 @@ module AtlasEngine
|
|
101
101
|
end
|
102
102
|
|
103
103
|
sig { returns(Integer) }
|
104
|
-
def
|
105
|
-
token_comparisons.sum(&:edit_distance)
|
104
|
+
def aggregate_distance
|
105
|
+
token_comparisons.sum(&:edit_distance) + unmatched_tokens.map(&:value).sum(&:length)
|
106
106
|
end
|
107
107
|
|
108
108
|
sig { returns(Integer) }
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module AddressValidation
|
6
|
+
class Token
|
7
|
+
class Sequence
|
8
|
+
class ComparisonPolicy
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
UNMATCHED_POLICIES = [
|
12
|
+
:retain, # keep all unmatched tokens in comparison
|
13
|
+
:ignore_left_unmatched, # omit unmatched tokens from left sequence in comparison
|
14
|
+
:ignore_right_unmatched, # omit unmatched tokens from right sequence in comparison
|
15
|
+
:ignore_largest_unmatched_side, # omit unmatched tokens from the side with the most unmatched tokens,
|
16
|
+
# omit from left in case of a tie
|
17
|
+
].freeze
|
18
|
+
|
19
|
+
attr_reader :unmatched
|
20
|
+
|
21
|
+
sig { params(unmatched: Symbol).void }
|
22
|
+
def initialize(unmatched:)
|
23
|
+
raise "Unknown unmatched policy: #{unmatched}" if UNMATCHED_POLICIES.exclude?(unmatched)
|
24
|
+
|
25
|
+
@unmatched = unmatched
|
26
|
+
end
|
27
|
+
|
28
|
+
DEFAULT_POLICY = ComparisonPolicy.new(unmatched: :retain).freeze
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb
CHANGED
@@ -9,18 +9,13 @@ module AtlasEngine
|
|
9
9
|
extend T::Sig
|
10
10
|
include Comparable
|
11
11
|
|
12
|
-
attr_reader :
|
13
|
-
|
14
|
-
delegate :street_comparison,
|
15
|
-
:city_comparison,
|
16
|
-
:province_code_comparison,
|
17
|
-
:zip_comparison,
|
18
|
-
:building_comparison,
|
19
|
-
to: :comparison_helper
|
12
|
+
attr_reader :address, :candidate, :datastore
|
20
13
|
|
21
14
|
sig { params(address: AbstractAddress, candidate: Candidate, datastore: DatastoreBase).void }
|
22
15
|
def initialize(address:, candidate:, datastore:)
|
23
|
-
@
|
16
|
+
@address = address
|
17
|
+
@candidate = candidate
|
18
|
+
@datastore = datastore
|
24
19
|
end
|
25
20
|
|
26
21
|
sig { params(other: AddressComparison).returns(Integer) }
|
@@ -42,13 +37,38 @@ module AtlasEngine
|
|
42
37
|
|
43
38
|
sig { returns(T::Boolean) }
|
44
39
|
def potential_match?
|
45
|
-
street_comparison.nil? || T.must(street_comparison).potential_match?
|
40
|
+
street_comparison.sequence_comparison.nil? || T.must(street_comparison.sequence_comparison).potential_match?
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { returns(ZipComparison) }
|
44
|
+
def zip_comparison
|
45
|
+
@zip_comparison ||= field_comparison(field: :zip)
|
46
|
+
end
|
47
|
+
|
48
|
+
sig { returns(StreetComparison) }
|
49
|
+
def street_comparison
|
50
|
+
@street_comparison ||= field_comparison(field: :street)
|
51
|
+
end
|
52
|
+
|
53
|
+
sig { returns(CityComparison) }
|
54
|
+
def city_comparison
|
55
|
+
@city_comparison ||= field_comparison(field: :city)
|
56
|
+
end
|
57
|
+
|
58
|
+
sig { returns(ProvinceCodeComparison) }
|
59
|
+
def province_code_comparison
|
60
|
+
@province_code_comparison ||= field_comparison(field: :province_code)
|
61
|
+
end
|
62
|
+
|
63
|
+
sig { returns(BuildingComparison) }
|
64
|
+
def building_comparison
|
65
|
+
@building_comparison ||= field_comparison(field: :building)
|
46
66
|
end
|
47
67
|
|
48
68
|
protected
|
49
69
|
|
50
70
|
sig do
|
51
|
-
returns(T::Array[
|
71
|
+
returns(T::Array[FieldComparisonBase])
|
52
72
|
end
|
53
73
|
def comparisons
|
54
74
|
[
|
@@ -63,10 +83,10 @@ module AtlasEngine
|
|
63
83
|
sig { returns(T::Array[AtlasEngine::AddressValidation::Token::Sequence::Comparison]) }
|
64
84
|
def text_comparisons
|
65
85
|
[
|
66
|
-
street_comparison,
|
67
|
-
city_comparison,
|
68
|
-
zip_comparison,
|
69
|
-
province_code_comparison,
|
86
|
+
street_comparison.sequence_comparison,
|
87
|
+
city_comparison.sequence_comparison,
|
88
|
+
zip_comparison.sequence_comparison,
|
89
|
+
province_code_comparison.sequence_comparison,
|
70
90
|
].compact_blank
|
71
91
|
end
|
72
92
|
|
@@ -74,6 +94,12 @@ module AtlasEngine
|
|
74
94
|
def merged_comparison
|
75
95
|
@merged_comparisons ||= text_comparisons.reduce(&:merge)
|
76
96
|
end
|
97
|
+
|
98
|
+
sig { params(field: Symbol).returns(FieldComparisonBase) }
|
99
|
+
def field_comparison(field:)
|
100
|
+
klass = CountryProfile.for(address.country_code).validation.address_comparison(field: field)
|
101
|
+
klass.new(address: address, candidate: candidate, datastore: datastore)
|
102
|
+
end
|
77
103
|
end
|
78
104
|
end
|
79
105
|
end
|
data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module AddressValidation
|
6
|
+
module Validators
|
7
|
+
module FullAddress
|
8
|
+
class BuildingComparison < FieldComparisonBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
sig { override.returns(T.nilable(NumberComparison)) }
|
12
|
+
def sequence_comparison
|
13
|
+
@building_comparison ||= NumberComparison.new(
|
14
|
+
numbers: datastore.parsings.potential_building_numbers,
|
15
|
+
candidate_ranges: building_ranges_from_candidate(candidate),
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
sig { params(candidate: Candidate).returns(T::Array[AddressNumberRange]) }
|
22
|
+
def building_ranges_from_candidate(candidate)
|
23
|
+
building_and_unit_ranges = candidate.component(:building_and_unit_ranges)&.value
|
24
|
+
return [] if building_and_unit_ranges.blank?
|
25
|
+
|
26
|
+
building_ranges = JSON.parse(building_and_unit_ranges).keys
|
27
|
+
building_ranges.map { |building_range| AddressNumberRange.new(range_string: building_range) }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -61,7 +61,7 @@ module AtlasEngine
|
|
61
61
|
concern = InvalidZipConcernBuilder.for(session.address, [])
|
62
62
|
result.concerns << concern if concern
|
63
63
|
|
64
|
-
if ConcernBuilder.too_many_unmatched_components?(unmatched_components.keys)
|
64
|
+
if ConcernBuilder.too_many_unmatched_components?(session.address, unmatched_components.keys)
|
65
65
|
result.concerns << UnknownAddressConcern.new(session.address)
|
66
66
|
end
|
67
67
|
end
|
@@ -128,7 +128,7 @@ module AtlasEngine
|
|
128
128
|
components = {}
|
129
129
|
@matched_and_unmatched_components ||= begin
|
130
130
|
components_to_compare.each do |field|
|
131
|
-
components[field] = @address_comparison.send(:"#{field}_comparison")
|
131
|
+
components[field] = @address_comparison.send(:"#{field}_comparison").sequence_comparison
|
132
132
|
end
|
133
133
|
components
|
134
134
|
end
|
@@ -146,7 +146,7 @@ module AtlasEngine
|
|
146
146
|
|
147
147
|
sig { returns(RelevantComponents) }
|
148
148
|
def relevant_components
|
149
|
-
@relevant_components ||= RelevantComponents.new(session, candidate,
|
149
|
+
@relevant_components ||= RelevantComponents.new(session, candidate, address_comparison)
|
150
150
|
end
|
151
151
|
|
152
152
|
sig do
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# typed: true
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module AtlasEngine
|
5
|
+
module AddressValidation
|
6
|
+
module Validators
|
7
|
+
module FullAddress
|
8
|
+
class CityComparison < FieldComparisonBase
|
9
|
+
extend T::Sig
|
10
|
+
|
11
|
+
sig { override.returns(T.nilable(Token::Sequence::Comparison)) }
|
12
|
+
def sequence_comparison
|
13
|
+
return @city_comparison if defined?(@city_comparison)
|
14
|
+
|
15
|
+
@city_comparison = best_comparison(
|
16
|
+
datastore.fetch_city_sequence,
|
17
|
+
T.must(candidate.component(:city)).sequences,
|
18
|
+
field_policy(:city),
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|