atlas_engine 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -4
  3. data/app/countries/atlas_engine/cz/country_profile.yml +3 -0
  4. data/app/countries/atlas_engine/es/country_profile.yml +3 -0
  5. data/app/countries/atlas_engine/es/synonyms.yml +2 -0
  6. data/app/countries/atlas_engine/es/validation_transcriber/address_parser.rb +28 -0
  7. data/app/countries/atlas_engine/fo/address_importer/corrections/open_address/city_corrector.rb +27 -0
  8. data/app/countries/atlas_engine/fo/country_profile.yml +4 -0
  9. data/app/countries/atlas_engine/fr/country_profile.yml +2 -0
  10. data/app/countries/atlas_engine/it/address_validation/validators/full_address/exclusions/city.rb +31 -0
  11. data/app/countries/atlas_engine/it/country_profile.yml +4 -0
  12. data/app/countries/atlas_engine/kr/address_validation/validators/full_address/exclusions/city.rb +69 -0
  13. data/app/countries/atlas_engine/kr/country_profile.yml +7 -0
  14. data/app/countries/atlas_engine/kr/validation_transcriber/address_parser.rb +36 -0
  15. data/app/countries/atlas_engine/lu/address_importer/corrections/open_address/city_corrector.rb +45 -0
  16. data/app/countries/atlas_engine/lu/country_profile.yml +4 -1
  17. data/app/countries/atlas_engine/lu/validation_transcriber/address_parser.rb +23 -0
  18. data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/city_corrector.rb +25 -0
  19. data/app/countries/atlas_engine/pl/address_importer/corrections/open_address/empty_street_corrector.rb +32 -0
  20. data/app/countries/atlas_engine/pl/address_validation/exclusions/placeholder_postal_code.rb +35 -0
  21. data/app/countries/atlas_engine/pl/address_validation/exclusions/rural_address.rb +42 -0
  22. data/app/countries/atlas_engine/pl/country_profile.yml +13 -0
  23. data/app/countries/atlas_engine/pl/synonyms.yml +13 -0
  24. data/app/countries/atlas_engine/pl/validation_transcriber/address_parser.rb +36 -1
  25. data/app/countries/atlas_engine/pt/address_validation/validators/full_address/exclusions/zip.rb +38 -0
  26. data/app/countries/atlas_engine/pt/country_profile.yml +4 -0
  27. data/app/countries/atlas_engine/pt/synonyms.yml +12 -0
  28. data/app/countries/atlas_engine/pt/validation_transcriber/address_parser.rb +75 -0
  29. data/app/countries/atlas_engine/si/address_importer/open_address/corrections/city_district_corrector.rb +25 -0
  30. data/app/countries/atlas_engine/si/address_importer/open_address/mapper.rb +19 -0
  31. data/app/countries/atlas_engine/si/address_validation/exclusions/unknown_city.rb +33 -0
  32. data/app/countries/atlas_engine/si/country_profile.yml +17 -0
  33. data/app/countries/atlas_engine/si/synonyms.yml +7 -0
  34. data/app/countries/atlas_engine/si/validation_transcriber/address_parser.rb +52 -0
  35. data/app/graphql/atlas_engine/schema.graphql +1 -1
  36. data/app/lib/atlas_engine/validation_transcriber/address_parser_base.rb +1 -1
  37. data/app/models/atlas_engine/address_validation/concern_record.rb +6 -1
  38. data/app/models/atlas_engine/address_validation/es/query_builder.rb +6 -1
  39. data/app/models/atlas_engine/address_validation/statsd_emitter.rb +6 -2
  40. data/app/models/atlas_engine/address_validation/token/sequence/comparator.rb +38 -4
  41. data/app/models/atlas_engine/address_validation/token/sequence/comparison.rb +4 -4
  42. data/app/models/atlas_engine/address_validation/token/sequence/comparison_policy.rb +33 -0
  43. data/app/models/atlas_engine/address_validation/validators/full_address/address_comparison.rb +41 -15
  44. data/app/models/atlas_engine/address_validation/validators/full_address/building_comparison.rb +33 -0
  45. data/app/models/atlas_engine/address_validation/validators/full_address/candidate_result.rb +3 -3
  46. data/app/models/atlas_engine/address_validation/validators/full_address/city_comparison.rb +25 -0
  47. data/app/models/atlas_engine/address_validation/validators/full_address/concern_builder.rb +15 -6
  48. data/app/models/atlas_engine/address_validation/validators/full_address/exclusions/exclusion_base.rb +8 -2
  49. data/app/models/atlas_engine/address_validation/validators/full_address/field_comparison_base.rb +77 -0
  50. data/app/models/atlas_engine/address_validation/validators/full_address/province_code_comparison.rb +34 -0
  51. data/app/models/atlas_engine/address_validation/validators/full_address/relevant_components.rb +34 -12
  52. data/app/models/atlas_engine/address_validation/validators/full_address/street_comparison.rb +30 -0
  53. data/app/models/atlas_engine/address_validation/validators/full_address/suggestion_builder.rb +1 -1
  54. data/app/models/atlas_engine/address_validation/validators/full_address/zip_comparison.rb +37 -0
  55. data/app/models/atlas_engine/address_validation/validators/predicates/street/building_number_in_address1_or_address2.rb +2 -2
  56. data/app/models/atlas_engine/country_profile_validation_subset.rb +35 -2
  57. data/db/data/country_profiles/default.yml +12 -0
  58. data/lib/atlas_engine/version.rb +1 -1
  59. data/lib/tasks/atlas_engine/graphql.rake +13 -0
  60. metadata +35 -6
  61. data/app/models/atlas_engine/address_validation/validators/full_address/comparison_helper.rb +0 -135
@@ -1,3 +1,14 @@
1
+ street_synonyms:
2
+ - rua, r
3
+ - avenida, av
4
+ - praca, pc, pca
5
+ - travessa, tv
6
+ - largo, lg, l, lgo
7
+ - beco, bc
8
+ - calcada, cc
9
+ - professor, prof
10
+ - camino, cam
11
+ - estrada, estr
1
12
  city_synonyms:
2
13
  - GDM, Gondomar
3
14
  - GMR, Guimarães
@@ -5,3 +16,4 @@ city_synonyms:
5
16
  - VGS, Vagos
6
17
  - VN, Vila Nova De
7
18
  - VN, Vila Nova Da
19
+ - TCS, Trancoso
@@ -0,0 +1,75 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Pt
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ private
9
+
10
+ STREET = "(?<street>.+)"
11
+ NUMBERED_STREET = "(?<street>.+\s+[0-9]+)"
12
+ BUILDING_NUM = "n?(?<building_num>[0-9]+[a-z]*)"
13
+ UNIT_NUM = "(?<unit_num>[[:alnum:]]+)"
14
+ DIRECTION = /\b(?<direction>esq|dir|dto|fte|e|d|f|esquerda|direito|frente|fundo|andar)\b\.?/i
15
+ PO_BOX = /\b(?<box_type>ap|apartado|caixa postal|cp)\s+(?<number>\d+)\b/i
16
+
17
+ sig { returns(T::Array[Regexp]) }
18
+ def country_regex_formats
19
+ @country_regex_formats ||= [
20
+ /^#{STREET},?\s+#{BUILDING_NUM}$/,
21
+ /^#{STREET},?\s+#{BUILDING_NUM},?\s.*$/,
22
+ /^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}$/,
23
+ /^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
24
+ /^#{STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}[\s,-]+#{DIRECTION}$/,
25
+ /^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}$/,
26
+ /^#{NUMBERED_STREET},?\s+#{BUILDING_NUM}[\s,-]+#{UNIT_NUM}[\s,-]+#{DIRECTION}$/,
27
+ ]
28
+ end
29
+
30
+ sig { override.params(address_line: String).returns(T::Array[T.nilable(String)]) }
31
+ def extract_po_box(address_line)
32
+ po_box_match = address_line.match(PO_BOX)
33
+
34
+ if po_box_match
35
+ po_box = po_box_match["number"]
36
+ address_line = address_line.gsub(PO_BOX, "").strip.delete_suffix(",")
37
+ else
38
+ po_box = nil
39
+ end
40
+
41
+ [address_line, po_box]
42
+ end
43
+
44
+ # Return true if something's obviously wrong with this regex match
45
+ sig do
46
+ override.params(
47
+ captures: T::Hash[Symbol, T.nilable(String)],
48
+ address: ::AtlasEngine::AddressValidation::AbstractAddress,
49
+ ).returns(T::Boolean)
50
+ end
51
+ def ridiculous?(captures, address)
52
+ building_num = captures[:building_num]&.downcase
53
+ street = captures[:street]&.downcase
54
+ unit_num = captures[:unit_num]&.downcase
55
+
56
+ if street.present?
57
+ return true unless address.address1&.upcase&.include?(street.upcase) ||
58
+ address.address2&.upcase&.include?(street.upcase)
59
+ end
60
+
61
+ [building_num, unit_num].any? do |token|
62
+ po_box?(token) || street_suffix?(token)
63
+ end
64
+ end
65
+
66
+ sig { override.params(token: T.nilable(String)).returns(T::Boolean) }
67
+ def po_box?(token)
68
+ return false if token.blank?
69
+
70
+ token.match?(PO_BOX)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,25 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Si
6
+ module AddressImporter
7
+ module OpenAddress
8
+ module Corrections
9
+ class CityDistrictCorrector
10
+ class << self
11
+ extend T::Sig
12
+
13
+ sig { params(address: Hash).void }
14
+ def apply(address)
15
+ if address[:region4].present?
16
+ address[:city] << address[:region4] if address[:city].exclude?(address[:region4])
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,19 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Si
6
+ module AddressImporter
7
+ module OpenAddress
8
+ class Mapper < AtlasEngine::AddressImporter::OpenAddress::DefaultMapper
9
+ sig do
10
+ params(feature: AtlasEngine::AddressImporter::OpenAddress::Feature).returns(T::Hash[Symbol, T.untyped])
11
+ end
12
+ def map(feature)
13
+ super(feature).merge(region4: feature["properties"]["district"])
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,33 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Si
6
+ module AddressValidation
7
+ module Exclusions
8
+ class UnknownCity < AtlasEngine::AddressValidation::Validators::FullAddress::Exclusions::ExclusionBase
9
+ extend T::Sig
10
+
11
+ class << self
12
+ sig do
13
+ override.params(
14
+ session: AtlasEngine::AddressValidation::Session,
15
+ candidate: AtlasEngine::AddressValidation::Candidate,
16
+ address_comparison: AtlasEngine::AddressValidation::Validators::FullAddress::AddressComparison,
17
+ ).returns(T::Boolean)
18
+ end
19
+ def apply?(session, candidate, address_comparison)
20
+ poor_city_match?(address_comparison)
21
+ end
22
+
23
+ private
24
+
25
+ def poor_city_match?(address_comparison)
26
+ address_comparison.city_comparison.sequence_comparison.aggregate_distance > 2
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,17 @@
1
+ id: SI
2
+ ingestion:
3
+ open_address:
4
+ feature_mapper: AtlasEngine::Si::AddressImporter::OpenAddress::Mapper
5
+ correctors:
6
+ open_address:
7
+ - AtlasEngine::Si::AddressImporter::OpenAddress::Corrections::CityDistrictCorrector
8
+ validation:
9
+ enabled: true
10
+ default_matching_strategy: es
11
+ exclusions:
12
+ city:
13
+ - AtlasEngine::Si::AddressValidation::Exclusions::UnknownCity
14
+ address_parser: AtlasEngine::Si::ValidationTranscriber::AddressParser
15
+ comparison_policies:
16
+ street:
17
+ unmatched: ignore_right_unmatched
@@ -0,0 +1,7 @@
1
+ street_synonyms:
2
+ ## street suffixes
3
+ - Slovenskih, slov
4
+ - ulica, ul # street
5
+ city_synonyms:
6
+ - Slovenskih, slov
7
+ - zgornje, zg
@@ -0,0 +1,52 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module Si
6
+ module ValidationTranscriber
7
+ class AddressParser < AtlasEngine::ValidationTranscriber::AddressParserBase
8
+ STREET = "(?<street>.+?)" # the .+ is non-greedy to allow for optional building number prefixes
9
+ BUILDING_NUM = "(?<building_num>[0-9]+(\s?[[:alpha:]]*))"
10
+ # the current OpenAddress dataset does not include unit numbers
11
+
12
+ sig { override.returns(T::Array[AddressComponents]) }
13
+ def parse
14
+ # addressses sometimes follow an abbreviation with a period and no space afterward
15
+ super.each do |components|
16
+ components[:street]&.gsub!(
17
+ /\A(?<prefix>.+?)(?<dot>\.)(?<non_space>\S)/i,
18
+ "\\k<prefix> \\k<non_space>",
19
+ )
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ sig { returns(T::Array[Regexp]) }
26
+ def country_regex_formats
27
+ @country_regex_formats ||= [
28
+ /^#{STREET}\s+#{BUILDING_NUM}$/,
29
+ /^#{STREET}$/,
30
+ ]
31
+ end
32
+
33
+ sig do
34
+ override.params(
35
+ captures: T::Hash[Symbol, T.nilable(String)],
36
+ address: AtlasEngine::AddressValidation::AbstractAddress,
37
+ ).returns(T::Boolean)
38
+ end
39
+ def ridiculous?(captures, address)
40
+ street = captures[:street]&.downcase
41
+
42
+ if street.present?
43
+ true unless address.address1&.downcase&.include?(street) ||
44
+ address.address2&.downcase&.include?(street)
45
+ end
46
+
47
+ false
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1167,7 +1167,7 @@ enum ValidationSupportedCountry {
1167
1167
  TO
1168
1168
 
1169
1169
  """
1170
- Turkey
1170
+ Türkiye
1171
1171
  """
1172
1172
  TR
1173
1173
 
@@ -146,7 +146,7 @@ module AtlasEngine
146
146
  end
147
147
 
148
148
  street_tokens_ridiculous?(
149
- street: T.must(street),
149
+ street: street,
150
150
  unit_type: unit_type,
151
151
  unit_num: unit_num,
152
152
  num_street_space: num_street_space,
@@ -53,13 +53,18 @@ module AtlasEngine
53
53
  new(
54
54
  **T.unsafe(
55
55
  {
56
- result: result,
56
+ result: duplicate(result),
57
57
  **result.address,
58
58
  **context.except(:client_request_id),
59
59
  },
60
60
  ),
61
61
  )
62
62
  end
63
+
64
+ sig { params(obj: T.untyped).returns(T.untyped) }
65
+ def duplicate(obj)
66
+ Marshal.load(Marshal.dump(obj))
67
+ end
63
68
  end
64
69
 
65
70
  sig do
@@ -150,9 +150,14 @@ module AtlasEngine
150
150
  normalized_zip = ValidationTranscriber::ZipNormalizer.normalize(
151
151
  country_code: address.country_code, zip: address.zip,
152
152
  )
153
+
153
154
  {
154
155
  "match" => {
155
- "zip" => { "query" => normalized_zip, "fuzziness" => "auto" },
156
+ "zip" => {
157
+ "query" => normalized_zip,
158
+ "fuzziness" => "auto",
159
+ "prefix_length" => profile.validation.zip_prefix_length,
160
+ },
156
161
  },
157
162
  }
158
163
  end
@@ -45,7 +45,11 @@ module AtlasEngine
45
45
  concerns.each do |concern|
46
46
  tags.merge!(concern.attributes.slice(:code, :type))
47
47
 
48
- StatsD.increment("AddressValidation.#{ending_breadcrumb}", tags: tags)
48
+ if concern.attributes[:code] == :address_unknown
49
+ StatsD.increment("AddressValidation.unknown", tags: tags.except(:component))
50
+ else
51
+ StatsD.increment("AddressValidation.#{ending_breadcrumb}", tags: tags)
52
+ end
49
53
  end
50
54
  end
51
55
  end
@@ -57,7 +61,7 @@ module AtlasEngine
57
61
  def component_concerns(component)
58
62
  if component.equal?(:street)
59
63
  result.concerns.select do |c|
60
- c.attributes[:code] =~ /^(address1|address2|street).*/
64
+ c.attributes[:code] =~ /^(address1|address2|street|address_unknown).*/
61
65
  end
62
66
  elsif component.equal?(:building_number)
63
67
  result.concerns.select do |c|
@@ -11,14 +11,22 @@ module AtlasEngine
11
11
  sig { returns(Sequence) }
12
12
  attr_reader :left, :right
13
13
 
14
+ sig { returns(T::Hash[T::Array[Token], Token::Comparison]) }
14
15
  attr_reader :comparison_cache
15
16
 
16
17
  MAX_ALLOWED_EDIT_DISTANCE_PERCENT = 0.5
17
18
 
18
- sig { params(left_sequence: Sequence, right_sequence: Sequence).void }
19
- def initialize(left_sequence:, right_sequence:)
19
+ sig do
20
+ params(
21
+ left_sequence: Sequence,
22
+ right_sequence: Sequence,
23
+ comparison_policy: ComparisonPolicy,
24
+ ).void
25
+ end
26
+ def initialize(left_sequence:, right_sequence:, comparison_policy: ComparisonPolicy::DEFAULT_POLICY)
20
27
  @left = left_sequence
21
28
  @right = right_sequence
29
+ @comparison_policy = comparison_policy
22
30
  @comparison_cache = Hash.new do |h, (l_tok, r_tok)|
23
31
  h[[l_tok, r_tok]] = AddressValidation::Token::Comparator.new(l_tok, r_tok).compare
24
32
  end
@@ -35,6 +43,9 @@ module AtlasEngine
35
43
 
36
44
  private
37
45
 
46
+ sig { returns(ComparisonPolicy) }
47
+ attr_reader :comparison_policy
48
+
38
49
  sig do
39
50
  params(
40
51
  left_permutations: T::Array[Token],
@@ -43,7 +54,7 @@ module AtlasEngine
43
54
  end
44
55
  def token_comparisons(left_permutations, right_permutations)
45
56
  left_permutations.product(right_permutations).map do |l_tok, r_tok|
46
- comparison_cache[[l_tok, r_tok]]
57
+ T.must(comparison_cache[[l_tok, r_tok]])
47
58
  end
48
59
  end
49
60
 
@@ -133,7 +144,7 @@ module AtlasEngine
133
144
 
134
145
  remaining_right_tokens = remove_synonyms_at_same_position(remaining_right_tokens)
135
146
 
136
- remaining_left_tokens.concat(remaining_right_tokens)
147
+ apply_unmatched_policy(remaining_left_tokens, remaining_right_tokens)
137
148
  end
138
149
 
139
150
  sig { params(token: Token, other_token: Token).returns(T::Boolean) }
@@ -151,6 +162,29 @@ module AtlasEngine
151
162
  end
152
163
  .values.flatten
153
164
  end
165
+
166
+ sig do
167
+ params(
168
+ left_unmatched_tokens: T::Array[Token],
169
+ right_unmatched_tokens: T::Array[Token],
170
+ ).returns(T::Array[Token])
171
+ end
172
+ def apply_unmatched_policy(left_unmatched_tokens, right_unmatched_tokens)
173
+ case comparison_policy.unmatched
174
+ when :ignore_left_unmatched
175
+ right_unmatched_tokens
176
+ when :ignore_right_unmatched
177
+ left_unmatched_tokens
178
+ when :ignore_largest_unmatched_side
179
+ if right_unmatched_tokens.size > left_unmatched_tokens.size
180
+ left_unmatched_tokens
181
+ else
182
+ right_unmatched_tokens
183
+ end
184
+ else
185
+ left_unmatched_tokens.concat(right_unmatched_tokens)
186
+ end
187
+ end
154
188
  end
155
189
  end
156
190
  end
@@ -45,7 +45,7 @@ module AtlasEngine
45
45
  longest_subsequence = longest_subsequence_comparison <=> other.longest_subsequence_comparison
46
46
  return -1 * longest_subsequence if longest_subsequence.nonzero?
47
47
 
48
- edit_distance = aggregate_edit_distance <=> other.aggregate_edit_distance
48
+ edit_distance = aggregate_distance <=> other.aggregate_distance
49
49
  return edit_distance if edit_distance.nonzero?
50
50
 
51
51
  prefixes = count_by_qualifier(:prefix) <=> other.count_by_qualifier(:prefix)
@@ -92,7 +92,7 @@ module AtlasEngine
92
92
 
93
93
  sig { returns(T::Boolean) }
94
94
  def match?
95
- aggregate_edit_distance == 0 && unmatched_tokens.empty?
95
+ aggregate_distance == 0 && unmatched_tokens.empty?
96
96
  end
97
97
 
98
98
  sig { params(threshold_percent: Float).returns(T::Boolean) }
@@ -101,8 +101,8 @@ module AtlasEngine
101
101
  end
102
102
 
103
103
  sig { returns(Integer) }
104
- def aggregate_edit_distance
105
- token_comparisons.sum(&:edit_distance)
104
+ def aggregate_distance
105
+ token_comparisons.sum(&:edit_distance) + unmatched_tokens.map(&:value).sum(&:length)
106
106
  end
107
107
 
108
108
  sig { returns(Integer) }
@@ -0,0 +1,33 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ class Token
7
+ class Sequence
8
+ class ComparisonPolicy
9
+ extend T::Sig
10
+
11
+ UNMATCHED_POLICIES = [
12
+ :retain, # keep all unmatched tokens in comparison
13
+ :ignore_left_unmatched, # omit unmatched tokens from left sequence in comparison
14
+ :ignore_right_unmatched, # omit unmatched tokens from right sequence in comparison
15
+ :ignore_largest_unmatched_side, # omit unmatched tokens from the side with the most unmatched tokens,
16
+ # omit from left in case of a tie
17
+ ].freeze
18
+
19
+ attr_reader :unmatched
20
+
21
+ sig { params(unmatched: Symbol).void }
22
+ def initialize(unmatched:)
23
+ raise "Unknown unmatched policy: #{unmatched}" if UNMATCHED_POLICIES.exclude?(unmatched)
24
+
25
+ @unmatched = unmatched
26
+ end
27
+
28
+ DEFAULT_POLICY = ComparisonPolicy.new(unmatched: :retain).freeze
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -9,18 +9,13 @@ module AtlasEngine
9
9
  extend T::Sig
10
10
  include Comparable
11
11
 
12
- attr_reader :comparison_helper
13
-
14
- delegate :street_comparison,
15
- :city_comparison,
16
- :province_code_comparison,
17
- :zip_comparison,
18
- :building_comparison,
19
- to: :comparison_helper
12
+ attr_reader :address, :candidate, :datastore
20
13
 
21
14
  sig { params(address: AbstractAddress, candidate: Candidate, datastore: DatastoreBase).void }
22
15
  def initialize(address:, candidate:, datastore:)
23
- @comparison_helper = ComparisonHelper.new(address:, candidate:, datastore:)
16
+ @address = address
17
+ @candidate = candidate
18
+ @datastore = datastore
24
19
  end
25
20
 
26
21
  sig { params(other: AddressComparison).returns(Integer) }
@@ -42,13 +37,38 @@ module AtlasEngine
42
37
 
43
38
  sig { returns(T::Boolean) }
44
39
  def potential_match?
45
- street_comparison.nil? || T.must(street_comparison).potential_match?
40
+ street_comparison.sequence_comparison.nil? || T.must(street_comparison.sequence_comparison).potential_match?
41
+ end
42
+
43
+ sig { returns(ZipComparison) }
44
+ def zip_comparison
45
+ @zip_comparison ||= field_comparison(field: :zip)
46
+ end
47
+
48
+ sig { returns(StreetComparison) }
49
+ def street_comparison
50
+ @street_comparison ||= field_comparison(field: :street)
51
+ end
52
+
53
+ sig { returns(CityComparison) }
54
+ def city_comparison
55
+ @city_comparison ||= field_comparison(field: :city)
56
+ end
57
+
58
+ sig { returns(ProvinceCodeComparison) }
59
+ def province_code_comparison
60
+ @province_code_comparison ||= field_comparison(field: :province_code)
61
+ end
62
+
63
+ sig { returns(BuildingComparison) }
64
+ def building_comparison
65
+ @building_comparison ||= field_comparison(field: :building)
46
66
  end
47
67
 
48
68
  protected
49
69
 
50
70
  sig do
51
- returns(T::Array[T.any(AtlasEngine::AddressValidation::Token::Sequence::Comparison, NumberComparison)])
71
+ returns(T::Array[FieldComparisonBase])
52
72
  end
53
73
  def comparisons
54
74
  [
@@ -63,10 +83,10 @@ module AtlasEngine
63
83
  sig { returns(T::Array[AtlasEngine::AddressValidation::Token::Sequence::Comparison]) }
64
84
  def text_comparisons
65
85
  [
66
- street_comparison,
67
- city_comparison,
68
- zip_comparison,
69
- province_code_comparison,
86
+ street_comparison.sequence_comparison,
87
+ city_comparison.sequence_comparison,
88
+ zip_comparison.sequence_comparison,
89
+ province_code_comparison.sequence_comparison,
70
90
  ].compact_blank
71
91
  end
72
92
 
@@ -74,6 +94,12 @@ module AtlasEngine
74
94
  def merged_comparison
75
95
  @merged_comparisons ||= text_comparisons.reduce(&:merge)
76
96
  end
97
+
98
+ sig { params(field: Symbol).returns(FieldComparisonBase) }
99
+ def field_comparison(field:)
100
+ klass = CountryProfile.for(address.country_code).validation.address_comparison(field: field)
101
+ klass.new(address: address, candidate: candidate, datastore: datastore)
102
+ end
77
103
  end
78
104
  end
79
105
  end
@@ -0,0 +1,33 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Validators
7
+ module FullAddress
8
+ class BuildingComparison < FieldComparisonBase
9
+ extend T::Sig
10
+
11
+ sig { override.returns(T.nilable(NumberComparison)) }
12
+ def sequence_comparison
13
+ @building_comparison ||= NumberComparison.new(
14
+ numbers: datastore.parsings.potential_building_numbers,
15
+ candidate_ranges: building_ranges_from_candidate(candidate),
16
+ )
17
+ end
18
+
19
+ private
20
+
21
+ sig { params(candidate: Candidate).returns(T::Array[AddressNumberRange]) }
22
+ def building_ranges_from_candidate(candidate)
23
+ building_and_unit_ranges = candidate.component(:building_and_unit_ranges)&.value
24
+ return [] if building_and_unit_ranges.blank?
25
+
26
+ building_ranges = JSON.parse(building_and_unit_ranges).keys
27
+ building_ranges.map { |building_range| AddressNumberRange.new(range_string: building_range) }
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -61,7 +61,7 @@ module AtlasEngine
61
61
  concern = InvalidZipConcernBuilder.for(session.address, [])
62
62
  result.concerns << concern if concern
63
63
 
64
- if ConcernBuilder.too_many_unmatched_components?(unmatched_components.keys)
64
+ if ConcernBuilder.too_many_unmatched_components?(session.address, unmatched_components.keys)
65
65
  result.concerns << UnknownAddressConcern.new(session.address)
66
66
  end
67
67
  end
@@ -128,7 +128,7 @@ module AtlasEngine
128
128
  components = {}
129
129
  @matched_and_unmatched_components ||= begin
130
130
  components_to_compare.each do |field|
131
- components[field] = @address_comparison.send(:"#{field}_comparison")
131
+ components[field] = @address_comparison.send(:"#{field}_comparison").sequence_comparison
132
132
  end
133
133
  components
134
134
  end
@@ -146,7 +146,7 @@ module AtlasEngine
146
146
 
147
147
  sig { returns(RelevantComponents) }
148
148
  def relevant_components
149
- @relevant_components ||= RelevantComponents.new(session, candidate, street_comparison)
149
+ @relevant_components ||= RelevantComponents.new(session, candidate, address_comparison)
150
150
  end
151
151
 
152
152
  sig do
@@ -0,0 +1,25 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ module AtlasEngine
5
+ module AddressValidation
6
+ module Validators
7
+ module FullAddress
8
+ class CityComparison < FieldComparisonBase
9
+ extend T::Sig
10
+
11
+ sig { override.returns(T.nilable(Token::Sequence::Comparison)) }
12
+ def sequence_comparison
13
+ return @city_comparison if defined?(@city_comparison)
14
+
15
+ @city_comparison = best_comparison(
16
+ datastore.fetch_city_sequence,
17
+ T.must(candidate.component(:city)).sequences,
18
+ field_policy(:city),
19
+ )
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end