lite-address 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'snail' unless defined?(Snail)
4
+
5
+ module Lite
6
+ module Address
7
+
8
+ FORMAT_KEYS = %i[
9
+ number
10
+ street street2
11
+ street_type street_type2 redundant_street_type
12
+ unit_prefix unit
13
+ prefix prefix2
14
+ suffix suffix2
15
+ city
16
+ state
17
+ postal_code postal_code_ext
18
+ country list regexp
19
+ ].freeze
20
+
21
+ class Format < Struct.new(*FORMAT_KEYS, keyword_init: true)
22
+
23
+ def country_code
24
+ country.alpha2
25
+ end
26
+
27
+ def country_name
28
+ country.name
29
+ end
30
+
31
+ def intersection?
32
+ !!street && !!street2
33
+ end
34
+
35
+ def full_postal_code
36
+ return if postal_code.nil?
37
+
38
+ @full_postal_code ||= [postal_code, postal_code_ext].compact.join('-')
39
+ end
40
+
41
+ def line1(str = +'')
42
+ parts = intersection? ? intersection_line1 : address_line1
43
+ str + parts.compact.join(' ').strip
44
+ end
45
+
46
+ def line2(str = +'')
47
+ str += [city, state].compact.join(', ')
48
+ str << " #{full_postal_code}" if postal_code
49
+ str.strip
50
+ end
51
+
52
+ def state_name
53
+ list.subdivision_map[state]
54
+ end
55
+
56
+ def to_h
57
+ @to_h ||= Lite::Address::FORMAT_KEYS.each_with_object({}) do |key, hash|
58
+ hash[key] = public_send(key)
59
+ end
60
+ end
61
+
62
+ def to_s(format = :default)
63
+ case format
64
+ when :line1 then line1
65
+ when :line2 then line2
66
+ else [line1, line2].reject(&:empty?).join(', ')
67
+ end
68
+ end
69
+
70
+ def to_snail(options = {})
71
+ Snail.new(snail_params.merge(options)).to_s
72
+ end
73
+
74
+ def to_ukey
75
+ Lite::Address::Ukey.generate(to_s)
76
+ end
77
+
78
+ def ==(other)
79
+ to_s == other.to_s
80
+ end
81
+
82
+ alias alpha2 country_code
83
+ alias state_code state
84
+
85
+ private
86
+
87
+ # rubocop:disable Metrics/AbcSize, Naming/VariableNumber
88
+ def address_line1
89
+ parts = []
90
+ parts << number
91
+ parts << prefix
92
+ parts << street
93
+ parts << street_type unless redundant_street_type
94
+ parts << suffix
95
+ parts << unit_prefix
96
+ # http://pe.usps.gov/cpim/ftp/pubs/Pub28/pub28.pdf pg28
97
+ parts << (unit_prefix ? unit : "\# #{unit}") if unit
98
+ parts
99
+ end
100
+
101
+ def snail_params
102
+ {
103
+ line_1: line1,
104
+ city: city,
105
+ region: state,
106
+ postal_code: full_postal_code,
107
+ country: country_code
108
+ }
109
+ end
110
+
111
+ def intersection_line1
112
+ parts = []
113
+ parts << prefix
114
+ parts << street
115
+ parts << street_type
116
+ parts << suffix
117
+ parts << 'and'
118
+ parts << prefix2
119
+ parts << street2
120
+ parts << street_type2
121
+ parts << suffix2
122
+ parts
123
+ end
124
+ # rubocop:enable Metrics/AbcSize, Naming/VariableNumber
125
+
126
+ end
127
+
128
+ end
129
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml' unless defined?(YAML)
4
+
5
+ module Lite
6
+ module Address
7
+ class List
8
+
9
+ attr_reader :country
10
+
11
+ def initialize(country)
12
+ @country = country
13
+ end
14
+
15
+ def cardinal_codes
16
+ @cardinal_codes ||= cardinal_types.invert
17
+ end
18
+
19
+ def cardinal_types
20
+ @cardinal_types ||= begin
21
+ file_path = File.expand_path('types/cardinal.yml', File.dirname(__FILE__))
22
+ YAML.load_file(file_path)
23
+ end
24
+ end
25
+
26
+ def street_types
27
+ @street_types ||= begin
28
+ file_path = File.expand_path('types/street.yml', File.dirname(__FILE__))
29
+ YAML.load_file(file_path)
30
+ end
31
+ end
32
+
33
+ def street_type_regexps
34
+ @street_type_regexps ||= street_types.each_with_object({}) do |(type, abbr), hash|
35
+ hash[abbr] = /\b(?:#{abbr}|#{::Regexp.quote(type)})\b/ix
36
+ end
37
+ end
38
+
39
+ def subdivision_codes
40
+ @subdivision_codes ||= subdivision_names.invert
41
+ end
42
+
43
+ def subdivision_map
44
+ @subdivision_map ||= country.subdivisions.transform_values(&:name)
45
+ end
46
+
47
+ def subdivision_names
48
+ @subdivision_names ||= country.subdivisions.each_with_object({}) do |(code, sub), hash|
49
+ hash[sub.name.downcase] = code
50
+ end
51
+ end
52
+
53
+ def unit_abbr_regexps
54
+ # http://pe.usps.com/text/pub28/pub28c2_003
55
+ @unit_abbr_regexps ||= unit_abbr_numbered_regexps.merge(unit_abbr_unnumbered_regexps)
56
+ end
57
+
58
+ def unit_abbr_numbered_regexps
59
+ @unit_abbr_numbered_regexps ||= {
60
+ 'Apt' => /(?:ap|dep)(?:ar)?t(?:me?nt)?/i,
61
+ 'PO Box' => /p\W*[om]\W*b(?:ox)?/i,
62
+ 'Bldg' => /bu?i?ldi?n?g/i,
63
+ 'Dept' => /dep(artmen)?t/i,
64
+ 'Floor' => /flo*r?/i,
65
+ 'Hanger' => /ha?nga?r/i,
66
+ 'Lot' => /lo?t/i,
67
+ 'Room' => /ro*m/i,
68
+ 'Pier' => /pier/i,
69
+ 'Slip' => /slip/i,
70
+ 'Space' => /spa?ce?/i,
71
+ 'Stop' => /stop/i,
72
+ 'Drawer' => /drawer/i,
73
+ 'Suite' => /su?i?te/i,
74
+ 'Trailer' => /tra?i?le?r/i,
75
+ 'Box' => /\w*(?<!po\W)box/i,
76
+ 'Unit' => /uni?t/i
77
+ }
78
+ end
79
+
80
+ def unit_abbr_unnumbered_regexps
81
+ @unit_abbr_unnumbered_regexps ||= {
82
+ 'Basement' => /ba?se?me?n?t/i,
83
+ 'Front' => /fro?nt/i,
84
+ 'Lobby' => /lo?bby/i,
85
+ 'Lower' => /lowe?r/i,
86
+ 'Office' => /off?i?ce?/i,
87
+ 'PH' => /pe?n?t?ho?u?s?e?/i,
88
+ 'Rear' => /rear/i,
89
+ 'Side' => /side/i,
90
+ 'Upper' => /uppe?r/i
91
+ }
92
+ end
93
+
94
+ alias state_codes subdivision_codes
95
+ alias state_map subdivision_map
96
+ alias state_names subdivision_names
97
+
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'countries' unless defined?(ISO3166::Country)
4
+
5
+ module Lite
6
+ module Address
7
+ class Parser
8
+
9
+ LOOKUPS = %i[any formal informal intersectional].freeze
10
+ CAPITALIZATION_PARTS = %w[street street_type street2 street_type2 city unit_prefix].freeze
11
+ STREET_POSITIONS = ['', '1', '2'].freeze
12
+
13
+ attr_reader :address, :country_code
14
+
15
+ def initialize(address, country_code: 'US')
16
+ @address = sanitize_address(address)
17
+ @country_code = sanitize_country_code(country_code)
18
+ end
19
+
20
+ class << self
21
+
22
+ LOOKUPS.each do |method_name|
23
+ define_method(method_name) do |address, args = {}|
24
+ instance = new(address, country_code: args.delete(:country_code) || 'US')
25
+ instance.public_send(method_name, args)
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ def any(args = {})
32
+ return intersectional(args) if regexp.corner.match(address)
33
+
34
+ formal(args) || informal(args)
35
+ end
36
+
37
+ def formal(args = {})
38
+ return unless (match = regexp.formal_address.match(address))
39
+
40
+ map = match_map(match)
41
+ generate_address(map, args)
42
+ end
43
+
44
+ def informal(args = {})
45
+ return unless (match = regexp.informal_address.match(address))
46
+
47
+ map = match_map(match)
48
+ generate_address(map, args)
49
+ end
50
+
51
+ def intersectional(args = {})
52
+ return unless (match = regexp.intersectional_address.match(address))
53
+
54
+ map = match_map(match)
55
+ intersectional_submatch(match, map, 'street')
56
+ intersectional_submatch(match, map, 'street_type')
57
+ intersectional_rematch(match, map, 'street_type')
58
+
59
+ generate_address(map, args)
60
+ end
61
+
62
+ protected
63
+
64
+ def country
65
+ @country ||= ISO3166::Country.new(country_code)
66
+ end
67
+
68
+ def list
69
+ @list ||= Lite::Address::List.new(country)
70
+ end
71
+
72
+ def regexp
73
+ @regexp ||= Lite::Address::Regexp.new(list)
74
+ end
75
+
76
+ private
77
+
78
+ def sanitize_address(value)
79
+ value.delete_prefix('(').delete_suffix(')')
80
+ end
81
+
82
+ def sanitize_country_code(value)
83
+ value.to_s.upcase
84
+ end
85
+
86
+ def match_map(match)
87
+ match.names.each_with_object({}) do |name, hash|
88
+ hash[name] = match[name] if match[name]
89
+ end
90
+ end
91
+
92
+ # rubocop:disable Metrics/AbcSize
93
+ def normalization_map
94
+ @normalization_map ||= {
95
+ 'prefix' => list.cardinal_types,
96
+ 'prefix1' => list.cardinal_types,
97
+ 'prefix2' => list.cardinal_types,
98
+ 'suffix' => list.cardinal_types,
99
+ 'suffix1' => list.cardinal_types,
100
+ 'suffix2' => list.cardinal_types,
101
+ 'street_type' => list.street_types,
102
+ 'street_type1' => list.street_types,
103
+ 'street_type2' => list.street_types,
104
+ 'state' => list.subdivision_names
105
+ }
106
+ end
107
+ # rubocop:enable Metrics/AbcSize
108
+
109
+ def intersectional_submatch(match, map, part)
110
+ parts = regexp.intersectional_address.named_captures
111
+ parts = parts[part].filter_map { |i| match[i.to_i] }
112
+ map[part] = parts[0] if parts[0]
113
+ map["#{part}2"] = parts[1] if parts[1]
114
+ end
115
+
116
+ def intersectional_rematch(_match, map, part)
117
+ return unless map[part] && (!map["#{part}2"] || (map[part] == map["#{part}2"]))
118
+
119
+ type = map[part].dup
120
+ return unless type.gsub!(/s\W*$/i, '') && (/\A#{regexp.public_send(part)}\z/io =~ type)
121
+
122
+ map[part] = map["#{part}2"] = type
123
+ end
124
+
125
+ def address_strip_chars(map)
126
+ map.each do |key, string|
127
+ string.strip!
128
+
129
+ if key == 'number'
130
+ string.gsub!(%r{[^\w\s\-\#&/.]}, '')
131
+ else
132
+ string.gsub!(%r{[^\w\s\-\#&/]}, '')
133
+ end
134
+ end
135
+ end
136
+
137
+ def address_redundantize_street_type(map)
138
+ map['redundant_street_type'] = false
139
+ return unless map['street'] && !map['street_type']
140
+
141
+ match = regexp.street.match(map['street'])
142
+ map['street_type'] = match['street_type'] if match
143
+ map['redundant_street_type'] = true
144
+ end
145
+
146
+ def address_abbreviate_unit_prefixes(map)
147
+ list.unit_abbr_regexps.each do |abbr, regex|
148
+ regex.match(map['unit_prefix']) do |_match|
149
+ map['unit_prefix'] = abbr
150
+ end
151
+ end
152
+ end
153
+
154
+ def address_normalize_values(map)
155
+ normalization_map.each do |key, hash|
156
+ next unless (map_key = map[key])
157
+
158
+ mapping = hash[map_key.downcase]
159
+ map[key] = mapping if mapping
160
+ end
161
+ end
162
+
163
+ def address_avoid_redundant_street_type(map)
164
+ STREET_POSITIONS.each do |suffix|
165
+ street = map["street#{suffix}"]
166
+ street_type = map["street_type#{suffix}"]
167
+ next if !street || !street_type
168
+
169
+ type_regexp = list.street_type_regexps[street_type.downcase]
170
+ next unless type_regexp.match(street)
171
+
172
+ map.delete("street_type#{suffix}")
173
+ end
174
+ end
175
+
176
+ def address_expand_cardinals(map)
177
+ return unless map['city']
178
+
179
+ map['city'].gsub!(/^(#{regexp.cardinal_code})\s+(?=\S)/o) do |match|
180
+ "#{list.cardinal_codes[match[0].upcase]} "
181
+ end
182
+ end
183
+
184
+ def address_fix_dirty_ordinals(map)
185
+ # Sometimes parcel data will have addresses like "1 1ST ST" as "1 1 ST ST"
186
+ return unless map['street']
187
+
188
+ map['street'].gsub!(/\A(\d+\s+st|\d+\s+nd|\d+\s+rd|\d+\s+th)\z/i) do |match|
189
+ match.gsub!(/\s+/, '')
190
+ end
191
+ end
192
+
193
+ def address_capitalize_parts(map)
194
+ CAPITALIZATION_PARTS.each do |k|
195
+ map[k] = map[k].split.map(&:capitalize).join(' ') if map[k]
196
+ end
197
+ end
198
+
199
+ def generate_address(map, args = {})
200
+ address_strip_chars(map)
201
+ address_redundantize_street_type(map)
202
+ address_abbreviate_unit_prefixes(map)
203
+ address_normalize_values(map)
204
+ address_avoid_redundant_street_type(map) if args[:avoid_redundant_street_type]
205
+ address_expand_cardinals(map)
206
+ address_fix_dirty_ordinals(map)
207
+ address_capitalize_parts(map)
208
+
209
+ map.merge!(country: country, list: list, regexp: regexp)
210
+ Lite::Address::Format.new(map)
211
+ end
212
+
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lite
4
+ module Address
5
+ class Regexp
6
+
7
+ attr_reader :list
8
+
9
+ def initialize(list)
10
+ @list = list
11
+ end
12
+
13
+ def avoid_unit
14
+ @avoid_unit ||= /(?:[^\#\w]+|\Z)/ix
15
+ end
16
+
17
+ def cardinal_code
18
+ @cardinal_code ||= begin
19
+ values = list.cardinal_codes.keys
20
+ ::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
21
+ end
22
+ end
23
+
24
+ def cardinal_type
25
+ @cardinal_type ||= begin
26
+ values = list.cardinal_types.each_with_object([]) do |(key, val), array|
27
+ array << key
28
+ array << [::Regexp.quote(val.gsub(/(\w)/, '\1.')), ::Regexp.quote(val)]
29
+ end
30
+
31
+ ::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
32
+ end
33
+ end
34
+
35
+ def city_state
36
+ @city_state ||= /(?:(?<city> [^\d,]+?)\W+(?<state> #{subdivision}))/ix
37
+ end
38
+
39
+ def corner
40
+ @corner ||= /(?:\band\b|\bat\b|&|@)/ix
41
+ end
42
+
43
+ def formal_address
44
+ @formal_address ||= /\A[^\w\x23]*
45
+ #{number} \W*
46
+ #{street}\W+
47
+ (?:#{unit}\W+)?
48
+ #{place}\W*\z
49
+ /ix
50
+ end
51
+
52
+ def informal_address
53
+ @informal_address ||= /\A\s*
54
+ (?:#{unit} #{separator} #{place})?
55
+ (?:#{number})? \W*
56
+ #{street} #{avoid_unit}
57
+ (?:#{unit} #{separator})?
58
+ (?:#{place})?
59
+ /ix
60
+ end
61
+
62
+ def intersectional_address
63
+ @intersectional_address ||= /\A\W*
64
+ #{street}\W*?
65
+ \s+#{corner}\s+
66
+ #{street}\W+
67
+ #{place}\W*\z
68
+ /ix
69
+ end
70
+
71
+ # rubocop:disable Lint/MixedRegexpCaptureTypes
72
+ def number
73
+ # Utah and Wisconsin have a more elaborate system of block numbering
74
+ # http://en.wikipedia.org/wiki/House_number#Block_numbers
75
+ @number ||= /(?<number>(n|s|e|w)?\d+[.-]?\d*)(?=\D)/ix
76
+ end
77
+ # rubocop:enable Lint/MixedRegexpCaptureTypes
78
+
79
+ def place
80
+ @place ||= /(?:#{city_state}\W*)? (?:#{postal_code})?/ix
81
+ end
82
+
83
+ def postal_code
84
+ @postal_code ||= /(?:(?<postal_code>\d{5})(?:-?(?<postal_code_ext>\d{4}))?)/ix
85
+ end
86
+
87
+ def separator
88
+ @separator ||= /(?:\W+|\Z)/ix
89
+ end
90
+
91
+ def street
92
+ @street ||= /(?:
93
+ (?:
94
+ (?<street> #{cardinal_type})\W+
95
+ (?<street_type> #{street_type})\b
96
+ )
97
+ | (?:(?<prefix> #{cardinal_type})\W+)?
98
+ (?:
99
+ (?<street> [^,]*\d)
100
+ (?:[^\w,]* (?<suffix> #{cardinal_type})\b)
101
+ |
102
+ (?<street> [^,]+)
103
+ (?:[^\w,]+(?<street_type> #{street_type})\b)
104
+ (?:[^\w,]+(?<suffix> #{cardinal_type})\b)?
105
+ |
106
+ (?<street> [^,]+?)
107
+ (?:[^\w,]+(?<street_type> #{street_type})\b)?
108
+ (?:[^\w,]+(?<suffix> #{cardinal_type})\b)?
109
+ )
110
+ )/ix
111
+ end
112
+
113
+ def street_type
114
+ @street_type ||= begin
115
+ values = (list.street_types.keys + list.street_types.values).uniq
116
+ ::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
117
+ end
118
+ end
119
+
120
+ def subdivision
121
+ @subdivision ||= begin
122
+ values = list.subdivision_codes.flatten.map { |code| ::Regexp.quote(code) }
123
+ ::Regexp.new("\b#{values.join('|')}\b", ::Regexp::IGNORECASE)
124
+ end
125
+ end
126
+
127
+ def unit
128
+ @unit ||= %r{
129
+ (?:
130
+ (?:
131
+ (?:#{unit_numbered} \W*)
132
+ | (?<unit_prefix> \#)\W*
133
+ )
134
+ (?<unit> [\w/-]+)
135
+ ) | #{unit_unnumbered}
136
+ }ix
137
+ end
138
+
139
+ def unit_numbered
140
+ @unit_numbered ||= begin
141
+ values = list.unit_abbr_numbered_regexps.values
142
+ /(?<unit_prefix>#{values.join('|')})(?![a-z])/ix
143
+ end
144
+ end
145
+
146
+ def unit_unnumbered
147
+ @unit_unnumbered ||= begin
148
+ values = list.unit_abbr_unnumbered_regexps.values
149
+ /(?<unit_prefix>#{values.join('|')})\b/ix
150
+ end
151
+ end
152
+
153
+ end
154
+ end
155
+ end