lite-address 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'snail' unless defined?(Snail)
4
+
5
+ module Lite
6
+ module Address
7
+
8
+ FORMAT_KEYS = %i[
9
+ number
10
+ street street2
11
+ street_type street_type2 redundant_street_type
12
+ unit_prefix unit
13
+ prefix prefix2
14
+ suffix suffix2
15
+ city
16
+ state
17
+ postal_code postal_code_ext
18
+ country list regexp
19
+ ].freeze
20
+
21
+ class Format < Struct.new(*FORMAT_KEYS, keyword_init: true)
22
+
23
+ def country_code
24
+ country.alpha2
25
+ end
26
+
27
+ def country_name
28
+ country.name
29
+ end
30
+
31
+ def intersection?
32
+ !!street && !!street2
33
+ end
34
+
35
+ def full_postal_code
36
+ return if postal_code.nil?
37
+
38
+ @full_postal_code ||= [postal_code, postal_code_ext].compact.join('-')
39
+ end
40
+
41
+ def line1(str = +'')
42
+ parts = intersection? ? intersection_line1 : address_line1
43
+ str + parts.compact.join(' ').strip
44
+ end
45
+
46
+ def line2(str = +'')
47
+ str += [city, state].compact.join(', ')
48
+ str << " #{full_postal_code}" if postal_code
49
+ str.strip
50
+ end
51
+
52
+ def state_name
53
+ list.subdivision_map[state]
54
+ end
55
+
56
+ def to_h
57
+ @to_h ||= Lite::Address::FORMAT_KEYS.each_with_object({}) do |key, hash|
58
+ hash[key] = public_send(key)
59
+ end
60
+ end
61
+
62
+ def to_s(format = :default)
63
+ case format
64
+ when :line1 then line1
65
+ when :line2 then line2
66
+ else [line1, line2].reject(&:empty?).join(', ')
67
+ end
68
+ end
69
+
70
+ def to_snail(options = {})
71
+ Snail.new(snail_params.merge(options)).to_s
72
+ end
73
+
74
+ def to_ukey
75
+ Lite::Address::Ukey.generate(to_s)
76
+ end
77
+
78
+ def ==(other)
79
+ to_s == other.to_s
80
+ end
81
+
82
+ alias alpha2 country_code
83
+ alias state_code state
84
+
85
+ private
86
+
87
+ # rubocop:disable Metrics/AbcSize, Naming/VariableNumber
88
+ def address_line1
89
+ parts = []
90
+ parts << number
91
+ parts << prefix
92
+ parts << street
93
+ parts << street_type unless redundant_street_type
94
+ parts << suffix
95
+ parts << unit_prefix
96
+ # http://pe.usps.gov/cpim/ftp/pubs/Pub28/pub28.pdf pg28
97
+ parts << (unit_prefix ? unit : "\# #{unit}") if unit
98
+ parts
99
+ end
100
+
101
+ def snail_params
102
+ {
103
+ line_1: line1,
104
+ city: city,
105
+ region: state,
106
+ postal_code: full_postal_code,
107
+ country: country_code
108
+ }
109
+ end
110
+
111
+ def intersection_line1
112
+ parts = []
113
+ parts << prefix
114
+ parts << street
115
+ parts << street_type
116
+ parts << suffix
117
+ parts << 'and'
118
+ parts << prefix2
119
+ parts << street2
120
+ parts << street_type2
121
+ parts << suffix2
122
+ parts
123
+ end
124
+ # rubocop:enable Metrics/AbcSize, Naming/VariableNumber
125
+
126
+ end
127
+
128
+ end
129
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml' unless defined?(YAML)
4
+
5
+ module Lite
6
+ module Address
7
+ class List
8
+
9
+ attr_reader :country
10
+
11
+ def initialize(country)
12
+ @country = country
13
+ end
14
+
15
+ def cardinal_codes
16
+ @cardinal_codes ||= cardinal_types.invert
17
+ end
18
+
19
+ def cardinal_types
20
+ @cardinal_types ||= begin
21
+ file_path = File.expand_path('types/cardinal.yml', File.dirname(__FILE__))
22
+ YAML.load_file(file_path)
23
+ end
24
+ end
25
+
26
+ def street_types
27
+ @street_types ||= begin
28
+ file_path = File.expand_path('types/street.yml', File.dirname(__FILE__))
29
+ YAML.load_file(file_path)
30
+ end
31
+ end
32
+
33
+ def street_type_regexps
34
+ @street_type_regexps ||= street_types.each_with_object({}) do |(type, abbr), hash|
35
+ hash[abbr] = /\b(?:#{abbr}|#{::Regexp.quote(type)})\b/ix
36
+ end
37
+ end
38
+
39
+ def subdivision_codes
40
+ @subdivision_codes ||= subdivision_names.invert
41
+ end
42
+
43
+ def subdivision_map
44
+ @subdivision_map ||= country.subdivisions.transform_values(&:name)
45
+ end
46
+
47
+ def subdivision_names
48
+ @subdivision_names ||= country.subdivisions.each_with_object({}) do |(code, sub), hash|
49
+ hash[sub.name.downcase] = code
50
+ end
51
+ end
52
+
53
+ def unit_abbr_regexps
54
+ # http://pe.usps.com/text/pub28/pub28c2_003
55
+ @unit_abbr_regexps ||= unit_abbr_numbered_regexps.merge(unit_abbr_unnumbered_regexps)
56
+ end
57
+
58
+ def unit_abbr_numbered_regexps
59
+ @unit_abbr_numbered_regexps ||= {
60
+ 'Apt' => /(?:ap|dep)(?:ar)?t(?:me?nt)?/i,
61
+ 'PO Box' => /p\W*[om]\W*b(?:ox)?/i,
62
+ 'Bldg' => /bu?i?ldi?n?g/i,
63
+ 'Dept' => /dep(artmen)?t/i,
64
+ 'Floor' => /flo*r?/i,
65
+ 'Hanger' => /ha?nga?r/i,
66
+ 'Lot' => /lo?t/i,
67
+ 'Room' => /ro*m/i,
68
+ 'Pier' => /pier/i,
69
+ 'Slip' => /slip/i,
70
+ 'Space' => /spa?ce?/i,
71
+ 'Stop' => /stop/i,
72
+ 'Drawer' => /drawer/i,
73
+ 'Suite' => /su?i?te/i,
74
+ 'Trailer' => /tra?i?le?r/i,
75
+ 'Box' => /\w*(?<!po\W)box/i,
76
+ 'Unit' => /uni?t/i
77
+ }
78
+ end
79
+
80
+ def unit_abbr_unnumbered_regexps
81
+ @unit_abbr_unnumbered_regexps ||= {
82
+ 'Basement' => /ba?se?me?n?t/i,
83
+ 'Front' => /fro?nt/i,
84
+ 'Lobby' => /lo?bby/i,
85
+ 'Lower' => /lowe?r/i,
86
+ 'Office' => /off?i?ce?/i,
87
+ 'PH' => /pe?n?t?ho?u?s?e?/i,
88
+ 'Rear' => /rear/i,
89
+ 'Side' => /side/i,
90
+ 'Upper' => /uppe?r/i
91
+ }
92
+ end
93
+
94
+ alias state_codes subdivision_codes
95
+ alias state_map subdivision_map
96
+ alias state_names subdivision_names
97
+
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'countries' unless defined?(ISO3166::Country)
4
+
5
+ module Lite
6
+ module Address
7
+ class Parser
8
+
9
+ LOOKUPS = %i[any formal informal intersectional].freeze
10
+ CAPITALIZATION_PARTS = %w[street street_type street2 street_type2 city unit_prefix].freeze
11
+ STREET_POSITIONS = ['', '1', '2'].freeze
12
+
13
+ attr_reader :address, :country_code
14
+
15
+ def initialize(address, country_code: 'US')
16
+ @address = sanitize_address(address)
17
+ @country_code = sanitize_country_code(country_code)
18
+ end
19
+
20
+ class << self
21
+
22
+ LOOKUPS.each do |method_name|
23
+ define_method(method_name) do |address, args = {}|
24
+ instance = new(address, country_code: args.delete(:country_code) || 'US')
25
+ instance.public_send(method_name, args)
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ def any(args = {})
32
+ return intersectional(args) if regexp.corner.match(address)
33
+
34
+ formal(args) || informal(args)
35
+ end
36
+
37
+ def formal(args = {})
38
+ return unless (match = regexp.formal_address.match(address))
39
+
40
+ map = match_map(match)
41
+ generate_address(map, args)
42
+ end
43
+
44
+ def informal(args = {})
45
+ return unless (match = regexp.informal_address.match(address))
46
+
47
+ map = match_map(match)
48
+ generate_address(map, args)
49
+ end
50
+
51
+ def intersectional(args = {})
52
+ return unless (match = regexp.intersectional_address.match(address))
53
+
54
+ map = match_map(match)
55
+ intersectional_submatch(match, map, 'street')
56
+ intersectional_submatch(match, map, 'street_type')
57
+ intersectional_rematch(match, map, 'street_type')
58
+
59
+ generate_address(map, args)
60
+ end
61
+
62
+ protected
63
+
64
+ def country
65
+ @country ||= ISO3166::Country.new(country_code)
66
+ end
67
+
68
+ def list
69
+ @list ||= Lite::Address::List.new(country)
70
+ end
71
+
72
+ def regexp
73
+ @regexp ||= Lite::Address::Regexp.new(list)
74
+ end
75
+
76
+ private
77
+
78
+ def sanitize_address(value)
79
+ value.delete_prefix('(').delete_suffix(')')
80
+ end
81
+
82
+ def sanitize_country_code(value)
83
+ value.to_s.upcase
84
+ end
85
+
86
+ def match_map(match)
87
+ match.names.each_with_object({}) do |name, hash|
88
+ hash[name] = match[name] if match[name]
89
+ end
90
+ end
91
+
92
+ # rubocop:disable Metrics/AbcSize
93
+ def normalization_map
94
+ @normalization_map ||= {
95
+ 'prefix' => list.cardinal_types,
96
+ 'prefix1' => list.cardinal_types,
97
+ 'prefix2' => list.cardinal_types,
98
+ 'suffix' => list.cardinal_types,
99
+ 'suffix1' => list.cardinal_types,
100
+ 'suffix2' => list.cardinal_types,
101
+ 'street_type' => list.street_types,
102
+ 'street_type1' => list.street_types,
103
+ 'street_type2' => list.street_types,
104
+ 'state' => list.subdivision_names
105
+ }
106
+ end
107
+ # rubocop:enable Metrics/AbcSize
108
+
109
+ def intersectional_submatch(match, map, part)
110
+ parts = regexp.intersectional_address.named_captures
111
+ parts = parts[part].filter_map { |i| match[i.to_i] }
112
+ map[part] = parts[0] if parts[0]
113
+ map["#{part}2"] = parts[1] if parts[1]
114
+ end
115
+
116
+ def intersectional_rematch(_match, map, part)
117
+ return unless map[part] && (!map["#{part}2"] || (map[part] == map["#{part}2"]))
118
+
119
+ type = map[part].dup
120
+ return unless type.gsub!(/s\W*$/i, '') && (/\A#{regexp.public_send(part)}\z/io =~ type)
121
+
122
+ map[part] = map["#{part}2"] = type
123
+ end
124
+
125
+ def address_strip_chars(map)
126
+ map.each do |key, string|
127
+ string.strip!
128
+
129
+ if key == 'number'
130
+ string.gsub!(%r{[^\w\s\-\#&/.]}, '')
131
+ else
132
+ string.gsub!(%r{[^\w\s\-\#&/]}, '')
133
+ end
134
+ end
135
+ end
136
+
137
+ def address_redundantize_street_type(map)
138
+ map['redundant_street_type'] = false
139
+ return unless map['street'] && !map['street_type']
140
+
141
+ match = regexp.street.match(map['street'])
142
+ map['street_type'] = match['street_type'] if match
143
+ map['redundant_street_type'] = true
144
+ end
145
+
146
+ def address_abbreviate_unit_prefixes(map)
147
+ list.unit_abbr_regexps.each do |abbr, regex|
148
+ regex.match(map['unit_prefix']) do |_match|
149
+ map['unit_prefix'] = abbr
150
+ end
151
+ end
152
+ end
153
+
154
+ def address_normalize_values(map)
155
+ normalization_map.each do |key, hash|
156
+ next unless (map_key = map[key])
157
+
158
+ mapping = hash[map_key.downcase]
159
+ map[key] = mapping if mapping
160
+ end
161
+ end
162
+
163
+ def address_avoid_redundant_street_type(map)
164
+ STREET_POSITIONS.each do |suffix|
165
+ street = map["street#{suffix}"]
166
+ street_type = map["street_type#{suffix}"]
167
+ next if !street || !street_type
168
+
169
+ type_regexp = list.street_type_regexps[street_type.downcase]
170
+ next unless type_regexp.match(street)
171
+
172
+ map.delete("street_type#{suffix}")
173
+ end
174
+ end
175
+
176
+ def address_expand_cardinals(map)
177
+ return unless map['city']
178
+
179
+ map['city'].gsub!(/^(#{regexp.cardinal_code})\s+(?=\S)/o) do |match|
180
+ "#{list.cardinal_codes[match[0].upcase]} "
181
+ end
182
+ end
183
+
184
+ def address_fix_dirty_ordinals(map)
185
+ # Sometimes parcel data will have addresses like "1 1ST ST" as "1 1 ST ST"
186
+ return unless map['street']
187
+
188
+ map['street'].gsub!(/\A(\d+\s+st|\d+\s+nd|\d+\s+rd|\d+\s+th)\z/i) do |match|
189
+ match.gsub!(/\s+/, '')
190
+ end
191
+ end
192
+
193
+ def address_capitalize_parts(map)
194
+ CAPITALIZATION_PARTS.each do |k|
195
+ map[k] = map[k].split.map(&:capitalize).join(' ') if map[k]
196
+ end
197
+ end
198
+
199
+ def generate_address(map, args = {})
200
+ address_strip_chars(map)
201
+ address_redundantize_street_type(map)
202
+ address_abbreviate_unit_prefixes(map)
203
+ address_normalize_values(map)
204
+ address_avoid_redundant_street_type(map) if args[:avoid_redundant_street_type]
205
+ address_expand_cardinals(map)
206
+ address_fix_dirty_ordinals(map)
207
+ address_capitalize_parts(map)
208
+
209
+ map.merge!(country: country, list: list, regexp: regexp)
210
+ Lite::Address::Format.new(map)
211
+ end
212
+
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,155 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lite
4
+ module Address
5
+ class Regexp
6
+
7
+ attr_reader :list
8
+
9
+ def initialize(list)
10
+ @list = list
11
+ end
12
+
13
+ def avoid_unit
14
+ @avoid_unit ||= /(?:[^\#\w]+|\Z)/ix
15
+ end
16
+
17
+ def cardinal_code
18
+ @cardinal_code ||= begin
19
+ values = list.cardinal_codes.keys
20
+ ::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
21
+ end
22
+ end
23
+
24
+ def cardinal_type
25
+ @cardinal_type ||= begin
26
+ values = list.cardinal_types.each_with_object([]) do |(key, val), array|
27
+ array << key
28
+ array << [::Regexp.quote(val.gsub(/(\w)/, '\1.')), ::Regexp.quote(val)]
29
+ end
30
+
31
+ ::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
32
+ end
33
+ end
34
+
35
+ def city_state
36
+ @city_state ||= /(?:(?<city> [^\d,]+?)\W+(?<state> #{subdivision}))/ix
37
+ end
38
+
39
+ def corner
40
+ @corner ||= /(?:\band\b|\bat\b|&|@)/ix
41
+ end
42
+
43
+ def formal_address
44
+ @formal_address ||= /\A[^\w\x23]*
45
+ #{number} \W*
46
+ #{street}\W+
47
+ (?:#{unit}\W+)?
48
+ #{place}\W*\z
49
+ /ix
50
+ end
51
+
52
+ def informal_address
53
+ @informal_address ||= /\A\s*
54
+ (?:#{unit} #{separator} #{place})?
55
+ (?:#{number})? \W*
56
+ #{street} #{avoid_unit}
57
+ (?:#{unit} #{separator})?
58
+ (?:#{place})?
59
+ /ix
60
+ end
61
+
62
+ def intersectional_address
63
+ @intersectional_address ||= /\A\W*
64
+ #{street}\W*?
65
+ \s+#{corner}\s+
66
+ #{street}\W+
67
+ #{place}\W*\z
68
+ /ix
69
+ end
70
+
71
+ # rubocop:disable Lint/MixedRegexpCaptureTypes
72
+ def number
73
+ # Utah and Wisconsin have a more elaborate system of block numbering
74
+ # http://en.wikipedia.org/wiki/House_number#Block_numbers
75
+ @number ||= /(?<number>(n|s|e|w)?\d+[.-]?\d*)(?=\D)/ix
76
+ end
77
+ # rubocop:enable Lint/MixedRegexpCaptureTypes
78
+
79
+ def place
80
+ @place ||= /(?:#{city_state}\W*)? (?:#{postal_code})?/ix
81
+ end
82
+
83
+ def postal_code
84
+ @postal_code ||= /(?:(?<postal_code>\d{5})(?:-?(?<postal_code_ext>\d{4}))?)/ix
85
+ end
86
+
87
+ def separator
88
+ @separator ||= /(?:\W+|\Z)/ix
89
+ end
90
+
91
+ def street
92
+ @street ||= /(?:
93
+ (?:
94
+ (?<street> #{cardinal_type})\W+
95
+ (?<street_type> #{street_type})\b
96
+ )
97
+ | (?:(?<prefix> #{cardinal_type})\W+)?
98
+ (?:
99
+ (?<street> [^,]*\d)
100
+ (?:[^\w,]* (?<suffix> #{cardinal_type})\b)
101
+ |
102
+ (?<street> [^,]+)
103
+ (?:[^\w,]+(?<street_type> #{street_type})\b)
104
+ (?:[^\w,]+(?<suffix> #{cardinal_type})\b)?
105
+ |
106
+ (?<street> [^,]+?)
107
+ (?:[^\w,]+(?<street_type> #{street_type})\b)?
108
+ (?:[^\w,]+(?<suffix> #{cardinal_type})\b)?
109
+ )
110
+ )/ix
111
+ end
112
+
113
+ def street_type
114
+ @street_type ||= begin
115
+ values = (list.street_types.keys + list.street_types.values).uniq
116
+ ::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
117
+ end
118
+ end
119
+
120
+ def subdivision
121
+ @subdivision ||= begin
122
+ values = list.subdivision_codes.flatten.map { |code| ::Regexp.quote(code) }
123
+ ::Regexp.new("\b#{values.join('|')}\b", ::Regexp::IGNORECASE)
124
+ end
125
+ end
126
+
127
+ def unit
128
+ @unit ||= %r{
129
+ (?:
130
+ (?:
131
+ (?:#{unit_numbered} \W*)
132
+ | (?<unit_prefix> \#)\W*
133
+ )
134
+ (?<unit> [\w/-]+)
135
+ ) | #{unit_unnumbered}
136
+ }ix
137
+ end
138
+
139
+ def unit_numbered
140
+ @unit_numbered ||= begin
141
+ values = list.unit_abbr_numbered_regexps.values
142
+ /(?<unit_prefix>#{values.join('|')})(?![a-z])/ix
143
+ end
144
+ end
145
+
146
+ def unit_unnumbered
147
+ @unit_unnumbered ||= begin
148
+ values = list.unit_abbr_unnumbered_regexps.values
149
+ /(?<unit_prefix>#{values.join('|')})\b/ix
150
+ end
151
+ end
152
+
153
+ end
154
+ end
155
+ end