lite-address 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.fasterer.yml +19 -0
- data/.gitignore +11 -0
- data/.rspec +4 -0
- data/.rubocop.yml +48 -0
- data/CHANGELOG.md +11 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +97 -0
- data/LICENSE.txt +21 -0
- data/README.md +96 -0
- data/Rakefile +8 -0
- data/_config.yml +1 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/lite/address/format.rb +129 -0
- data/lib/lite/address/list.rb +100 -0
- data/lib/lite/address/parser.rb +215 -0
- data/lib/lite/address/regexp.rb +155 -0
- data/lib/lite/address/types/cardinal.yml +8 -0
- data/lib/lite/address/types/street.yml +363 -0
- data/lib/lite/address/ukey.rb +20 -0
- data/lib/lite/address/version.rb +9 -0
- data/lib/lite/address.rb +8 -0
- data/lite-address.gemspec +51 -0
- metadata +207 -0
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'snail' unless defined?(Snail)
|
4
|
+
|
5
|
+
module Lite
|
6
|
+
module Address
|
7
|
+
|
8
|
+
FORMAT_KEYS = %i[
|
9
|
+
number
|
10
|
+
street street2
|
11
|
+
street_type street_type2 redundant_street_type
|
12
|
+
unit_prefix unit
|
13
|
+
prefix prefix2
|
14
|
+
suffix suffix2
|
15
|
+
city
|
16
|
+
state
|
17
|
+
postal_code postal_code_ext
|
18
|
+
country list regexp
|
19
|
+
].freeze
|
20
|
+
|
21
|
+
class Format < Struct.new(*FORMAT_KEYS, keyword_init: true)
|
22
|
+
|
23
|
+
def country_code
|
24
|
+
country.alpha2
|
25
|
+
end
|
26
|
+
|
27
|
+
def country_name
|
28
|
+
country.name
|
29
|
+
end
|
30
|
+
|
31
|
+
def intersection?
|
32
|
+
!!street && !!street2
|
33
|
+
end
|
34
|
+
|
35
|
+
def full_postal_code
|
36
|
+
return if postal_code.nil?
|
37
|
+
|
38
|
+
@full_postal_code ||= [postal_code, postal_code_ext].compact.join('-')
|
39
|
+
end
|
40
|
+
|
41
|
+
def line1(str = +'')
|
42
|
+
parts = intersection? ? intersection_line1 : address_line1
|
43
|
+
str + parts.compact.join(' ').strip
|
44
|
+
end
|
45
|
+
|
46
|
+
def line2(str = +'')
|
47
|
+
str += [city, state].compact.join(', ')
|
48
|
+
str << " #{full_postal_code}" if postal_code
|
49
|
+
str.strip
|
50
|
+
end
|
51
|
+
|
52
|
+
def state_name
|
53
|
+
list.subdivision_map[state]
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_h
|
57
|
+
@to_h ||= Lite::Address::FORMAT_KEYS.each_with_object({}) do |key, hash|
|
58
|
+
hash[key] = public_send(key)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_s(format = :default)
|
63
|
+
case format
|
64
|
+
when :line1 then line1
|
65
|
+
when :line2 then line2
|
66
|
+
else [line1, line2].reject(&:empty?).join(', ')
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def to_snail(options = {})
|
71
|
+
Snail.new(snail_params.merge(options)).to_s
|
72
|
+
end
|
73
|
+
|
74
|
+
def to_ukey
|
75
|
+
Lite::Address::Ukey.generate(to_s)
|
76
|
+
end
|
77
|
+
|
78
|
+
def ==(other)
|
79
|
+
to_s == other.to_s
|
80
|
+
end
|
81
|
+
|
82
|
+
alias alpha2 country_code
|
83
|
+
alias state_code state
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
# rubocop:disable Metrics/AbcSize, Naming/VariableNumber
|
88
|
+
def address_line1
|
89
|
+
parts = []
|
90
|
+
parts << number
|
91
|
+
parts << prefix
|
92
|
+
parts << street
|
93
|
+
parts << street_type unless redundant_street_type
|
94
|
+
parts << suffix
|
95
|
+
parts << unit_prefix
|
96
|
+
# http://pe.usps.gov/cpim/ftp/pubs/Pub28/pub28.pdf pg28
|
97
|
+
parts << (unit_prefix ? unit : "\# #{unit}") if unit
|
98
|
+
parts
|
99
|
+
end
|
100
|
+
|
101
|
+
def snail_params
|
102
|
+
{
|
103
|
+
line_1: line1,
|
104
|
+
city: city,
|
105
|
+
region: state,
|
106
|
+
postal_code: full_postal_code,
|
107
|
+
country: country_code
|
108
|
+
}
|
109
|
+
end
|
110
|
+
|
111
|
+
def intersection_line1
|
112
|
+
parts = []
|
113
|
+
parts << prefix
|
114
|
+
parts << street
|
115
|
+
parts << street_type
|
116
|
+
parts << suffix
|
117
|
+
parts << 'and'
|
118
|
+
parts << prefix2
|
119
|
+
parts << street2
|
120
|
+
parts << street_type2
|
121
|
+
parts << suffix2
|
122
|
+
parts
|
123
|
+
end
|
124
|
+
# rubocop:enable Metrics/AbcSize, Naming/VariableNumber
|
125
|
+
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'yaml' unless defined?(YAML)
|
4
|
+
|
5
|
+
module Lite
|
6
|
+
module Address
|
7
|
+
class List
|
8
|
+
|
9
|
+
attr_reader :country
|
10
|
+
|
11
|
+
def initialize(country)
|
12
|
+
@country = country
|
13
|
+
end
|
14
|
+
|
15
|
+
def cardinal_codes
|
16
|
+
@cardinal_codes ||= cardinal_types.invert
|
17
|
+
end
|
18
|
+
|
19
|
+
def cardinal_types
|
20
|
+
@cardinal_types ||= begin
|
21
|
+
file_path = File.expand_path('types/cardinal.yml', File.dirname(__FILE__))
|
22
|
+
YAML.load_file(file_path)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def street_types
|
27
|
+
@street_types ||= begin
|
28
|
+
file_path = File.expand_path('types/street.yml', File.dirname(__FILE__))
|
29
|
+
YAML.load_file(file_path)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def street_type_regexps
|
34
|
+
@street_type_regexps ||= street_types.each_with_object({}) do |(type, abbr), hash|
|
35
|
+
hash[abbr] = /\b(?:#{abbr}|#{::Regexp.quote(type)})\b/ix
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def subdivision_codes
|
40
|
+
@subdivision_codes ||= subdivision_names.invert
|
41
|
+
end
|
42
|
+
|
43
|
+
def subdivision_map
|
44
|
+
@subdivision_map ||= country.subdivisions.transform_values(&:name)
|
45
|
+
end
|
46
|
+
|
47
|
+
def subdivision_names
|
48
|
+
@subdivision_names ||= country.subdivisions.each_with_object({}) do |(code, sub), hash|
|
49
|
+
hash[sub.name.downcase] = code
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def unit_abbr_regexps
|
54
|
+
# http://pe.usps.com/text/pub28/pub28c2_003
|
55
|
+
@unit_abbr_regexps ||= unit_abbr_numbered_regexps.merge(unit_abbr_unnumbered_regexps)
|
56
|
+
end
|
57
|
+
|
58
|
+
def unit_abbr_numbered_regexps
|
59
|
+
@unit_abbr_numbered_regexps ||= {
|
60
|
+
'Apt' => /(?:ap|dep)(?:ar)?t(?:me?nt)?/i,
|
61
|
+
'PO Box' => /p\W*[om]\W*b(?:ox)?/i,
|
62
|
+
'Bldg' => /bu?i?ldi?n?g/i,
|
63
|
+
'Dept' => /dep(artmen)?t/i,
|
64
|
+
'Floor' => /flo*r?/i,
|
65
|
+
'Hanger' => /ha?nga?r/i,
|
66
|
+
'Lot' => /lo?t/i,
|
67
|
+
'Room' => /ro*m/i,
|
68
|
+
'Pier' => /pier/i,
|
69
|
+
'Slip' => /slip/i,
|
70
|
+
'Space' => /spa?ce?/i,
|
71
|
+
'Stop' => /stop/i,
|
72
|
+
'Drawer' => /drawer/i,
|
73
|
+
'Suite' => /su?i?te/i,
|
74
|
+
'Trailer' => /tra?i?le?r/i,
|
75
|
+
'Box' => /\w*(?<!po\W)box/i,
|
76
|
+
'Unit' => /uni?t/i
|
77
|
+
}
|
78
|
+
end
|
79
|
+
|
80
|
+
def unit_abbr_unnumbered_regexps
|
81
|
+
@unit_abbr_unnumbered_regexps ||= {
|
82
|
+
'Basement' => /ba?se?me?n?t/i,
|
83
|
+
'Front' => /fro?nt/i,
|
84
|
+
'Lobby' => /lo?bby/i,
|
85
|
+
'Lower' => /lowe?r/i,
|
86
|
+
'Office' => /off?i?ce?/i,
|
87
|
+
'PH' => /pe?n?t?ho?u?s?e?/i,
|
88
|
+
'Rear' => /rear/i,
|
89
|
+
'Side' => /side/i,
|
90
|
+
'Upper' => /uppe?r/i
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
alias state_codes subdivision_codes
|
95
|
+
alias state_map subdivision_map
|
96
|
+
alias state_names subdivision_names
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'countries' unless defined?(ISO3166::Country)
|
4
|
+
|
5
|
+
module Lite
|
6
|
+
module Address
|
7
|
+
class Parser
|
8
|
+
|
9
|
+
LOOKUPS = %i[any formal informal intersectional].freeze
|
10
|
+
CAPITALIZATION_PARTS = %w[street street_type street2 street_type2 city unit_prefix].freeze
|
11
|
+
STREET_POSITIONS = ['', '1', '2'].freeze
|
12
|
+
|
13
|
+
attr_reader :address, :country_code
|
14
|
+
|
15
|
+
def initialize(address, country_code: 'US')
|
16
|
+
@address = sanitize_address(address)
|
17
|
+
@country_code = sanitize_country_code(country_code)
|
18
|
+
end
|
19
|
+
|
20
|
+
class << self
|
21
|
+
|
22
|
+
LOOKUPS.each do |method_name|
|
23
|
+
define_method(method_name) do |address, args = {}|
|
24
|
+
instance = new(address, country_code: args.delete(:country_code) || 'US')
|
25
|
+
instance.public_send(method_name, args)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
def any(args = {})
|
32
|
+
return intersectional(args) if regexp.corner.match(address)
|
33
|
+
|
34
|
+
formal(args) || informal(args)
|
35
|
+
end
|
36
|
+
|
37
|
+
def formal(args = {})
|
38
|
+
return unless (match = regexp.formal_address.match(address))
|
39
|
+
|
40
|
+
map = match_map(match)
|
41
|
+
generate_address(map, args)
|
42
|
+
end
|
43
|
+
|
44
|
+
def informal(args = {})
|
45
|
+
return unless (match = regexp.informal_address.match(address))
|
46
|
+
|
47
|
+
map = match_map(match)
|
48
|
+
generate_address(map, args)
|
49
|
+
end
|
50
|
+
|
51
|
+
def intersectional(args = {})
|
52
|
+
return unless (match = regexp.intersectional_address.match(address))
|
53
|
+
|
54
|
+
map = match_map(match)
|
55
|
+
intersectional_submatch(match, map, 'street')
|
56
|
+
intersectional_submatch(match, map, 'street_type')
|
57
|
+
intersectional_rematch(match, map, 'street_type')
|
58
|
+
|
59
|
+
generate_address(map, args)
|
60
|
+
end
|
61
|
+
|
62
|
+
protected
|
63
|
+
|
64
|
+
def country
|
65
|
+
@country ||= ISO3166::Country.new(country_code)
|
66
|
+
end
|
67
|
+
|
68
|
+
def list
|
69
|
+
@list ||= Lite::Address::List.new(country)
|
70
|
+
end
|
71
|
+
|
72
|
+
def regexp
|
73
|
+
@regexp ||= Lite::Address::Regexp.new(list)
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def sanitize_address(value)
|
79
|
+
value.delete_prefix('(').delete_suffix(')')
|
80
|
+
end
|
81
|
+
|
82
|
+
def sanitize_country_code(value)
|
83
|
+
value.to_s.upcase
|
84
|
+
end
|
85
|
+
|
86
|
+
def match_map(match)
|
87
|
+
match.names.each_with_object({}) do |name, hash|
|
88
|
+
hash[name] = match[name] if match[name]
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# rubocop:disable Metrics/AbcSize
|
93
|
+
def normalization_map
|
94
|
+
@normalization_map ||= {
|
95
|
+
'prefix' => list.cardinal_types,
|
96
|
+
'prefix1' => list.cardinal_types,
|
97
|
+
'prefix2' => list.cardinal_types,
|
98
|
+
'suffix' => list.cardinal_types,
|
99
|
+
'suffix1' => list.cardinal_types,
|
100
|
+
'suffix2' => list.cardinal_types,
|
101
|
+
'street_type' => list.street_types,
|
102
|
+
'street_type1' => list.street_types,
|
103
|
+
'street_type2' => list.street_types,
|
104
|
+
'state' => list.subdivision_names
|
105
|
+
}
|
106
|
+
end
|
107
|
+
# rubocop:enable Metrics/AbcSize
|
108
|
+
|
109
|
+
def intersectional_submatch(match, map, part)
|
110
|
+
parts = regexp.intersectional_address.named_captures
|
111
|
+
parts = parts[part].filter_map { |i| match[i.to_i] }
|
112
|
+
map[part] = parts[0] if parts[0]
|
113
|
+
map["#{part}2"] = parts[1] if parts[1]
|
114
|
+
end
|
115
|
+
|
116
|
+
def intersectional_rematch(_match, map, part)
|
117
|
+
return unless map[part] && (!map["#{part}2"] || (map[part] == map["#{part}2"]))
|
118
|
+
|
119
|
+
type = map[part].dup
|
120
|
+
return unless type.gsub!(/s\W*$/i, '') && (/\A#{regexp.public_send(part)}\z/io =~ type)
|
121
|
+
|
122
|
+
map[part] = map["#{part}2"] = type
|
123
|
+
end
|
124
|
+
|
125
|
+
def address_strip_chars(map)
|
126
|
+
map.each do |key, string|
|
127
|
+
string.strip!
|
128
|
+
|
129
|
+
if key == 'number'
|
130
|
+
string.gsub!(%r{[^\w\s\-\#&/.]}, '')
|
131
|
+
else
|
132
|
+
string.gsub!(%r{[^\w\s\-\#&/]}, '')
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def address_redundantize_street_type(map)
|
138
|
+
map['redundant_street_type'] = false
|
139
|
+
return unless map['street'] && !map['street_type']
|
140
|
+
|
141
|
+
match = regexp.street.match(map['street'])
|
142
|
+
map['street_type'] = match['street_type'] if match
|
143
|
+
map['redundant_street_type'] = true
|
144
|
+
end
|
145
|
+
|
146
|
+
def address_abbreviate_unit_prefixes(map)
|
147
|
+
list.unit_abbr_regexps.each do |abbr, regex|
|
148
|
+
regex.match(map['unit_prefix']) do |_match|
|
149
|
+
map['unit_prefix'] = abbr
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def address_normalize_values(map)
|
155
|
+
normalization_map.each do |key, hash|
|
156
|
+
next unless (map_key = map[key])
|
157
|
+
|
158
|
+
mapping = hash[map_key.downcase]
|
159
|
+
map[key] = mapping if mapping
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def address_avoid_redundant_street_type(map)
|
164
|
+
STREET_POSITIONS.each do |suffix|
|
165
|
+
street = map["street#{suffix}"]
|
166
|
+
street_type = map["street_type#{suffix}"]
|
167
|
+
next if !street || !street_type
|
168
|
+
|
169
|
+
type_regexp = list.street_type_regexps[street_type.downcase]
|
170
|
+
next unless type_regexp.match(street)
|
171
|
+
|
172
|
+
map.delete("street_type#{suffix}")
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def address_expand_cardinals(map)
|
177
|
+
return unless map['city']
|
178
|
+
|
179
|
+
map['city'].gsub!(/^(#{regexp.cardinal_code})\s+(?=\S)/o) do |match|
|
180
|
+
"#{list.cardinal_codes[match[0].upcase]} "
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def address_fix_dirty_ordinals(map)
|
185
|
+
# Sometimes parcel data will have addresses like "1 1ST ST" as "1 1 ST ST"
|
186
|
+
return unless map['street']
|
187
|
+
|
188
|
+
map['street'].gsub!(/\A(\d+\s+st|\d+\s+nd|\d+\s+rd|\d+\s+th)\z/i) do |match|
|
189
|
+
match.gsub!(/\s+/, '')
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def address_capitalize_parts(map)
|
194
|
+
CAPITALIZATION_PARTS.each do |k|
|
195
|
+
map[k] = map[k].split.map(&:capitalize).join(' ') if map[k]
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def generate_address(map, args = {})
|
200
|
+
address_strip_chars(map)
|
201
|
+
address_redundantize_street_type(map)
|
202
|
+
address_abbreviate_unit_prefixes(map)
|
203
|
+
address_normalize_values(map)
|
204
|
+
address_avoid_redundant_street_type(map) if args[:avoid_redundant_street_type]
|
205
|
+
address_expand_cardinals(map)
|
206
|
+
address_fix_dirty_ordinals(map)
|
207
|
+
address_capitalize_parts(map)
|
208
|
+
|
209
|
+
map.merge!(country: country, list: list, regexp: regexp)
|
210
|
+
Lite::Address::Format.new(map)
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lite
|
4
|
+
module Address
|
5
|
+
class Regexp
|
6
|
+
|
7
|
+
attr_reader :list
|
8
|
+
|
9
|
+
def initialize(list)
|
10
|
+
@list = list
|
11
|
+
end
|
12
|
+
|
13
|
+
def avoid_unit
|
14
|
+
@avoid_unit ||= /(?:[^\#\w]+|\Z)/ix
|
15
|
+
end
|
16
|
+
|
17
|
+
def cardinal_code
|
18
|
+
@cardinal_code ||= begin
|
19
|
+
values = list.cardinal_codes.keys
|
20
|
+
::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def cardinal_type
|
25
|
+
@cardinal_type ||= begin
|
26
|
+
values = list.cardinal_types.each_with_object([]) do |(key, val), array|
|
27
|
+
array << key
|
28
|
+
array << [::Regexp.quote(val.gsub(/(\w)/, '\1.')), ::Regexp.quote(val)]
|
29
|
+
end
|
30
|
+
|
31
|
+
::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def city_state
|
36
|
+
@city_state ||= /(?:(?<city> [^\d,]+?)\W+(?<state> #{subdivision}))/ix
|
37
|
+
end
|
38
|
+
|
39
|
+
def corner
|
40
|
+
@corner ||= /(?:\band\b|\bat\b|&|@)/ix
|
41
|
+
end
|
42
|
+
|
43
|
+
def formal_address
|
44
|
+
@formal_address ||= /\A[^\w\x23]*
|
45
|
+
#{number} \W*
|
46
|
+
#{street}\W+
|
47
|
+
(?:#{unit}\W+)?
|
48
|
+
#{place}\W*\z
|
49
|
+
/ix
|
50
|
+
end
|
51
|
+
|
52
|
+
def informal_address
|
53
|
+
@informal_address ||= /\A\s*
|
54
|
+
(?:#{unit} #{separator} #{place})?
|
55
|
+
(?:#{number})? \W*
|
56
|
+
#{street} #{avoid_unit}
|
57
|
+
(?:#{unit} #{separator})?
|
58
|
+
(?:#{place})?
|
59
|
+
/ix
|
60
|
+
end
|
61
|
+
|
62
|
+
def intersectional_address
|
63
|
+
@intersectional_address ||= /\A\W*
|
64
|
+
#{street}\W*?
|
65
|
+
\s+#{corner}\s+
|
66
|
+
#{street}\W+
|
67
|
+
#{place}\W*\z
|
68
|
+
/ix
|
69
|
+
end
|
70
|
+
|
71
|
+
# rubocop:disable Lint/MixedRegexpCaptureTypes
|
72
|
+
def number
|
73
|
+
# Utah and Wisconsin have a more elaborate system of block numbering
|
74
|
+
# http://en.wikipedia.org/wiki/House_number#Block_numbers
|
75
|
+
@number ||= /(?<number>(n|s|e|w)?\d+[.-]?\d*)(?=\D)/ix
|
76
|
+
end
|
77
|
+
# rubocop:enable Lint/MixedRegexpCaptureTypes
|
78
|
+
|
79
|
+
def place
|
80
|
+
@place ||= /(?:#{city_state}\W*)? (?:#{postal_code})?/ix
|
81
|
+
end
|
82
|
+
|
83
|
+
def postal_code
|
84
|
+
@postal_code ||= /(?:(?<postal_code>\d{5})(?:-?(?<postal_code_ext>\d{4}))?)/ix
|
85
|
+
end
|
86
|
+
|
87
|
+
def separator
|
88
|
+
@separator ||= /(?:\W+|\Z)/ix
|
89
|
+
end
|
90
|
+
|
91
|
+
def street
|
92
|
+
@street ||= /(?:
|
93
|
+
(?:
|
94
|
+
(?<street> #{cardinal_type})\W+
|
95
|
+
(?<street_type> #{street_type})\b
|
96
|
+
)
|
97
|
+
| (?:(?<prefix> #{cardinal_type})\W+)?
|
98
|
+
(?:
|
99
|
+
(?<street> [^,]*\d)
|
100
|
+
(?:[^\w,]* (?<suffix> #{cardinal_type})\b)
|
101
|
+
|
|
102
|
+
(?<street> [^,]+)
|
103
|
+
(?:[^\w,]+(?<street_type> #{street_type})\b)
|
104
|
+
(?:[^\w,]+(?<suffix> #{cardinal_type})\b)?
|
105
|
+
|
|
106
|
+
(?<street> [^,]+?)
|
107
|
+
(?:[^\w,]+(?<street_type> #{street_type})\b)?
|
108
|
+
(?:[^\w,]+(?<suffix> #{cardinal_type})\b)?
|
109
|
+
)
|
110
|
+
)/ix
|
111
|
+
end
|
112
|
+
|
113
|
+
def street_type
|
114
|
+
@street_type ||= begin
|
115
|
+
values = (list.street_types.keys + list.street_types.values).uniq
|
116
|
+
::Regexp.new(values.join('|'), ::Regexp::IGNORECASE)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def subdivision
|
121
|
+
@subdivision ||= begin
|
122
|
+
values = list.subdivision_codes.flatten.map { |code| ::Regexp.quote(code) }
|
123
|
+
::Regexp.new("\b#{values.join('|')}\b", ::Regexp::IGNORECASE)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def unit
|
128
|
+
@unit ||= %r{
|
129
|
+
(?:
|
130
|
+
(?:
|
131
|
+
(?:#{unit_numbered} \W*)
|
132
|
+
| (?<unit_prefix> \#)\W*
|
133
|
+
)
|
134
|
+
(?<unit> [\w/-]+)
|
135
|
+
) | #{unit_unnumbered}
|
136
|
+
}ix
|
137
|
+
end
|
138
|
+
|
139
|
+
def unit_numbered
|
140
|
+
@unit_numbered ||= begin
|
141
|
+
values = list.unit_abbr_numbered_regexps.values
|
142
|
+
/(?<unit_prefix>#{values.join('|')})(?![a-z])/ix
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def unit_unnumbered
|
147
|
+
@unit_unnumbered ||= begin
|
148
|
+
values = list.unit_abbr_unnumbered_regexps.values
|
149
|
+
/(?<unit_prefix>#{values.join('|')})\b/ix
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|