street_sweeper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2
4
+ - jruby
5
+ - rbx-3
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENCE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Derrek Long
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,70 @@
1
+ [![Build Status](https://travis-ci.org/williamatodd/street_sweeper.svg?branch=master)](https://travis-ci.org/williamatodd/street_sweeper)
2
+
3
+ # DESCRIPTION
4
+
5
+ Parses a string returning a normalized Address object. When the string is not an US address it returns nil.
6
+
7
+ This is a resurrected fork of [StreetSweeper::US](https://github.com/street-address-rb/street-address) which was itself a port of the perl module [Geo::StreetSweeper](https://github.com/timbunce/Geo-StreetAddress-US) originally written by Schuyler D. Erle.
8
+
9
+ ## Installation
10
+
11
+ ```shell
12
+ gem install StreetSweeper
13
+ ```
14
+
15
+ then in your code
16
+
17
+ ```ruby
18
+ require 'street_sweeper'
19
+ ```
20
+
21
+ or from Gemfile
22
+
23
+ ```ruby
24
+ gem 'StreetSweeper', require: "street_sweeper"
25
+ ```
26
+
27
+ ## Basic Usage
28
+
29
+ ```ruby
30
+ require 'street_sweeper'
31
+
32
+ address = StreetSweeper.parse("1600 Pennsylvania Ave, Washington, DC, 20500")
33
+ address.street # Pennsylvania
34
+ address.number # 1600
35
+ address.postal_code # 20500
36
+ address.city # Washington
37
+ address.state # DC
38
+ address.state_name # District of columbia
39
+ address.street_type # Ave
40
+ address.intersection? # false
41
+ address.full_street_address # 1600 Pennsylvania Ave, Washington, DC 20500
42
+
43
+ address = StreetSweeper.parse("1600 Pennsylvania Ave")
44
+ address.street # Pennsylvania
45
+ address.number # 1600
46
+ address.state # nil
47
+
48
+ address = StreetSweeper.parse("5904 Richmond Hwy Ste 340 Alexandria VA 22303-1864")
49
+ address.street_address_1 # 5904 Richmond Hwy
50
+ address.street_address_2 # Ste 340
51
+ address.full_postal_code # 22303-1864
52
+ address.postal_code_ext # 1846
53
+ address.state_name # Virginia
54
+ address.state_fips # 06
55
+
56
+ ```
57
+ ## Stricter Parsing
58
+
59
+ ```ruby
60
+ address = StreetSweeper.parse_address("1600 Pennsylvania Avenue")
61
+ # nil - not enough information to be a full address
62
+
63
+ address = StreetSweeper.parse_address("1600 Pennsylvania Ave, Washington, DC, 20500")
64
+ # same results as above
65
+ ```
66
+
67
+ ## License
68
+ The [MIT License](http://opensource.org/licenses/MIT)
69
+
70
+ Copyright (c) 2007-2018 Contributors
@@ -0,0 +1,6 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+ RSpec::Core::RakeTask.new(:spec)
4
+
5
+ # If you want to make this the default task
6
+ task default: :spec
@@ -0,0 +1,4 @@
1
+ require 'street_sweeper/constants'
2
+ require 'street_sweeper/matchers'
3
+ require 'street_sweeper/address'
4
+ require 'street_sweeper/base'
@@ -0,0 +1,103 @@
1
+ module StreetSweeper
2
+ class Address
3
+ attr_accessor(
4
+ :number,
5
+ :street,
6
+ :street_type,
7
+ :unit,
8
+ :unit_prefix,
9
+ :suffix,
10
+ :prefix,
11
+ :city,
12
+ :state,
13
+ :postal_code,
14
+ :postal_code_ext,
15
+ :street2,
16
+ :street_type2,
17
+ :suffix2,
18
+ :prefix2,
19
+ :redundant_street_type
20
+ )
21
+
22
+ def initialize(args)
23
+ args.each do |attr, val|
24
+ public_send("#{attr}=", val)
25
+ end
26
+ end
27
+
28
+ def full_postal_code
29
+ return nil unless postal_code
30
+ postal_code_ext ? "#{postal_code}-#{postal_code_ext}" : postal_code
31
+ end
32
+
33
+ def state_fips
34
+ Constants::FIPS_STATES[state]
35
+ end
36
+
37
+ def state_name
38
+ (name = Constants::STATE_NAMES[state]) && name.capitalize
39
+ end
40
+
41
+ def intersection?
42
+ !street2.nil?
43
+ end
44
+
45
+ def line1
46
+ parts = []
47
+ if intersection?
48
+ parts << prefix if prefix
49
+ parts << street
50
+ parts << street_type if street_type
51
+ parts << suffix if suffix
52
+ parts << 'and'
53
+ parts << prefix2 if prefix2
54
+ parts << street2
55
+ parts << street_type2 if street_type2
56
+ parts << suffix2 if suffix2
57
+ else
58
+ parts << street_address_1
59
+ parts << street_address_2
60
+ end
61
+ parts.join(' ').strip
62
+ end
63
+
64
+ def line2
65
+ parts = []
66
+ parts << city if city
67
+ parts << state if state
68
+ s = parts.join(', ')
69
+ s += " #{full_postal_code}" if full_postal_code
70
+ s.strip
71
+ end
72
+
73
+ def street_address_1
74
+ return line1 if intersection?
75
+ parts = []
76
+ parts << number
77
+ parts << prefix if prefix
78
+ parts << street if street
79
+ parts << street_type if street_type && !redundant_street_type
80
+ parts << suffix if suffix
81
+ parts.join(' ').strip
82
+ end
83
+
84
+ def street_address_2
85
+ parts = []
86
+ parts << unit_prefix if unit_prefix
87
+ parts << (unit_prefix ? unit : "\# #{unit}") if unit
88
+ parts.join(' ').strip
89
+ end
90
+
91
+ def full_street_address
92
+ [line1, line2].reject(&:empty?).join(', ')
93
+ end
94
+
95
+ def to_h
96
+ instance_variables.each_with_object({}) do |var_name, hash|
97
+ var_value = instance_variable_get(var_name)
98
+ hash_name = var_name[1..-1].to_sym
99
+ hash[hash_name] = var_value
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,121 @@
1
+ module StreetSweeper
2
+ class << self
3
+ def parse(location, args = {})
4
+ if Matchers.corner_regexp.match(location)
5
+ parse_intersection(location, args)
6
+ else
7
+ parse_po_address(location, args) || parse_address(location, args) || parse_informal_address(location, args)
8
+ end
9
+ end
10
+
11
+ def parse_address(address, args = {})
12
+ matched = Matchers.address_regexp.match(address)
13
+ return unless matched
14
+ to_address(match_to_hash(matched), args)
15
+ end
16
+
17
+ def parse_po_address(address, args = {})
18
+ matched = Matchers.po_address_regexp.match(address)
19
+ return unless matched
20
+ to_address(match_to_hash(matched), args)
21
+ end
22
+
23
+ def parse_informal_address(address, args = {})
24
+ matched = Matchers.informal_address_regexp.match(address)
25
+ return unless matched
26
+ to_address(match_to_hash(matched), args)
27
+ end
28
+
29
+ def parse_intersection(intersection, args)
30
+ matched = Matchers.intersection_regexp.match(intersection)
31
+ return unless matched
32
+ hash = match_to_hash(matched)
33
+
34
+ streets = Matchers.intersection_regexp.named_captures['street'].map do |pos|
35
+ matched[pos.to_i]
36
+ end.select { |v| v }
37
+
38
+ hash['street'] = streets[0] if streets[0]
39
+ hash['street2'] = streets[1] if streets[1]
40
+
41
+ street_types = Matchers.intersection_regexp.named_captures['street_type'].map do |pos|
42
+ matched[pos.to_i]
43
+ end.select { |v| v }
44
+
45
+ hash['street_type'] = street_types[0] if street_types[0]
46
+ hash['street_type2'] = street_types[1] if street_types[1]
47
+
48
+ if hash['street_type'] && (!hash['street_type2'] || (hash['street_type'] == hash['street_type2']))
49
+ type = hash['street_type'].clone
50
+ hash['street_type'] = hash['street_type2'] = type if type.gsub!(/s\W*$/i, '') && /\A#{Matchers.street_type_regexp}\z/i =~ type
51
+ end
52
+
53
+ to_address(hash, args)
54
+ end
55
+
56
+ private
57
+
58
+ def match_to_hash(matched)
59
+ hash = {}
60
+ matched.names.each { |name| hash[name] = matched[name] if matched[name] && !matched[name].strip.empty? }
61
+ hash
62
+ end
63
+
64
+ def to_address(input, args)
65
+ # strip off some punctuation and whitespace
66
+ input.each_value do |string|
67
+ string.strip!
68
+ string.gsub!(/[^\w\s\-\#\&]/, '')
69
+ end
70
+
71
+ input['redundant_street_type'] = false
72
+ if input['street'] && !input['street_type']
73
+ matched = Matchers.street_regexp.match(input['street'])
74
+ input['street_type'] = matched['street_type']
75
+ input['redundant_street_type'] = true
76
+ end
77
+
78
+ ## abbreviate unit prefixes
79
+ if input['unit_prefix']
80
+ Constants::UNIT_ABBREVIATIONS.each_pair do |regex, abbr|
81
+ regex.match(input['unit_prefix']) { |_m| input['unit_prefix'] = abbr }
82
+ end
83
+ end
84
+
85
+ Constants::NORMALIZE_MAP.each_pair do |key, map|
86
+ next unless input[key]
87
+ mapping = map[input[key].downcase]
88
+ input[key] = mapping if mapping
89
+ end
90
+
91
+ if args[:avoid_redundant_street_type]
92
+ ['', '1', '2'].each do |suffix|
93
+ street = input['street' + suffix]
94
+ type = input['street_type' + suffix]
95
+ next if !street || !type
96
+
97
+ type_regexp = Matchers.street_type_matches[type.downcase]
98
+ input.delete('street_type' + suffix) if type_regexp.match(street)
99
+ end
100
+ end
101
+
102
+ # attempt to expand directional prefixes on place names
103
+ if input['city']
104
+ input['city'].gsub!(/^(#{Matchers.dircode_regexp})\s+(?=\S)/) do |match|
105
+ Constants::DIRECTION_CODES[match[0].upcase] + ' '
106
+ end
107
+ end
108
+
109
+ %w[street street_type street2 street_type2 city unit_prefix].each do |k|
110
+ input[k] = input[k].split.map { |elem| upcase_or_capitalize(elem) }.join(' ') if input[k]
111
+ end
112
+
113
+ StreetSweeper::Address.new(input)
114
+ end
115
+
116
+ def upcase_or_capitalize(elem)
117
+ return elem.upcase if elem.downcase =~ /^(po|ne|nw|sw|se)$/
118
+ elem.capitalize
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,568 @@
1
+ module StreetSweeper
2
+ class Constants
3
+ DIRECTIONAL = {
4
+ 'north' => 'N',
5
+ 'northeast' => 'NE',
6
+ 'east' => 'E',
7
+ 'southeast' => 'SE',
8
+ 'south' => 'S',
9
+ 'southwest' => 'SW',
10
+ 'west' => 'W',
11
+ 'northwest' => 'NW'
12
+ }.freeze
13
+ DIRECTION_CODES = DIRECTIONAL.invert
14
+
15
+ STREET_TYPES = {
16
+ 'allee' => 'aly',
17
+ 'alley' => 'aly',
18
+ 'ally' => 'aly',
19
+ 'anex' => 'anx',
20
+ 'annex' => 'anx',
21
+ 'annx' => 'anx',
22
+ 'arcade' => 'arc',
23
+ 'av' => 'ave',
24
+ 'aven' => 'ave',
25
+ 'avenu' => 'ave',
26
+ 'avenue' => 'ave',
27
+ 'avn' => 'ave',
28
+ 'avnue' => 'ave',
29
+ 'bayoo' => 'byu',
30
+ 'bayou' => 'byu',
31
+ 'beach' => 'bch',
32
+ 'bend' => 'bnd',
33
+ 'bluf' => 'blf',
34
+ 'bluff' => 'blf',
35
+ 'bluffs' => 'blfs',
36
+ 'bot' => 'btm',
37
+ 'bottm' => 'btm',
38
+ 'bottom' => 'btm',
39
+ 'boul' => 'blvd',
40
+ 'boulevard' => 'blvd',
41
+ 'boulv' => 'blvd',
42
+ 'branch' => 'br',
43
+ 'brdge' => 'brg',
44
+ 'bridge' => 'brg',
45
+ 'brnch' => 'br',
46
+ 'brook' => 'brk',
47
+ 'brooks' => 'brks',
48
+ 'burg' => 'bg',
49
+ 'burgs' => 'bgs',
50
+ 'bypa' => 'byp',
51
+ 'bypas' => 'byp',
52
+ 'bypass' => 'byp',
53
+ 'byps' => 'byp',
54
+ 'camp' => 'cp',
55
+ 'canyn' => 'cyn',
56
+ 'canyon' => 'cyn',
57
+ 'cape' => 'cpe',
58
+ 'causeway' => 'cswy',
59
+ 'causway' => 'cswy',
60
+ 'cen' => 'ctr',
61
+ 'cent' => 'ctr',
62
+ 'center' => 'ctr',
63
+ 'centers' => 'ctrs',
64
+ 'centr' => 'ctr',
65
+ 'centre' => 'ctr',
66
+ 'circ' => 'cir',
67
+ 'circl' => 'cir',
68
+ 'circle' => 'cir',
69
+ 'circles' => 'cirs',
70
+ 'ck' => 'crk',
71
+ 'cliff' => 'clf',
72
+ 'cliffs' => 'clfs',
73
+ 'club' => 'clb',
74
+ 'cmp' => 'cp',
75
+ 'cnter' => 'ctr',
76
+ 'cntr' => 'ctr',
77
+ 'cnyn' => 'cyn',
78
+ 'common' => 'cmn',
79
+ 'corner' => 'cor',
80
+ 'corners' => 'cors',
81
+ 'course' => 'crse',
82
+ 'court' => 'ct',
83
+ 'courts' => 'cts',
84
+ 'cove' => 'cv',
85
+ 'coves' => 'cvs',
86
+ 'cr' => 'crk',
87
+ 'crcl' => 'cir',
88
+ 'crcle' => 'cir',
89
+ 'crecent' => 'cres',
90
+ 'creek' => 'crk',
91
+ 'crescent' => 'cres',
92
+ 'cresent' => 'cres',
93
+ 'crest' => 'crst',
94
+ 'crossing' => 'xing',
95
+ 'crossroad' => 'xrd',
96
+ 'crscnt' => 'cres',
97
+ 'crsent' => 'cres',
98
+ 'crsnt' => 'cres',
99
+ 'crssing' => 'xing',
100
+ 'crssng' => 'xing',
101
+ 'crt' => 'ct',
102
+ 'curve' => 'curv',
103
+ 'dale' => 'dl',
104
+ 'dam' => 'dm',
105
+ 'div' => 'dv',
106
+ 'divide' => 'dv',
107
+ 'driv' => 'dr',
108
+ 'drive' => 'dr',
109
+ 'drives' => 'drs',
110
+ 'drv' => 'dr',
111
+ 'dvd' => 'dv',
112
+ 'estate' => 'est',
113
+ 'estates' => 'ests',
114
+ 'exp' => 'expy',
115
+ 'expr' => 'expy',
116
+ 'express' => 'expy',
117
+ 'expressway' => 'expy',
118
+ 'expw' => 'expy',
119
+ 'extension' => 'ext',
120
+ 'extensions' => 'exts',
121
+ 'extn' => 'ext',
122
+ 'extnsn' => 'ext',
123
+ 'falls' => 'fls',
124
+ 'ferry' => 'fry',
125
+ 'field' => 'fld',
126
+ 'fields' => 'flds',
127
+ 'flat' => 'flt',
128
+ 'flats' => 'flts',
129
+ 'ford' => 'frd',
130
+ 'fords' => 'frds',
131
+ 'forest' => 'frst',
132
+ 'forests' => 'frst',
133
+ 'forg' => 'frg',
134
+ 'forge' => 'frg',
135
+ 'forges' => 'frgs',
136
+ 'fork' => 'frk',
137
+ 'forks' => 'frks',
138
+ 'fort' => 'ft',
139
+ 'freeway' => 'fwy',
140
+ 'freewy' => 'fwy',
141
+ 'frry' => 'fry',
142
+ 'frt' => 'ft',
143
+ 'frway' => 'fwy',
144
+ 'frwy' => 'fwy',
145
+ 'garden' => 'gdn',
146
+ 'gardens' => 'gdns',
147
+ 'gardn' => 'gdn',
148
+ 'gateway' => 'gtwy',
149
+ 'gatewy' => 'gtwy',
150
+ 'gatway' => 'gtwy',
151
+ 'glen' => 'gln',
152
+ 'glens' => 'glns',
153
+ 'grden' => 'gdn',
154
+ 'grdn' => 'gdn',
155
+ 'grdns' => 'gdns',
156
+ 'green' => 'grn',
157
+ 'greens' => 'grns',
158
+ 'grov' => 'grv',
159
+ 'grove' => 'grv',
160
+ 'groves' => 'grvs',
161
+ 'gtway' => 'gtwy',
162
+ 'harb' => 'hbr',
163
+ 'harbor' => 'hbr',
164
+ 'harbors' => 'hbrs',
165
+ 'harbr' => 'hbr',
166
+ 'haven' => 'hvn',
167
+ 'havn' => 'hvn',
168
+ 'height' => 'hts',
169
+ 'heights' => 'hts',
170
+ 'hgts' => 'hts',
171
+ 'highway' => 'hwy',
172
+ 'highwy' => 'hwy',
173
+ 'hill' => 'hl',
174
+ 'hills' => 'hls',
175
+ 'hiway' => 'hwy',
176
+ 'hiwy' => 'hwy',
177
+ 'hllw' => 'holw',
178
+ 'hollow' => 'holw',
179
+ 'hollows' => 'holw',
180
+ 'holws' => 'holw',
181
+ 'hrbor' => 'hbr',
182
+ 'ht' => 'hts',
183
+ 'hway' => 'hwy',
184
+ 'inlet' => 'inlt',
185
+ 'island' => 'is',
186
+ 'islands' => 'iss',
187
+ 'isles' => 'isle',
188
+ 'islnd' => 'is',
189
+ 'islnds' => 'iss',
190
+ 'jction' => 'jct',
191
+ 'jctn' => 'jct',
192
+ 'jctns' => 'jcts',
193
+ 'junction' => 'jct',
194
+ 'junctions' => 'jcts',
195
+ 'junctn' => 'jct',
196
+ 'juncton' => 'jct',
197
+ 'key' => 'ky',
198
+ 'keys' => 'kys',
199
+ 'knol' => 'knl',
200
+ 'knoll' => 'knl',
201
+ 'knolls' => 'knls',
202
+ 'la' => 'ln',
203
+ 'lake' => 'lk',
204
+ 'lakes' => 'lks',
205
+ 'landing' => 'lndg',
206
+ 'lane' => 'ln',
207
+ 'lanes' => 'ln',
208
+ 'ldge' => 'ldg',
209
+ 'light' => 'lgt',
210
+ 'lights' => 'lgts',
211
+ 'lndng' => 'lndg',
212
+ 'loaf' => 'lf',
213
+ 'lock' => 'lck',
214
+ 'locks' => 'lcks',
215
+ 'lodg' => 'ldg',
216
+ 'lodge' => 'ldg',
217
+ 'loops' => 'loop',
218
+ 'manor' => 'mnr',
219
+ 'manors' => 'mnrs',
220
+ 'meadow' => 'mdw',
221
+ 'meadows' => 'mdws',
222
+ 'medows' => 'mdws',
223
+ 'mill' => 'ml',
224
+ 'mills' => 'mls',
225
+ 'mission' => 'msn',
226
+ 'missn' => 'msn',
227
+ 'mnt' => 'mt',
228
+ 'mntain' => 'mtn',
229
+ 'mntn' => 'mtn',
230
+ 'mntns' => 'mtns',
231
+ 'motorway' => 'mtwy',
232
+ 'mount' => 'mt',
233
+ 'mountain' => 'mtn',
234
+ 'mountains' => 'mtns',
235
+ 'mountin' => 'mtn',
236
+ 'mssn' => 'msn',
237
+ 'mtin' => 'mtn',
238
+ 'neck' => 'nck',
239
+ 'orchard' => 'orch',
240
+ 'orchrd' => 'orch',
241
+ 'overpass' => 'opas',
242
+ 'ovl' => 'oval',
243
+ 'parks' => 'park',
244
+ 'parkway' => 'pkwy',
245
+ 'parkways' => 'pkwy',
246
+ 'parkwy' => 'pkwy',
247
+ 'passage' => 'psge',
248
+ 'paths' => 'path',
249
+ 'pikes' => 'pike',
250
+ 'pine' => 'pne',
251
+ 'pines' => 'pnes',
252
+ 'pk' => 'park',
253
+ 'pkway' => 'pkwy',
254
+ 'pkwys' => 'pkwy',
255
+ 'pky' => 'pkwy',
256
+ 'place' => 'pl',
257
+ 'plain' => 'pln',
258
+ 'plaines' => 'plns',
259
+ 'plains' => 'plns',
260
+ 'plaza' => 'plz',
261
+ 'plza' => 'plz',
262
+ 'point' => 'pt',
263
+ 'points' => 'pts',
264
+ 'port' => 'prt',
265
+ 'ports' => 'prts',
266
+ 'prairie' => 'pr',
267
+ 'prarie' => 'pr',
268
+ 'prk' => 'park',
269
+ 'prr' => 'pr',
270
+ 'rad' => 'radl',
271
+ 'radial' => 'radl',
272
+ 'radiel' => 'radl',
273
+ 'ranch' => 'rnch',
274
+ 'ranches' => 'rnch',
275
+ 'rapid' => 'rpd',
276
+ 'rapids' => 'rpds',
277
+ 'rdge' => 'rdg',
278
+ 'rest' => 'rst',
279
+ 'ridge' => 'rdg',
280
+ 'ridges' => 'rdgs',
281
+ 'river' => 'riv',
282
+ 'rivr' => 'riv',
283
+ 'rnchs' => 'rnch',
284
+ 'road' => 'rd',
285
+ 'roads' => 'rds',
286
+ 'route' => 'rte',
287
+ 'run' => 'run',
288
+ 'rvr' => 'riv',
289
+ 'shoal' => 'shl',
290
+ 'shoals' => 'shls',
291
+ 'shoar' => 'shr',
292
+ 'shoars' => 'shrs',
293
+ 'shore' => 'shr',
294
+ 'shores' => 'shrs',
295
+ 'skyway' => 'skwy',
296
+ 'spng' => 'spg',
297
+ 'spngs' => 'spgs',
298
+ 'spring' => 'spg',
299
+ 'springs' => 'spgs',
300
+ 'sprng' => 'spg',
301
+ 'sprngs' => 'spgs',
302
+ 'spurs' => 'spur',
303
+ 'sqr' => 'sq',
304
+ 'sqre' => 'sq',
305
+ 'sqrs' => 'sqs',
306
+ 'squ' => 'sq',
307
+ 'square' => 'sq',
308
+ 'squares' => 'sqs',
309
+ 'station' => 'sta',
310
+ 'statn' => 'sta',
311
+ 'stn' => 'sta',
312
+ 'str' => 'st',
313
+ 'strav' => 'stra',
314
+ 'strave' => 'stra',
315
+ 'straven' => 'stra',
316
+ 'stravenue' => 'stra',
317
+ 'stravn' => 'stra',
318
+ 'stream' => 'strm',
319
+ 'street' => 'st',
320
+ 'streets' => 'sts',
321
+ 'streme' => 'strm',
322
+ 'strt' => 'st',
323
+ 'strvn' => 'stra',
324
+ 'strvnue' => 'stra',
325
+ 'sumit' => 'smt',
326
+ 'sumitt' => 'smt',
327
+ 'summit' => 'smt',
328
+ 'terr' => 'ter',
329
+ 'terrace' => 'ter',
330
+ 'throughway' => 'trwy',
331
+ 'tpk' => 'tpke',
332
+ 'tr' => 'trl',
333
+ 'trace' => 'trce',
334
+ 'traces' => 'trce',
335
+ 'track' => 'trak',
336
+ 'tracks' => 'trak',
337
+ 'trafficway' => 'trfy',
338
+ 'trail' => 'trl',
339
+ 'trails' => 'trl',
340
+ 'trk' => 'trak',
341
+ 'trks' => 'trak',
342
+ 'trls' => 'trl',
343
+ 'trnpk' => 'tpke',
344
+ 'trpk' => 'tpke',
345
+ 'tunel' => 'tunl',
346
+ 'tunls' => 'tunl',
347
+ 'tunnel' => 'tunl',
348
+ 'tunnels' => 'tunl',
349
+ 'tunnl' => 'tunl',
350
+ 'turnpike' => 'tpke',
351
+ 'turnpk' => 'tpke',
352
+ 'underpass' => 'upas',
353
+ 'union' => 'un',
354
+ 'unions' => 'uns',
355
+ 'valley' => 'vly',
356
+ 'valleys' => 'vlys',
357
+ 'vally' => 'vly',
358
+ 'vdct' => 'via',
359
+ 'viadct' => 'via',
360
+ 'viaduct' => 'via',
361
+ 'view' => 'vw',
362
+ 'views' => 'vws',
363
+ 'vill' => 'vlg',
364
+ 'villag' => 'vlg',
365
+ 'village' => 'vlg',
366
+ 'villages' => 'vlgs',
367
+ 'ville' => 'vl',
368
+ 'villg' => 'vlg',
369
+ 'villiage' => 'vlg',
370
+ 'vist' => 'vis',
371
+ 'vista' => 'vis',
372
+ 'vlly' => 'vly',
373
+ 'vst' => 'vis',
374
+ 'vsta' => 'vis',
375
+ 'walks' => 'walk',
376
+ 'well' => 'wl',
377
+ 'wells' => 'wls',
378
+ 'wy' => 'way'
379
+ }.freeze
380
+
381
+ STREET_TYPES_LIST = {}
382
+ STREET_TYPES.to_a.each do |item|
383
+ STREET_TYPES_LIST[item[0]] = true
384
+ STREET_TYPES_LIST[item[1]] = true
385
+ end
386
+
387
+ NUMERIC_STREET_NAMES = {
388
+ 'first' => '1st',
389
+ 'second' => '2nd',
390
+ 'third' => '3rd',
391
+ 'fourth' => '4th',
392
+ 'fifth' => '5th',
393
+ 'sixth' => '6th',
394
+ 'seventh' => '7th',
395
+ 'eighth' => '8th',
396
+ 'ninth' => '9th'
397
+ }.freeze
398
+
399
+ UNIT_ABBREVIATIONS_NUMBERED = {
400
+ /(?:ap|dep)(?:ar)?t(?:me?nt)?/i => 'Apt',
401
+ /box/i => 'Box',
402
+ /bu?i?ldi?n?g/i => 'Bldg',
403
+ /dep(artmen)?t/i => 'Dept',
404
+ /flo*r?/i => 'Fl',
405
+ /ha?nga?r/i => 'Hngr',
406
+ /lo?t/i => 'Lot',
407
+ /ro*m/i => 'Rm',
408
+ /pier/i => 'Pier',
409
+ /p\W*[om]\W*b(?:ox)?/i => 'PO Box',
410
+ /slip/i => 'Slip',
411
+ /spa?ce?/i => 'Spc',
412
+ /stop/i => 'Stop',
413
+ /su?i?te/i => 'Ste',
414
+ /tra?i?le?r/i => 'Trlr',
415
+ /uni?t/i => 'Unit'
416
+ }.freeze
417
+
418
+ UNIT_ABBREVIATIONS_UNNUMBERED = {
419
+ /ba?se?me?n?t/i => 'Bsmt',
420
+ /fro?nt/i => 'Frnt',
421
+ /lo?bby/i => 'Lbby',
422
+ /lowe?r/i => 'Lowr',
423
+ /off?i?ce?/i => 'Ofc',
424
+ /pe?n?t?ho?u?s?e?/i => 'PH',
425
+ /rear/i => 'Rear',
426
+ /side/i => 'Side',
427
+ /uppe?r/i => 'Uppr'
428
+ }.freeze
429
+
430
+ UNIT_ABBREVIATIONS = UNIT_ABBREVIATIONS_NUMBERED.merge(UNIT_ABBREVIATIONS_UNNUMBERED)
431
+
432
+ STATE_CODES = {
433
+ 'alabama' => 'AL',
434
+ 'alaska' => 'AK',
435
+ 'american samoa' => 'AS',
436
+ 'arizona' => 'AZ',
437
+ 'arkansas' => 'AR',
438
+ 'california' => 'CA',
439
+ 'colorado' => 'CO',
440
+ 'connecticut' => 'CT',
441
+ 'delaware' => 'DE',
442
+ 'district of columbia' => 'DC',
443
+ 'federated states of micronesia' => 'FM',
444
+ 'florida' => 'FL',
445
+ 'georgia' => 'GA',
446
+ 'guam' => 'GU',
447
+ 'hawaii' => 'HI',
448
+ 'idaho' => 'ID',
449
+ 'illinois' => 'IL',
450
+ 'indiana' => 'IN',
451
+ 'iowa' => 'IA',
452
+ 'kansas' => 'KS',
453
+ 'kentucky' => 'KY',
454
+ 'louisiana' => 'LA',
455
+ 'maine' => 'ME',
456
+ 'marshall islands' => 'MH',
457
+ 'maryland' => 'MD',
458
+ 'massachusetts' => 'MA',
459
+ 'michigan' => 'MI',
460
+ 'minnesota' => 'MN',
461
+ 'mississippi' => 'MS',
462
+ 'missouri' => 'MO',
463
+ 'montana' => 'MT',
464
+ 'nebraska' => 'NE',
465
+ 'nevada' => 'NV',
466
+ 'new hampshire' => 'NH',
467
+ 'new jersey' => 'NJ',
468
+ 'new mexico' => 'NM',
469
+ 'new york' => 'NY',
470
+ 'north carolina' => 'NC',
471
+ 'north dakota' => 'ND',
472
+ 'northern mariana islands' => 'MP',
473
+ 'ohio' => 'OH',
474
+ 'oklahoma' => 'OK',
475
+ 'oregon' => 'OR',
476
+ 'palau' => 'PW',
477
+ 'pennsylvania' => 'PA',
478
+ 'puerto rico' => 'PR',
479
+ 'rhode island' => 'RI',
480
+ 'south carolina' => 'SC',
481
+ 'south dakota' => 'SD',
482
+ 'tennessee' => 'TN',
483
+ 'texas' => 'TX',
484
+ 'utah' => 'UT',
485
+ 'vermont' => 'VT',
486
+ 'virgin islands' => 'VI',
487
+ 'virginia' => 'VA',
488
+ 'washington' => 'WA',
489
+ 'west virginia' => 'WV',
490
+ 'wisconsin' => 'WI',
491
+ 'wyoming' => 'WY'
492
+ }.freeze
493
+
494
+ STATE_NAMES = STATE_CODES.invert
495
+
496
+ STATE_FIPS = {
497
+ '01' => 'AL',
498
+ '02' => 'AK',
499
+ '04' => 'AZ',
500
+ '05' => 'AR',
501
+ '06' => 'CA',
502
+ '08' => 'CO',
503
+ '09' => 'CT',
504
+ '10' => 'DE',
505
+ '11' => 'DC',
506
+ '12' => 'FL',
507
+ '13' => 'GA',
508
+ '15' => 'HI',
509
+ '16' => 'ID',
510
+ '17' => 'IL',
511
+ '18' => 'IN',
512
+ '19' => 'IA',
513
+ '20' => 'KS',
514
+ '21' => 'KY',
515
+ '22' => 'LA',
516
+ '23' => 'ME',
517
+ '24' => 'MD',
518
+ '25' => 'MA',
519
+ '26' => 'MI',
520
+ '27' => 'MN',
521
+ '28' => 'MS',
522
+ '29' => 'MO',
523
+ '30' => 'MT',
524
+ '31' => 'NE',
525
+ '32' => 'NV',
526
+ '33' => 'NH',
527
+ '34' => 'NJ',
528
+ '35' => 'NM',
529
+ '36' => 'NY',
530
+ '37' => 'NC',
531
+ '38' => 'ND',
532
+ '39' => 'OH',
533
+ '40' => 'OK',
534
+ '41' => 'OR',
535
+ '42' => 'PA',
536
+ '44' => 'RI',
537
+ '45' => 'SC',
538
+ '46' => 'SD',
539
+ '47' => 'TN',
540
+ '48' => 'TX',
541
+ '49' => 'UT',
542
+ '50' => 'VT',
543
+ '51' => 'VA',
544
+ '53' => 'WA',
545
+ '54' => 'WV',
546
+ '55' => 'WI',
547
+ '56' => 'WY',
548
+ '72' => 'PR',
549
+ '78' => 'VI'
550
+ }.freeze
551
+
552
+ FIPS_STATES = STATE_FIPS.invert
553
+
554
+ NORMALIZE_MAP = {
555
+ 'prefix' => DIRECTIONAL,
556
+ 'prefix1' => DIRECTIONAL,
557
+ 'prefix2' => DIRECTIONAL,
558
+ 'suffix' => DIRECTIONAL,
559
+ 'suffix1' => DIRECTIONAL,
560
+ 'suffix2' => DIRECTIONAL,
561
+ 'street_type' => STREET_TYPES,
562
+ 'street_type1' => STREET_TYPES,
563
+ 'street_type2' => STREET_TYPES,
564
+ 'street' => NUMERIC_STREET_NAMES,
565
+ 'state' => STATE_CODES
566
+ }.freeze
567
+ end
568
+ end