street_sweeper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2
4
+ - jruby
5
+ - rbx-3
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENCE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Derrek Long
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,70 @@
1
+ [![Build Status](https://travis-ci.org/williamatodd/street_sweeper.svg?branch=master)](https://travis-ci.org/williamatodd/street_sweeper)
2
+
3
+ # DESCRIPTION
4
+
5
+ Parses a string returning a normalized Address object. When the string is not an US address it returns nil.
6
+
7
+ This is a resurrected fork of [StreetSweeper::US](https://github.com/street-address-rb/street-address) which was itself a port of the perl module [Geo::StreetSweeper](https://github.com/timbunce/Geo-StreetAddress-US) originally written by Schuyler D. Erle.
8
+
9
+ ## Installation
10
+
11
+ ```shell
12
+ gem install StreetSweeper
13
+ ```
14
+
15
+ then in your code
16
+
17
+ ```ruby
18
+ require 'street_sweeper'
19
+ ```
20
+
21
+ or from Gemfile
22
+
23
+ ```ruby
24
+ gem 'StreetSweeper', require: "street_sweeper"
25
+ ```
26
+
27
+ ## Basic Usage
28
+
29
+ ```ruby
30
+ require 'street_sweeper'
31
+
32
+ address = StreetSweeper.parse("1600 Pennsylvania Ave, Washington, DC, 20500")
33
+ address.street # Pennsylvania
34
+ address.number # 1600
35
+ address.postal_code # 20500
36
+ address.city # Washington
37
+ address.state # DC
38
+ address.state_name # District of columbia
39
+ address.street_type # Ave
40
+ address.intersection? # false
41
+ address.full_street_address # 1600 Pennsylvania Ave, Washington, DC 20500
42
+
43
+ address = StreetSweeper.parse("1600 Pennsylvania Ave")
44
+ address.street # Pennsylvania
45
+ address.number # 1600
46
+ address.state # nil
47
+
48
+ address = StreetSweeper.parse("5904 Richmond Hwy Ste 340 Alexandria VA 22303-1864")
49
+ address.street_address_1 # 5904 Richmond Hwy
50
+ address.street_address_2 # Ste 340
51
+ address.full_postal_code # 22303-1864
52
+ address.postal_code_ext # 1846
53
+ address.state_name # Virginia
54
+ address.state_fips # 06
55
+
56
+ ```
57
+ ## Stricter Parsing
58
+
59
+ ```ruby
60
+ address = StreetSweeper.parse_address("1600 Pennsylvania Avenue")
61
+ # nil - not enough information to be a full address
62
+
63
+ address = StreetSweeper.parse_address("1600 Pennsylvania Ave, Washington, DC, 20500")
64
+ # same results as above
65
+ ```
66
+
67
+ ## License
68
+ The [MIT License](http://opensource.org/licenses/MIT)
69
+
70
+ Copyright (c) 2007-2018 Contributors
@@ -0,0 +1,6 @@
1
+ require 'rspec/core/rake_task'
2
+
3
+ RSpec::Core::RakeTask.new(:spec)
4
+
5
+ # If you want to make this the default task
6
+ task default: :spec
@@ -0,0 +1,4 @@
1
+ require 'street_sweeper/constants'
2
+ require 'street_sweeper/matchers'
3
+ require 'street_sweeper/address'
4
+ require 'street_sweeper/base'
@@ -0,0 +1,103 @@
1
+ module StreetSweeper
2
+ class Address
3
+ attr_accessor(
4
+ :number,
5
+ :street,
6
+ :street_type,
7
+ :unit,
8
+ :unit_prefix,
9
+ :suffix,
10
+ :prefix,
11
+ :city,
12
+ :state,
13
+ :postal_code,
14
+ :postal_code_ext,
15
+ :street2,
16
+ :street_type2,
17
+ :suffix2,
18
+ :prefix2,
19
+ :redundant_street_type
20
+ )
21
+
22
+ def initialize(args)
23
+ args.each do |attr, val|
24
+ public_send("#{attr}=", val)
25
+ end
26
+ end
27
+
28
+ def full_postal_code
29
+ return nil unless postal_code
30
+ postal_code_ext ? "#{postal_code}-#{postal_code_ext}" : postal_code
31
+ end
32
+
33
+ def state_fips
34
+ Constants::FIPS_STATES[state]
35
+ end
36
+
37
+ def state_name
38
+ (name = Constants::STATE_NAMES[state]) && name.capitalize
39
+ end
40
+
41
+ def intersection?
42
+ !street2.nil?
43
+ end
44
+
45
+ def line1
46
+ parts = []
47
+ if intersection?
48
+ parts << prefix if prefix
49
+ parts << street
50
+ parts << street_type if street_type
51
+ parts << suffix if suffix
52
+ parts << 'and'
53
+ parts << prefix2 if prefix2
54
+ parts << street2
55
+ parts << street_type2 if street_type2
56
+ parts << suffix2 if suffix2
57
+ else
58
+ parts << street_address_1
59
+ parts << street_address_2
60
+ end
61
+ parts.join(' ').strip
62
+ end
63
+
64
+ def line2
65
+ parts = []
66
+ parts << city if city
67
+ parts << state if state
68
+ s = parts.join(', ')
69
+ s += " #{full_postal_code}" if full_postal_code
70
+ s.strip
71
+ end
72
+
73
+ def street_address_1
74
+ return line1 if intersection?
75
+ parts = []
76
+ parts << number
77
+ parts << prefix if prefix
78
+ parts << street if street
79
+ parts << street_type if street_type && !redundant_street_type
80
+ parts << suffix if suffix
81
+ parts.join(' ').strip
82
+ end
83
+
84
+ def street_address_2
85
+ parts = []
86
+ parts << unit_prefix if unit_prefix
87
+ parts << (unit_prefix ? unit : "\# #{unit}") if unit
88
+ parts.join(' ').strip
89
+ end
90
+
91
+ def full_street_address
92
+ [line1, line2].reject(&:empty?).join(', ')
93
+ end
94
+
95
+ def to_h
96
+ instance_variables.each_with_object({}) do |var_name, hash|
97
+ var_value = instance_variable_get(var_name)
98
+ hash_name = var_name[1..-1].to_sym
99
+ hash[hash_name] = var_value
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,121 @@
1
+ module StreetSweeper
2
+ class << self
3
+ def parse(location, args = {})
4
+ if Matchers.corner_regexp.match(location)
5
+ parse_intersection(location, args)
6
+ else
7
+ parse_po_address(location, args) || parse_address(location, args) || parse_informal_address(location, args)
8
+ end
9
+ end
10
+
11
+ def parse_address(address, args = {})
12
+ matched = Matchers.address_regexp.match(address)
13
+ return unless matched
14
+ to_address(match_to_hash(matched), args)
15
+ end
16
+
17
+ def parse_po_address(address, args = {})
18
+ matched = Matchers.po_address_regexp.match(address)
19
+ return unless matched
20
+ to_address(match_to_hash(matched), args)
21
+ end
22
+
23
+ def parse_informal_address(address, args = {})
24
+ matched = Matchers.informal_address_regexp.match(address)
25
+ return unless matched
26
+ to_address(match_to_hash(matched), args)
27
+ end
28
+
29
+ def parse_intersection(intersection, args)
30
+ matched = Matchers.intersection_regexp.match(intersection)
31
+ return unless matched
32
+ hash = match_to_hash(matched)
33
+
34
+ streets = Matchers.intersection_regexp.named_captures['street'].map do |pos|
35
+ matched[pos.to_i]
36
+ end.select { |v| v }
37
+
38
+ hash['street'] = streets[0] if streets[0]
39
+ hash['street2'] = streets[1] if streets[1]
40
+
41
+ street_types = Matchers.intersection_regexp.named_captures['street_type'].map do |pos|
42
+ matched[pos.to_i]
43
+ end.select { |v| v }
44
+
45
+ hash['street_type'] = street_types[0] if street_types[0]
46
+ hash['street_type2'] = street_types[1] if street_types[1]
47
+
48
+ if hash['street_type'] && (!hash['street_type2'] || (hash['street_type'] == hash['street_type2']))
49
+ type = hash['street_type'].clone
50
+ hash['street_type'] = hash['street_type2'] = type if type.gsub!(/s\W*$/i, '') && /\A#{Matchers.street_type_regexp}\z/i =~ type
51
+ end
52
+
53
+ to_address(hash, args)
54
+ end
55
+
56
+ private
57
+
58
+ def match_to_hash(matched)
59
+ hash = {}
60
+ matched.names.each { |name| hash[name] = matched[name] if matched[name] && !matched[name].strip.empty? }
61
+ hash
62
+ end
63
+
64
+ def to_address(input, args)
65
+ # strip off some punctuation and whitespace
66
+ input.each_value do |string|
67
+ string.strip!
68
+ string.gsub!(/[^\w\s\-\#\&]/, '')
69
+ end
70
+
71
+ input['redundant_street_type'] = false
72
+ if input['street'] && !input['street_type']
73
+ matched = Matchers.street_regexp.match(input['street'])
74
+ input['street_type'] = matched['street_type']
75
+ input['redundant_street_type'] = true
76
+ end
77
+
78
+ ## abbreviate unit prefixes
79
+ if input['unit_prefix']
80
+ Constants::UNIT_ABBREVIATIONS.each_pair do |regex, abbr|
81
+ regex.match(input['unit_prefix']) { |_m| input['unit_prefix'] = abbr }
82
+ end
83
+ end
84
+
85
+ Constants::NORMALIZE_MAP.each_pair do |key, map|
86
+ next unless input[key]
87
+ mapping = map[input[key].downcase]
88
+ input[key] = mapping if mapping
89
+ end
90
+
91
+ if args[:avoid_redundant_street_type]
92
+ ['', '1', '2'].each do |suffix|
93
+ street = input['street' + suffix]
94
+ type = input['street_type' + suffix]
95
+ next if !street || !type
96
+
97
+ type_regexp = Matchers.street_type_matches[type.downcase]
98
+ input.delete('street_type' + suffix) if type_regexp.match(street)
99
+ end
100
+ end
101
+
102
+ # attempt to expand directional prefixes on place names
103
+ if input['city']
104
+ input['city'].gsub!(/^(#{Matchers.dircode_regexp})\s+(?=\S)/) do |match|
105
+ Constants::DIRECTION_CODES[match[0].upcase] + ' '
106
+ end
107
+ end
108
+
109
+ %w[street street_type street2 street_type2 city unit_prefix].each do |k|
110
+ input[k] = input[k].split.map { |elem| upcase_or_capitalize(elem) }.join(' ') if input[k]
111
+ end
112
+
113
+ StreetSweeper::Address.new(input)
114
+ end
115
+
116
+ def upcase_or_capitalize(elem)
117
+ return elem.upcase if elem.downcase =~ /^(po|ne|nw|sw|se)$/
118
+ elem.capitalize
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,568 @@
1
+ module StreetSweeper
2
+ class Constants
3
+ DIRECTIONAL = {
4
+ 'north' => 'N',
5
+ 'northeast' => 'NE',
6
+ 'east' => 'E',
7
+ 'southeast' => 'SE',
8
+ 'south' => 'S',
9
+ 'southwest' => 'SW',
10
+ 'west' => 'W',
11
+ 'northwest' => 'NW'
12
+ }.freeze
13
+ DIRECTION_CODES = DIRECTIONAL.invert
14
+
15
+ STREET_TYPES = {
16
+ 'allee' => 'aly',
17
+ 'alley' => 'aly',
18
+ 'ally' => 'aly',
19
+ 'anex' => 'anx',
20
+ 'annex' => 'anx',
21
+ 'annx' => 'anx',
22
+ 'arcade' => 'arc',
23
+ 'av' => 'ave',
24
+ 'aven' => 'ave',
25
+ 'avenu' => 'ave',
26
+ 'avenue' => 'ave',
27
+ 'avn' => 'ave',
28
+ 'avnue' => 'ave',
29
+ 'bayoo' => 'byu',
30
+ 'bayou' => 'byu',
31
+ 'beach' => 'bch',
32
+ 'bend' => 'bnd',
33
+ 'bluf' => 'blf',
34
+ 'bluff' => 'blf',
35
+ 'bluffs' => 'blfs',
36
+ 'bot' => 'btm',
37
+ 'bottm' => 'btm',
38
+ 'bottom' => 'btm',
39
+ 'boul' => 'blvd',
40
+ 'boulevard' => 'blvd',
41
+ 'boulv' => 'blvd',
42
+ 'branch' => 'br',
43
+ 'brdge' => 'brg',
44
+ 'bridge' => 'brg',
45
+ 'brnch' => 'br',
46
+ 'brook' => 'brk',
47
+ 'brooks' => 'brks',
48
+ 'burg' => 'bg',
49
+ 'burgs' => 'bgs',
50
+ 'bypa' => 'byp',
51
+ 'bypas' => 'byp',
52
+ 'bypass' => 'byp',
53
+ 'byps' => 'byp',
54
+ 'camp' => 'cp',
55
+ 'canyn' => 'cyn',
56
+ 'canyon' => 'cyn',
57
+ 'cape' => 'cpe',
58
+ 'causeway' => 'cswy',
59
+ 'causway' => 'cswy',
60
+ 'cen' => 'ctr',
61
+ 'cent' => 'ctr',
62
+ 'center' => 'ctr',
63
+ 'centers' => 'ctrs',
64
+ 'centr' => 'ctr',
65
+ 'centre' => 'ctr',
66
+ 'circ' => 'cir',
67
+ 'circl' => 'cir',
68
+ 'circle' => 'cir',
69
+ 'circles' => 'cirs',
70
+ 'ck' => 'crk',
71
+ 'cliff' => 'clf',
72
+ 'cliffs' => 'clfs',
73
+ 'club' => 'clb',
74
+ 'cmp' => 'cp',
75
+ 'cnter' => 'ctr',
76
+ 'cntr' => 'ctr',
77
+ 'cnyn' => 'cyn',
78
+ 'common' => 'cmn',
79
+ 'corner' => 'cor',
80
+ 'corners' => 'cors',
81
+ 'course' => 'crse',
82
+ 'court' => 'ct',
83
+ 'courts' => 'cts',
84
+ 'cove' => 'cv',
85
+ 'coves' => 'cvs',
86
+ 'cr' => 'crk',
87
+ 'crcl' => 'cir',
88
+ 'crcle' => 'cir',
89
+ 'crecent' => 'cres',
90
+ 'creek' => 'crk',
91
+ 'crescent' => 'cres',
92
+ 'cresent' => 'cres',
93
+ 'crest' => 'crst',
94
+ 'crossing' => 'xing',
95
+ 'crossroad' => 'xrd',
96
+ 'crscnt' => 'cres',
97
+ 'crsent' => 'cres',
98
+ 'crsnt' => 'cres',
99
+ 'crssing' => 'xing',
100
+ 'crssng' => 'xing',
101
+ 'crt' => 'ct',
102
+ 'curve' => 'curv',
103
+ 'dale' => 'dl',
104
+ 'dam' => 'dm',
105
+ 'div' => 'dv',
106
+ 'divide' => 'dv',
107
+ 'driv' => 'dr',
108
+ 'drive' => 'dr',
109
+ 'drives' => 'drs',
110
+ 'drv' => 'dr',
111
+ 'dvd' => 'dv',
112
+ 'estate' => 'est',
113
+ 'estates' => 'ests',
114
+ 'exp' => 'expy',
115
+ 'expr' => 'expy',
116
+ 'express' => 'expy',
117
+ 'expressway' => 'expy',
118
+ 'expw' => 'expy',
119
+ 'extension' => 'ext',
120
+ 'extensions' => 'exts',
121
+ 'extn' => 'ext',
122
+ 'extnsn' => 'ext',
123
+ 'falls' => 'fls',
124
+ 'ferry' => 'fry',
125
+ 'field' => 'fld',
126
+ 'fields' => 'flds',
127
+ 'flat' => 'flt',
128
+ 'flats' => 'flts',
129
+ 'ford' => 'frd',
130
+ 'fords' => 'frds',
131
+ 'forest' => 'frst',
132
+ 'forests' => 'frst',
133
+ 'forg' => 'frg',
134
+ 'forge' => 'frg',
135
+ 'forges' => 'frgs',
136
+ 'fork' => 'frk',
137
+ 'forks' => 'frks',
138
+ 'fort' => 'ft',
139
+ 'freeway' => 'fwy',
140
+ 'freewy' => 'fwy',
141
+ 'frry' => 'fry',
142
+ 'frt' => 'ft',
143
+ 'frway' => 'fwy',
144
+ 'frwy' => 'fwy',
145
+ 'garden' => 'gdn',
146
+ 'gardens' => 'gdns',
147
+ 'gardn' => 'gdn',
148
+ 'gateway' => 'gtwy',
149
+ 'gatewy' => 'gtwy',
150
+ 'gatway' => 'gtwy',
151
+ 'glen' => 'gln',
152
+ 'glens' => 'glns',
153
+ 'grden' => 'gdn',
154
+ 'grdn' => 'gdn',
155
+ 'grdns' => 'gdns',
156
+ 'green' => 'grn',
157
+ 'greens' => 'grns',
158
+ 'grov' => 'grv',
159
+ 'grove' => 'grv',
160
+ 'groves' => 'grvs',
161
+ 'gtway' => 'gtwy',
162
+ 'harb' => 'hbr',
163
+ 'harbor' => 'hbr',
164
+ 'harbors' => 'hbrs',
165
+ 'harbr' => 'hbr',
166
+ 'haven' => 'hvn',
167
+ 'havn' => 'hvn',
168
+ 'height' => 'hts',
169
+ 'heights' => 'hts',
170
+ 'hgts' => 'hts',
171
+ 'highway' => 'hwy',
172
+ 'highwy' => 'hwy',
173
+ 'hill' => 'hl',
174
+ 'hills' => 'hls',
175
+ 'hiway' => 'hwy',
176
+ 'hiwy' => 'hwy',
177
+ 'hllw' => 'holw',
178
+ 'hollow' => 'holw',
179
+ 'hollows' => 'holw',
180
+ 'holws' => 'holw',
181
+ 'hrbor' => 'hbr',
182
+ 'ht' => 'hts',
183
+ 'hway' => 'hwy',
184
+ 'inlet' => 'inlt',
185
+ 'island' => 'is',
186
+ 'islands' => 'iss',
187
+ 'isles' => 'isle',
188
+ 'islnd' => 'is',
189
+ 'islnds' => 'iss',
190
+ 'jction' => 'jct',
191
+ 'jctn' => 'jct',
192
+ 'jctns' => 'jcts',
193
+ 'junction' => 'jct',
194
+ 'junctions' => 'jcts',
195
+ 'junctn' => 'jct',
196
+ 'juncton' => 'jct',
197
+ 'key' => 'ky',
198
+ 'keys' => 'kys',
199
+ 'knol' => 'knl',
200
+ 'knoll' => 'knl',
201
+ 'knolls' => 'knls',
202
+ 'la' => 'ln',
203
+ 'lake' => 'lk',
204
+ 'lakes' => 'lks',
205
+ 'landing' => 'lndg',
206
+ 'lane' => 'ln',
207
+ 'lanes' => 'ln',
208
+ 'ldge' => 'ldg',
209
+ 'light' => 'lgt',
210
+ 'lights' => 'lgts',
211
+ 'lndng' => 'lndg',
212
+ 'loaf' => 'lf',
213
+ 'lock' => 'lck',
214
+ 'locks' => 'lcks',
215
+ 'lodg' => 'ldg',
216
+ 'lodge' => 'ldg',
217
+ 'loops' => 'loop',
218
+ 'manor' => 'mnr',
219
+ 'manors' => 'mnrs',
220
+ 'meadow' => 'mdw',
221
+ 'meadows' => 'mdws',
222
+ 'medows' => 'mdws',
223
+ 'mill' => 'ml',
224
+ 'mills' => 'mls',
225
+ 'mission' => 'msn',
226
+ 'missn' => 'msn',
227
+ 'mnt' => 'mt',
228
+ 'mntain' => 'mtn',
229
+ 'mntn' => 'mtn',
230
+ 'mntns' => 'mtns',
231
+ 'motorway' => 'mtwy',
232
+ 'mount' => 'mt',
233
+ 'mountain' => 'mtn',
234
+ 'mountains' => 'mtns',
235
+ 'mountin' => 'mtn',
236
+ 'mssn' => 'msn',
237
+ 'mtin' => 'mtn',
238
+ 'neck' => 'nck',
239
+ 'orchard' => 'orch',
240
+ 'orchrd' => 'orch',
241
+ 'overpass' => 'opas',
242
+ 'ovl' => 'oval',
243
+ 'parks' => 'park',
244
+ 'parkway' => 'pkwy',
245
+ 'parkways' => 'pkwy',
246
+ 'parkwy' => 'pkwy',
247
+ 'passage' => 'psge',
248
+ 'paths' => 'path',
249
+ 'pikes' => 'pike',
250
+ 'pine' => 'pne',
251
+ 'pines' => 'pnes',
252
+ 'pk' => 'park',
253
+ 'pkway' => 'pkwy',
254
+ 'pkwys' => 'pkwy',
255
+ 'pky' => 'pkwy',
256
+ 'place' => 'pl',
257
+ 'plain' => 'pln',
258
+ 'plaines' => 'plns',
259
+ 'plains' => 'plns',
260
+ 'plaza' => 'plz',
261
+ 'plza' => 'plz',
262
+ 'point' => 'pt',
263
+ 'points' => 'pts',
264
+ 'port' => 'prt',
265
+ 'ports' => 'prts',
266
+ 'prairie' => 'pr',
267
+ 'prarie' => 'pr',
268
+ 'prk' => 'park',
269
+ 'prr' => 'pr',
270
+ 'rad' => 'radl',
271
+ 'radial' => 'radl',
272
+ 'radiel' => 'radl',
273
+ 'ranch' => 'rnch',
274
+ 'ranches' => 'rnch',
275
+ 'rapid' => 'rpd',
276
+ 'rapids' => 'rpds',
277
+ 'rdge' => 'rdg',
278
+ 'rest' => 'rst',
279
+ 'ridge' => 'rdg',
280
+ 'ridges' => 'rdgs',
281
+ 'river' => 'riv',
282
+ 'rivr' => 'riv',
283
+ 'rnchs' => 'rnch',
284
+ 'road' => 'rd',
285
+ 'roads' => 'rds',
286
+ 'route' => 'rte',
287
+ 'run' => 'run',
288
+ 'rvr' => 'riv',
289
+ 'shoal' => 'shl',
290
+ 'shoals' => 'shls',
291
+ 'shoar' => 'shr',
292
+ 'shoars' => 'shrs',
293
+ 'shore' => 'shr',
294
+ 'shores' => 'shrs',
295
+ 'skyway' => 'skwy',
296
+ 'spng' => 'spg',
297
+ 'spngs' => 'spgs',
298
+ 'spring' => 'spg',
299
+ 'springs' => 'spgs',
300
+ 'sprng' => 'spg',
301
+ 'sprngs' => 'spgs',
302
+ 'spurs' => 'spur',
303
+ 'sqr' => 'sq',
304
+ 'sqre' => 'sq',
305
+ 'sqrs' => 'sqs',
306
+ 'squ' => 'sq',
307
+ 'square' => 'sq',
308
+ 'squares' => 'sqs',
309
+ 'station' => 'sta',
310
+ 'statn' => 'sta',
311
+ 'stn' => 'sta',
312
+ 'str' => 'st',
313
+ 'strav' => 'stra',
314
+ 'strave' => 'stra',
315
+ 'straven' => 'stra',
316
+ 'stravenue' => 'stra',
317
+ 'stravn' => 'stra',
318
+ 'stream' => 'strm',
319
+ 'street' => 'st',
320
+ 'streets' => 'sts',
321
+ 'streme' => 'strm',
322
+ 'strt' => 'st',
323
+ 'strvn' => 'stra',
324
+ 'strvnue' => 'stra',
325
+ 'sumit' => 'smt',
326
+ 'sumitt' => 'smt',
327
+ 'summit' => 'smt',
328
+ 'terr' => 'ter',
329
+ 'terrace' => 'ter',
330
+ 'throughway' => 'trwy',
331
+ 'tpk' => 'tpke',
332
+ 'tr' => 'trl',
333
+ 'trace' => 'trce',
334
+ 'traces' => 'trce',
335
+ 'track' => 'trak',
336
+ 'tracks' => 'trak',
337
+ 'trafficway' => 'trfy',
338
+ 'trail' => 'trl',
339
+ 'trails' => 'trl',
340
+ 'trk' => 'trak',
341
+ 'trks' => 'trak',
342
+ 'trls' => 'trl',
343
+ 'trnpk' => 'tpke',
344
+ 'trpk' => 'tpke',
345
+ 'tunel' => 'tunl',
346
+ 'tunls' => 'tunl',
347
+ 'tunnel' => 'tunl',
348
+ 'tunnels' => 'tunl',
349
+ 'tunnl' => 'tunl',
350
+ 'turnpike' => 'tpke',
351
+ 'turnpk' => 'tpke',
352
+ 'underpass' => 'upas',
353
+ 'union' => 'un',
354
+ 'unions' => 'uns',
355
+ 'valley' => 'vly',
356
+ 'valleys' => 'vlys',
357
+ 'vally' => 'vly',
358
+ 'vdct' => 'via',
359
+ 'viadct' => 'via',
360
+ 'viaduct' => 'via',
361
+ 'view' => 'vw',
362
+ 'views' => 'vws',
363
+ 'vill' => 'vlg',
364
+ 'villag' => 'vlg',
365
+ 'village' => 'vlg',
366
+ 'villages' => 'vlgs',
367
+ 'ville' => 'vl',
368
+ 'villg' => 'vlg',
369
+ 'villiage' => 'vlg',
370
+ 'vist' => 'vis',
371
+ 'vista' => 'vis',
372
+ 'vlly' => 'vly',
373
+ 'vst' => 'vis',
374
+ 'vsta' => 'vis',
375
+ 'walks' => 'walk',
376
+ 'well' => 'wl',
377
+ 'wells' => 'wls',
378
+ 'wy' => 'way'
379
+ }.freeze
380
+
381
+ STREET_TYPES_LIST = {}
382
+ STREET_TYPES.to_a.each do |item|
383
+ STREET_TYPES_LIST[item[0]] = true
384
+ STREET_TYPES_LIST[item[1]] = true
385
+ end
386
+
387
+ NUMERIC_STREET_NAMES = {
388
+ 'first' => '1st',
389
+ 'second' => '2nd',
390
+ 'third' => '3rd',
391
+ 'fourth' => '4th',
392
+ 'fifth' => '5th',
393
+ 'sixth' => '6th',
394
+ 'seventh' => '7th',
395
+ 'eighth' => '8th',
396
+ 'ninth' => '9th'
397
+ }.freeze
398
+
399
+ UNIT_ABBREVIATIONS_NUMBERED = {
400
+ /(?:ap|dep)(?:ar)?t(?:me?nt)?/i => 'Apt',
401
+ /box/i => 'Box',
402
+ /bu?i?ldi?n?g/i => 'Bldg',
403
+ /dep(artmen)?t/i => 'Dept',
404
+ /flo*r?/i => 'Fl',
405
+ /ha?nga?r/i => 'Hngr',
406
+ /lo?t/i => 'Lot',
407
+ /ro*m/i => 'Rm',
408
+ /pier/i => 'Pier',
409
+ /p\W*[om]\W*b(?:ox)?/i => 'PO Box',
410
+ /slip/i => 'Slip',
411
+ /spa?ce?/i => 'Spc',
412
+ /stop/i => 'Stop',
413
+ /su?i?te/i => 'Ste',
414
+ /tra?i?le?r/i => 'Trlr',
415
+ /uni?t/i => 'Unit'
416
+ }.freeze
417
+
418
+ UNIT_ABBREVIATIONS_UNNUMBERED = {
419
+ /ba?se?me?n?t/i => 'Bsmt',
420
+ /fro?nt/i => 'Frnt',
421
+ /lo?bby/i => 'Lbby',
422
+ /lowe?r/i => 'Lowr',
423
+ /off?i?ce?/i => 'Ofc',
424
+ /pe?n?t?ho?u?s?e?/i => 'PH',
425
+ /rear/i => 'Rear',
426
+ /side/i => 'Side',
427
+ /uppe?r/i => 'Uppr'
428
+ }.freeze
429
+
430
+ UNIT_ABBREVIATIONS = UNIT_ABBREVIATIONS_NUMBERED.merge(UNIT_ABBREVIATIONS_UNNUMBERED)
431
+
432
+ STATE_CODES = {
433
+ 'alabama' => 'AL',
434
+ 'alaska' => 'AK',
435
+ 'american samoa' => 'AS',
436
+ 'arizona' => 'AZ',
437
+ 'arkansas' => 'AR',
438
+ 'california' => 'CA',
439
+ 'colorado' => 'CO',
440
+ 'connecticut' => 'CT',
441
+ 'delaware' => 'DE',
442
+ 'district of columbia' => 'DC',
443
+ 'federated states of micronesia' => 'FM',
444
+ 'florida' => 'FL',
445
+ 'georgia' => 'GA',
446
+ 'guam' => 'GU',
447
+ 'hawaii' => 'HI',
448
+ 'idaho' => 'ID',
449
+ 'illinois' => 'IL',
450
+ 'indiana' => 'IN',
451
+ 'iowa' => 'IA',
452
+ 'kansas' => 'KS',
453
+ 'kentucky' => 'KY',
454
+ 'louisiana' => 'LA',
455
+ 'maine' => 'ME',
456
+ 'marshall islands' => 'MH',
457
+ 'maryland' => 'MD',
458
+ 'massachusetts' => 'MA',
459
+ 'michigan' => 'MI',
460
+ 'minnesota' => 'MN',
461
+ 'mississippi' => 'MS',
462
+ 'missouri' => 'MO',
463
+ 'montana' => 'MT',
464
+ 'nebraska' => 'NE',
465
+ 'nevada' => 'NV',
466
+ 'new hampshire' => 'NH',
467
+ 'new jersey' => 'NJ',
468
+ 'new mexico' => 'NM',
469
+ 'new york' => 'NY',
470
+ 'north carolina' => 'NC',
471
+ 'north dakota' => 'ND',
472
+ 'northern mariana islands' => 'MP',
473
+ 'ohio' => 'OH',
474
+ 'oklahoma' => 'OK',
475
+ 'oregon' => 'OR',
476
+ 'palau' => 'PW',
477
+ 'pennsylvania' => 'PA',
478
+ 'puerto rico' => 'PR',
479
+ 'rhode island' => 'RI',
480
+ 'south carolina' => 'SC',
481
+ 'south dakota' => 'SD',
482
+ 'tennessee' => 'TN',
483
+ 'texas' => 'TX',
484
+ 'utah' => 'UT',
485
+ 'vermont' => 'VT',
486
+ 'virgin islands' => 'VI',
487
+ 'virginia' => 'VA',
488
+ 'washington' => 'WA',
489
+ 'west virginia' => 'WV',
490
+ 'wisconsin' => 'WI',
491
+ 'wyoming' => 'WY'
492
+ }.freeze
493
+
494
+ STATE_NAMES = STATE_CODES.invert
495
+
496
+ STATE_FIPS = {
497
+ '01' => 'AL',
498
+ '02' => 'AK',
499
+ '04' => 'AZ',
500
+ '05' => 'AR',
501
+ '06' => 'CA',
502
+ '08' => 'CO',
503
+ '09' => 'CT',
504
+ '10' => 'DE',
505
+ '11' => 'DC',
506
+ '12' => 'FL',
507
+ '13' => 'GA',
508
+ '15' => 'HI',
509
+ '16' => 'ID',
510
+ '17' => 'IL',
511
+ '18' => 'IN',
512
+ '19' => 'IA',
513
+ '20' => 'KS',
514
+ '21' => 'KY',
515
+ '22' => 'LA',
516
+ '23' => 'ME',
517
+ '24' => 'MD',
518
+ '25' => 'MA',
519
+ '26' => 'MI',
520
+ '27' => 'MN',
521
+ '28' => 'MS',
522
+ '29' => 'MO',
523
+ '30' => 'MT',
524
+ '31' => 'NE',
525
+ '32' => 'NV',
526
+ '33' => 'NH',
527
+ '34' => 'NJ',
528
+ '35' => 'NM',
529
+ '36' => 'NY',
530
+ '37' => 'NC',
531
+ '38' => 'ND',
532
+ '39' => 'OH',
533
+ '40' => 'OK',
534
+ '41' => 'OR',
535
+ '42' => 'PA',
536
+ '44' => 'RI',
537
+ '45' => 'SC',
538
+ '46' => 'SD',
539
+ '47' => 'TN',
540
+ '48' => 'TX',
541
+ '49' => 'UT',
542
+ '50' => 'VT',
543
+ '51' => 'VA',
544
+ '53' => 'WA',
545
+ '54' => 'WV',
546
+ '55' => 'WI',
547
+ '56' => 'WY',
548
+ '72' => 'PR',
549
+ '78' => 'VI'
550
+ }.freeze
551
+
552
+ FIPS_STATES = STATE_FIPS.invert
553
+
554
+ NORMALIZE_MAP = {
555
+ 'prefix' => DIRECTIONAL,
556
+ 'prefix1' => DIRECTIONAL,
557
+ 'prefix2' => DIRECTIONAL,
558
+ 'suffix' => DIRECTIONAL,
559
+ 'suffix1' => DIRECTIONAL,
560
+ 'suffix2' => DIRECTIONAL,
561
+ 'street_type' => STREET_TYPES,
562
+ 'street_type1' => STREET_TYPES,
563
+ 'street_type2' => STREET_TYPES,
564
+ 'street' => NUMERIC_STREET_NAMES,
565
+ 'state' => STATE_CODES
566
+ }.freeze
567
+ end
568
+ end