street_sweeper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.rubocop.yml +1472 -0
- data/.travis.yml +5 -0
- data/Gemfile +2 -0
- data/LICENCE +21 -0
- data/README.md +70 -0
- data/Rakefile +6 -0
- data/lib/street_sweeper.rb +4 -0
- data/lib/street_sweeper/address.rb +103 -0
- data/lib/street_sweeper/base.rb +121 -0
- data/lib/street_sweeper/constants.rb +568 -0
- data/lib/street_sweeper/matchers.rb +173 -0
- data/lib/version.rb +3 -0
- data/spec/address_spec.rb +530 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/street_sweeper_spec.rb +637 -0
- data/street_sweeper.gemspec +26 -0
- metadata +106 -0
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Derrek Long
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
[](https://travis-ci.org/williamatodd/street_sweeper)
|
2
|
+
|
3
|
+
# DESCRIPTION
|
4
|
+
|
5
|
+
Parses a string returning a normalized Address object. When the string is not an US address it returns nil.
|
6
|
+
|
7
|
+
This is a resurrected fork of [StreetSweeper::US](https://github.com/street-address-rb/street-address) which was itself a port of the perl module [Geo::StreetSweeper](https://github.com/timbunce/Geo-StreetAddress-US) originally written by Schuyler D. Erle.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```shell
|
12
|
+
gem install StreetSweeper
|
13
|
+
```
|
14
|
+
|
15
|
+
then in your code
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require 'street_sweeper'
|
19
|
+
```
|
20
|
+
|
21
|
+
or from Gemfile
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem 'StreetSweeper', require: "street_sweeper"
|
25
|
+
```
|
26
|
+
|
27
|
+
## Basic Usage
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require 'street_sweeper'
|
31
|
+
|
32
|
+
address = StreetSweeper.parse("1600 Pennsylvania Ave, Washington, DC, 20500")
|
33
|
+
address.street # Pennsylvania
|
34
|
+
address.number # 1600
|
35
|
+
address.postal_code # 20500
|
36
|
+
address.city # Washington
|
37
|
+
address.state # DC
|
38
|
+
address.state_name # District of columbia
|
39
|
+
address.street_type # Ave
|
40
|
+
address.intersection? # false
|
41
|
+
address.full_street_address # 1600 Pennsylvania Ave, Washington, DC 20500
|
42
|
+
|
43
|
+
address = StreetSweeper.parse("1600 Pennsylvania Ave")
|
44
|
+
address.street # Pennsylvania
|
45
|
+
address.number # 1600
|
46
|
+
address.state # nil
|
47
|
+
|
48
|
+
address = StreetSweeper.parse("5904 Richmond Hwy Ste 340 Alexandria VA 22303-1864")
|
49
|
+
address.street_address_1 # 5904 Richmond Hwy
|
50
|
+
address.street_address_2 # Ste 340
|
51
|
+
address.full_postal_code # 22303-1864
|
52
|
+
address.postal_code_ext # 1846
|
53
|
+
address.state_name # Virginia
|
54
|
+
address.state_fips # 06
|
55
|
+
|
56
|
+
```
|
57
|
+
## Stricter Parsing
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
address = StreetSweeper.parse_address("1600 Pennsylvania Avenue")
|
61
|
+
# nil - not enough information to be a full address
|
62
|
+
|
63
|
+
address = StreetSweeper.parse_address("1600 Pennsylvania Ave, Washington, DC, 20500")
|
64
|
+
# same results as above
|
65
|
+
```
|
66
|
+
|
67
|
+
## License
|
68
|
+
The [MIT License](http://opensource.org/licenses/MIT)
|
69
|
+
|
70
|
+
Copyright (c) 2007-2018 Contributors
|
data/Rakefile
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
module StreetSweeper
|
2
|
+
class Address
|
3
|
+
attr_accessor(
|
4
|
+
:number,
|
5
|
+
:street,
|
6
|
+
:street_type,
|
7
|
+
:unit,
|
8
|
+
:unit_prefix,
|
9
|
+
:suffix,
|
10
|
+
:prefix,
|
11
|
+
:city,
|
12
|
+
:state,
|
13
|
+
:postal_code,
|
14
|
+
:postal_code_ext,
|
15
|
+
:street2,
|
16
|
+
:street_type2,
|
17
|
+
:suffix2,
|
18
|
+
:prefix2,
|
19
|
+
:redundant_street_type
|
20
|
+
)
|
21
|
+
|
22
|
+
def initialize(args)
|
23
|
+
args.each do |attr, val|
|
24
|
+
public_send("#{attr}=", val)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def full_postal_code
|
29
|
+
return nil unless postal_code
|
30
|
+
postal_code_ext ? "#{postal_code}-#{postal_code_ext}" : postal_code
|
31
|
+
end
|
32
|
+
|
33
|
+
def state_fips
|
34
|
+
Constants::FIPS_STATES[state]
|
35
|
+
end
|
36
|
+
|
37
|
+
def state_name
|
38
|
+
(name = Constants::STATE_NAMES[state]) && name.capitalize
|
39
|
+
end
|
40
|
+
|
41
|
+
def intersection?
|
42
|
+
!street2.nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
def line1
|
46
|
+
parts = []
|
47
|
+
if intersection?
|
48
|
+
parts << prefix if prefix
|
49
|
+
parts << street
|
50
|
+
parts << street_type if street_type
|
51
|
+
parts << suffix if suffix
|
52
|
+
parts << 'and'
|
53
|
+
parts << prefix2 if prefix2
|
54
|
+
parts << street2
|
55
|
+
parts << street_type2 if street_type2
|
56
|
+
parts << suffix2 if suffix2
|
57
|
+
else
|
58
|
+
parts << street_address_1
|
59
|
+
parts << street_address_2
|
60
|
+
end
|
61
|
+
parts.join(' ').strip
|
62
|
+
end
|
63
|
+
|
64
|
+
def line2
|
65
|
+
parts = []
|
66
|
+
parts << city if city
|
67
|
+
parts << state if state
|
68
|
+
s = parts.join(', ')
|
69
|
+
s += " #{full_postal_code}" if full_postal_code
|
70
|
+
s.strip
|
71
|
+
end
|
72
|
+
|
73
|
+
def street_address_1
|
74
|
+
return line1 if intersection?
|
75
|
+
parts = []
|
76
|
+
parts << number
|
77
|
+
parts << prefix if prefix
|
78
|
+
parts << street if street
|
79
|
+
parts << street_type if street_type && !redundant_street_type
|
80
|
+
parts << suffix if suffix
|
81
|
+
parts.join(' ').strip
|
82
|
+
end
|
83
|
+
|
84
|
+
def street_address_2
|
85
|
+
parts = []
|
86
|
+
parts << unit_prefix if unit_prefix
|
87
|
+
parts << (unit_prefix ? unit : "\# #{unit}") if unit
|
88
|
+
parts.join(' ').strip
|
89
|
+
end
|
90
|
+
|
91
|
+
def full_street_address
|
92
|
+
[line1, line2].reject(&:empty?).join(', ')
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_h
|
96
|
+
instance_variables.each_with_object({}) do |var_name, hash|
|
97
|
+
var_value = instance_variable_get(var_name)
|
98
|
+
hash_name = var_name[1..-1].to_sym
|
99
|
+
hash[hash_name] = var_value
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module StreetSweeper
|
2
|
+
class << self
|
3
|
+
def parse(location, args = {})
|
4
|
+
if Matchers.corner_regexp.match(location)
|
5
|
+
parse_intersection(location, args)
|
6
|
+
else
|
7
|
+
parse_po_address(location, args) || parse_address(location, args) || parse_informal_address(location, args)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def parse_address(address, args = {})
|
12
|
+
matched = Matchers.address_regexp.match(address)
|
13
|
+
return unless matched
|
14
|
+
to_address(match_to_hash(matched), args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_po_address(address, args = {})
|
18
|
+
matched = Matchers.po_address_regexp.match(address)
|
19
|
+
return unless matched
|
20
|
+
to_address(match_to_hash(matched), args)
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_informal_address(address, args = {})
|
24
|
+
matched = Matchers.informal_address_regexp.match(address)
|
25
|
+
return unless matched
|
26
|
+
to_address(match_to_hash(matched), args)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_intersection(intersection, args)
|
30
|
+
matched = Matchers.intersection_regexp.match(intersection)
|
31
|
+
return unless matched
|
32
|
+
hash = match_to_hash(matched)
|
33
|
+
|
34
|
+
streets = Matchers.intersection_regexp.named_captures['street'].map do |pos|
|
35
|
+
matched[pos.to_i]
|
36
|
+
end.select { |v| v }
|
37
|
+
|
38
|
+
hash['street'] = streets[0] if streets[0]
|
39
|
+
hash['street2'] = streets[1] if streets[1]
|
40
|
+
|
41
|
+
street_types = Matchers.intersection_regexp.named_captures['street_type'].map do |pos|
|
42
|
+
matched[pos.to_i]
|
43
|
+
end.select { |v| v }
|
44
|
+
|
45
|
+
hash['street_type'] = street_types[0] if street_types[0]
|
46
|
+
hash['street_type2'] = street_types[1] if street_types[1]
|
47
|
+
|
48
|
+
if hash['street_type'] && (!hash['street_type2'] || (hash['street_type'] == hash['street_type2']))
|
49
|
+
type = hash['street_type'].clone
|
50
|
+
hash['street_type'] = hash['street_type2'] = type if type.gsub!(/s\W*$/i, '') && /\A#{Matchers.street_type_regexp}\z/i =~ type
|
51
|
+
end
|
52
|
+
|
53
|
+
to_address(hash, args)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def match_to_hash(matched)
|
59
|
+
hash = {}
|
60
|
+
matched.names.each { |name| hash[name] = matched[name] if matched[name] && !matched[name].strip.empty? }
|
61
|
+
hash
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_address(input, args)
|
65
|
+
# strip off some punctuation and whitespace
|
66
|
+
input.each_value do |string|
|
67
|
+
string.strip!
|
68
|
+
string.gsub!(/[^\w\s\-\#\&]/, '')
|
69
|
+
end
|
70
|
+
|
71
|
+
input['redundant_street_type'] = false
|
72
|
+
if input['street'] && !input['street_type']
|
73
|
+
matched = Matchers.street_regexp.match(input['street'])
|
74
|
+
input['street_type'] = matched['street_type']
|
75
|
+
input['redundant_street_type'] = true
|
76
|
+
end
|
77
|
+
|
78
|
+
## abbreviate unit prefixes
|
79
|
+
if input['unit_prefix']
|
80
|
+
Constants::UNIT_ABBREVIATIONS.each_pair do |regex, abbr|
|
81
|
+
regex.match(input['unit_prefix']) { |_m| input['unit_prefix'] = abbr }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
Constants::NORMALIZE_MAP.each_pair do |key, map|
|
86
|
+
next unless input[key]
|
87
|
+
mapping = map[input[key].downcase]
|
88
|
+
input[key] = mapping if mapping
|
89
|
+
end
|
90
|
+
|
91
|
+
if args[:avoid_redundant_street_type]
|
92
|
+
['', '1', '2'].each do |suffix|
|
93
|
+
street = input['street' + suffix]
|
94
|
+
type = input['street_type' + suffix]
|
95
|
+
next if !street || !type
|
96
|
+
|
97
|
+
type_regexp = Matchers.street_type_matches[type.downcase]
|
98
|
+
input.delete('street_type' + suffix) if type_regexp.match(street)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# attempt to expand directional prefixes on place names
|
103
|
+
if input['city']
|
104
|
+
input['city'].gsub!(/^(#{Matchers.dircode_regexp})\s+(?=\S)/) do |match|
|
105
|
+
Constants::DIRECTION_CODES[match[0].upcase] + ' '
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
%w[street street_type street2 street_type2 city unit_prefix].each do |k|
|
110
|
+
input[k] = input[k].split.map { |elem| upcase_or_capitalize(elem) }.join(' ') if input[k]
|
111
|
+
end
|
112
|
+
|
113
|
+
StreetSweeper::Address.new(input)
|
114
|
+
end
|
115
|
+
|
116
|
+
def upcase_or_capitalize(elem)
|
117
|
+
return elem.upcase if elem.downcase =~ /^(po|ne|nw|sw|se)$/
|
118
|
+
elem.capitalize
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,568 @@
|
|
1
|
+
module StreetSweeper
|
2
|
+
class Constants
|
3
|
+
DIRECTIONAL = {
|
4
|
+
'north' => 'N',
|
5
|
+
'northeast' => 'NE',
|
6
|
+
'east' => 'E',
|
7
|
+
'southeast' => 'SE',
|
8
|
+
'south' => 'S',
|
9
|
+
'southwest' => 'SW',
|
10
|
+
'west' => 'W',
|
11
|
+
'northwest' => 'NW'
|
12
|
+
}.freeze
|
13
|
+
DIRECTION_CODES = DIRECTIONAL.invert
|
14
|
+
|
15
|
+
STREET_TYPES = {
|
16
|
+
'allee' => 'aly',
|
17
|
+
'alley' => 'aly',
|
18
|
+
'ally' => 'aly',
|
19
|
+
'anex' => 'anx',
|
20
|
+
'annex' => 'anx',
|
21
|
+
'annx' => 'anx',
|
22
|
+
'arcade' => 'arc',
|
23
|
+
'av' => 'ave',
|
24
|
+
'aven' => 'ave',
|
25
|
+
'avenu' => 'ave',
|
26
|
+
'avenue' => 'ave',
|
27
|
+
'avn' => 'ave',
|
28
|
+
'avnue' => 'ave',
|
29
|
+
'bayoo' => 'byu',
|
30
|
+
'bayou' => 'byu',
|
31
|
+
'beach' => 'bch',
|
32
|
+
'bend' => 'bnd',
|
33
|
+
'bluf' => 'blf',
|
34
|
+
'bluff' => 'blf',
|
35
|
+
'bluffs' => 'blfs',
|
36
|
+
'bot' => 'btm',
|
37
|
+
'bottm' => 'btm',
|
38
|
+
'bottom' => 'btm',
|
39
|
+
'boul' => 'blvd',
|
40
|
+
'boulevard' => 'blvd',
|
41
|
+
'boulv' => 'blvd',
|
42
|
+
'branch' => 'br',
|
43
|
+
'brdge' => 'brg',
|
44
|
+
'bridge' => 'brg',
|
45
|
+
'brnch' => 'br',
|
46
|
+
'brook' => 'brk',
|
47
|
+
'brooks' => 'brks',
|
48
|
+
'burg' => 'bg',
|
49
|
+
'burgs' => 'bgs',
|
50
|
+
'bypa' => 'byp',
|
51
|
+
'bypas' => 'byp',
|
52
|
+
'bypass' => 'byp',
|
53
|
+
'byps' => 'byp',
|
54
|
+
'camp' => 'cp',
|
55
|
+
'canyn' => 'cyn',
|
56
|
+
'canyon' => 'cyn',
|
57
|
+
'cape' => 'cpe',
|
58
|
+
'causeway' => 'cswy',
|
59
|
+
'causway' => 'cswy',
|
60
|
+
'cen' => 'ctr',
|
61
|
+
'cent' => 'ctr',
|
62
|
+
'center' => 'ctr',
|
63
|
+
'centers' => 'ctrs',
|
64
|
+
'centr' => 'ctr',
|
65
|
+
'centre' => 'ctr',
|
66
|
+
'circ' => 'cir',
|
67
|
+
'circl' => 'cir',
|
68
|
+
'circle' => 'cir',
|
69
|
+
'circles' => 'cirs',
|
70
|
+
'ck' => 'crk',
|
71
|
+
'cliff' => 'clf',
|
72
|
+
'cliffs' => 'clfs',
|
73
|
+
'club' => 'clb',
|
74
|
+
'cmp' => 'cp',
|
75
|
+
'cnter' => 'ctr',
|
76
|
+
'cntr' => 'ctr',
|
77
|
+
'cnyn' => 'cyn',
|
78
|
+
'common' => 'cmn',
|
79
|
+
'corner' => 'cor',
|
80
|
+
'corners' => 'cors',
|
81
|
+
'course' => 'crse',
|
82
|
+
'court' => 'ct',
|
83
|
+
'courts' => 'cts',
|
84
|
+
'cove' => 'cv',
|
85
|
+
'coves' => 'cvs',
|
86
|
+
'cr' => 'crk',
|
87
|
+
'crcl' => 'cir',
|
88
|
+
'crcle' => 'cir',
|
89
|
+
'crecent' => 'cres',
|
90
|
+
'creek' => 'crk',
|
91
|
+
'crescent' => 'cres',
|
92
|
+
'cresent' => 'cres',
|
93
|
+
'crest' => 'crst',
|
94
|
+
'crossing' => 'xing',
|
95
|
+
'crossroad' => 'xrd',
|
96
|
+
'crscnt' => 'cres',
|
97
|
+
'crsent' => 'cres',
|
98
|
+
'crsnt' => 'cres',
|
99
|
+
'crssing' => 'xing',
|
100
|
+
'crssng' => 'xing',
|
101
|
+
'crt' => 'ct',
|
102
|
+
'curve' => 'curv',
|
103
|
+
'dale' => 'dl',
|
104
|
+
'dam' => 'dm',
|
105
|
+
'div' => 'dv',
|
106
|
+
'divide' => 'dv',
|
107
|
+
'driv' => 'dr',
|
108
|
+
'drive' => 'dr',
|
109
|
+
'drives' => 'drs',
|
110
|
+
'drv' => 'dr',
|
111
|
+
'dvd' => 'dv',
|
112
|
+
'estate' => 'est',
|
113
|
+
'estates' => 'ests',
|
114
|
+
'exp' => 'expy',
|
115
|
+
'expr' => 'expy',
|
116
|
+
'express' => 'expy',
|
117
|
+
'expressway' => 'expy',
|
118
|
+
'expw' => 'expy',
|
119
|
+
'extension' => 'ext',
|
120
|
+
'extensions' => 'exts',
|
121
|
+
'extn' => 'ext',
|
122
|
+
'extnsn' => 'ext',
|
123
|
+
'falls' => 'fls',
|
124
|
+
'ferry' => 'fry',
|
125
|
+
'field' => 'fld',
|
126
|
+
'fields' => 'flds',
|
127
|
+
'flat' => 'flt',
|
128
|
+
'flats' => 'flts',
|
129
|
+
'ford' => 'frd',
|
130
|
+
'fords' => 'frds',
|
131
|
+
'forest' => 'frst',
|
132
|
+
'forests' => 'frst',
|
133
|
+
'forg' => 'frg',
|
134
|
+
'forge' => 'frg',
|
135
|
+
'forges' => 'frgs',
|
136
|
+
'fork' => 'frk',
|
137
|
+
'forks' => 'frks',
|
138
|
+
'fort' => 'ft',
|
139
|
+
'freeway' => 'fwy',
|
140
|
+
'freewy' => 'fwy',
|
141
|
+
'frry' => 'fry',
|
142
|
+
'frt' => 'ft',
|
143
|
+
'frway' => 'fwy',
|
144
|
+
'frwy' => 'fwy',
|
145
|
+
'garden' => 'gdn',
|
146
|
+
'gardens' => 'gdns',
|
147
|
+
'gardn' => 'gdn',
|
148
|
+
'gateway' => 'gtwy',
|
149
|
+
'gatewy' => 'gtwy',
|
150
|
+
'gatway' => 'gtwy',
|
151
|
+
'glen' => 'gln',
|
152
|
+
'glens' => 'glns',
|
153
|
+
'grden' => 'gdn',
|
154
|
+
'grdn' => 'gdn',
|
155
|
+
'grdns' => 'gdns',
|
156
|
+
'green' => 'grn',
|
157
|
+
'greens' => 'grns',
|
158
|
+
'grov' => 'grv',
|
159
|
+
'grove' => 'grv',
|
160
|
+
'groves' => 'grvs',
|
161
|
+
'gtway' => 'gtwy',
|
162
|
+
'harb' => 'hbr',
|
163
|
+
'harbor' => 'hbr',
|
164
|
+
'harbors' => 'hbrs',
|
165
|
+
'harbr' => 'hbr',
|
166
|
+
'haven' => 'hvn',
|
167
|
+
'havn' => 'hvn',
|
168
|
+
'height' => 'hts',
|
169
|
+
'heights' => 'hts',
|
170
|
+
'hgts' => 'hts',
|
171
|
+
'highway' => 'hwy',
|
172
|
+
'highwy' => 'hwy',
|
173
|
+
'hill' => 'hl',
|
174
|
+
'hills' => 'hls',
|
175
|
+
'hiway' => 'hwy',
|
176
|
+
'hiwy' => 'hwy',
|
177
|
+
'hllw' => 'holw',
|
178
|
+
'hollow' => 'holw',
|
179
|
+
'hollows' => 'holw',
|
180
|
+
'holws' => 'holw',
|
181
|
+
'hrbor' => 'hbr',
|
182
|
+
'ht' => 'hts',
|
183
|
+
'hway' => 'hwy',
|
184
|
+
'inlet' => 'inlt',
|
185
|
+
'island' => 'is',
|
186
|
+
'islands' => 'iss',
|
187
|
+
'isles' => 'isle',
|
188
|
+
'islnd' => 'is',
|
189
|
+
'islnds' => 'iss',
|
190
|
+
'jction' => 'jct',
|
191
|
+
'jctn' => 'jct',
|
192
|
+
'jctns' => 'jcts',
|
193
|
+
'junction' => 'jct',
|
194
|
+
'junctions' => 'jcts',
|
195
|
+
'junctn' => 'jct',
|
196
|
+
'juncton' => 'jct',
|
197
|
+
'key' => 'ky',
|
198
|
+
'keys' => 'kys',
|
199
|
+
'knol' => 'knl',
|
200
|
+
'knoll' => 'knl',
|
201
|
+
'knolls' => 'knls',
|
202
|
+
'la' => 'ln',
|
203
|
+
'lake' => 'lk',
|
204
|
+
'lakes' => 'lks',
|
205
|
+
'landing' => 'lndg',
|
206
|
+
'lane' => 'ln',
|
207
|
+
'lanes' => 'ln',
|
208
|
+
'ldge' => 'ldg',
|
209
|
+
'light' => 'lgt',
|
210
|
+
'lights' => 'lgts',
|
211
|
+
'lndng' => 'lndg',
|
212
|
+
'loaf' => 'lf',
|
213
|
+
'lock' => 'lck',
|
214
|
+
'locks' => 'lcks',
|
215
|
+
'lodg' => 'ldg',
|
216
|
+
'lodge' => 'ldg',
|
217
|
+
'loops' => 'loop',
|
218
|
+
'manor' => 'mnr',
|
219
|
+
'manors' => 'mnrs',
|
220
|
+
'meadow' => 'mdw',
|
221
|
+
'meadows' => 'mdws',
|
222
|
+
'medows' => 'mdws',
|
223
|
+
'mill' => 'ml',
|
224
|
+
'mills' => 'mls',
|
225
|
+
'mission' => 'msn',
|
226
|
+
'missn' => 'msn',
|
227
|
+
'mnt' => 'mt',
|
228
|
+
'mntain' => 'mtn',
|
229
|
+
'mntn' => 'mtn',
|
230
|
+
'mntns' => 'mtns',
|
231
|
+
'motorway' => 'mtwy',
|
232
|
+
'mount' => 'mt',
|
233
|
+
'mountain' => 'mtn',
|
234
|
+
'mountains' => 'mtns',
|
235
|
+
'mountin' => 'mtn',
|
236
|
+
'mssn' => 'msn',
|
237
|
+
'mtin' => 'mtn',
|
238
|
+
'neck' => 'nck',
|
239
|
+
'orchard' => 'orch',
|
240
|
+
'orchrd' => 'orch',
|
241
|
+
'overpass' => 'opas',
|
242
|
+
'ovl' => 'oval',
|
243
|
+
'parks' => 'park',
|
244
|
+
'parkway' => 'pkwy',
|
245
|
+
'parkways' => 'pkwy',
|
246
|
+
'parkwy' => 'pkwy',
|
247
|
+
'passage' => 'psge',
|
248
|
+
'paths' => 'path',
|
249
|
+
'pikes' => 'pike',
|
250
|
+
'pine' => 'pne',
|
251
|
+
'pines' => 'pnes',
|
252
|
+
'pk' => 'park',
|
253
|
+
'pkway' => 'pkwy',
|
254
|
+
'pkwys' => 'pkwy',
|
255
|
+
'pky' => 'pkwy',
|
256
|
+
'place' => 'pl',
|
257
|
+
'plain' => 'pln',
|
258
|
+
'plaines' => 'plns',
|
259
|
+
'plains' => 'plns',
|
260
|
+
'plaza' => 'plz',
|
261
|
+
'plza' => 'plz',
|
262
|
+
'point' => 'pt',
|
263
|
+
'points' => 'pts',
|
264
|
+
'port' => 'prt',
|
265
|
+
'ports' => 'prts',
|
266
|
+
'prairie' => 'pr',
|
267
|
+
'prarie' => 'pr',
|
268
|
+
'prk' => 'park',
|
269
|
+
'prr' => 'pr',
|
270
|
+
'rad' => 'radl',
|
271
|
+
'radial' => 'radl',
|
272
|
+
'radiel' => 'radl',
|
273
|
+
'ranch' => 'rnch',
|
274
|
+
'ranches' => 'rnch',
|
275
|
+
'rapid' => 'rpd',
|
276
|
+
'rapids' => 'rpds',
|
277
|
+
'rdge' => 'rdg',
|
278
|
+
'rest' => 'rst',
|
279
|
+
'ridge' => 'rdg',
|
280
|
+
'ridges' => 'rdgs',
|
281
|
+
'river' => 'riv',
|
282
|
+
'rivr' => 'riv',
|
283
|
+
'rnchs' => 'rnch',
|
284
|
+
'road' => 'rd',
|
285
|
+
'roads' => 'rds',
|
286
|
+
'route' => 'rte',
|
287
|
+
'run' => 'run',
|
288
|
+
'rvr' => 'riv',
|
289
|
+
'shoal' => 'shl',
|
290
|
+
'shoals' => 'shls',
|
291
|
+
'shoar' => 'shr',
|
292
|
+
'shoars' => 'shrs',
|
293
|
+
'shore' => 'shr',
|
294
|
+
'shores' => 'shrs',
|
295
|
+
'skyway' => 'skwy',
|
296
|
+
'spng' => 'spg',
|
297
|
+
'spngs' => 'spgs',
|
298
|
+
'spring' => 'spg',
|
299
|
+
'springs' => 'spgs',
|
300
|
+
'sprng' => 'spg',
|
301
|
+
'sprngs' => 'spgs',
|
302
|
+
'spurs' => 'spur',
|
303
|
+
'sqr' => 'sq',
|
304
|
+
'sqre' => 'sq',
|
305
|
+
'sqrs' => 'sqs',
|
306
|
+
'squ' => 'sq',
|
307
|
+
'square' => 'sq',
|
308
|
+
'squares' => 'sqs',
|
309
|
+
'station' => 'sta',
|
310
|
+
'statn' => 'sta',
|
311
|
+
'stn' => 'sta',
|
312
|
+
'str' => 'st',
|
313
|
+
'strav' => 'stra',
|
314
|
+
'strave' => 'stra',
|
315
|
+
'straven' => 'stra',
|
316
|
+
'stravenue' => 'stra',
|
317
|
+
'stravn' => 'stra',
|
318
|
+
'stream' => 'strm',
|
319
|
+
'street' => 'st',
|
320
|
+
'streets' => 'sts',
|
321
|
+
'streme' => 'strm',
|
322
|
+
'strt' => 'st',
|
323
|
+
'strvn' => 'stra',
|
324
|
+
'strvnue' => 'stra',
|
325
|
+
'sumit' => 'smt',
|
326
|
+
'sumitt' => 'smt',
|
327
|
+
'summit' => 'smt',
|
328
|
+
'terr' => 'ter',
|
329
|
+
'terrace' => 'ter',
|
330
|
+
'throughway' => 'trwy',
|
331
|
+
'tpk' => 'tpke',
|
332
|
+
'tr' => 'trl',
|
333
|
+
'trace' => 'trce',
|
334
|
+
'traces' => 'trce',
|
335
|
+
'track' => 'trak',
|
336
|
+
'tracks' => 'trak',
|
337
|
+
'trafficway' => 'trfy',
|
338
|
+
'trail' => 'trl',
|
339
|
+
'trails' => 'trl',
|
340
|
+
'trk' => 'trak',
|
341
|
+
'trks' => 'trak',
|
342
|
+
'trls' => 'trl',
|
343
|
+
'trnpk' => 'tpke',
|
344
|
+
'trpk' => 'tpke',
|
345
|
+
'tunel' => 'tunl',
|
346
|
+
'tunls' => 'tunl',
|
347
|
+
'tunnel' => 'tunl',
|
348
|
+
'tunnels' => 'tunl',
|
349
|
+
'tunnl' => 'tunl',
|
350
|
+
'turnpike' => 'tpke',
|
351
|
+
'turnpk' => 'tpke',
|
352
|
+
'underpass' => 'upas',
|
353
|
+
'union' => 'un',
|
354
|
+
'unions' => 'uns',
|
355
|
+
'valley' => 'vly',
|
356
|
+
'valleys' => 'vlys',
|
357
|
+
'vally' => 'vly',
|
358
|
+
'vdct' => 'via',
|
359
|
+
'viadct' => 'via',
|
360
|
+
'viaduct' => 'via',
|
361
|
+
'view' => 'vw',
|
362
|
+
'views' => 'vws',
|
363
|
+
'vill' => 'vlg',
|
364
|
+
'villag' => 'vlg',
|
365
|
+
'village' => 'vlg',
|
366
|
+
'villages' => 'vlgs',
|
367
|
+
'ville' => 'vl',
|
368
|
+
'villg' => 'vlg',
|
369
|
+
'villiage' => 'vlg',
|
370
|
+
'vist' => 'vis',
|
371
|
+
'vista' => 'vis',
|
372
|
+
'vlly' => 'vly',
|
373
|
+
'vst' => 'vis',
|
374
|
+
'vsta' => 'vis',
|
375
|
+
'walks' => 'walk',
|
376
|
+
'well' => 'wl',
|
377
|
+
'wells' => 'wls',
|
378
|
+
'wy' => 'way'
|
379
|
+
}.freeze
|
380
|
+
|
381
|
+
STREET_TYPES_LIST = {}
|
382
|
+
STREET_TYPES.to_a.each do |item|
|
383
|
+
STREET_TYPES_LIST[item[0]] = true
|
384
|
+
STREET_TYPES_LIST[item[1]] = true
|
385
|
+
end
|
386
|
+
|
387
|
+
NUMERIC_STREET_NAMES = {
|
388
|
+
'first' => '1st',
|
389
|
+
'second' => '2nd',
|
390
|
+
'third' => '3rd',
|
391
|
+
'fourth' => '4th',
|
392
|
+
'fifth' => '5th',
|
393
|
+
'sixth' => '6th',
|
394
|
+
'seventh' => '7th',
|
395
|
+
'eighth' => '8th',
|
396
|
+
'ninth' => '9th'
|
397
|
+
}.freeze
|
398
|
+
|
399
|
+
UNIT_ABBREVIATIONS_NUMBERED = {
|
400
|
+
/(?:ap|dep)(?:ar)?t(?:me?nt)?/i => 'Apt',
|
401
|
+
/box/i => 'Box',
|
402
|
+
/bu?i?ldi?n?g/i => 'Bldg',
|
403
|
+
/dep(artmen)?t/i => 'Dept',
|
404
|
+
/flo*r?/i => 'Fl',
|
405
|
+
/ha?nga?r/i => 'Hngr',
|
406
|
+
/lo?t/i => 'Lot',
|
407
|
+
/ro*m/i => 'Rm',
|
408
|
+
/pier/i => 'Pier',
|
409
|
+
/p\W*[om]\W*b(?:ox)?/i => 'PO Box',
|
410
|
+
/slip/i => 'Slip',
|
411
|
+
/spa?ce?/i => 'Spc',
|
412
|
+
/stop/i => 'Stop',
|
413
|
+
/su?i?te/i => 'Ste',
|
414
|
+
/tra?i?le?r/i => 'Trlr',
|
415
|
+
/uni?t/i => 'Unit'
|
416
|
+
}.freeze
|
417
|
+
|
418
|
+
UNIT_ABBREVIATIONS_UNNUMBERED = {
|
419
|
+
/ba?se?me?n?t/i => 'Bsmt',
|
420
|
+
/fro?nt/i => 'Frnt',
|
421
|
+
/lo?bby/i => 'Lbby',
|
422
|
+
/lowe?r/i => 'Lowr',
|
423
|
+
/off?i?ce?/i => 'Ofc',
|
424
|
+
/pe?n?t?ho?u?s?e?/i => 'PH',
|
425
|
+
/rear/i => 'Rear',
|
426
|
+
/side/i => 'Side',
|
427
|
+
/uppe?r/i => 'Uppr'
|
428
|
+
}.freeze
|
429
|
+
|
430
|
+
UNIT_ABBREVIATIONS = UNIT_ABBREVIATIONS_NUMBERED.merge(UNIT_ABBREVIATIONS_UNNUMBERED)
|
431
|
+
|
432
|
+
STATE_CODES = {
|
433
|
+
'alabama' => 'AL',
|
434
|
+
'alaska' => 'AK',
|
435
|
+
'american samoa' => 'AS',
|
436
|
+
'arizona' => 'AZ',
|
437
|
+
'arkansas' => 'AR',
|
438
|
+
'california' => 'CA',
|
439
|
+
'colorado' => 'CO',
|
440
|
+
'connecticut' => 'CT',
|
441
|
+
'delaware' => 'DE',
|
442
|
+
'district of columbia' => 'DC',
|
443
|
+
'federated states of micronesia' => 'FM',
|
444
|
+
'florida' => 'FL',
|
445
|
+
'georgia' => 'GA',
|
446
|
+
'guam' => 'GU',
|
447
|
+
'hawaii' => 'HI',
|
448
|
+
'idaho' => 'ID',
|
449
|
+
'illinois' => 'IL',
|
450
|
+
'indiana' => 'IN',
|
451
|
+
'iowa' => 'IA',
|
452
|
+
'kansas' => 'KS',
|
453
|
+
'kentucky' => 'KY',
|
454
|
+
'louisiana' => 'LA',
|
455
|
+
'maine' => 'ME',
|
456
|
+
'marshall islands' => 'MH',
|
457
|
+
'maryland' => 'MD',
|
458
|
+
'massachusetts' => 'MA',
|
459
|
+
'michigan' => 'MI',
|
460
|
+
'minnesota' => 'MN',
|
461
|
+
'mississippi' => 'MS',
|
462
|
+
'missouri' => 'MO',
|
463
|
+
'montana' => 'MT',
|
464
|
+
'nebraska' => 'NE',
|
465
|
+
'nevada' => 'NV',
|
466
|
+
'new hampshire' => 'NH',
|
467
|
+
'new jersey' => 'NJ',
|
468
|
+
'new mexico' => 'NM',
|
469
|
+
'new york' => 'NY',
|
470
|
+
'north carolina' => 'NC',
|
471
|
+
'north dakota' => 'ND',
|
472
|
+
'northern mariana islands' => 'MP',
|
473
|
+
'ohio' => 'OH',
|
474
|
+
'oklahoma' => 'OK',
|
475
|
+
'oregon' => 'OR',
|
476
|
+
'palau' => 'PW',
|
477
|
+
'pennsylvania' => 'PA',
|
478
|
+
'puerto rico' => 'PR',
|
479
|
+
'rhode island' => 'RI',
|
480
|
+
'south carolina' => 'SC',
|
481
|
+
'south dakota' => 'SD',
|
482
|
+
'tennessee' => 'TN',
|
483
|
+
'texas' => 'TX',
|
484
|
+
'utah' => 'UT',
|
485
|
+
'vermont' => 'VT',
|
486
|
+
'virgin islands' => 'VI',
|
487
|
+
'virginia' => 'VA',
|
488
|
+
'washington' => 'WA',
|
489
|
+
'west virginia' => 'WV',
|
490
|
+
'wisconsin' => 'WI',
|
491
|
+
'wyoming' => 'WY'
|
492
|
+
}.freeze
|
493
|
+
|
494
|
+
STATE_NAMES = STATE_CODES.invert
|
495
|
+
|
496
|
+
STATE_FIPS = {
|
497
|
+
'01' => 'AL',
|
498
|
+
'02' => 'AK',
|
499
|
+
'04' => 'AZ',
|
500
|
+
'05' => 'AR',
|
501
|
+
'06' => 'CA',
|
502
|
+
'08' => 'CO',
|
503
|
+
'09' => 'CT',
|
504
|
+
'10' => 'DE',
|
505
|
+
'11' => 'DC',
|
506
|
+
'12' => 'FL',
|
507
|
+
'13' => 'GA',
|
508
|
+
'15' => 'HI',
|
509
|
+
'16' => 'ID',
|
510
|
+
'17' => 'IL',
|
511
|
+
'18' => 'IN',
|
512
|
+
'19' => 'IA',
|
513
|
+
'20' => 'KS',
|
514
|
+
'21' => 'KY',
|
515
|
+
'22' => 'LA',
|
516
|
+
'23' => 'ME',
|
517
|
+
'24' => 'MD',
|
518
|
+
'25' => 'MA',
|
519
|
+
'26' => 'MI',
|
520
|
+
'27' => 'MN',
|
521
|
+
'28' => 'MS',
|
522
|
+
'29' => 'MO',
|
523
|
+
'30' => 'MT',
|
524
|
+
'31' => 'NE',
|
525
|
+
'32' => 'NV',
|
526
|
+
'33' => 'NH',
|
527
|
+
'34' => 'NJ',
|
528
|
+
'35' => 'NM',
|
529
|
+
'36' => 'NY',
|
530
|
+
'37' => 'NC',
|
531
|
+
'38' => 'ND',
|
532
|
+
'39' => 'OH',
|
533
|
+
'40' => 'OK',
|
534
|
+
'41' => 'OR',
|
535
|
+
'42' => 'PA',
|
536
|
+
'44' => 'RI',
|
537
|
+
'45' => 'SC',
|
538
|
+
'46' => 'SD',
|
539
|
+
'47' => 'TN',
|
540
|
+
'48' => 'TX',
|
541
|
+
'49' => 'UT',
|
542
|
+
'50' => 'VT',
|
543
|
+
'51' => 'VA',
|
544
|
+
'53' => 'WA',
|
545
|
+
'54' => 'WV',
|
546
|
+
'55' => 'WI',
|
547
|
+
'56' => 'WY',
|
548
|
+
'72' => 'PR',
|
549
|
+
'78' => 'VI'
|
550
|
+
}.freeze
|
551
|
+
|
552
|
+
FIPS_STATES = STATE_FIPS.invert
|
553
|
+
|
554
|
+
NORMALIZE_MAP = {
|
555
|
+
'prefix' => DIRECTIONAL,
|
556
|
+
'prefix1' => DIRECTIONAL,
|
557
|
+
'prefix2' => DIRECTIONAL,
|
558
|
+
'suffix' => DIRECTIONAL,
|
559
|
+
'suffix1' => DIRECTIONAL,
|
560
|
+
'suffix2' => DIRECTIONAL,
|
561
|
+
'street_type' => STREET_TYPES,
|
562
|
+
'street_type1' => STREET_TYPES,
|
563
|
+
'street_type2' => STREET_TYPES,
|
564
|
+
'street' => NUMERIC_STREET_NAMES,
|
565
|
+
'state' => STATE_CODES
|
566
|
+
}.freeze
|
567
|
+
end
|
568
|
+
end
|