street_sweeper 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.rubocop.yml +1472 -0
- data/.travis.yml +5 -0
- data/Gemfile +2 -0
- data/LICENCE +21 -0
- data/README.md +70 -0
- data/Rakefile +6 -0
- data/lib/street_sweeper.rb +4 -0
- data/lib/street_sweeper/address.rb +103 -0
- data/lib/street_sweeper/base.rb +121 -0
- data/lib/street_sweeper/constants.rb +568 -0
- data/lib/street_sweeper/matchers.rb +173 -0
- data/lib/version.rb +3 -0
- data/spec/address_spec.rb +530 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/street_sweeper_spec.rb +637 -0
- data/street_sweeper.gemspec +26 -0
- metadata +106 -0
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Derrek Long
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/williamatodd/street_sweeper.svg?branch=master)](https://travis-ci.org/williamatodd/street_sweeper)
|
2
|
+
|
3
|
+
# DESCRIPTION
|
4
|
+
|
5
|
+
Parses a string returning a normalized Address object. When the string is not an US address it returns nil.
|
6
|
+
|
7
|
+
This is a resurrected fork of [StreetSweeper::US](https://github.com/street-address-rb/street-address) which was itself a port of the perl module [Geo::StreetSweeper](https://github.com/timbunce/Geo-StreetAddress-US) originally written by Schuyler D. Erle.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
```shell
|
12
|
+
gem install StreetSweeper
|
13
|
+
```
|
14
|
+
|
15
|
+
then in your code
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
require 'street_sweeper'
|
19
|
+
```
|
20
|
+
|
21
|
+
or from Gemfile
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
gem 'StreetSweeper', require: "street_sweeper"
|
25
|
+
```
|
26
|
+
|
27
|
+
## Basic Usage
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require 'street_sweeper'
|
31
|
+
|
32
|
+
address = StreetSweeper.parse("1600 Pennsylvania Ave, Washington, DC, 20500")
|
33
|
+
address.street # Pennsylvania
|
34
|
+
address.number # 1600
|
35
|
+
address.postal_code # 20500
|
36
|
+
address.city # Washington
|
37
|
+
address.state # DC
|
38
|
+
address.state_name # District of columbia
|
39
|
+
address.street_type # Ave
|
40
|
+
address.intersection? # false
|
41
|
+
address.full_street_address # 1600 Pennsylvania Ave, Washington, DC 20500
|
42
|
+
|
43
|
+
address = StreetSweeper.parse("1600 Pennsylvania Ave")
|
44
|
+
address.street # Pennsylvania
|
45
|
+
address.number # 1600
|
46
|
+
address.state # nil
|
47
|
+
|
48
|
+
address = StreetSweeper.parse("5904 Richmond Hwy Ste 340 Alexandria VA 22303-1864")
|
49
|
+
address.street_address_1 # 5904 Richmond Hwy
|
50
|
+
address.street_address_2 # Ste 340
|
51
|
+
address.full_postal_code # 22303-1864
|
52
|
+
address.postal_code_ext # 1846
|
53
|
+
address.state_name # Virginia
|
54
|
+
address.state_fips # 06
|
55
|
+
|
56
|
+
```
|
57
|
+
## Stricter Parsing
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
address = StreetSweeper.parse_address("1600 Pennsylvania Avenue")
|
61
|
+
# nil - not enough information to be a full address
|
62
|
+
|
63
|
+
address = StreetSweeper.parse_address("1600 Pennsylvania Ave, Washington, DC, 20500")
|
64
|
+
# same results as above
|
65
|
+
```
|
66
|
+
|
67
|
+
## License
|
68
|
+
The [MIT License](http://opensource.org/licenses/MIT)
|
69
|
+
|
70
|
+
Copyright (c) 2007-2018 Contributors
|
data/Rakefile
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
module StreetSweeper
|
2
|
+
class Address
|
3
|
+
attr_accessor(
|
4
|
+
:number,
|
5
|
+
:street,
|
6
|
+
:street_type,
|
7
|
+
:unit,
|
8
|
+
:unit_prefix,
|
9
|
+
:suffix,
|
10
|
+
:prefix,
|
11
|
+
:city,
|
12
|
+
:state,
|
13
|
+
:postal_code,
|
14
|
+
:postal_code_ext,
|
15
|
+
:street2,
|
16
|
+
:street_type2,
|
17
|
+
:suffix2,
|
18
|
+
:prefix2,
|
19
|
+
:redundant_street_type
|
20
|
+
)
|
21
|
+
|
22
|
+
def initialize(args)
|
23
|
+
args.each do |attr, val|
|
24
|
+
public_send("#{attr}=", val)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def full_postal_code
|
29
|
+
return nil unless postal_code
|
30
|
+
postal_code_ext ? "#{postal_code}-#{postal_code_ext}" : postal_code
|
31
|
+
end
|
32
|
+
|
33
|
+
def state_fips
|
34
|
+
Constants::FIPS_STATES[state]
|
35
|
+
end
|
36
|
+
|
37
|
+
def state_name
|
38
|
+
(name = Constants::STATE_NAMES[state]) && name.capitalize
|
39
|
+
end
|
40
|
+
|
41
|
+
def intersection?
|
42
|
+
!street2.nil?
|
43
|
+
end
|
44
|
+
|
45
|
+
def line1
|
46
|
+
parts = []
|
47
|
+
if intersection?
|
48
|
+
parts << prefix if prefix
|
49
|
+
parts << street
|
50
|
+
parts << street_type if street_type
|
51
|
+
parts << suffix if suffix
|
52
|
+
parts << 'and'
|
53
|
+
parts << prefix2 if prefix2
|
54
|
+
parts << street2
|
55
|
+
parts << street_type2 if street_type2
|
56
|
+
parts << suffix2 if suffix2
|
57
|
+
else
|
58
|
+
parts << street_address_1
|
59
|
+
parts << street_address_2
|
60
|
+
end
|
61
|
+
parts.join(' ').strip
|
62
|
+
end
|
63
|
+
|
64
|
+
def line2
|
65
|
+
parts = []
|
66
|
+
parts << city if city
|
67
|
+
parts << state if state
|
68
|
+
s = parts.join(', ')
|
69
|
+
s += " #{full_postal_code}" if full_postal_code
|
70
|
+
s.strip
|
71
|
+
end
|
72
|
+
|
73
|
+
def street_address_1
|
74
|
+
return line1 if intersection?
|
75
|
+
parts = []
|
76
|
+
parts << number
|
77
|
+
parts << prefix if prefix
|
78
|
+
parts << street if street
|
79
|
+
parts << street_type if street_type && !redundant_street_type
|
80
|
+
parts << suffix if suffix
|
81
|
+
parts.join(' ').strip
|
82
|
+
end
|
83
|
+
|
84
|
+
def street_address_2
|
85
|
+
parts = []
|
86
|
+
parts << unit_prefix if unit_prefix
|
87
|
+
parts << (unit_prefix ? unit : "\# #{unit}") if unit
|
88
|
+
parts.join(' ').strip
|
89
|
+
end
|
90
|
+
|
91
|
+
def full_street_address
|
92
|
+
[line1, line2].reject(&:empty?).join(', ')
|
93
|
+
end
|
94
|
+
|
95
|
+
def to_h
|
96
|
+
instance_variables.each_with_object({}) do |var_name, hash|
|
97
|
+
var_value = instance_variable_get(var_name)
|
98
|
+
hash_name = var_name[1..-1].to_sym
|
99
|
+
hash[hash_name] = var_value
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module StreetSweeper
|
2
|
+
class << self
|
3
|
+
def parse(location, args = {})
|
4
|
+
if Matchers.corner_regexp.match(location)
|
5
|
+
parse_intersection(location, args)
|
6
|
+
else
|
7
|
+
parse_po_address(location, args) || parse_address(location, args) || parse_informal_address(location, args)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def parse_address(address, args = {})
|
12
|
+
matched = Matchers.address_regexp.match(address)
|
13
|
+
return unless matched
|
14
|
+
to_address(match_to_hash(matched), args)
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_po_address(address, args = {})
|
18
|
+
matched = Matchers.po_address_regexp.match(address)
|
19
|
+
return unless matched
|
20
|
+
to_address(match_to_hash(matched), args)
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_informal_address(address, args = {})
|
24
|
+
matched = Matchers.informal_address_regexp.match(address)
|
25
|
+
return unless matched
|
26
|
+
to_address(match_to_hash(matched), args)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_intersection(intersection, args)
|
30
|
+
matched = Matchers.intersection_regexp.match(intersection)
|
31
|
+
return unless matched
|
32
|
+
hash = match_to_hash(matched)
|
33
|
+
|
34
|
+
streets = Matchers.intersection_regexp.named_captures['street'].map do |pos|
|
35
|
+
matched[pos.to_i]
|
36
|
+
end.select { |v| v }
|
37
|
+
|
38
|
+
hash['street'] = streets[0] if streets[0]
|
39
|
+
hash['street2'] = streets[1] if streets[1]
|
40
|
+
|
41
|
+
street_types = Matchers.intersection_regexp.named_captures['street_type'].map do |pos|
|
42
|
+
matched[pos.to_i]
|
43
|
+
end.select { |v| v }
|
44
|
+
|
45
|
+
hash['street_type'] = street_types[0] if street_types[0]
|
46
|
+
hash['street_type2'] = street_types[1] if street_types[1]
|
47
|
+
|
48
|
+
if hash['street_type'] && (!hash['street_type2'] || (hash['street_type'] == hash['street_type2']))
|
49
|
+
type = hash['street_type'].clone
|
50
|
+
hash['street_type'] = hash['street_type2'] = type if type.gsub!(/s\W*$/i, '') && /\A#{Matchers.street_type_regexp}\z/i =~ type
|
51
|
+
end
|
52
|
+
|
53
|
+
to_address(hash, args)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def match_to_hash(matched)
|
59
|
+
hash = {}
|
60
|
+
matched.names.each { |name| hash[name] = matched[name] if matched[name] && !matched[name].strip.empty? }
|
61
|
+
hash
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_address(input, args)
|
65
|
+
# strip off some punctuation and whitespace
|
66
|
+
input.each_value do |string|
|
67
|
+
string.strip!
|
68
|
+
string.gsub!(/[^\w\s\-\#\&]/, '')
|
69
|
+
end
|
70
|
+
|
71
|
+
input['redundant_street_type'] = false
|
72
|
+
if input['street'] && !input['street_type']
|
73
|
+
matched = Matchers.street_regexp.match(input['street'])
|
74
|
+
input['street_type'] = matched['street_type']
|
75
|
+
input['redundant_street_type'] = true
|
76
|
+
end
|
77
|
+
|
78
|
+
## abbreviate unit prefixes
|
79
|
+
if input['unit_prefix']
|
80
|
+
Constants::UNIT_ABBREVIATIONS.each_pair do |regex, abbr|
|
81
|
+
regex.match(input['unit_prefix']) { |_m| input['unit_prefix'] = abbr }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
Constants::NORMALIZE_MAP.each_pair do |key, map|
|
86
|
+
next unless input[key]
|
87
|
+
mapping = map[input[key].downcase]
|
88
|
+
input[key] = mapping if mapping
|
89
|
+
end
|
90
|
+
|
91
|
+
if args[:avoid_redundant_street_type]
|
92
|
+
['', '1', '2'].each do |suffix|
|
93
|
+
street = input['street' + suffix]
|
94
|
+
type = input['street_type' + suffix]
|
95
|
+
next if !street || !type
|
96
|
+
|
97
|
+
type_regexp = Matchers.street_type_matches[type.downcase]
|
98
|
+
input.delete('street_type' + suffix) if type_regexp.match(street)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# attempt to expand directional prefixes on place names
|
103
|
+
if input['city']
|
104
|
+
input['city'].gsub!(/^(#{Matchers.dircode_regexp})\s+(?=\S)/) do |match|
|
105
|
+
Constants::DIRECTION_CODES[match[0].upcase] + ' '
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
%w[street street_type street2 street_type2 city unit_prefix].each do |k|
|
110
|
+
input[k] = input[k].split.map { |elem| upcase_or_capitalize(elem) }.join(' ') if input[k]
|
111
|
+
end
|
112
|
+
|
113
|
+
StreetSweeper::Address.new(input)
|
114
|
+
end
|
115
|
+
|
116
|
+
def upcase_or_capitalize(elem)
|
117
|
+
return elem.upcase if elem.downcase =~ /^(po|ne|nw|sw|se)$/
|
118
|
+
elem.capitalize
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,568 @@
|
|
1
|
+
module StreetSweeper
|
2
|
+
class Constants
|
3
|
+
DIRECTIONAL = {
|
4
|
+
'north' => 'N',
|
5
|
+
'northeast' => 'NE',
|
6
|
+
'east' => 'E',
|
7
|
+
'southeast' => 'SE',
|
8
|
+
'south' => 'S',
|
9
|
+
'southwest' => 'SW',
|
10
|
+
'west' => 'W',
|
11
|
+
'northwest' => 'NW'
|
12
|
+
}.freeze
|
13
|
+
DIRECTION_CODES = DIRECTIONAL.invert
|
14
|
+
|
15
|
+
STREET_TYPES = {
|
16
|
+
'allee' => 'aly',
|
17
|
+
'alley' => 'aly',
|
18
|
+
'ally' => 'aly',
|
19
|
+
'anex' => 'anx',
|
20
|
+
'annex' => 'anx',
|
21
|
+
'annx' => 'anx',
|
22
|
+
'arcade' => 'arc',
|
23
|
+
'av' => 'ave',
|
24
|
+
'aven' => 'ave',
|
25
|
+
'avenu' => 'ave',
|
26
|
+
'avenue' => 'ave',
|
27
|
+
'avn' => 'ave',
|
28
|
+
'avnue' => 'ave',
|
29
|
+
'bayoo' => 'byu',
|
30
|
+
'bayou' => 'byu',
|
31
|
+
'beach' => 'bch',
|
32
|
+
'bend' => 'bnd',
|
33
|
+
'bluf' => 'blf',
|
34
|
+
'bluff' => 'blf',
|
35
|
+
'bluffs' => 'blfs',
|
36
|
+
'bot' => 'btm',
|
37
|
+
'bottm' => 'btm',
|
38
|
+
'bottom' => 'btm',
|
39
|
+
'boul' => 'blvd',
|
40
|
+
'boulevard' => 'blvd',
|
41
|
+
'boulv' => 'blvd',
|
42
|
+
'branch' => 'br',
|
43
|
+
'brdge' => 'brg',
|
44
|
+
'bridge' => 'brg',
|
45
|
+
'brnch' => 'br',
|
46
|
+
'brook' => 'brk',
|
47
|
+
'brooks' => 'brks',
|
48
|
+
'burg' => 'bg',
|
49
|
+
'burgs' => 'bgs',
|
50
|
+
'bypa' => 'byp',
|
51
|
+
'bypas' => 'byp',
|
52
|
+
'bypass' => 'byp',
|
53
|
+
'byps' => 'byp',
|
54
|
+
'camp' => 'cp',
|
55
|
+
'canyn' => 'cyn',
|
56
|
+
'canyon' => 'cyn',
|
57
|
+
'cape' => 'cpe',
|
58
|
+
'causeway' => 'cswy',
|
59
|
+
'causway' => 'cswy',
|
60
|
+
'cen' => 'ctr',
|
61
|
+
'cent' => 'ctr',
|
62
|
+
'center' => 'ctr',
|
63
|
+
'centers' => 'ctrs',
|
64
|
+
'centr' => 'ctr',
|
65
|
+
'centre' => 'ctr',
|
66
|
+
'circ' => 'cir',
|
67
|
+
'circl' => 'cir',
|
68
|
+
'circle' => 'cir',
|
69
|
+
'circles' => 'cirs',
|
70
|
+
'ck' => 'crk',
|
71
|
+
'cliff' => 'clf',
|
72
|
+
'cliffs' => 'clfs',
|
73
|
+
'club' => 'clb',
|
74
|
+
'cmp' => 'cp',
|
75
|
+
'cnter' => 'ctr',
|
76
|
+
'cntr' => 'ctr',
|
77
|
+
'cnyn' => 'cyn',
|
78
|
+
'common' => 'cmn',
|
79
|
+
'corner' => 'cor',
|
80
|
+
'corners' => 'cors',
|
81
|
+
'course' => 'crse',
|
82
|
+
'court' => 'ct',
|
83
|
+
'courts' => 'cts',
|
84
|
+
'cove' => 'cv',
|
85
|
+
'coves' => 'cvs',
|
86
|
+
'cr' => 'crk',
|
87
|
+
'crcl' => 'cir',
|
88
|
+
'crcle' => 'cir',
|
89
|
+
'crecent' => 'cres',
|
90
|
+
'creek' => 'crk',
|
91
|
+
'crescent' => 'cres',
|
92
|
+
'cresent' => 'cres',
|
93
|
+
'crest' => 'crst',
|
94
|
+
'crossing' => 'xing',
|
95
|
+
'crossroad' => 'xrd',
|
96
|
+
'crscnt' => 'cres',
|
97
|
+
'crsent' => 'cres',
|
98
|
+
'crsnt' => 'cres',
|
99
|
+
'crssing' => 'xing',
|
100
|
+
'crssng' => 'xing',
|
101
|
+
'crt' => 'ct',
|
102
|
+
'curve' => 'curv',
|
103
|
+
'dale' => 'dl',
|
104
|
+
'dam' => 'dm',
|
105
|
+
'div' => 'dv',
|
106
|
+
'divide' => 'dv',
|
107
|
+
'driv' => 'dr',
|
108
|
+
'drive' => 'dr',
|
109
|
+
'drives' => 'drs',
|
110
|
+
'drv' => 'dr',
|
111
|
+
'dvd' => 'dv',
|
112
|
+
'estate' => 'est',
|
113
|
+
'estates' => 'ests',
|
114
|
+
'exp' => 'expy',
|
115
|
+
'expr' => 'expy',
|
116
|
+
'express' => 'expy',
|
117
|
+
'expressway' => 'expy',
|
118
|
+
'expw' => 'expy',
|
119
|
+
'extension' => 'ext',
|
120
|
+
'extensions' => 'exts',
|
121
|
+
'extn' => 'ext',
|
122
|
+
'extnsn' => 'ext',
|
123
|
+
'falls' => 'fls',
|
124
|
+
'ferry' => 'fry',
|
125
|
+
'field' => 'fld',
|
126
|
+
'fields' => 'flds',
|
127
|
+
'flat' => 'flt',
|
128
|
+
'flats' => 'flts',
|
129
|
+
'ford' => 'frd',
|
130
|
+
'fords' => 'frds',
|
131
|
+
'forest' => 'frst',
|
132
|
+
'forests' => 'frst',
|
133
|
+
'forg' => 'frg',
|
134
|
+
'forge' => 'frg',
|
135
|
+
'forges' => 'frgs',
|
136
|
+
'fork' => 'frk',
|
137
|
+
'forks' => 'frks',
|
138
|
+
'fort' => 'ft',
|
139
|
+
'freeway' => 'fwy',
|
140
|
+
'freewy' => 'fwy',
|
141
|
+
'frry' => 'fry',
|
142
|
+
'frt' => 'ft',
|
143
|
+
'frway' => 'fwy',
|
144
|
+
'frwy' => 'fwy',
|
145
|
+
'garden' => 'gdn',
|
146
|
+
'gardens' => 'gdns',
|
147
|
+
'gardn' => 'gdn',
|
148
|
+
'gateway' => 'gtwy',
|
149
|
+
'gatewy' => 'gtwy',
|
150
|
+
'gatway' => 'gtwy',
|
151
|
+
'glen' => 'gln',
|
152
|
+
'glens' => 'glns',
|
153
|
+
'grden' => 'gdn',
|
154
|
+
'grdn' => 'gdn',
|
155
|
+
'grdns' => 'gdns',
|
156
|
+
'green' => 'grn',
|
157
|
+
'greens' => 'grns',
|
158
|
+
'grov' => 'grv',
|
159
|
+
'grove' => 'grv',
|
160
|
+
'groves' => 'grvs',
|
161
|
+
'gtway' => 'gtwy',
|
162
|
+
'harb' => 'hbr',
|
163
|
+
'harbor' => 'hbr',
|
164
|
+
'harbors' => 'hbrs',
|
165
|
+
'harbr' => 'hbr',
|
166
|
+
'haven' => 'hvn',
|
167
|
+
'havn' => 'hvn',
|
168
|
+
'height' => 'hts',
|
169
|
+
'heights' => 'hts',
|
170
|
+
'hgts' => 'hts',
|
171
|
+
'highway' => 'hwy',
|
172
|
+
'highwy' => 'hwy',
|
173
|
+
'hill' => 'hl',
|
174
|
+
'hills' => 'hls',
|
175
|
+
'hiway' => 'hwy',
|
176
|
+
'hiwy' => 'hwy',
|
177
|
+
'hllw' => 'holw',
|
178
|
+
'hollow' => 'holw',
|
179
|
+
'hollows' => 'holw',
|
180
|
+
'holws' => 'holw',
|
181
|
+
'hrbor' => 'hbr',
|
182
|
+
'ht' => 'hts',
|
183
|
+
'hway' => 'hwy',
|
184
|
+
'inlet' => 'inlt',
|
185
|
+
'island' => 'is',
|
186
|
+
'islands' => 'iss',
|
187
|
+
'isles' => 'isle',
|
188
|
+
'islnd' => 'is',
|
189
|
+
'islnds' => 'iss',
|
190
|
+
'jction' => 'jct',
|
191
|
+
'jctn' => 'jct',
|
192
|
+
'jctns' => 'jcts',
|
193
|
+
'junction' => 'jct',
|
194
|
+
'junctions' => 'jcts',
|
195
|
+
'junctn' => 'jct',
|
196
|
+
'juncton' => 'jct',
|
197
|
+
'key' => 'ky',
|
198
|
+
'keys' => 'kys',
|
199
|
+
'knol' => 'knl',
|
200
|
+
'knoll' => 'knl',
|
201
|
+
'knolls' => 'knls',
|
202
|
+
'la' => 'ln',
|
203
|
+
'lake' => 'lk',
|
204
|
+
'lakes' => 'lks',
|
205
|
+
'landing' => 'lndg',
|
206
|
+
'lane' => 'ln',
|
207
|
+
'lanes' => 'ln',
|
208
|
+
'ldge' => 'ldg',
|
209
|
+
'light' => 'lgt',
|
210
|
+
'lights' => 'lgts',
|
211
|
+
'lndng' => 'lndg',
|
212
|
+
'loaf' => 'lf',
|
213
|
+
'lock' => 'lck',
|
214
|
+
'locks' => 'lcks',
|
215
|
+
'lodg' => 'ldg',
|
216
|
+
'lodge' => 'ldg',
|
217
|
+
'loops' => 'loop',
|
218
|
+
'manor' => 'mnr',
|
219
|
+
'manors' => 'mnrs',
|
220
|
+
'meadow' => 'mdw',
|
221
|
+
'meadows' => 'mdws',
|
222
|
+
'medows' => 'mdws',
|
223
|
+
'mill' => 'ml',
|
224
|
+
'mills' => 'mls',
|
225
|
+
'mission' => 'msn',
|
226
|
+
'missn' => 'msn',
|
227
|
+
'mnt' => 'mt',
|
228
|
+
'mntain' => 'mtn',
|
229
|
+
'mntn' => 'mtn',
|
230
|
+
'mntns' => 'mtns',
|
231
|
+
'motorway' => 'mtwy',
|
232
|
+
'mount' => 'mt',
|
233
|
+
'mountain' => 'mtn',
|
234
|
+
'mountains' => 'mtns',
|
235
|
+
'mountin' => 'mtn',
|
236
|
+
'mssn' => 'msn',
|
237
|
+
'mtin' => 'mtn',
|
238
|
+
'neck' => 'nck',
|
239
|
+
'orchard' => 'orch',
|
240
|
+
'orchrd' => 'orch',
|
241
|
+
'overpass' => 'opas',
|
242
|
+
'ovl' => 'oval',
|
243
|
+
'parks' => 'park',
|
244
|
+
'parkway' => 'pkwy',
|
245
|
+
'parkways' => 'pkwy',
|
246
|
+
'parkwy' => 'pkwy',
|
247
|
+
'passage' => 'psge',
|
248
|
+
'paths' => 'path',
|
249
|
+
'pikes' => 'pike',
|
250
|
+
'pine' => 'pne',
|
251
|
+
'pines' => 'pnes',
|
252
|
+
'pk' => 'park',
|
253
|
+
'pkway' => 'pkwy',
|
254
|
+
'pkwys' => 'pkwy',
|
255
|
+
'pky' => 'pkwy',
|
256
|
+
'place' => 'pl',
|
257
|
+
'plain' => 'pln',
|
258
|
+
'plaines' => 'plns',
|
259
|
+
'plains' => 'plns',
|
260
|
+
'plaza' => 'plz',
|
261
|
+
'plza' => 'plz',
|
262
|
+
'point' => 'pt',
|
263
|
+
'points' => 'pts',
|
264
|
+
'port' => 'prt',
|
265
|
+
'ports' => 'prts',
|
266
|
+
'prairie' => 'pr',
|
267
|
+
'prarie' => 'pr',
|
268
|
+
'prk' => 'park',
|
269
|
+
'prr' => 'pr',
|
270
|
+
'rad' => 'radl',
|
271
|
+
'radial' => 'radl',
|
272
|
+
'radiel' => 'radl',
|
273
|
+
'ranch' => 'rnch',
|
274
|
+
'ranches' => 'rnch',
|
275
|
+
'rapid' => 'rpd',
|
276
|
+
'rapids' => 'rpds',
|
277
|
+
'rdge' => 'rdg',
|
278
|
+
'rest' => 'rst',
|
279
|
+
'ridge' => 'rdg',
|
280
|
+
'ridges' => 'rdgs',
|
281
|
+
'river' => 'riv',
|
282
|
+
'rivr' => 'riv',
|
283
|
+
'rnchs' => 'rnch',
|
284
|
+
'road' => 'rd',
|
285
|
+
'roads' => 'rds',
|
286
|
+
'route' => 'rte',
|
287
|
+
'run' => 'run',
|
288
|
+
'rvr' => 'riv',
|
289
|
+
'shoal' => 'shl',
|
290
|
+
'shoals' => 'shls',
|
291
|
+
'shoar' => 'shr',
|
292
|
+
'shoars' => 'shrs',
|
293
|
+
'shore' => 'shr',
|
294
|
+
'shores' => 'shrs',
|
295
|
+
'skyway' => 'skwy',
|
296
|
+
'spng' => 'spg',
|
297
|
+
'spngs' => 'spgs',
|
298
|
+
'spring' => 'spg',
|
299
|
+
'springs' => 'spgs',
|
300
|
+
'sprng' => 'spg',
|
301
|
+
'sprngs' => 'spgs',
|
302
|
+
'spurs' => 'spur',
|
303
|
+
'sqr' => 'sq',
|
304
|
+
'sqre' => 'sq',
|
305
|
+
'sqrs' => 'sqs',
|
306
|
+
'squ' => 'sq',
|
307
|
+
'square' => 'sq',
|
308
|
+
'squares' => 'sqs',
|
309
|
+
'station' => 'sta',
|
310
|
+
'statn' => 'sta',
|
311
|
+
'stn' => 'sta',
|
312
|
+
'str' => 'st',
|
313
|
+
'strav' => 'stra',
|
314
|
+
'strave' => 'stra',
|
315
|
+
'straven' => 'stra',
|
316
|
+
'stravenue' => 'stra',
|
317
|
+
'stravn' => 'stra',
|
318
|
+
'stream' => 'strm',
|
319
|
+
'street' => 'st',
|
320
|
+
'streets' => 'sts',
|
321
|
+
'streme' => 'strm',
|
322
|
+
'strt' => 'st',
|
323
|
+
'strvn' => 'stra',
|
324
|
+
'strvnue' => 'stra',
|
325
|
+
'sumit' => 'smt',
|
326
|
+
'sumitt' => 'smt',
|
327
|
+
'summit' => 'smt',
|
328
|
+
'terr' => 'ter',
|
329
|
+
'terrace' => 'ter',
|
330
|
+
'throughway' => 'trwy',
|
331
|
+
'tpk' => 'tpke',
|
332
|
+
'tr' => 'trl',
|
333
|
+
'trace' => 'trce',
|
334
|
+
'traces' => 'trce',
|
335
|
+
'track' => 'trak',
|
336
|
+
'tracks' => 'trak',
|
337
|
+
'trafficway' => 'trfy',
|
338
|
+
'trail' => 'trl',
|
339
|
+
'trails' => 'trl',
|
340
|
+
'trk' => 'trak',
|
341
|
+
'trks' => 'trak',
|
342
|
+
'trls' => 'trl',
|
343
|
+
'trnpk' => 'tpke',
|
344
|
+
'trpk' => 'tpke',
|
345
|
+
'tunel' => 'tunl',
|
346
|
+
'tunls' => 'tunl',
|
347
|
+
'tunnel' => 'tunl',
|
348
|
+
'tunnels' => 'tunl',
|
349
|
+
'tunnl' => 'tunl',
|
350
|
+
'turnpike' => 'tpke',
|
351
|
+
'turnpk' => 'tpke',
|
352
|
+
'underpass' => 'upas',
|
353
|
+
'union' => 'un',
|
354
|
+
'unions' => 'uns',
|
355
|
+
'valley' => 'vly',
|
356
|
+
'valleys' => 'vlys',
|
357
|
+
'vally' => 'vly',
|
358
|
+
'vdct' => 'via',
|
359
|
+
'viadct' => 'via',
|
360
|
+
'viaduct' => 'via',
|
361
|
+
'view' => 'vw',
|
362
|
+
'views' => 'vws',
|
363
|
+
'vill' => 'vlg',
|
364
|
+
'villag' => 'vlg',
|
365
|
+
'village' => 'vlg',
|
366
|
+
'villages' => 'vlgs',
|
367
|
+
'ville' => 'vl',
|
368
|
+
'villg' => 'vlg',
|
369
|
+
'villiage' => 'vlg',
|
370
|
+
'vist' => 'vis',
|
371
|
+
'vista' => 'vis',
|
372
|
+
'vlly' => 'vly',
|
373
|
+
'vst' => 'vis',
|
374
|
+
'vsta' => 'vis',
|
375
|
+
'walks' => 'walk',
|
376
|
+
'well' => 'wl',
|
377
|
+
'wells' => 'wls',
|
378
|
+
'wy' => 'way'
|
379
|
+
}.freeze
|
380
|
+
|
381
|
+
STREET_TYPES_LIST = {}
|
382
|
+
STREET_TYPES.to_a.each do |item|
|
383
|
+
STREET_TYPES_LIST[item[0]] = true
|
384
|
+
STREET_TYPES_LIST[item[1]] = true
|
385
|
+
end
|
386
|
+
|
387
|
+
NUMERIC_STREET_NAMES = {
|
388
|
+
'first' => '1st',
|
389
|
+
'second' => '2nd',
|
390
|
+
'third' => '3rd',
|
391
|
+
'fourth' => '4th',
|
392
|
+
'fifth' => '5th',
|
393
|
+
'sixth' => '6th',
|
394
|
+
'seventh' => '7th',
|
395
|
+
'eighth' => '8th',
|
396
|
+
'ninth' => '9th'
|
397
|
+
}.freeze
|
398
|
+
|
399
|
+
UNIT_ABBREVIATIONS_NUMBERED = {
|
400
|
+
/(?:ap|dep)(?:ar)?t(?:me?nt)?/i => 'Apt',
|
401
|
+
/box/i => 'Box',
|
402
|
+
/bu?i?ldi?n?g/i => 'Bldg',
|
403
|
+
/dep(artmen)?t/i => 'Dept',
|
404
|
+
/flo*r?/i => 'Fl',
|
405
|
+
/ha?nga?r/i => 'Hngr',
|
406
|
+
/lo?t/i => 'Lot',
|
407
|
+
/ro*m/i => 'Rm',
|
408
|
+
/pier/i => 'Pier',
|
409
|
+
/p\W*[om]\W*b(?:ox)?/i => 'PO Box',
|
410
|
+
/slip/i => 'Slip',
|
411
|
+
/spa?ce?/i => 'Spc',
|
412
|
+
/stop/i => 'Stop',
|
413
|
+
/su?i?te/i => 'Ste',
|
414
|
+
/tra?i?le?r/i => 'Trlr',
|
415
|
+
/uni?t/i => 'Unit'
|
416
|
+
}.freeze
|
417
|
+
|
418
|
+
UNIT_ABBREVIATIONS_UNNUMBERED = {
|
419
|
+
/ba?se?me?n?t/i => 'Bsmt',
|
420
|
+
/fro?nt/i => 'Frnt',
|
421
|
+
/lo?bby/i => 'Lbby',
|
422
|
+
/lowe?r/i => 'Lowr',
|
423
|
+
/off?i?ce?/i => 'Ofc',
|
424
|
+
/pe?n?t?ho?u?s?e?/i => 'PH',
|
425
|
+
/rear/i => 'Rear',
|
426
|
+
/side/i => 'Side',
|
427
|
+
/uppe?r/i => 'Uppr'
|
428
|
+
}.freeze
|
429
|
+
|
430
|
+
UNIT_ABBREVIATIONS = UNIT_ABBREVIATIONS_NUMBERED.merge(UNIT_ABBREVIATIONS_UNNUMBERED)
|
431
|
+
|
432
|
+
STATE_CODES = {
|
433
|
+
'alabama' => 'AL',
|
434
|
+
'alaska' => 'AK',
|
435
|
+
'american samoa' => 'AS',
|
436
|
+
'arizona' => 'AZ',
|
437
|
+
'arkansas' => 'AR',
|
438
|
+
'california' => 'CA',
|
439
|
+
'colorado' => 'CO',
|
440
|
+
'connecticut' => 'CT',
|
441
|
+
'delaware' => 'DE',
|
442
|
+
'district of columbia' => 'DC',
|
443
|
+
'federated states of micronesia' => 'FM',
|
444
|
+
'florida' => 'FL',
|
445
|
+
'georgia' => 'GA',
|
446
|
+
'guam' => 'GU',
|
447
|
+
'hawaii' => 'HI',
|
448
|
+
'idaho' => 'ID',
|
449
|
+
'illinois' => 'IL',
|
450
|
+
'indiana' => 'IN',
|
451
|
+
'iowa' => 'IA',
|
452
|
+
'kansas' => 'KS',
|
453
|
+
'kentucky' => 'KY',
|
454
|
+
'louisiana' => 'LA',
|
455
|
+
'maine' => 'ME',
|
456
|
+
'marshall islands' => 'MH',
|
457
|
+
'maryland' => 'MD',
|
458
|
+
'massachusetts' => 'MA',
|
459
|
+
'michigan' => 'MI',
|
460
|
+
'minnesota' => 'MN',
|
461
|
+
'mississippi' => 'MS',
|
462
|
+
'missouri' => 'MO',
|
463
|
+
'montana' => 'MT',
|
464
|
+
'nebraska' => 'NE',
|
465
|
+
'nevada' => 'NV',
|
466
|
+
'new hampshire' => 'NH',
|
467
|
+
'new jersey' => 'NJ',
|
468
|
+
'new mexico' => 'NM',
|
469
|
+
'new york' => 'NY',
|
470
|
+
'north carolina' => 'NC',
|
471
|
+
'north dakota' => 'ND',
|
472
|
+
'northern mariana islands' => 'MP',
|
473
|
+
'ohio' => 'OH',
|
474
|
+
'oklahoma' => 'OK',
|
475
|
+
'oregon' => 'OR',
|
476
|
+
'palau' => 'PW',
|
477
|
+
'pennsylvania' => 'PA',
|
478
|
+
'puerto rico' => 'PR',
|
479
|
+
'rhode island' => 'RI',
|
480
|
+
'south carolina' => 'SC',
|
481
|
+
'south dakota' => 'SD',
|
482
|
+
'tennessee' => 'TN',
|
483
|
+
'texas' => 'TX',
|
484
|
+
'utah' => 'UT',
|
485
|
+
'vermont' => 'VT',
|
486
|
+
'virgin islands' => 'VI',
|
487
|
+
'virginia' => 'VA',
|
488
|
+
'washington' => 'WA',
|
489
|
+
'west virginia' => 'WV',
|
490
|
+
'wisconsin' => 'WI',
|
491
|
+
'wyoming' => 'WY'
|
492
|
+
}.freeze
|
493
|
+
|
494
|
+
STATE_NAMES = STATE_CODES.invert
|
495
|
+
|
496
|
+
STATE_FIPS = {
|
497
|
+
'01' => 'AL',
|
498
|
+
'02' => 'AK',
|
499
|
+
'04' => 'AZ',
|
500
|
+
'05' => 'AR',
|
501
|
+
'06' => 'CA',
|
502
|
+
'08' => 'CO',
|
503
|
+
'09' => 'CT',
|
504
|
+
'10' => 'DE',
|
505
|
+
'11' => 'DC',
|
506
|
+
'12' => 'FL',
|
507
|
+
'13' => 'GA',
|
508
|
+
'15' => 'HI',
|
509
|
+
'16' => 'ID',
|
510
|
+
'17' => 'IL',
|
511
|
+
'18' => 'IN',
|
512
|
+
'19' => 'IA',
|
513
|
+
'20' => 'KS',
|
514
|
+
'21' => 'KY',
|
515
|
+
'22' => 'LA',
|
516
|
+
'23' => 'ME',
|
517
|
+
'24' => 'MD',
|
518
|
+
'25' => 'MA',
|
519
|
+
'26' => 'MI',
|
520
|
+
'27' => 'MN',
|
521
|
+
'28' => 'MS',
|
522
|
+
'29' => 'MO',
|
523
|
+
'30' => 'MT',
|
524
|
+
'31' => 'NE',
|
525
|
+
'32' => 'NV',
|
526
|
+
'33' => 'NH',
|
527
|
+
'34' => 'NJ',
|
528
|
+
'35' => 'NM',
|
529
|
+
'36' => 'NY',
|
530
|
+
'37' => 'NC',
|
531
|
+
'38' => 'ND',
|
532
|
+
'39' => 'OH',
|
533
|
+
'40' => 'OK',
|
534
|
+
'41' => 'OR',
|
535
|
+
'42' => 'PA',
|
536
|
+
'44' => 'RI',
|
537
|
+
'45' => 'SC',
|
538
|
+
'46' => 'SD',
|
539
|
+
'47' => 'TN',
|
540
|
+
'48' => 'TX',
|
541
|
+
'49' => 'UT',
|
542
|
+
'50' => 'VT',
|
543
|
+
'51' => 'VA',
|
544
|
+
'53' => 'WA',
|
545
|
+
'54' => 'WV',
|
546
|
+
'55' => 'WI',
|
547
|
+
'56' => 'WY',
|
548
|
+
'72' => 'PR',
|
549
|
+
'78' => 'VI'
|
550
|
+
}.freeze
|
551
|
+
|
552
|
+
FIPS_STATES = STATE_FIPS.invert
|
553
|
+
|
554
|
+
NORMALIZE_MAP = {
|
555
|
+
'prefix' => DIRECTIONAL,
|
556
|
+
'prefix1' => DIRECTIONAL,
|
557
|
+
'prefix2' => DIRECTIONAL,
|
558
|
+
'suffix' => DIRECTIONAL,
|
559
|
+
'suffix1' => DIRECTIONAL,
|
560
|
+
'suffix2' => DIRECTIONAL,
|
561
|
+
'street_type' => STREET_TYPES,
|
562
|
+
'street_type1' => STREET_TYPES,
|
563
|
+
'street_type2' => STREET_TYPES,
|
564
|
+
'street' => NUMERIC_STREET_NAMES,
|
565
|
+
'state' => STATE_CODES
|
566
|
+
}.freeze
|
567
|
+
end
|
568
|
+
end
|