dburkes-people_places_things 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/lib/people_places_things.rb +7 -7
- data/lib/people_places_things/ansi_counties.rb +68 -0
- data/lib/people_places_things/{ansi_counties/data → data}/data.yml +0 -0
- data/lib/people_places_things/{ansi_counties/data → data}/process_data.rb +0 -0
- data/lib/people_places_things/{ansi_counties/data → data}/raw.txt +0 -0
- data/lib/people_places_things/location.rb +30 -0
- data/lib/people_places_things/person_name.rb +121 -0
- data/lib/people_places_things/phone_number.rb +48 -0
- data/lib/people_places_things/state.rb +83 -0
- data/lib/people_places_things/street_address.rb +180 -0
- data/lib/people_places_things/zip_code.rb +17 -0
- data/people_places_things.gemspec +12 -12
- data/spec/helper.rb +2 -1
- metadata +13 -14
- data/lib/people_places_things/ansi_counties/ansi_counties.rb +0 -66
- data/lib/people_places_things/location/location.rb +0 -28
- data/lib/people_places_things/person_name/person_name.rb +0 -119
- data/lib/people_places_things/phone_number/phone_number.rb +0 -46
- data/lib/people_places_things/state/state.rb +0 -81
- data/lib/people_places_things/street_address/street_address.rb +0 -178
- data/lib/people_places_things/zip_code/zip_code.rb +0 -15
data/Rakefile
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.1.0
|
data/lib/people_places_things.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
1
|
+
require 'people_places_things/street_address'
|
2
|
+
require 'people_places_things/person_name'
|
3
|
+
require 'people_places_things/ansi_counties'
|
4
|
+
require 'people_places_things/phone_number'
|
5
|
+
require 'people_places_things/zip_code'
|
6
|
+
require 'people_places_things/state'
|
7
|
+
require 'people_places_things/location'
|
8
8
|
|
9
9
|
module PeoplePlacesThings
|
10
10
|
VERSION = File.read('VERSION').chomp.strip rescue "Unknown"
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module PeoplePlacesThings
|
4
|
+
# Provides two-way mapping between U.S. state and county names and their associated ANSI codes (formerly known as FIPS codes).
|
5
|
+
#
|
6
|
+
# == Examples
|
7
|
+
#
|
8
|
+
# To get the ANSI code for a state and county, you call ANSICounties.code_for, like so:
|
9
|
+
#
|
10
|
+
# code = ANSICounties.code_for('GA', 'FULTON')
|
11
|
+
# # => 13121
|
12
|
+
#
|
13
|
+
# You can also pass a single Hash argument:
|
14
|
+
#
|
15
|
+
# code = ANSICounties.code_for(:state => 'ga', :county => 'fulton')
|
16
|
+
# # => 13121
|
17
|
+
#
|
18
|
+
# Conversely, to get the state and county for an ANSI code, you call ANSICounties.data_for:
|
19
|
+
#
|
20
|
+
# hash = ANSICounties.data_for(13121)
|
21
|
+
# # => { :state => 'GA', :county => 'FULTON' }
|
22
|
+
#
|
23
|
+
# == Data source
|
24
|
+
#
|
25
|
+
# The data that makes up <tt>lib/ansi-counties/data/data.yml</tt> was generated from <tt>lib/ansi-counties/data/raw.txt</tt>, which was downloaded from
|
26
|
+
# the {US Census website}[http://www.census.gov/geo/www/ansi/download.html].
|
27
|
+
class ANSICounties
|
28
|
+
|
29
|
+
# Get the ANSI code for the given state and county. If _data_or_state_ is a Hash, then it must contain <em>state</em> and <em>county</em> keys, otherwise,
|
30
|
+
# it is assumbed to be a String containing the state name.
|
31
|
+
def self.code_for(data_or_state, county=nil)
|
32
|
+
if data_or_state.is_a?(Hash)
|
33
|
+
state, county = data_or_state[:state], data_or_state[:county]
|
34
|
+
else
|
35
|
+
state = data_or_state
|
36
|
+
end
|
37
|
+
|
38
|
+
forward_hash[key_for(state, county)] rescue nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get the state and county names for a given ANSI code. Returns a Hash containing <em>state</em> and <em>county</em> keys
|
42
|
+
def self.data_for(code)
|
43
|
+
reverse_hash[code]
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.normalize_county_name(name) #:nodoc:
|
47
|
+
name.upcase.gsub("ST ", "ST. ").gsub("SAINT ", "ST. ")
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.key_for(state, county) #:nodoc:
|
51
|
+
"#{state.upcase}/#{normalize_county_name(county)}"
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def self.forward_hash
|
57
|
+
@@forward_hash ||= File.open(File.join(File.dirname(__FILE__), 'data', 'data.yml')) {|yf| YAML::load(yf)}
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.reverse_hash
|
61
|
+
@@reverse_hash ||= forward_hash.inject({}) do |h, kv|
|
62
|
+
state_county = kv[0].split('/')
|
63
|
+
h[kv[1]] = { :state => state_county[0], :county => state_county[1]}
|
64
|
+
h
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class Location
|
3
|
+
attr_accessor :city, :state, :zip, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
self.raw = str
|
7
|
+
|
8
|
+
tokens = str.split(/\s|,/).collect {|t| t.strip}
|
9
|
+
|
10
|
+
# try to parse last token as zip
|
11
|
+
#
|
12
|
+
self.zip = ZipCode.new(tokens.last) rescue nil
|
13
|
+
tokens = tokens.slice(0..-2) if self.zip
|
14
|
+
|
15
|
+
# try to parse last token as state
|
16
|
+
#
|
17
|
+
self.state = State.new(tokens.last) rescue nil
|
18
|
+
tokens = tokens.slice(0..-2) if self.state
|
19
|
+
|
20
|
+
# remainder must be city
|
21
|
+
#
|
22
|
+
self.city = tokens.join(' ').strip
|
23
|
+
self.city = nil if self.city.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
[[self.city, (self.state.to_s(:abbr) rescue nil)].compact.join(','), self.zip.to_s].compact.join(' ')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class PersonName
|
3
|
+
attr_accessor :first, :middle, :last, :suffix, :raw
|
4
|
+
|
5
|
+
def initialize(str, fmt = :auto_detect)
|
6
|
+
raise "Unsupported Format" if !PARSE_FORMATS.include?(fmt)
|
7
|
+
|
8
|
+
if fmt == :auto_detect
|
9
|
+
fmt = str.include?(',') ? :last_first_middle : :first_middle_last
|
10
|
+
end
|
11
|
+
|
12
|
+
self.raw = str
|
13
|
+
|
14
|
+
parts = str.split(/\s|,/).collect {|p| p.strip}.reject {|p| PersonName.blank?(p) || p == ',' }
|
15
|
+
|
16
|
+
if parts.size == 1
|
17
|
+
self.last = parts.first
|
18
|
+
else
|
19
|
+
case fmt
|
20
|
+
when :first_middle_last
|
21
|
+
if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts.last) == 0}
|
22
|
+
self.suffix = PersonName.normalize_suffix(parts.last)
|
23
|
+
parts.delete_at(parts.size - 1)
|
24
|
+
end
|
25
|
+
|
26
|
+
self.first = parts.first if parts.size > 0
|
27
|
+
self.last = parts.last if parts.size > 1
|
28
|
+
|
29
|
+
if parts.size > 2 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(parts[-2]) == 0}
|
30
|
+
self.last = "#{parts[-2]}#{self.last}"
|
31
|
+
parts.delete_at(parts.size - 2)
|
32
|
+
end
|
33
|
+
|
34
|
+
self.middle = parts[1..(parts.size - 2)].join(' ') if parts.size > 2
|
35
|
+
|
36
|
+
when :last_first_middle
|
37
|
+
self.last = parts.first if parts.size > 0
|
38
|
+
|
39
|
+
if parts.size > 1 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(self.last) == 0}
|
40
|
+
self.last << parts[1]
|
41
|
+
parts.delete_at(1)
|
42
|
+
end
|
43
|
+
|
44
|
+
if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts[1]) == 0}
|
45
|
+
self.suffix = PersonName.normalize_suffix(parts[1])
|
46
|
+
parts.delete_at(1)
|
47
|
+
end
|
48
|
+
|
49
|
+
self.first = parts[1] if parts.size > 1
|
50
|
+
self.middle = parts[2..(parts.size - 1)].join(' ') if parts.size > 2
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s(fmt = :full)
|
56
|
+
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
57
|
+
|
58
|
+
case fmt
|
59
|
+
when :first, :middle, :last
|
60
|
+
self.send(fmt)
|
61
|
+
|
62
|
+
when :full
|
63
|
+
[self.first, self.middle, self.last, self.suffix].compact.join(' ')
|
64
|
+
|
65
|
+
when :full_reverse
|
66
|
+
[self.last, self.first, self.middle, self.suffix].compact.join(' ')
|
67
|
+
|
68
|
+
when :first_space_last
|
69
|
+
[self.first, self.last].compact.join(' ')
|
70
|
+
|
71
|
+
when :last_space_first
|
72
|
+
[self.last, self.first].compact.join(' ')
|
73
|
+
|
74
|
+
when :last_comma_first
|
75
|
+
[self.last, self.first].compact.join(',')
|
76
|
+
|
77
|
+
when :last_comma_space_first
|
78
|
+
[(self.first ? "#{self.last}," : self.last), self.first].compact.join(' ')
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def first_i
|
83
|
+
self.first[0,1] rescue nil
|
84
|
+
end
|
85
|
+
|
86
|
+
def middle_i
|
87
|
+
self.middle[0,1] rescue nil
|
88
|
+
end
|
89
|
+
|
90
|
+
def last_i
|
91
|
+
self.last[0,1] rescue nil
|
92
|
+
end
|
93
|
+
|
94
|
+
def eql?(other, initials_only=false)
|
95
|
+
if other.is_a?(PersonName)
|
96
|
+
[:first, :middle, :last].all? do |k|
|
97
|
+
msg = (k != :last && initials_only) ? "#{k}_i" : k
|
98
|
+
me = self.send(msg)
|
99
|
+
them = other.send(msg)
|
100
|
+
me && them ? me.casecmp(them) == 0 : true
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
PARSE_FORMATS = [:first_middle_last, :last_first_middle, :auto_detect]
|
106
|
+
OUTPUT_FORMATS = [:first, :middle, :last, :full, :full_reverse, :first_space_last, :last_space_first, :last_comma_first, :last_comma_space_first]
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def self.blank?(string_or_nil)
|
111
|
+
string_or_nil.nil? || string_or_nil !~ /\S/
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.normalize_suffix(suffix)
|
115
|
+
suffix.match(/\w+/)[0] rescue suffix
|
116
|
+
end
|
117
|
+
|
118
|
+
SUPPORTED_SUFFIXES = %w(II III IV V JR JR. SR SR.)
|
119
|
+
ODD_LAST_NAME_PREFIXES = %w(MC ST ST.)
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class PhoneNumber
|
3
|
+
attr_accessor :country_code, :area_code, :number, :exchange, :suffix, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
extract = str.strip.match(/^([-+()\d ]+)$/)[0].gsub(/[^\d]/, '') rescue nil
|
7
|
+
raise "Unsupported Format" if !extract || extract.length < 10 || extract.length > 11
|
8
|
+
|
9
|
+
if extract.length == 11
|
10
|
+
self.country_code = extract.slice!(0..0)
|
11
|
+
else
|
12
|
+
self.country_code = '1'
|
13
|
+
end
|
14
|
+
|
15
|
+
raise "Unsupported Format" if self.country_code != '1'
|
16
|
+
|
17
|
+
self.area_code = extract.slice!(0..2)
|
18
|
+
|
19
|
+
self.number = extract.dup
|
20
|
+
|
21
|
+
self.exchange = extract.slice!(0..2)
|
22
|
+
|
23
|
+
self.suffix = extract
|
24
|
+
|
25
|
+
raise "Unsupported Format" if !self.exchange || !self.suffix
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s(fmt = :full_formatted)
|
29
|
+
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
30
|
+
|
31
|
+
case fmt
|
32
|
+
when :full_digits
|
33
|
+
"#{self.country_code}#{self.area_code}#{self.exchange}#{self.suffix}"
|
34
|
+
|
35
|
+
when :local_digits
|
36
|
+
"#{self.exchange}#{self.suffix}"
|
37
|
+
|
38
|
+
when :full_formatted
|
39
|
+
"#{self.country_code} (#{self.area_code}) #{self.exchange}-#{self.suffix}"
|
40
|
+
|
41
|
+
when :local_formatted
|
42
|
+
"#{self.exchange}-#{self.suffix}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
OUTPUT_FORMATS = [:full_digits, :local_digits, :full_formatted, :local_formatted]
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class State
|
3
|
+
attr_accessor :sym, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
self.raw = str
|
7
|
+
token = str.strip.downcase
|
8
|
+
|
9
|
+
if FORWARD.has_key?(token.to_sym)
|
10
|
+
self.sym = token.to_sym
|
11
|
+
elsif REVERSE.has_key?(token)
|
12
|
+
self.sym = REVERSE[token]
|
13
|
+
end
|
14
|
+
|
15
|
+
raise "Unsupported Format" if !self.sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s(fmt = :full)
|
19
|
+
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
20
|
+
fmt == :full ? FORWARD[self.sym].capitalize : self.sym.to_s.upcase
|
21
|
+
end
|
22
|
+
|
23
|
+
OUTPUT_FORMATS = [:abbr, :full]
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
FORWARD = {
|
28
|
+
:al => "alabama",
|
29
|
+
:ak => "alaska",
|
30
|
+
:az => "arizona",
|
31
|
+
:ar => "arkansas",
|
32
|
+
:ca => "california",
|
33
|
+
:co => "colorado",
|
34
|
+
:ct => "connecticut",
|
35
|
+
:de => "delaware",
|
36
|
+
:dc => "district of columbia",
|
37
|
+
:fl => "florida",
|
38
|
+
:ga => "georgia",
|
39
|
+
:hi => "hawaii",
|
40
|
+
:id => "idaho",
|
41
|
+
:il => "illinois",
|
42
|
+
:in => "indiana",
|
43
|
+
:ia => "iowa",
|
44
|
+
:ks => "kansas",
|
45
|
+
:ky => "kentucky",
|
46
|
+
:la => "louisiana",
|
47
|
+
:me => "maine",
|
48
|
+
:md => "maryland",
|
49
|
+
:ma => "massachusetts",
|
50
|
+
:mi => "michigan",
|
51
|
+
:mn => "minnesota",
|
52
|
+
:ms => "mississippi",
|
53
|
+
:mo => "missouri",
|
54
|
+
:mt => "montana",
|
55
|
+
:ne => "nebraska",
|
56
|
+
:nv => "nevada",
|
57
|
+
:nh => "new hampshire",
|
58
|
+
:nj => "new jersey",
|
59
|
+
:nm => "new mexico",
|
60
|
+
:ny => "new york",
|
61
|
+
:nc => "north carolina",
|
62
|
+
:nd => "north dakota",
|
63
|
+
:oh => "ohio",
|
64
|
+
:ok => "oklahoma",
|
65
|
+
:or => "oregon",
|
66
|
+
:pa => "pennsylvania",
|
67
|
+
:ri => "Rhode island",
|
68
|
+
:sc => "south carolina",
|
69
|
+
:sd => "south dakota",
|
70
|
+
:tn => "tennessee",
|
71
|
+
:tx => "texas",
|
72
|
+
:ut => "utah",
|
73
|
+
:vt => "vermont",
|
74
|
+
:va => "virginia",
|
75
|
+
:wa => "washington",
|
76
|
+
:wv => "west virginia",
|
77
|
+
:wi => "wisconsin",
|
78
|
+
:wy => "wyoming",
|
79
|
+
}
|
80
|
+
|
81
|
+
REVERSE = FORWARD.inject({}) {|r, f| r[f[1]] = f[0]; r}
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class StreetAddress
|
3
|
+
attr_accessor :number, :pre_direction, :name, :suffix, :post_direction, :unit_type, :unit, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
tokens = str.split(/[\s,]/).select {|s| !s.empty?}
|
7
|
+
|
8
|
+
# Check the first token for leading numericality. If so, set number to the first token, and delete it
|
9
|
+
#
|
10
|
+
if tokens.first =~ /(^\d+.*)/
|
11
|
+
self.number = $1
|
12
|
+
tokens.shift
|
13
|
+
end
|
14
|
+
|
15
|
+
# If at least two tokens remain, check next-to-last token as unit type. If so, set unit_type and unit, and delete the tokens
|
16
|
+
#
|
17
|
+
if tokens.size > 1
|
18
|
+
self.unit_type = StreetAddress.find_token(tokens[-2], UNIT_TYPES)
|
19
|
+
if self.unit_type
|
20
|
+
self.unit = tokens[-1]
|
21
|
+
tokens.slice!(tokens.size - 2, 2)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# If at least one token remains, check last token for directionality. If so, set post_direction and delete the token
|
26
|
+
#
|
27
|
+
if tokens.size > 0
|
28
|
+
self.post_direction = StreetAddress.find_token(tokens[-1], DIRECTIONS)
|
29
|
+
if self.post_direction
|
30
|
+
post_direction_token = tokens[-1]
|
31
|
+
tokens.slice!(tokens.size - 1)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# If at least one token remains, check last token for suffix. If so, self set.suffix and delete the token
|
36
|
+
#
|
37
|
+
if tokens.size > 0
|
38
|
+
self.suffix = StreetAddress.find_token(tokens[-1], SUFFIXES)
|
39
|
+
tokens.slice!(tokens.size - 1) if self.suffix
|
40
|
+
end
|
41
|
+
|
42
|
+
# If at least two tokens remain, check first for directionality. If so, set pre_direction and delete token
|
43
|
+
#
|
44
|
+
if tokens.size > 1
|
45
|
+
self.pre_direction = StreetAddress.find_token(tokens.first, DIRECTIONS)
|
46
|
+
tokens.shift if self.pre_direction
|
47
|
+
end
|
48
|
+
|
49
|
+
# if any tokens remain, set joined remaining tokens as name, otherwise, set name to post_direction, if set, and set post_direction to nil
|
50
|
+
#
|
51
|
+
if tokens.size > 0
|
52
|
+
self.name = tokens.join(' ')
|
53
|
+
else
|
54
|
+
self.name = post_direction_token
|
55
|
+
self.post_direction = nil
|
56
|
+
end
|
57
|
+
|
58
|
+
validate_parts
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_s
|
62
|
+
parts = []
|
63
|
+
parts << self.number if self.number
|
64
|
+
parts << DIRECTIONS[self.pre_direction].first if self.pre_direction
|
65
|
+
parts << self.name if self.name
|
66
|
+
parts << SUFFIXES[self.suffix].first if self.suffix
|
67
|
+
parts << DIRECTIONS[self.post_direction].first if self.post_direction
|
68
|
+
parts << UNIT_TYPES[self.unit_type].first if self.unit_type
|
69
|
+
parts << self.unit if self.unit
|
70
|
+
parts.join(' ')
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.string_for(symbol, form)
|
74
|
+
raise "Requested unknown form \"#{type}\" for :#{symbol}" if !SUPPORTED_FORMS.include?(form)
|
75
|
+
|
76
|
+
val = DIRECTIONS[symbol] || SUFFIXES[symbol] || UNIT_TYPES[symbol]
|
77
|
+
|
78
|
+
if val
|
79
|
+
val = ((val[SUPPORTED_FORMS.index(form)] rescue nil) || (val.first rescue val))
|
80
|
+
end
|
81
|
+
|
82
|
+
val
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def validate_parts
|
88
|
+
[:pre_direction, :suffix, :post_direction, :unit_type].each do |p|
|
89
|
+
if self.send(p)
|
90
|
+
legal_values = p == :suffix ? SUFFIXES : p == :unit_type ? UNIT_TYPES : DIRECTIONS
|
91
|
+
raise "Invalid #{p.to_s} \"#{self.send(p)}\"" if !legal_values.include?(self.send(p))
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.find_token(token, values)
|
97
|
+
values.keys.each do |k|
|
98
|
+
return k if values[k].detect {|v| v.casecmp(token) == 0}
|
99
|
+
end
|
100
|
+
|
101
|
+
nil
|
102
|
+
end
|
103
|
+
|
104
|
+
DIRECTIONS = {
|
105
|
+
:north => %w(north n n.),
|
106
|
+
:northeast => %w(northeast ne ne. n.e.),
|
107
|
+
:east => %w(east e e.),
|
108
|
+
:southeast => %w(southeast se se. s.e.),
|
109
|
+
:south => %w(south s s.),
|
110
|
+
:southwest => %w(southwest sw sw. s.w.),
|
111
|
+
:west => %w(west w w.),
|
112
|
+
:northwest => %w(northwest nw nw. n.w.)
|
113
|
+
}
|
114
|
+
|
115
|
+
SUFFIXES = {
|
116
|
+
:alley => %w(alley al al.),
|
117
|
+
:avenue => %w(avenue ave ave. av av.),
|
118
|
+
:beach => %w(beach bch bch.),
|
119
|
+
:bend => %w(bend),
|
120
|
+
:boulevard => %w(boulevard blvd blvd. blv blv.),
|
121
|
+
:center => %w(center ctr ctr.),
|
122
|
+
:circle => %w(circle cir cir.),
|
123
|
+
:cliff => %w(cliff clf clf.),
|
124
|
+
:club => %w(club),
|
125
|
+
:condo => %w(condo con con.),
|
126
|
+
:court => %w(court ct ct. cor cor.),
|
127
|
+
:cove => %w(cove),
|
128
|
+
:creek => %w(creek crk crk.),
|
129
|
+
:crossing => %w(crossing xing xing. crs crs.),
|
130
|
+
:drive => %w(drive dr dr.),
|
131
|
+
:extension => %w(extension ext ext.),
|
132
|
+
:freeway => %w(freeway fwy fwy.),
|
133
|
+
:gardens => %w(gardens gdns gdns.),
|
134
|
+
:glen => %w(glen gl gl.),
|
135
|
+
:green => %w(green grn grn.),
|
136
|
+
:heights => %w(heights hts hts.),
|
137
|
+
:highway => %w(highway hwy hwy. hgwy hgwy.),
|
138
|
+
:hill => %w(hill),
|
139
|
+
:knoll => %w(knoll knl knl.),
|
140
|
+
:lake => %w(lake),
|
141
|
+
:lane => %w(lane ln ln.),
|
142
|
+
:landing => %w(landing lndg lndg.),
|
143
|
+
:loop => %w(loop),
|
144
|
+
:meadows => %w(meadows mdws mdws.),
|
145
|
+
:manor => %w(manor mnr mnr.),
|
146
|
+
:mountain => %w(mountain mtn mtn. mnt mnt.),
|
147
|
+
:oaks => %w(oaks),
|
148
|
+
:oval => %w(oval),
|
149
|
+
:park => %w(park pk pk. prk prk.),
|
150
|
+
:parkway => %w(parkway pkwy pkwy. pky pky.),
|
151
|
+
:pier => %w(pier),
|
152
|
+
:place => %w(place pl pl.),
|
153
|
+
:plaza => %w(plaza plz plz.),
|
154
|
+
:point => %w(point pt pt. pnt pnt.),
|
155
|
+
:ridge => %w(ridge ri ri.),
|
156
|
+
:road => %w(road rd rd.),
|
157
|
+
:row => %w(row),
|
158
|
+
:run => %w(run),
|
159
|
+
:springs => %w(springs spgs spgs.),
|
160
|
+
:square => %w(square sq sq.),
|
161
|
+
:street => %w(street st st.),
|
162
|
+
:station => %w(station sta sta.),
|
163
|
+
:terrace => %w(terrace ter ter. te te.),
|
164
|
+
:turnpike => %w(turnpike tpke tpke.),
|
165
|
+
:trace => %w(trace trc trc.),
|
166
|
+
:trail => %w(trail trl trl. tl tl.),
|
167
|
+
:valley => %w(valley vly vly.),
|
168
|
+
:walk => %w(walk),
|
169
|
+
:way => %w(way)
|
170
|
+
}
|
171
|
+
|
172
|
+
UNIT_TYPES = {
|
173
|
+
:suite => %w(suite ste ste.),
|
174
|
+
:number => %w(number # nbr nbr.),
|
175
|
+
:apartment => %w(apartment apt apt.)
|
176
|
+
}
|
177
|
+
|
178
|
+
SUPPORTED_FORMS = [:long, :short]
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class ZipCode
|
3
|
+
attr_accessor :base, :plus_four, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
tokens = str.strip.match(/^(\d{5})(-\d{4})?$/)[0].split('-') rescue nil
|
7
|
+
raise "Unsupported Format" if !tokens
|
8
|
+
|
9
|
+
self.base = tokens.first
|
10
|
+
self.plus_four = tokens[1] rescue nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
[self.base, self.plus_four].compact.join('-')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{people_places_things}
|
8
|
-
s.version = "2.
|
8
|
+
s.version = "2.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Danny Burkes"]
|
12
|
-
s.date = %q{2009-
|
12
|
+
s.date = %q{2009-09-01}
|
13
13
|
s.description = %q{Parsers and formatters for person names, street addresses, city/state/zip, phone numbers, etc.}
|
14
14
|
s.email = %q{dburkes@netable.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -21,16 +21,16 @@ Gem::Specification.new do |s|
|
|
21
21
|
"Rakefile",
|
22
22
|
"VERSION",
|
23
23
|
"lib/people_places_things.rb",
|
24
|
-
"lib/people_places_things/ansi_counties
|
25
|
-
"lib/people_places_things/
|
26
|
-
"lib/people_places_things/
|
27
|
-
"lib/people_places_things/
|
28
|
-
"lib/people_places_things/location
|
29
|
-
"lib/people_places_things/person_name
|
30
|
-
"lib/people_places_things/phone_number
|
31
|
-
"lib/people_places_things/state
|
32
|
-
"lib/people_places_things/street_address
|
33
|
-
"lib/people_places_things/zip_code
|
24
|
+
"lib/people_places_things/ansi_counties.rb",
|
25
|
+
"lib/people_places_things/data/data.yml",
|
26
|
+
"lib/people_places_things/data/process_data.rb",
|
27
|
+
"lib/people_places_things/data/raw.txt",
|
28
|
+
"lib/people_places_things/location.rb",
|
29
|
+
"lib/people_places_things/person_name.rb",
|
30
|
+
"lib/people_places_things/phone_number.rb",
|
31
|
+
"lib/people_places_things/state.rb",
|
32
|
+
"lib/people_places_things/street_address.rb",
|
33
|
+
"lib/people_places_things/zip_code.rb",
|
34
34
|
"people_places_things.gemspec",
|
35
35
|
"spec/ansi_counties_spec.rb",
|
36
36
|
"spec/helper.rb",
|
data/spec/helper.rb
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
require 'lib/people_places_things'
|
1
|
+
require 'lib/people_places_things'
|
2
|
+
include PeoplePlacesThings
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dburkes-people_places_things
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Danny Burkes
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-01 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -27,16 +27,16 @@ files:
|
|
27
27
|
- Rakefile
|
28
28
|
- VERSION
|
29
29
|
- lib/people_places_things.rb
|
30
|
-
- lib/people_places_things/ansi_counties
|
31
|
-
- lib/people_places_things/
|
32
|
-
- lib/people_places_things/
|
33
|
-
- lib/people_places_things/
|
34
|
-
- lib/people_places_things/location
|
35
|
-
- lib/people_places_things/person_name
|
36
|
-
- lib/people_places_things/phone_number
|
37
|
-
- lib/people_places_things/state
|
38
|
-
- lib/people_places_things/street_address
|
39
|
-
- lib/people_places_things/zip_code
|
30
|
+
- lib/people_places_things/ansi_counties.rb
|
31
|
+
- lib/people_places_things/data/data.yml
|
32
|
+
- lib/people_places_things/data/process_data.rb
|
33
|
+
- lib/people_places_things/data/raw.txt
|
34
|
+
- lib/people_places_things/location.rb
|
35
|
+
- lib/people_places_things/person_name.rb
|
36
|
+
- lib/people_places_things/phone_number.rb
|
37
|
+
- lib/people_places_things/state.rb
|
38
|
+
- lib/people_places_things/street_address.rb
|
39
|
+
- lib/people_places_things/zip_code.rb
|
40
40
|
- people_places_things.gemspec
|
41
41
|
- spec/ansi_counties_spec.rb
|
42
42
|
- spec/helper.rb
|
@@ -48,7 +48,6 @@ files:
|
|
48
48
|
- spec/zip_code_spec.rb
|
49
49
|
has_rdoc: true
|
50
50
|
homepage: http://github.com/dburkes/people_places_things
|
51
|
-
licenses:
|
52
51
|
post_install_message:
|
53
52
|
rdoc_options:
|
54
53
|
- --charset=UTF-8
|
@@ -69,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
69
68
|
requirements: []
|
70
69
|
|
71
70
|
rubyforge_project:
|
72
|
-
rubygems_version: 1.
|
71
|
+
rubygems_version: 1.2.0
|
73
72
|
signing_key:
|
74
73
|
specification_version: 2
|
75
74
|
summary: Parsers and formatters for person names, street addresses, city/state/zip, phone numbers, etc.
|
@@ -1,66 +0,0 @@
|
|
1
|
-
require 'yaml'
|
2
|
-
|
3
|
-
# Provides two-way mapping between U.S. state and county names and their associated ANSI codes (formerly known as FIPS codes).
|
4
|
-
#
|
5
|
-
# == Examples
|
6
|
-
#
|
7
|
-
# To get the ANSI code for a state and county, you call ANSICounties.code_for, like so:
|
8
|
-
#
|
9
|
-
# code = ANSICounties.code_for('GA', 'FULTON')
|
10
|
-
# # => 13121
|
11
|
-
#
|
12
|
-
# You can also pass a single Hash argument:
|
13
|
-
#
|
14
|
-
# code = ANSICounties.code_for(:state => 'ga', :county => 'fulton')
|
15
|
-
# # => 13121
|
16
|
-
#
|
17
|
-
# Conversely, to get the state and county for an ANSI code, you call ANSICounties.data_for:
|
18
|
-
#
|
19
|
-
# hash = ANSICounties.data_for(13121)
|
20
|
-
# # => { :state => 'GA', :county => 'FULTON' }
|
21
|
-
#
|
22
|
-
# == Data source
|
23
|
-
#
|
24
|
-
# The data that makes up <tt>lib/ansi-counties/data/data.yml</tt> was generated from <tt>lib/ansi-counties/data/raw.txt</tt>, which was downloaded from
|
25
|
-
# the {US Census website}[http://www.census.gov/geo/www/ansi/download.html].
|
26
|
-
class ANSICounties
|
27
|
-
|
28
|
-
# Get the ANSI code for the given state and county. If _data_or_state_ is a Hash, then it must contain <em>state</em> and <em>county</em> keys, otherwise,
|
29
|
-
# it is assumbed to be a String containing the state name.
|
30
|
-
def self.code_for(data_or_state, county=nil)
|
31
|
-
if data_or_state.is_a?(Hash)
|
32
|
-
state, county = data_or_state[:state], data_or_state[:county]
|
33
|
-
else
|
34
|
-
state = data_or_state
|
35
|
-
end
|
36
|
-
|
37
|
-
forward_hash[key_for(state, county)] rescue nil
|
38
|
-
end
|
39
|
-
|
40
|
-
# Get the state and county names for a given ANSI code. Returns a Hash containing <em>state</em> and <em>county</em> keys
|
41
|
-
def self.data_for(code)
|
42
|
-
reverse_hash[code]
|
43
|
-
end
|
44
|
-
|
45
|
-
def self.normalize_county_name(name) #:nodoc:
|
46
|
-
name.upcase.gsub("ST ", "ST. ").gsub("SAINT ", "ST. ")
|
47
|
-
end
|
48
|
-
|
49
|
-
def self.key_for(state, county) #:nodoc:
|
50
|
-
"#{state.upcase}/#{normalize_county_name(county)}"
|
51
|
-
end
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def self.forward_hash
|
56
|
-
@@forward_hash ||= File.open(File.join(File.dirname(__FILE__), 'data', 'data.yml')) {|yf| YAML::load(yf)}
|
57
|
-
end
|
58
|
-
|
59
|
-
def self.reverse_hash
|
60
|
-
@@reverse_hash ||= forward_hash.inject({}) do |h, kv|
|
61
|
-
state_county = kv[0].split('/')
|
62
|
-
h[kv[1]] = { :state => state_county[0], :county => state_county[1]}
|
63
|
-
h
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
@@ -1,28 +0,0 @@
|
|
1
|
-
class Location
|
2
|
-
attr_accessor :city, :state, :zip, :raw
|
3
|
-
|
4
|
-
def initialize(str)
|
5
|
-
self.raw = str
|
6
|
-
|
7
|
-
tokens = str.split(/\s|,/).collect {|t| t.strip}
|
8
|
-
|
9
|
-
# try to parse last token as zip
|
10
|
-
#
|
11
|
-
self.zip = ZipCode.new(tokens.last) rescue nil
|
12
|
-
tokens = tokens.slice(0..-2) if self.zip
|
13
|
-
|
14
|
-
# try to parse last token as state
|
15
|
-
#
|
16
|
-
self.state = State.new(tokens.last) rescue nil
|
17
|
-
tokens = tokens.slice(0..-2) if self.state
|
18
|
-
|
19
|
-
# remainder must be city
|
20
|
-
#
|
21
|
-
self.city = tokens.join(' ').strip
|
22
|
-
self.city = nil if self.city.empty?
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_s
|
26
|
-
[[self.city, (self.state.to_s(:abbr) rescue nil)].compact.join(','), self.zip.to_s].compact.join(' ')
|
27
|
-
end
|
28
|
-
end
|
@@ -1,119 +0,0 @@
|
|
1
|
-
class PersonName
|
2
|
-
attr_accessor :first, :middle, :last, :suffix, :raw
|
3
|
-
|
4
|
-
def initialize(str, fmt = :auto_detect)
|
5
|
-
raise "Unsupported Format" if !PARSE_FORMATS.include?(fmt)
|
6
|
-
|
7
|
-
if fmt == :auto_detect
|
8
|
-
fmt = str.include?(',') ? :last_first_middle : :first_middle_last
|
9
|
-
end
|
10
|
-
|
11
|
-
self.raw = str
|
12
|
-
|
13
|
-
parts = str.split(/\s|,/).collect {|p| p.strip}.reject {|p| PersonName.blank?(p) || p == ',' }
|
14
|
-
|
15
|
-
if parts.size == 1
|
16
|
-
self.last = parts.first
|
17
|
-
else
|
18
|
-
case fmt
|
19
|
-
when :first_middle_last
|
20
|
-
if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts.last) == 0}
|
21
|
-
self.suffix = PersonName.normalize_suffix(parts.last)
|
22
|
-
parts.delete_at(parts.size - 1)
|
23
|
-
end
|
24
|
-
|
25
|
-
self.first = parts.first if parts.size > 0
|
26
|
-
self.last = parts.last if parts.size > 1
|
27
|
-
|
28
|
-
if parts.size > 2 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(parts[-2]) == 0}
|
29
|
-
self.last = "#{parts[-2]}#{self.last}"
|
30
|
-
parts.delete_at(parts.size - 2)
|
31
|
-
end
|
32
|
-
|
33
|
-
self.middle = parts[1..(parts.size - 2)].join(' ') if parts.size > 2
|
34
|
-
|
35
|
-
when :last_first_middle
|
36
|
-
self.last = parts.first if parts.size > 0
|
37
|
-
|
38
|
-
if parts.size > 1 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(self.last) == 0}
|
39
|
-
self.last << parts[1]
|
40
|
-
parts.delete_at(1)
|
41
|
-
end
|
42
|
-
|
43
|
-
if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts[1]) == 0}
|
44
|
-
self.suffix = PersonName.normalize_suffix(parts[1])
|
45
|
-
parts.delete_at(1)
|
46
|
-
end
|
47
|
-
|
48
|
-
self.first = parts[1] if parts.size > 1
|
49
|
-
self.middle = parts[2..(parts.size - 1)].join(' ') if parts.size > 2
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_s(fmt = :full)
|
55
|
-
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
56
|
-
|
57
|
-
case fmt
|
58
|
-
when :first, :middle, :last
|
59
|
-
self.send(fmt)
|
60
|
-
|
61
|
-
when :full
|
62
|
-
[self.first, self.middle, self.last, self.suffix].compact.join(' ')
|
63
|
-
|
64
|
-
when :full_reverse
|
65
|
-
[self.last, self.first, self.middle, self.suffix].compact.join(' ')
|
66
|
-
|
67
|
-
when :first_space_last
|
68
|
-
[self.first, self.last].compact.join(' ')
|
69
|
-
|
70
|
-
when :last_space_first
|
71
|
-
[self.last, self.first].compact.join(' ')
|
72
|
-
|
73
|
-
when :last_comma_first
|
74
|
-
[self.last, self.first].compact.join(',')
|
75
|
-
|
76
|
-
when :last_comma_space_first
|
77
|
-
[(self.first ? "#{self.last}," : self.last), self.first].compact.join(' ')
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def first_i
|
82
|
-
self.first[0,1] rescue nil
|
83
|
-
end
|
84
|
-
|
85
|
-
def middle_i
|
86
|
-
self.middle[0,1] rescue nil
|
87
|
-
end
|
88
|
-
|
89
|
-
def last_i
|
90
|
-
self.last[0,1] rescue nil
|
91
|
-
end
|
92
|
-
|
93
|
-
def eql?(other, initials_only=false)
|
94
|
-
if other.is_a?(PersonName)
|
95
|
-
[:first, :middle, :last].all? do |k|
|
96
|
-
msg = (k != :last && initials_only) ? "#{k}_i" : k
|
97
|
-
me = self.send(msg)
|
98
|
-
them = other.send(msg)
|
99
|
-
me && them ? me.casecmp(them) == 0 : true
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
|
104
|
-
PARSE_FORMATS = [:first_middle_last, :last_first_middle, :auto_detect]
|
105
|
-
OUTPUT_FORMATS = [:first, :middle, :last, :full, :full_reverse, :first_space_last, :last_space_first, :last_comma_first, :last_comma_space_first]
|
106
|
-
|
107
|
-
private
|
108
|
-
|
109
|
-
def self.blank?(string_or_nil)
|
110
|
-
string_or_nil.nil? || string_or_nil !~ /\S/
|
111
|
-
end
|
112
|
-
|
113
|
-
def self.normalize_suffix(suffix)
|
114
|
-
suffix.match(/\w+/)[0] rescue suffix
|
115
|
-
end
|
116
|
-
|
117
|
-
SUPPORTED_SUFFIXES = %w(II III IV V JR JR. SR SR.)
|
118
|
-
ODD_LAST_NAME_PREFIXES = %w(MC ST ST.)
|
119
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
class PhoneNumber
|
2
|
-
attr_accessor :country_code, :area_code, :number, :exchange, :suffix, :raw
|
3
|
-
|
4
|
-
def initialize(str)
|
5
|
-
extract = str.strip.match(/^([-+()\d ]+)$/)[0].gsub(/[^\d]/, '') rescue nil
|
6
|
-
raise "Unsupported Format" if !extract || extract.length < 10 || extract.length > 11
|
7
|
-
|
8
|
-
if extract.length == 11
|
9
|
-
self.country_code = extract.slice!(0..0)
|
10
|
-
else
|
11
|
-
self.country_code = '1'
|
12
|
-
end
|
13
|
-
|
14
|
-
raise "Unsupported Format" if self.country_code != '1'
|
15
|
-
|
16
|
-
self.area_code = extract.slice!(0..2)
|
17
|
-
|
18
|
-
self.number = extract.dup
|
19
|
-
|
20
|
-
self.exchange = extract.slice!(0..2)
|
21
|
-
|
22
|
-
self.suffix = extract
|
23
|
-
|
24
|
-
raise "Unsupported Format" if !self.exchange || !self.suffix
|
25
|
-
end
|
26
|
-
|
27
|
-
def to_s(fmt = :full_formatted)
|
28
|
-
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
29
|
-
|
30
|
-
case fmt
|
31
|
-
when :full_digits
|
32
|
-
"#{self.country_code}#{self.area_code}#{self.exchange}#{self.suffix}"
|
33
|
-
|
34
|
-
when :local_digits
|
35
|
-
"#{self.exchange}#{self.suffix}"
|
36
|
-
|
37
|
-
when :full_formatted
|
38
|
-
"#{self.country_code} (#{self.area_code}) #{self.exchange}-#{self.suffix}"
|
39
|
-
|
40
|
-
when :local_formatted
|
41
|
-
"#{self.exchange}-#{self.suffix}"
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
OUTPUT_FORMATS = [:full_digits, :local_digits, :full_formatted, :local_formatted]
|
46
|
-
end
|
@@ -1,81 +0,0 @@
|
|
1
|
-
class State
|
2
|
-
attr_accessor :sym, :raw
|
3
|
-
|
4
|
-
def initialize(str)
|
5
|
-
self.raw = str
|
6
|
-
token = str.strip.downcase
|
7
|
-
|
8
|
-
if FORWARD.has_key?(token.to_sym)
|
9
|
-
self.sym = token.to_sym
|
10
|
-
elsif REVERSE.has_key?(token)
|
11
|
-
self.sym = REVERSE[token]
|
12
|
-
end
|
13
|
-
|
14
|
-
raise "Unsupported Format" if !self.sym
|
15
|
-
end
|
16
|
-
|
17
|
-
def to_s(fmt = :full)
|
18
|
-
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
19
|
-
fmt == :full ? FORWARD[self.sym].capitalize : self.sym.to_s.upcase
|
20
|
-
end
|
21
|
-
|
22
|
-
OUTPUT_FORMATS = [:abbr, :full]
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
FORWARD = {
|
27
|
-
:al => "alabama",
|
28
|
-
:ak => "alaska",
|
29
|
-
:az => "arizona",
|
30
|
-
:ar => "arkansas",
|
31
|
-
:ca => "california",
|
32
|
-
:co => "colorado",
|
33
|
-
:ct => "connecticut",
|
34
|
-
:de => "delaware",
|
35
|
-
:dc => "district of columbia",
|
36
|
-
:fl => "florida",
|
37
|
-
:ga => "georgia",
|
38
|
-
:hi => "hawaii",
|
39
|
-
:id => "idaho",
|
40
|
-
:il => "illinois",
|
41
|
-
:in => "indiana",
|
42
|
-
:ia => "iowa",
|
43
|
-
:ks => "kansas",
|
44
|
-
:ky => "kentucky",
|
45
|
-
:la => "louisiana",
|
46
|
-
:me => "maine",
|
47
|
-
:md => "maryland",
|
48
|
-
:ma => "massachusetts",
|
49
|
-
:mi => "michigan",
|
50
|
-
:mn => "minnesota",
|
51
|
-
:ms => "mississippi",
|
52
|
-
:mo => "missouri",
|
53
|
-
:mt => "montana",
|
54
|
-
:ne => "nebraska",
|
55
|
-
:nv => "nevada",
|
56
|
-
:nh => "new hampshire",
|
57
|
-
:nj => "new jersey",
|
58
|
-
:nm => "new mexico",
|
59
|
-
:ny => "new york",
|
60
|
-
:nc => "north carolina",
|
61
|
-
:nd => "north dakota",
|
62
|
-
:oh => "ohio",
|
63
|
-
:ok => "oklahoma",
|
64
|
-
:or => "oregon",
|
65
|
-
:pa => "pennsylvania",
|
66
|
-
:ri => "Rhode island",
|
67
|
-
:sc => "south carolina",
|
68
|
-
:sd => "south dakota",
|
69
|
-
:tn => "tennessee",
|
70
|
-
:tx => "texas",
|
71
|
-
:ut => "utah",
|
72
|
-
:vt => "vermont",
|
73
|
-
:va => "virginia",
|
74
|
-
:wa => "washington",
|
75
|
-
:wv => "west virginia",
|
76
|
-
:wi => "wisconsin",
|
77
|
-
:wy => "wyoming",
|
78
|
-
}
|
79
|
-
|
80
|
-
REVERSE = FORWARD.inject({}) {|r, f| r[f[1]] = f[0]; r}
|
81
|
-
end
|
@@ -1,178 +0,0 @@
|
|
1
|
-
class StreetAddress
|
2
|
-
attr_accessor :number, :pre_direction, :name, :suffix, :post_direction, :unit_type, :unit, :raw
|
3
|
-
|
4
|
-
def initialize(str)
|
5
|
-
tokens = str.split(/[\s,]/).select {|s| !s.empty?}
|
6
|
-
|
7
|
-
# Check the first token for leading numericality. If so, set number to the first token, and delete it
|
8
|
-
#
|
9
|
-
if tokens.first =~ /(^\d+.*)/
|
10
|
-
self.number = $1
|
11
|
-
tokens.shift
|
12
|
-
end
|
13
|
-
|
14
|
-
# If at least two tokens remain, check next-to-last token as unit type. If so, set unit_type and unit, and delete the tokens
|
15
|
-
#
|
16
|
-
if tokens.size > 1
|
17
|
-
self.unit_type = StreetAddress.find_token(tokens[-2], UNIT_TYPES)
|
18
|
-
if self.unit_type
|
19
|
-
self.unit = tokens[-1]
|
20
|
-
tokens.slice!(tokens.size - 2, 2)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
# If at least one token remains, check last token for directionality. If so, set post_direction and delete the token
|
25
|
-
#
|
26
|
-
if tokens.size > 0
|
27
|
-
self.post_direction = StreetAddress.find_token(tokens[-1], DIRECTIONS)
|
28
|
-
if self.post_direction
|
29
|
-
post_direction_token = tokens[-1]
|
30
|
-
tokens.slice!(tokens.size - 1)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
# If at least one token remains, check last token for suffix. If so, self set.suffix and delete the token
|
35
|
-
#
|
36
|
-
if tokens.size > 0
|
37
|
-
self.suffix = StreetAddress.find_token(tokens[-1], SUFFIXES)
|
38
|
-
tokens.slice!(tokens.size - 1) if self.suffix
|
39
|
-
end
|
40
|
-
|
41
|
-
# If at least two tokens remain, check first for directionality. If so, set pre_direction and delete token
|
42
|
-
#
|
43
|
-
if tokens.size > 1
|
44
|
-
self.pre_direction = StreetAddress.find_token(tokens.first, DIRECTIONS)
|
45
|
-
tokens.shift if self.pre_direction
|
46
|
-
end
|
47
|
-
|
48
|
-
# if any tokens remain, set joined remaining tokens as name, otherwise, set name to post_direction, if set, and set post_direction to nil
|
49
|
-
#
|
50
|
-
if tokens.size > 0
|
51
|
-
self.name = tokens.join(' ')
|
52
|
-
else
|
53
|
-
self.name = post_direction_token
|
54
|
-
self.post_direction = nil
|
55
|
-
end
|
56
|
-
|
57
|
-
validate_parts
|
58
|
-
end
|
59
|
-
|
60
|
-
def to_s
|
61
|
-
parts = []
|
62
|
-
parts << self.number if self.number
|
63
|
-
parts << DIRECTIONS[self.pre_direction].first if self.pre_direction
|
64
|
-
parts << self.name if self.name
|
65
|
-
parts << SUFFIXES[self.suffix].first if self.suffix
|
66
|
-
parts << DIRECTIONS[self.post_direction].first if self.post_direction
|
67
|
-
parts << UNIT_TYPES[self.unit_type].first if self.unit_type
|
68
|
-
parts << self.unit if self.unit
|
69
|
-
parts.join(' ')
|
70
|
-
end
|
71
|
-
|
72
|
-
def self.string_for(symbol, form)
|
73
|
-
raise "Requested unknown form \"#{type}\" for :#{symbol}" if !SUPPORTED_FORMS.include?(form)
|
74
|
-
|
75
|
-
val = DIRECTIONS[symbol] || SUFFIXES[symbol] || UNIT_TYPES[symbol]
|
76
|
-
|
77
|
-
if val
|
78
|
-
val = ((val[SUPPORTED_FORMS.index(form)] rescue nil) || (val.first rescue val))
|
79
|
-
end
|
80
|
-
|
81
|
-
val
|
82
|
-
end
|
83
|
-
|
84
|
-
private
|
85
|
-
|
86
|
-
def validate_parts
|
87
|
-
[:pre_direction, :suffix, :post_direction, :unit_type].each do |p|
|
88
|
-
if self.send(p)
|
89
|
-
legal_values = p == :suffix ? SUFFIXES : p == :unit_type ? UNIT_TYPES : DIRECTIONS
|
90
|
-
raise "Invalid #{p.to_s} \"#{self.send(p)}\"" if !legal_values.include?(self.send(p))
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def self.find_token(token, values)
|
96
|
-
values.keys.each do |k|
|
97
|
-
return k if values[k].detect {|v| v.casecmp(token) == 0}
|
98
|
-
end
|
99
|
-
|
100
|
-
nil
|
101
|
-
end
|
102
|
-
|
103
|
-
DIRECTIONS = {
|
104
|
-
:north => %w(north n n.),
|
105
|
-
:northeast => %w(northeast ne ne. n.e.),
|
106
|
-
:east => %w(east e e.),
|
107
|
-
:southeast => %w(southeast se se. s.e.),
|
108
|
-
:south => %w(south s s.),
|
109
|
-
:southwest => %w(southwest sw sw. s.w.),
|
110
|
-
:west => %w(west w w.),
|
111
|
-
:northwest => %w(northwest nw nw. n.w.)
|
112
|
-
}
|
113
|
-
|
114
|
-
SUFFIXES = {
|
115
|
-
:alley => %w(alley al al.),
|
116
|
-
:avenue => %w(avenue ave ave. av av.),
|
117
|
-
:beach => %w(beach bch bch.),
|
118
|
-
:bend => %w(bend),
|
119
|
-
:boulevard => %w(boulevard blvd blvd. blv blv.),
|
120
|
-
:center => %w(center ctr ctr.),
|
121
|
-
:circle => %w(circle cir cir.),
|
122
|
-
:cliff => %w(cliff clf clf.),
|
123
|
-
:club => %w(club),
|
124
|
-
:condo => %w(condo con con.),
|
125
|
-
:court => %w(court ct ct. cor cor.),
|
126
|
-
:cove => %w(cove),
|
127
|
-
:creek => %w(creek crk crk.),
|
128
|
-
:crossing => %w(crossing xing xing. crs crs.),
|
129
|
-
:drive => %w(drive dr dr.),
|
130
|
-
:extension => %w(extension ext ext.),
|
131
|
-
:freeway => %w(freeway fwy fwy.),
|
132
|
-
:gardens => %w(gardens gdns gdns.),
|
133
|
-
:glen => %w(glen gl gl.),
|
134
|
-
:green => %w(green grn grn.),
|
135
|
-
:heights => %w(heights hts hts.),
|
136
|
-
:highway => %w(highway hwy hwy. hgwy hgwy.),
|
137
|
-
:hill => %w(hill),
|
138
|
-
:knoll => %w(knoll knl knl.),
|
139
|
-
:lake => %w(lake),
|
140
|
-
:lane => %w(lane ln ln.),
|
141
|
-
:landing => %w(landing lndg lndg.),
|
142
|
-
:loop => %w(loop),
|
143
|
-
:meadows => %w(meadows mdws mdws.),
|
144
|
-
:manor => %w(manor mnr mnr.),
|
145
|
-
:mountain => %w(mountain mtn mtn. mnt mnt.),
|
146
|
-
:oaks => %w(oaks),
|
147
|
-
:oval => %w(oval),
|
148
|
-
:park => %w(park pk pk. prk prk.),
|
149
|
-
:parkway => %w(parkway pkwy pkwy. pky pky.),
|
150
|
-
:pier => %w(pier),
|
151
|
-
:place => %w(place pl pl.),
|
152
|
-
:plaza => %w(plaza plz plz.),
|
153
|
-
:point => %w(point pt pt. pnt pnt.),
|
154
|
-
:ridge => %w(ridge ri ri.),
|
155
|
-
:road => %w(road rd rd.),
|
156
|
-
:row => %w(row),
|
157
|
-
:run => %w(run),
|
158
|
-
:springs => %w(springs spgs spgs.),
|
159
|
-
:square => %w(square sq sq.),
|
160
|
-
:street => %w(street st st.),
|
161
|
-
:station => %w(station sta sta.),
|
162
|
-
:terrace => %w(terrace ter ter. te te.),
|
163
|
-
:turnpike => %w(turnpike tpke tpke.),
|
164
|
-
:trace => %w(trace trc trc.),
|
165
|
-
:trail => %w(trail trl trl. tl tl.),
|
166
|
-
:valley => %w(valley vly vly.),
|
167
|
-
:walk => %w(walk),
|
168
|
-
:way => %w(way)
|
169
|
-
}
|
170
|
-
|
171
|
-
UNIT_TYPES = {
|
172
|
-
:suite => %w(suite ste ste.),
|
173
|
-
:number => %w(number # nbr nbr.),
|
174
|
-
:apartment => %w(apartment apt apt.)
|
175
|
-
}
|
176
|
-
|
177
|
-
SUPPORTED_FORMS = [:long, :short]
|
178
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
class ZipCode
|
2
|
-
attr_accessor :base, :plus_four, :raw
|
3
|
-
|
4
|
-
def initialize(str)
|
5
|
-
tokens = str.strip.match(/^(\d{5})(-\d{4})?$/)[0].split('-') rescue nil
|
6
|
-
raise "Unsupported Format" if !tokens
|
7
|
-
|
8
|
-
self.base = tokens.first
|
9
|
-
self.plus_four = tokens[1] rescue nil
|
10
|
-
end
|
11
|
-
|
12
|
-
def to_s
|
13
|
-
[self.base, self.plus_four].compact.join('-')
|
14
|
-
end
|
15
|
-
end
|