people_places_things 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/README.textile +66 -0
- data/Rakefile +40 -0
- data/VERSION +1 -0
- data/lib/people_places_things/VERSION +1 -0
- data/lib/people_places_things/ansi_counties.rb +68 -0
- data/lib/people_places_things/data/data.yml +3228 -0
- data/lib/people_places_things/data/process_data.rb +33 -0
- data/lib/people_places_things/data/raw.txt +3235 -0
- data/lib/people_places_things/location.rb +30 -0
- data/lib/people_places_things/person_name.rb +121 -0
- data/lib/people_places_things/phone_number.rb +48 -0
- data/lib/people_places_things/state.rb +83 -0
- data/lib/people_places_things/street_address.rb +180 -0
- data/lib/people_places_things/zip_code.rb +17 -0
- data/lib/people_places_things.rb +11 -0
- data/people_places_things.gemspec +71 -0
- data/spec/ansi_counties_spec.rb +27 -0
- data/spec/helper.rb +2 -0
- data/spec/location_spec.rb +52 -0
- data/spec/person_name_spec.rb +175 -0
- data/spec/phone_number_spec.rb +59 -0
- data/spec/state_spec.rb +29 -0
- data/spec/street_address_spec.rb +159 -0
- data/spec/zip_code_spec.rb +27 -0
- metadata +86 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class Location
|
3
|
+
attr_accessor :city, :state, :zip, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
self.raw = str
|
7
|
+
|
8
|
+
tokens = str.split(/\s|,/).collect {|t| t.strip}
|
9
|
+
|
10
|
+
# try to parse last token as zip
|
11
|
+
#
|
12
|
+
self.zip = ZipCode.new(tokens.last) rescue nil
|
13
|
+
tokens = tokens.slice(0..-2) if self.zip
|
14
|
+
|
15
|
+
# try to parse last token as state
|
16
|
+
#
|
17
|
+
self.state = State.new(tokens.last) rescue nil
|
18
|
+
tokens = tokens.slice(0..-2) if self.state
|
19
|
+
|
20
|
+
# remainder must be city
|
21
|
+
#
|
22
|
+
self.city = tokens.join(' ').strip
|
23
|
+
self.city = nil if self.city.empty?
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_s
|
27
|
+
[[self.city, (self.state.to_s(:abbr) rescue nil)].compact.join(','), self.zip.to_s].compact.join(' ')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class PersonName
|
3
|
+
attr_accessor :first, :middle, :last, :suffix, :raw
|
4
|
+
|
5
|
+
def initialize(str, fmt = :auto_detect)
|
6
|
+
raise "Unsupported Format" if !PARSE_FORMATS.include?(fmt)
|
7
|
+
|
8
|
+
if fmt == :auto_detect
|
9
|
+
fmt = str.include?(',') ? :last_first_middle : :first_middle_last
|
10
|
+
end
|
11
|
+
|
12
|
+
self.raw = str
|
13
|
+
|
14
|
+
parts = str.split(/\s|,/).collect {|p| p.strip}.reject {|p| PersonName.blank?(p) || p == ',' }
|
15
|
+
|
16
|
+
if parts.size == 1
|
17
|
+
self.last = parts.first
|
18
|
+
else
|
19
|
+
case fmt
|
20
|
+
when :first_middle_last
|
21
|
+
if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts.last) == 0}
|
22
|
+
self.suffix = PersonName.normalize_suffix(parts.last)
|
23
|
+
parts.delete_at(parts.size - 1)
|
24
|
+
end
|
25
|
+
|
26
|
+
self.first = parts.first if parts.size > 0
|
27
|
+
self.last = parts.last if parts.size > 1
|
28
|
+
|
29
|
+
if parts.size > 2 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(parts[-2]) == 0}
|
30
|
+
self.last = "#{parts[-2]}#{self.last}"
|
31
|
+
parts.delete_at(parts.size - 2)
|
32
|
+
end
|
33
|
+
|
34
|
+
self.middle = parts[1..(parts.size - 2)].join(' ') if parts.size > 2
|
35
|
+
|
36
|
+
when :last_first_middle
|
37
|
+
self.last = parts.first if parts.size > 0
|
38
|
+
|
39
|
+
if parts.size > 1 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(self.last) == 0}
|
40
|
+
self.last << parts[1]
|
41
|
+
parts.delete_at(1)
|
42
|
+
end
|
43
|
+
|
44
|
+
if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts[1]) == 0}
|
45
|
+
self.suffix = PersonName.normalize_suffix(parts[1])
|
46
|
+
parts.delete_at(1)
|
47
|
+
end
|
48
|
+
|
49
|
+
self.first = parts[1] if parts.size > 1
|
50
|
+
self.middle = parts[2..(parts.size - 1)].join(' ') if parts.size > 2
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s(fmt = :full)
|
56
|
+
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
57
|
+
|
58
|
+
case fmt
|
59
|
+
when :first, :middle, :last
|
60
|
+
self.send(fmt)
|
61
|
+
|
62
|
+
when :full
|
63
|
+
[self.first, self.middle, self.last, self.suffix].compact.join(' ')
|
64
|
+
|
65
|
+
when :full_reverse
|
66
|
+
[self.last, self.first, self.middle, self.suffix].compact.join(' ')
|
67
|
+
|
68
|
+
when :first_space_last
|
69
|
+
[self.first, self.last].compact.join(' ')
|
70
|
+
|
71
|
+
when :last_space_first
|
72
|
+
[self.last, self.first].compact.join(' ')
|
73
|
+
|
74
|
+
when :last_comma_first
|
75
|
+
[self.last, self.first].compact.join(',')
|
76
|
+
|
77
|
+
when :last_comma_space_first
|
78
|
+
[(self.first ? "#{self.last}," : self.last), self.first].compact.join(' ')
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def first_i
|
83
|
+
self.first[0,1] rescue nil
|
84
|
+
end
|
85
|
+
|
86
|
+
def middle_i
|
87
|
+
self.middle[0,1] rescue nil
|
88
|
+
end
|
89
|
+
|
90
|
+
def last_i
|
91
|
+
self.last[0,1] rescue nil
|
92
|
+
end
|
93
|
+
|
94
|
+
def eql?(other, initials_only=false)
|
95
|
+
if other.is_a?(PersonName)
|
96
|
+
[:first, :middle, :last].all? do |k|
|
97
|
+
msg = (k != :last && initials_only) ? "#{k}_i" : k
|
98
|
+
me = self.send(msg)
|
99
|
+
them = other.send(msg)
|
100
|
+
me && them ? me.casecmp(them) == 0 : true
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
PARSE_FORMATS = [:first_middle_last, :last_first_middle, :auto_detect]
|
106
|
+
OUTPUT_FORMATS = [:first, :middle, :last, :full, :full_reverse, :first_space_last, :last_space_first, :last_comma_first, :last_comma_space_first]
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def self.blank?(string_or_nil)
|
111
|
+
string_or_nil.nil? || string_or_nil !~ /\S/
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.normalize_suffix(suffix)
|
115
|
+
suffix.match(/\w+/)[0] rescue suffix
|
116
|
+
end
|
117
|
+
|
118
|
+
SUPPORTED_SUFFIXES = %w(II III IV V JR JR. SR SR.)
|
119
|
+
ODD_LAST_NAME_PREFIXES = %w(MC ST ST.)
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class PhoneNumber
|
3
|
+
attr_accessor :country_code, :area_code, :number, :exchange, :suffix, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
extract = str.strip.match(/^([-+()\d ]+)$/)[0].gsub(/[^\d]/, '') rescue nil
|
7
|
+
raise "Unsupported Format" if !extract || extract.length < 10 || extract.length > 11
|
8
|
+
|
9
|
+
if extract.length == 11
|
10
|
+
self.country_code = extract.slice!(0..0)
|
11
|
+
else
|
12
|
+
self.country_code = '1'
|
13
|
+
end
|
14
|
+
|
15
|
+
raise "Unsupported Format" if self.country_code != '1'
|
16
|
+
|
17
|
+
self.area_code = extract.slice!(0..2)
|
18
|
+
|
19
|
+
self.number = extract.dup
|
20
|
+
|
21
|
+
self.exchange = extract.slice!(0..2)
|
22
|
+
|
23
|
+
self.suffix = extract
|
24
|
+
|
25
|
+
raise "Unsupported Format" if !self.exchange || !self.suffix
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_s(fmt = :full_formatted)
|
29
|
+
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
30
|
+
|
31
|
+
case fmt
|
32
|
+
when :full_digits
|
33
|
+
"#{self.country_code}#{self.area_code}#{self.exchange}#{self.suffix}"
|
34
|
+
|
35
|
+
when :local_digits
|
36
|
+
"#{self.exchange}#{self.suffix}"
|
37
|
+
|
38
|
+
when :full_formatted
|
39
|
+
"#{self.country_code} (#{self.area_code}) #{self.exchange}-#{self.suffix}"
|
40
|
+
|
41
|
+
when :local_formatted
|
42
|
+
"#{self.exchange}-#{self.suffix}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
OUTPUT_FORMATS = [:full_digits, :local_digits, :full_formatted, :local_formatted]
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class State
|
3
|
+
attr_accessor :sym, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
self.raw = str
|
7
|
+
token = str.strip.downcase
|
8
|
+
|
9
|
+
if FORWARD.has_key?(token.to_sym)
|
10
|
+
self.sym = token.to_sym
|
11
|
+
elsif REVERSE.has_key?(token)
|
12
|
+
self.sym = REVERSE[token]
|
13
|
+
end
|
14
|
+
|
15
|
+
raise "Unsupported Format" if !self.sym
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s(fmt = :full)
|
19
|
+
raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
|
20
|
+
fmt == :full ? FORWARD[self.sym].capitalize : self.sym.to_s.upcase
|
21
|
+
end
|
22
|
+
|
23
|
+
OUTPUT_FORMATS = [:abbr, :full]
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
FORWARD = {
|
28
|
+
:al => "alabama",
|
29
|
+
:ak => "alaska",
|
30
|
+
:az => "arizona",
|
31
|
+
:ar => "arkansas",
|
32
|
+
:ca => "california",
|
33
|
+
:co => "colorado",
|
34
|
+
:ct => "connecticut",
|
35
|
+
:de => "delaware",
|
36
|
+
:dc => "district of columbia",
|
37
|
+
:fl => "florida",
|
38
|
+
:ga => "georgia",
|
39
|
+
:hi => "hawaii",
|
40
|
+
:id => "idaho",
|
41
|
+
:il => "illinois",
|
42
|
+
:in => "indiana",
|
43
|
+
:ia => "iowa",
|
44
|
+
:ks => "kansas",
|
45
|
+
:ky => "kentucky",
|
46
|
+
:la => "louisiana",
|
47
|
+
:me => "maine",
|
48
|
+
:md => "maryland",
|
49
|
+
:ma => "massachusetts",
|
50
|
+
:mi => "michigan",
|
51
|
+
:mn => "minnesota",
|
52
|
+
:ms => "mississippi",
|
53
|
+
:mo => "missouri",
|
54
|
+
:mt => "montana",
|
55
|
+
:ne => "nebraska",
|
56
|
+
:nv => "nevada",
|
57
|
+
:nh => "new hampshire",
|
58
|
+
:nj => "new jersey",
|
59
|
+
:nm => "new mexico",
|
60
|
+
:ny => "new york",
|
61
|
+
:nc => "north carolina",
|
62
|
+
:nd => "north dakota",
|
63
|
+
:oh => "ohio",
|
64
|
+
:ok => "oklahoma",
|
65
|
+
:or => "oregon",
|
66
|
+
:pa => "pennsylvania",
|
67
|
+
:ri => "Rhode island",
|
68
|
+
:sc => "south carolina",
|
69
|
+
:sd => "south dakota",
|
70
|
+
:tn => "tennessee",
|
71
|
+
:tx => "texas",
|
72
|
+
:ut => "utah",
|
73
|
+
:vt => "vermont",
|
74
|
+
:va => "virginia",
|
75
|
+
:wa => "washington",
|
76
|
+
:wv => "west virginia",
|
77
|
+
:wi => "wisconsin",
|
78
|
+
:wy => "wyoming",
|
79
|
+
}
|
80
|
+
|
81
|
+
REVERSE = FORWARD.inject({}) {|r, f| r[f[1]] = f[0]; r}
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
module PeoplePlacesThings
|
2
|
+
class StreetAddress
|
3
|
+
attr_accessor :number, :pre_direction, :name, :suffix, :post_direction, :unit_type, :unit, :raw
|
4
|
+
|
5
|
+
def initialize(str)
|
6
|
+
tokens = str.split(/[\s,]/).select {|s| !s.empty?}
|
7
|
+
|
8
|
+
# Check the first token for leading numericality. If so, set number to the first token, and delete it
|
9
|
+
#
|
10
|
+
if tokens.first =~ /(^\d+.*)/
|
11
|
+
self.number = $1
|
12
|
+
tokens.shift
|
13
|
+
end
|
14
|
+
|
15
|
+
# If at least two tokens remain, check next-to-last token as unit type. If so, set unit_type and unit, and delete the tokens
|
16
|
+
#
|
17
|
+
if tokens.size > 1
|
18
|
+
self.unit_type = StreetAddress.find_token(tokens[-2], UNIT_TYPES)
|
19
|
+
if self.unit_type
|
20
|
+
self.unit = tokens[-1]
|
21
|
+
tokens.slice!(tokens.size - 2, 2)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# If at least one token remains, check last token for directionality. If so, set post_direction and delete the token
|
26
|
+
#
|
27
|
+
if tokens.size > 0
|
28
|
+
self.post_direction = StreetAddress.find_token(tokens[-1], DIRECTIONS)
|
29
|
+
if self.post_direction
|
30
|
+
post_direction_token = tokens[-1]
|
31
|
+
tokens.slice!(tokens.size - 1)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# If at least one token remains, check last token for suffix. If so, self set.suffix and delete the token
|
36
|
+
#
|
37
|
+
if tokens.size > 0
|
38
|
+
self.suffix = StreetAddress.find_token(tokens[-1], SUFFIXES)
|
39
|
+
tokens.slice!(tokens.size - 1) if self.suffix
|
40
|
+
end
|
41
|
+
|
42
|
+
# If at least two tokens remain, check first for directionality. If so, set pre_direction and delete token
|
43
|
+
#
|
44
|
+
if tokens.size > 1
|
45
|
+
self.pre_direction = StreetAddress.find_token(tokens.first, DIRECTIONS)
|
46
|
+
tokens.shift if self.pre_direction
|
47
|
+
end
|
48
|
+
|
49
|
+
# if any tokens remain, set joined remaining tokens as name, otherwise, set name to post_direction, if set, and set post_direction to nil
|
50
|
+
#
|
51
|
+
if tokens.size > 0
|
52
|
+
self.name = tokens.join(' ')
|
53
|
+
else
|
54
|
+
self.name = post_direction_token
|
55
|
+
self.post_direction = nil
|
56
|
+
end
|
57
|
+
|
58
|
+
validate_parts
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_s
|
62
|
+
parts = []
|
63
|
+
parts << self.number if self.number
|
64
|
+
parts << DIRECTIONS[self.pre_direction].first if self.pre_direction
|
65
|
+
parts << self.name if self.name
|
66
|
+
parts << SUFFIXES[self.suffix].first if self.suffix
|
67
|
+
parts << DIRECTIONS[self.post_direction].first if self.post_direction
|
68
|
+
parts << UNIT_TYPES[self.unit_type].first if self.unit_type
|
69
|
+
parts << self.unit if self.unit
|
70
|
+
parts.join(' ')
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.string_for(symbol, form)
|
74
|
+
raise "Requested unknown form \"#{type}\" for :#{symbol}" if !SUPPORTED_FORMS.include?(form)
|
75
|
+
|
76
|
+
val = DIRECTIONS[symbol] || SUFFIXES[symbol] || UNIT_TYPES[symbol]
|
77
|
+
|
78
|
+
if val
|
79
|
+
val = ((val[SUPPORTED_FORMS.index(form)] rescue nil) || (val.first rescue val))
|
80
|
+
end
|
81
|
+
|
82
|
+
val
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def validate_parts
|
88
|
+
[:pre_direction, :suffix, :post_direction, :unit_type].each do |p|
|
89
|
+
if self.send(p)
|
90
|
+
legal_values = p == :suffix ? SUFFIXES : p == :unit_type ? UNIT_TYPES : DIRECTIONS
|
91
|
+
raise "Invalid #{p.to_s} \"#{self.send(p)}\"" if !legal_values.include?(self.send(p))
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.find_token(token, values)
|
97
|
+
values.keys.each do |k|
|
98
|
+
return k if values[k].detect {|v| v.casecmp(token) == 0}
|
99
|
+
end
|
100
|
+
|
101
|
+
nil
|
102
|
+
end
|
103
|
+
|
104
|
+
DIRECTIONS = {
|
105
|
+
:north => %w(north n n.),
|
106
|
+
:northeast => %w(northeast ne ne. n.e.),
|
107
|
+
:east => %w(east e e.),
|
108
|
+
:southeast => %w(southeast se se. s.e.),
|
109
|
+
:south => %w(south s s.),
|
110
|
+
:southwest => %w(southwest sw sw. s.w.),
|
111
|
+
:west => %w(west w w.),
|
112
|
+
:northwest => %w(northwest nw nw. n.w.)
|
113
|
+
}
|
114
|
+
|
115
|
+
SUFFIXES = {
|
116
|
+
:alley => %w(alley al al.),
|
117
|
+
:avenue => %w(avenue ave ave. av av.),
|
118
|
+
:beach => %w(beach bch bch.),
|
119
|
+
:bend => %w(bend),
|
120
|
+
:boulevard => %w(boulevard blvd blvd. blv blv.),
|
121
|
+
:center => %w(center ctr ctr.),
|
122
|
+
:circle => %w(circle cir cir.),
|
123
|
+
:cliff => %w(cliff clf clf.),
|
124
|
+
:club => %w(club),
|
125
|
+
:condo => %w(condo con con.),
|
126
|
+
:court => %w(court ct ct. cor cor.),
|
127
|
+
:cove => %w(cove),
|
128
|
+
:creek => %w(creek crk crk.),
|
129
|
+
:crossing => %w(crossing xing xing. crs crs.),
|
130
|
+
:drive => %w(drive dr dr.),
|
131
|
+
:extension => %w(extension ext ext.),
|
132
|
+
:freeway => %w(freeway fwy fwy.),
|
133
|
+
:gardens => %w(gardens gdns gdns.),
|
134
|
+
:glen => %w(glen gl gl.),
|
135
|
+
:green => %w(green grn grn.),
|
136
|
+
:heights => %w(heights hts hts.),
|
137
|
+
:highway => %w(highway hwy hwy. hgwy hgwy.),
|
138
|
+
:hill => %w(hill),
|
139
|
+
:knoll => %w(knoll knl knl.),
|
140
|
+
:lake => %w(lake),
|
141
|
+
:lane => %w(lane ln ln.),
|
142
|
+
:landing => %w(landing lndg lndg.),
|
143
|
+
:loop => %w(loop),
|
144
|
+
:meadows => %w(meadows mdws mdws.),
|
145
|
+
:manor => %w(manor mnr mnr.),
|
146
|
+
:mountain => %w(mountain mtn mtn. mnt mnt.),
|
147
|
+
:oaks => %w(oaks),
|
148
|
+
:oval => %w(oval),
|
149
|
+
:park => %w(park pk pk. prk prk.),
|
150
|
+
:parkway => %w(parkway pkwy pkwy. pky pky.),
|
151
|
+
:pier => %w(pier),
|
152
|
+
:place => %w(place pl pl.),
|
153
|
+
:plaza => %w(plaza plz plz.),
|
154
|
+
:point => %w(point pt pt. pnt pnt.),
|
155
|
+
:ridge => %w(ridge ri ri.),
|
156
|
+
:road => %w(road rd rd.),
|
157
|
+
:row => %w(row),
|
158
|
+
:run => %w(run),
|
159
|
+
:springs => %w(springs spgs spgs.),
|
160
|
+
:square => %w(square sq sq.),
|
161
|
+
:street => %w(street st st.),
|
162
|
+
:station => %w(station sta sta.),
|
163
|
+
:terrace => %w(terrace ter ter. te te.),
|
164
|
+
:turnpike => %w(turnpike tpke tpke.),
|
165
|
+
:trace => %w(trace trc trc.),
|
166
|
+
:trail => %w(trail trl trl. tl tl.),
|
167
|
+
:valley => %w(valley vly vly.),
|
168
|
+
:walk => %w(walk),
|
169
|
+
:way => %w(way)
|
170
|
+
}
|
171
|
+
|
172
|
+
UNIT_TYPES = {
|
173
|
+
:suite => %w(suite ste ste.),
|
174
|
+
:number => %w(number # nbr nbr.),
|
175
|
+
:apartment => %w(apartment apt apt.)
|
176
|
+
}
|
177
|
+
|
178
|
+
SUPPORTED_FORMS = [:long, :short]
|
179
|
+
end
|
180
|
+
end
|