dburkes-people_places_things 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -6,6 +6,7 @@ require 'spec/rake/spectask'
6
6
 
7
7
  desc "Run all specs"
8
8
  Spec::Rake::SpecTask.new('specs') do |t|
9
+ t.libs << 'lib'
9
10
  t.spec_files = FileList['spec/**/*.rb']
10
11
  end
11
12
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.0.0
1
+ 2.1.0
@@ -1,10 +1,10 @@
1
- require File.join(File.dirname(__FILE__), 'people_places_things', 'street_address', 'street_address')
2
- require File.join(File.dirname(__FILE__), 'people_places_things', 'person_name', 'person_name')
3
- require File.join(File.dirname(__FILE__), 'people_places_things', 'ansi_counties', 'ansi_counties')
4
- require File.join(File.dirname(__FILE__), 'people_places_things', 'phone_number', 'phone_number')
5
- require File.join(File.dirname(__FILE__), 'people_places_things', 'zip_code', 'zip_code')
6
- require File.join(File.dirname(__FILE__), 'people_places_things', 'state', 'state')
7
- require File.join(File.dirname(__FILE__), 'people_places_things', 'location', 'location')
1
+ require 'people_places_things/street_address'
2
+ require 'people_places_things/person_name'
3
+ require 'people_places_things/ansi_counties'
4
+ require 'people_places_things/phone_number'
5
+ require 'people_places_things/zip_code'
6
+ require 'people_places_things/state'
7
+ require 'people_places_things/location'
8
8
 
9
9
  module PeoplePlacesThings
10
10
  VERSION = File.read('VERSION').chomp.strip rescue "Unknown"
@@ -0,0 +1,68 @@
1
+ require 'yaml'
2
+
3
+ module PeoplePlacesThings
4
+ # Provides two-way mapping between U.S. state and county names and their associated ANSI codes (formerly known as FIPS codes).
5
+ #
6
+ # == Examples
7
+ #
8
+ # To get the ANSI code for a state and county, you call ANSICounties.code_for, like so:
9
+ #
10
+ # code = ANSICounties.code_for('GA', 'FULTON')
11
+ # # => 13121
12
+ #
13
+ # You can also pass a single Hash argument:
14
+ #
15
+ # code = ANSICounties.code_for(:state => 'ga', :county => 'fulton')
16
+ # # => 13121
17
+ #
18
+ # Conversely, to get the state and county for an ANSI code, you call ANSICounties.data_for:
19
+ #
20
+ # hash = ANSICounties.data_for(13121)
21
+ # # => { :state => 'GA', :county => 'FULTON' }
22
+ #
23
+ # == Data source
24
+ #
25
+ # The data that makes up <tt>lib/ansi-counties/data/data.yml</tt> was generated from <tt>lib/ansi-counties/data/raw.txt</tt>, which was downloaded from
26
+ # the {US Census website}[http://www.census.gov/geo/www/ansi/download.html].
27
+ class ANSICounties
28
+
29
+ # Get the ANSI code for the given state and county. If _data_or_state_ is a Hash, then it must contain <em>state</em> and <em>county</em> keys, otherwise,
30
+ # it is assumbed to be a String containing the state name.
31
+ def self.code_for(data_or_state, county=nil)
32
+ if data_or_state.is_a?(Hash)
33
+ state, county = data_or_state[:state], data_or_state[:county]
34
+ else
35
+ state = data_or_state
36
+ end
37
+
38
+ forward_hash[key_for(state, county)] rescue nil
39
+ end
40
+
41
+ # Get the state and county names for a given ANSI code. Returns a Hash containing <em>state</em> and <em>county</em> keys
42
+ def self.data_for(code)
43
+ reverse_hash[code]
44
+ end
45
+
46
+ def self.normalize_county_name(name) #:nodoc:
47
+ name.upcase.gsub("ST ", "ST. ").gsub("SAINT ", "ST. ")
48
+ end
49
+
50
+ def self.key_for(state, county) #:nodoc:
51
+ "#{state.upcase}/#{normalize_county_name(county)}"
52
+ end
53
+
54
+ private
55
+
56
+ def self.forward_hash
57
+ @@forward_hash ||= File.open(File.join(File.dirname(__FILE__), 'data', 'data.yml')) {|yf| YAML::load(yf)}
58
+ end
59
+
60
+ def self.reverse_hash
61
+ @@reverse_hash ||= forward_hash.inject({}) do |h, kv|
62
+ state_county = kv[0].split('/')
63
+ h[kv[1]] = { :state => state_county[0], :county => state_county[1]}
64
+ h
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,30 @@
1
+ module PeoplePlacesThings
2
+ class Location
3
+ attr_accessor :city, :state, :zip, :raw
4
+
5
+ def initialize(str)
6
+ self.raw = str
7
+
8
+ tokens = str.split(/\s|,/).collect {|t| t.strip}
9
+
10
+ # try to parse last token as zip
11
+ #
12
+ self.zip = ZipCode.new(tokens.last) rescue nil
13
+ tokens = tokens.slice(0..-2) if self.zip
14
+
15
+ # try to parse last token as state
16
+ #
17
+ self.state = State.new(tokens.last) rescue nil
18
+ tokens = tokens.slice(0..-2) if self.state
19
+
20
+ # remainder must be city
21
+ #
22
+ self.city = tokens.join(' ').strip
23
+ self.city = nil if self.city.empty?
24
+ end
25
+
26
+ def to_s
27
+ [[self.city, (self.state.to_s(:abbr) rescue nil)].compact.join(','), self.zip.to_s].compact.join(' ')
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,121 @@
1
+ module PeoplePlacesThings
2
+ class PersonName
3
+ attr_accessor :first, :middle, :last, :suffix, :raw
4
+
5
+ def initialize(str, fmt = :auto_detect)
6
+ raise "Unsupported Format" if !PARSE_FORMATS.include?(fmt)
7
+
8
+ if fmt == :auto_detect
9
+ fmt = str.include?(',') ? :last_first_middle : :first_middle_last
10
+ end
11
+
12
+ self.raw = str
13
+
14
+ parts = str.split(/\s|,/).collect {|p| p.strip}.reject {|p| PersonName.blank?(p) || p == ',' }
15
+
16
+ if parts.size == 1
17
+ self.last = parts.first
18
+ else
19
+ case fmt
20
+ when :first_middle_last
21
+ if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts.last) == 0}
22
+ self.suffix = PersonName.normalize_suffix(parts.last)
23
+ parts.delete_at(parts.size - 1)
24
+ end
25
+
26
+ self.first = parts.first if parts.size > 0
27
+ self.last = parts.last if parts.size > 1
28
+
29
+ if parts.size > 2 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(parts[-2]) == 0}
30
+ self.last = "#{parts[-2]}#{self.last}"
31
+ parts.delete_at(parts.size - 2)
32
+ end
33
+
34
+ self.middle = parts[1..(parts.size - 2)].join(' ') if parts.size > 2
35
+
36
+ when :last_first_middle
37
+ self.last = parts.first if parts.size > 0
38
+
39
+ if parts.size > 1 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(self.last) == 0}
40
+ self.last << parts[1]
41
+ parts.delete_at(1)
42
+ end
43
+
44
+ if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts[1]) == 0}
45
+ self.suffix = PersonName.normalize_suffix(parts[1])
46
+ parts.delete_at(1)
47
+ end
48
+
49
+ self.first = parts[1] if parts.size > 1
50
+ self.middle = parts[2..(parts.size - 1)].join(' ') if parts.size > 2
51
+ end
52
+ end
53
+ end
54
+
55
+ def to_s(fmt = :full)
56
+ raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
57
+
58
+ case fmt
59
+ when :first, :middle, :last
60
+ self.send(fmt)
61
+
62
+ when :full
63
+ [self.first, self.middle, self.last, self.suffix].compact.join(' ')
64
+
65
+ when :full_reverse
66
+ [self.last, self.first, self.middle, self.suffix].compact.join(' ')
67
+
68
+ when :first_space_last
69
+ [self.first, self.last].compact.join(' ')
70
+
71
+ when :last_space_first
72
+ [self.last, self.first].compact.join(' ')
73
+
74
+ when :last_comma_first
75
+ [self.last, self.first].compact.join(',')
76
+
77
+ when :last_comma_space_first
78
+ [(self.first ? "#{self.last}," : self.last), self.first].compact.join(' ')
79
+ end
80
+ end
81
+
82
+ def first_i
83
+ self.first[0,1] rescue nil
84
+ end
85
+
86
+ def middle_i
87
+ self.middle[0,1] rescue nil
88
+ end
89
+
90
+ def last_i
91
+ self.last[0,1] rescue nil
92
+ end
93
+
94
+ def eql?(other, initials_only=false)
95
+ if other.is_a?(PersonName)
96
+ [:first, :middle, :last].all? do |k|
97
+ msg = (k != :last && initials_only) ? "#{k}_i" : k
98
+ me = self.send(msg)
99
+ them = other.send(msg)
100
+ me && them ? me.casecmp(them) == 0 : true
101
+ end
102
+ end
103
+ end
104
+
105
+ PARSE_FORMATS = [:first_middle_last, :last_first_middle, :auto_detect]
106
+ OUTPUT_FORMATS = [:first, :middle, :last, :full, :full_reverse, :first_space_last, :last_space_first, :last_comma_first, :last_comma_space_first]
107
+
108
+ private
109
+
110
+ def self.blank?(string_or_nil)
111
+ string_or_nil.nil? || string_or_nil !~ /\S/
112
+ end
113
+
114
+ def self.normalize_suffix(suffix)
115
+ suffix.match(/\w+/)[0] rescue suffix
116
+ end
117
+
118
+ SUPPORTED_SUFFIXES = %w(II III IV V JR JR. SR SR.)
119
+ ODD_LAST_NAME_PREFIXES = %w(MC ST ST.)
120
+ end
121
+ end
@@ -0,0 +1,48 @@
1
+ module PeoplePlacesThings
2
+ class PhoneNumber
3
+ attr_accessor :country_code, :area_code, :number, :exchange, :suffix, :raw
4
+
5
+ def initialize(str)
6
+ extract = str.strip.match(/^([-+()\d ]+)$/)[0].gsub(/[^\d]/, '') rescue nil
7
+ raise "Unsupported Format" if !extract || extract.length < 10 || extract.length > 11
8
+
9
+ if extract.length == 11
10
+ self.country_code = extract.slice!(0..0)
11
+ else
12
+ self.country_code = '1'
13
+ end
14
+
15
+ raise "Unsupported Format" if self.country_code != '1'
16
+
17
+ self.area_code = extract.slice!(0..2)
18
+
19
+ self.number = extract.dup
20
+
21
+ self.exchange = extract.slice!(0..2)
22
+
23
+ self.suffix = extract
24
+
25
+ raise "Unsupported Format" if !self.exchange || !self.suffix
26
+ end
27
+
28
+ def to_s(fmt = :full_formatted)
29
+ raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
30
+
31
+ case fmt
32
+ when :full_digits
33
+ "#{self.country_code}#{self.area_code}#{self.exchange}#{self.suffix}"
34
+
35
+ when :local_digits
36
+ "#{self.exchange}#{self.suffix}"
37
+
38
+ when :full_formatted
39
+ "#{self.country_code} (#{self.area_code}) #{self.exchange}-#{self.suffix}"
40
+
41
+ when :local_formatted
42
+ "#{self.exchange}-#{self.suffix}"
43
+ end
44
+ end
45
+
46
+ OUTPUT_FORMATS = [:full_digits, :local_digits, :full_formatted, :local_formatted]
47
+ end
48
+ end
@@ -0,0 +1,83 @@
1
+ module PeoplePlacesThings
2
+ class State
3
+ attr_accessor :sym, :raw
4
+
5
+ def initialize(str)
6
+ self.raw = str
7
+ token = str.strip.downcase
8
+
9
+ if FORWARD.has_key?(token.to_sym)
10
+ self.sym = token.to_sym
11
+ elsif REVERSE.has_key?(token)
12
+ self.sym = REVERSE[token]
13
+ end
14
+
15
+ raise "Unsupported Format" if !self.sym
16
+ end
17
+
18
+ def to_s(fmt = :full)
19
+ raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
20
+ fmt == :full ? FORWARD[self.sym].capitalize : self.sym.to_s.upcase
21
+ end
22
+
23
+ OUTPUT_FORMATS = [:abbr, :full]
24
+
25
+ private
26
+
27
+ FORWARD = {
28
+ :al => "alabama",
29
+ :ak => "alaska",
30
+ :az => "arizona",
31
+ :ar => "arkansas",
32
+ :ca => "california",
33
+ :co => "colorado",
34
+ :ct => "connecticut",
35
+ :de => "delaware",
36
+ :dc => "district of columbia",
37
+ :fl => "florida",
38
+ :ga => "georgia",
39
+ :hi => "hawaii",
40
+ :id => "idaho",
41
+ :il => "illinois",
42
+ :in => "indiana",
43
+ :ia => "iowa",
44
+ :ks => "kansas",
45
+ :ky => "kentucky",
46
+ :la => "louisiana",
47
+ :me => "maine",
48
+ :md => "maryland",
49
+ :ma => "massachusetts",
50
+ :mi => "michigan",
51
+ :mn => "minnesota",
52
+ :ms => "mississippi",
53
+ :mo => "missouri",
54
+ :mt => "montana",
55
+ :ne => "nebraska",
56
+ :nv => "nevada",
57
+ :nh => "new hampshire",
58
+ :nj => "new jersey",
59
+ :nm => "new mexico",
60
+ :ny => "new york",
61
+ :nc => "north carolina",
62
+ :nd => "north dakota",
63
+ :oh => "ohio",
64
+ :ok => "oklahoma",
65
+ :or => "oregon",
66
+ :pa => "pennsylvania",
67
+ :ri => "Rhode island",
68
+ :sc => "south carolina",
69
+ :sd => "south dakota",
70
+ :tn => "tennessee",
71
+ :tx => "texas",
72
+ :ut => "utah",
73
+ :vt => "vermont",
74
+ :va => "virginia",
75
+ :wa => "washington",
76
+ :wv => "west virginia",
77
+ :wi => "wisconsin",
78
+ :wy => "wyoming",
79
+ }
80
+
81
+ REVERSE = FORWARD.inject({}) {|r, f| r[f[1]] = f[0]; r}
82
+ end
83
+ end
@@ -0,0 +1,180 @@
1
+ module PeoplePlacesThings
2
+ class StreetAddress
3
+ attr_accessor :number, :pre_direction, :name, :suffix, :post_direction, :unit_type, :unit, :raw
4
+
5
+ def initialize(str)
6
+ tokens = str.split(/[\s,]/).select {|s| !s.empty?}
7
+
8
+ # Check the first token for leading numericality. If so, set number to the first token, and delete it
9
+ #
10
+ if tokens.first =~ /(^\d+.*)/
11
+ self.number = $1
12
+ tokens.shift
13
+ end
14
+
15
+ # If at least two tokens remain, check next-to-last token as unit type. If so, set unit_type and unit, and delete the tokens
16
+ #
17
+ if tokens.size > 1
18
+ self.unit_type = StreetAddress.find_token(tokens[-2], UNIT_TYPES)
19
+ if self.unit_type
20
+ self.unit = tokens[-1]
21
+ tokens.slice!(tokens.size - 2, 2)
22
+ end
23
+ end
24
+
25
+ # If at least one token remains, check last token for directionality. If so, set post_direction and delete the token
26
+ #
27
+ if tokens.size > 0
28
+ self.post_direction = StreetAddress.find_token(tokens[-1], DIRECTIONS)
29
+ if self.post_direction
30
+ post_direction_token = tokens[-1]
31
+ tokens.slice!(tokens.size - 1)
32
+ end
33
+ end
34
+
35
+ # If at least one token remains, check last token for suffix. If so, self set.suffix and delete the token
36
+ #
37
+ if tokens.size > 0
38
+ self.suffix = StreetAddress.find_token(tokens[-1], SUFFIXES)
39
+ tokens.slice!(tokens.size - 1) if self.suffix
40
+ end
41
+
42
+ # If at least two tokens remain, check first for directionality. If so, set pre_direction and delete token
43
+ #
44
+ if tokens.size > 1
45
+ self.pre_direction = StreetAddress.find_token(tokens.first, DIRECTIONS)
46
+ tokens.shift if self.pre_direction
47
+ end
48
+
49
+ # if any tokens remain, set joined remaining tokens as name, otherwise, set name to post_direction, if set, and set post_direction to nil
50
+ #
51
+ if tokens.size > 0
52
+ self.name = tokens.join(' ')
53
+ else
54
+ self.name = post_direction_token
55
+ self.post_direction = nil
56
+ end
57
+
58
+ validate_parts
59
+ end
60
+
61
+ def to_s
62
+ parts = []
63
+ parts << self.number if self.number
64
+ parts << DIRECTIONS[self.pre_direction].first if self.pre_direction
65
+ parts << self.name if self.name
66
+ parts << SUFFIXES[self.suffix].first if self.suffix
67
+ parts << DIRECTIONS[self.post_direction].first if self.post_direction
68
+ parts << UNIT_TYPES[self.unit_type].first if self.unit_type
69
+ parts << self.unit if self.unit
70
+ parts.join(' ')
71
+ end
72
+
73
+ def self.string_for(symbol, form)
74
+ raise "Requested unknown form \"#{type}\" for :#{symbol}" if !SUPPORTED_FORMS.include?(form)
75
+
76
+ val = DIRECTIONS[symbol] || SUFFIXES[symbol] || UNIT_TYPES[symbol]
77
+
78
+ if val
79
+ val = ((val[SUPPORTED_FORMS.index(form)] rescue nil) || (val.first rescue val))
80
+ end
81
+
82
+ val
83
+ end
84
+
85
+ private
86
+
87
+ def validate_parts
88
+ [:pre_direction, :suffix, :post_direction, :unit_type].each do |p|
89
+ if self.send(p)
90
+ legal_values = p == :suffix ? SUFFIXES : p == :unit_type ? UNIT_TYPES : DIRECTIONS
91
+ raise "Invalid #{p.to_s} \"#{self.send(p)}\"" if !legal_values.include?(self.send(p))
92
+ end
93
+ end
94
+ end
95
+
96
+ def self.find_token(token, values)
97
+ values.keys.each do |k|
98
+ return k if values[k].detect {|v| v.casecmp(token) == 0}
99
+ end
100
+
101
+ nil
102
+ end
103
+
104
+ DIRECTIONS = {
105
+ :north => %w(north n n.),
106
+ :northeast => %w(northeast ne ne. n.e.),
107
+ :east => %w(east e e.),
108
+ :southeast => %w(southeast se se. s.e.),
109
+ :south => %w(south s s.),
110
+ :southwest => %w(southwest sw sw. s.w.),
111
+ :west => %w(west w w.),
112
+ :northwest => %w(northwest nw nw. n.w.)
113
+ }
114
+
115
+ SUFFIXES = {
116
+ :alley => %w(alley al al.),
117
+ :avenue => %w(avenue ave ave. av av.),
118
+ :beach => %w(beach bch bch.),
119
+ :bend => %w(bend),
120
+ :boulevard => %w(boulevard blvd blvd. blv blv.),
121
+ :center => %w(center ctr ctr.),
122
+ :circle => %w(circle cir cir.),
123
+ :cliff => %w(cliff clf clf.),
124
+ :club => %w(club),
125
+ :condo => %w(condo con con.),
126
+ :court => %w(court ct ct. cor cor.),
127
+ :cove => %w(cove),
128
+ :creek => %w(creek crk crk.),
129
+ :crossing => %w(crossing xing xing. crs crs.),
130
+ :drive => %w(drive dr dr.),
131
+ :extension => %w(extension ext ext.),
132
+ :freeway => %w(freeway fwy fwy.),
133
+ :gardens => %w(gardens gdns gdns.),
134
+ :glen => %w(glen gl gl.),
135
+ :green => %w(green grn grn.),
136
+ :heights => %w(heights hts hts.),
137
+ :highway => %w(highway hwy hwy. hgwy hgwy.),
138
+ :hill => %w(hill),
139
+ :knoll => %w(knoll knl knl.),
140
+ :lake => %w(lake),
141
+ :lane => %w(lane ln ln.),
142
+ :landing => %w(landing lndg lndg.),
143
+ :loop => %w(loop),
144
+ :meadows => %w(meadows mdws mdws.),
145
+ :manor => %w(manor mnr mnr.),
146
+ :mountain => %w(mountain mtn mtn. mnt mnt.),
147
+ :oaks => %w(oaks),
148
+ :oval => %w(oval),
149
+ :park => %w(park pk pk. prk prk.),
150
+ :parkway => %w(parkway pkwy pkwy. pky pky.),
151
+ :pier => %w(pier),
152
+ :place => %w(place pl pl.),
153
+ :plaza => %w(plaza plz plz.),
154
+ :point => %w(point pt pt. pnt pnt.),
155
+ :ridge => %w(ridge ri ri.),
156
+ :road => %w(road rd rd.),
157
+ :row => %w(row),
158
+ :run => %w(run),
159
+ :springs => %w(springs spgs spgs.),
160
+ :square => %w(square sq sq.),
161
+ :street => %w(street st st.),
162
+ :station => %w(station sta sta.),
163
+ :terrace => %w(terrace ter ter. te te.),
164
+ :turnpike => %w(turnpike tpke tpke.),
165
+ :trace => %w(trace trc trc.),
166
+ :trail => %w(trail trl trl. tl tl.),
167
+ :valley => %w(valley vly vly.),
168
+ :walk => %w(walk),
169
+ :way => %w(way)
170
+ }
171
+
172
+ UNIT_TYPES = {
173
+ :suite => %w(suite ste ste.),
174
+ :number => %w(number # nbr nbr.),
175
+ :apartment => %w(apartment apt apt.)
176
+ }
177
+
178
+ SUPPORTED_FORMS = [:long, :short]
179
+ end
180
+ end
@@ -0,0 +1,17 @@
1
+ module PeoplePlacesThings
2
+ class ZipCode
3
+ attr_accessor :base, :plus_four, :raw
4
+
5
+ def initialize(str)
6
+ tokens = str.strip.match(/^(\d{5})(-\d{4})?$/)[0].split('-') rescue nil
7
+ raise "Unsupported Format" if !tokens
8
+
9
+ self.base = tokens.first
10
+ self.plus_four = tokens[1] rescue nil
11
+ end
12
+
13
+ def to_s
14
+ [self.base, self.plus_four].compact.join('-')
15
+ end
16
+ end
17
+ end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{people_places_things}
8
- s.version = "2.0.0"
8
+ s.version = "2.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danny Burkes"]
12
- s.date = %q{2009-08-31}
12
+ s.date = %q{2009-09-01}
13
13
  s.description = %q{Parsers and formatters for person names, street addresses, city/state/zip, phone numbers, etc.}
14
14
  s.email = %q{dburkes@netable.com}
15
15
  s.extra_rdoc_files = [
@@ -21,16 +21,16 @@ Gem::Specification.new do |s|
21
21
  "Rakefile",
22
22
  "VERSION",
23
23
  "lib/people_places_things.rb",
24
- "lib/people_places_things/ansi_counties/ansi_counties.rb",
25
- "lib/people_places_things/ansi_counties/data/data.yml",
26
- "lib/people_places_things/ansi_counties/data/process_data.rb",
27
- "lib/people_places_things/ansi_counties/data/raw.txt",
28
- "lib/people_places_things/location/location.rb",
29
- "lib/people_places_things/person_name/person_name.rb",
30
- "lib/people_places_things/phone_number/phone_number.rb",
31
- "lib/people_places_things/state/state.rb",
32
- "lib/people_places_things/street_address/street_address.rb",
33
- "lib/people_places_things/zip_code/zip_code.rb",
24
+ "lib/people_places_things/ansi_counties.rb",
25
+ "lib/people_places_things/data/data.yml",
26
+ "lib/people_places_things/data/process_data.rb",
27
+ "lib/people_places_things/data/raw.txt",
28
+ "lib/people_places_things/location.rb",
29
+ "lib/people_places_things/person_name.rb",
30
+ "lib/people_places_things/phone_number.rb",
31
+ "lib/people_places_things/state.rb",
32
+ "lib/people_places_things/street_address.rb",
33
+ "lib/people_places_things/zip_code.rb",
34
34
  "people_places_things.gemspec",
35
35
  "spec/ansi_counties_spec.rb",
36
36
  "spec/helper.rb",
data/spec/helper.rb CHANGED
@@ -1 +1,2 @@
1
- require 'lib/people_places_things'
1
+ require 'lib/people_places_things'
2
+ include PeoplePlacesThings
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dburkes-people_places_things
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Burkes
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-31 00:00:00 -07:00
12
+ date: 2009-09-01 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -27,16 +27,16 @@ files:
27
27
  - Rakefile
28
28
  - VERSION
29
29
  - lib/people_places_things.rb
30
- - lib/people_places_things/ansi_counties/ansi_counties.rb
31
- - lib/people_places_things/ansi_counties/data/data.yml
32
- - lib/people_places_things/ansi_counties/data/process_data.rb
33
- - lib/people_places_things/ansi_counties/data/raw.txt
34
- - lib/people_places_things/location/location.rb
35
- - lib/people_places_things/person_name/person_name.rb
36
- - lib/people_places_things/phone_number/phone_number.rb
37
- - lib/people_places_things/state/state.rb
38
- - lib/people_places_things/street_address/street_address.rb
39
- - lib/people_places_things/zip_code/zip_code.rb
30
+ - lib/people_places_things/ansi_counties.rb
31
+ - lib/people_places_things/data/data.yml
32
+ - lib/people_places_things/data/process_data.rb
33
+ - lib/people_places_things/data/raw.txt
34
+ - lib/people_places_things/location.rb
35
+ - lib/people_places_things/person_name.rb
36
+ - lib/people_places_things/phone_number.rb
37
+ - lib/people_places_things/state.rb
38
+ - lib/people_places_things/street_address.rb
39
+ - lib/people_places_things/zip_code.rb
40
40
  - people_places_things.gemspec
41
41
  - spec/ansi_counties_spec.rb
42
42
  - spec/helper.rb
@@ -48,7 +48,6 @@ files:
48
48
  - spec/zip_code_spec.rb
49
49
  has_rdoc: true
50
50
  homepage: http://github.com/dburkes/people_places_things
51
- licenses:
52
51
  post_install_message:
53
52
  rdoc_options:
54
53
  - --charset=UTF-8
@@ -69,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
69
68
  requirements: []
70
69
 
71
70
  rubyforge_project:
72
- rubygems_version: 1.3.5
71
+ rubygems_version: 1.2.0
73
72
  signing_key:
74
73
  specification_version: 2
75
74
  summary: Parsers and formatters for person names, street addresses, city/state/zip, phone numbers, etc.
@@ -1,66 +0,0 @@
1
- require 'yaml'
2
-
3
- # Provides two-way mapping between U.S. state and county names and their associated ANSI codes (formerly known as FIPS codes).
4
- #
5
- # == Examples
6
- #
7
- # To get the ANSI code for a state and county, you call ANSICounties.code_for, like so:
8
- #
9
- # code = ANSICounties.code_for('GA', 'FULTON')
10
- # # => 13121
11
- #
12
- # You can also pass a single Hash argument:
13
- #
14
- # code = ANSICounties.code_for(:state => 'ga', :county => 'fulton')
15
- # # => 13121
16
- #
17
- # Conversely, to get the state and county for an ANSI code, you call ANSICounties.data_for:
18
- #
19
- # hash = ANSICounties.data_for(13121)
20
- # # => { :state => 'GA', :county => 'FULTON' }
21
- #
22
- # == Data source
23
- #
24
- # The data that makes up <tt>lib/ansi-counties/data/data.yml</tt> was generated from <tt>lib/ansi-counties/data/raw.txt</tt>, which was downloaded from
25
- # the {US Census website}[http://www.census.gov/geo/www/ansi/download.html].
26
- class ANSICounties
27
-
28
- # Get the ANSI code for the given state and county. If _data_or_state_ is a Hash, then it must contain <em>state</em> and <em>county</em> keys, otherwise,
29
- # it is assumbed to be a String containing the state name.
30
- def self.code_for(data_or_state, county=nil)
31
- if data_or_state.is_a?(Hash)
32
- state, county = data_or_state[:state], data_or_state[:county]
33
- else
34
- state = data_or_state
35
- end
36
-
37
- forward_hash[key_for(state, county)] rescue nil
38
- end
39
-
40
- # Get the state and county names for a given ANSI code. Returns a Hash containing <em>state</em> and <em>county</em> keys
41
- def self.data_for(code)
42
- reverse_hash[code]
43
- end
44
-
45
- def self.normalize_county_name(name) #:nodoc:
46
- name.upcase.gsub("ST ", "ST. ").gsub("SAINT ", "ST. ")
47
- end
48
-
49
- def self.key_for(state, county) #:nodoc:
50
- "#{state.upcase}/#{normalize_county_name(county)}"
51
- end
52
-
53
- private
54
-
55
- def self.forward_hash
56
- @@forward_hash ||= File.open(File.join(File.dirname(__FILE__), 'data', 'data.yml')) {|yf| YAML::load(yf)}
57
- end
58
-
59
- def self.reverse_hash
60
- @@reverse_hash ||= forward_hash.inject({}) do |h, kv|
61
- state_county = kv[0].split('/')
62
- h[kv[1]] = { :state => state_county[0], :county => state_county[1]}
63
- h
64
- end
65
- end
66
- end
@@ -1,28 +0,0 @@
1
- class Location
2
- attr_accessor :city, :state, :zip, :raw
3
-
4
- def initialize(str)
5
- self.raw = str
6
-
7
- tokens = str.split(/\s|,/).collect {|t| t.strip}
8
-
9
- # try to parse last token as zip
10
- #
11
- self.zip = ZipCode.new(tokens.last) rescue nil
12
- tokens = tokens.slice(0..-2) if self.zip
13
-
14
- # try to parse last token as state
15
- #
16
- self.state = State.new(tokens.last) rescue nil
17
- tokens = tokens.slice(0..-2) if self.state
18
-
19
- # remainder must be city
20
- #
21
- self.city = tokens.join(' ').strip
22
- self.city = nil if self.city.empty?
23
- end
24
-
25
- def to_s
26
- [[self.city, (self.state.to_s(:abbr) rescue nil)].compact.join(','), self.zip.to_s].compact.join(' ')
27
- end
28
- end
@@ -1,119 +0,0 @@
1
- class PersonName
2
- attr_accessor :first, :middle, :last, :suffix, :raw
3
-
4
- def initialize(str, fmt = :auto_detect)
5
- raise "Unsupported Format" if !PARSE_FORMATS.include?(fmt)
6
-
7
- if fmt == :auto_detect
8
- fmt = str.include?(',') ? :last_first_middle : :first_middle_last
9
- end
10
-
11
- self.raw = str
12
-
13
- parts = str.split(/\s|,/).collect {|p| p.strip}.reject {|p| PersonName.blank?(p) || p == ',' }
14
-
15
- if parts.size == 1
16
- self.last = parts.first
17
- else
18
- case fmt
19
- when :first_middle_last
20
- if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts.last) == 0}
21
- self.suffix = PersonName.normalize_suffix(parts.last)
22
- parts.delete_at(parts.size - 1)
23
- end
24
-
25
- self.first = parts.first if parts.size > 0
26
- self.last = parts.last if parts.size > 1
27
-
28
- if parts.size > 2 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(parts[-2]) == 0}
29
- self.last = "#{parts[-2]}#{self.last}"
30
- parts.delete_at(parts.size - 2)
31
- end
32
-
33
- self.middle = parts[1..(parts.size - 2)].join(' ') if parts.size > 2
34
-
35
- when :last_first_middle
36
- self.last = parts.first if parts.size > 0
37
-
38
- if parts.size > 1 && ODD_LAST_NAME_PREFIXES.detect {|s| s.casecmp(self.last) == 0}
39
- self.last << parts[1]
40
- parts.delete_at(1)
41
- end
42
-
43
- if parts.size > 2 and SUPPORTED_SUFFIXES.detect {|s| s.casecmp(parts[1]) == 0}
44
- self.suffix = PersonName.normalize_suffix(parts[1])
45
- parts.delete_at(1)
46
- end
47
-
48
- self.first = parts[1] if parts.size > 1
49
- self.middle = parts[2..(parts.size - 1)].join(' ') if parts.size > 2
50
- end
51
- end
52
- end
53
-
54
- def to_s(fmt = :full)
55
- raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
56
-
57
- case fmt
58
- when :first, :middle, :last
59
- self.send(fmt)
60
-
61
- when :full
62
- [self.first, self.middle, self.last, self.suffix].compact.join(' ')
63
-
64
- when :full_reverse
65
- [self.last, self.first, self.middle, self.suffix].compact.join(' ')
66
-
67
- when :first_space_last
68
- [self.first, self.last].compact.join(' ')
69
-
70
- when :last_space_first
71
- [self.last, self.first].compact.join(' ')
72
-
73
- when :last_comma_first
74
- [self.last, self.first].compact.join(',')
75
-
76
- when :last_comma_space_first
77
- [(self.first ? "#{self.last}," : self.last), self.first].compact.join(' ')
78
- end
79
- end
80
-
81
- def first_i
82
- self.first[0,1] rescue nil
83
- end
84
-
85
- def middle_i
86
- self.middle[0,1] rescue nil
87
- end
88
-
89
- def last_i
90
- self.last[0,1] rescue nil
91
- end
92
-
93
- def eql?(other, initials_only=false)
94
- if other.is_a?(PersonName)
95
- [:first, :middle, :last].all? do |k|
96
- msg = (k != :last && initials_only) ? "#{k}_i" : k
97
- me = self.send(msg)
98
- them = other.send(msg)
99
- me && them ? me.casecmp(them) == 0 : true
100
- end
101
- end
102
- end
103
-
104
- PARSE_FORMATS = [:first_middle_last, :last_first_middle, :auto_detect]
105
- OUTPUT_FORMATS = [:first, :middle, :last, :full, :full_reverse, :first_space_last, :last_space_first, :last_comma_first, :last_comma_space_first]
106
-
107
- private
108
-
109
- def self.blank?(string_or_nil)
110
- string_or_nil.nil? || string_or_nil !~ /\S/
111
- end
112
-
113
- def self.normalize_suffix(suffix)
114
- suffix.match(/\w+/)[0] rescue suffix
115
- end
116
-
117
- SUPPORTED_SUFFIXES = %w(II III IV V JR JR. SR SR.)
118
- ODD_LAST_NAME_PREFIXES = %w(MC ST ST.)
119
- end
@@ -1,46 +0,0 @@
1
- class PhoneNumber
2
- attr_accessor :country_code, :area_code, :number, :exchange, :suffix, :raw
3
-
4
- def initialize(str)
5
- extract = str.strip.match(/^([-+()\d ]+)$/)[0].gsub(/[^\d]/, '') rescue nil
6
- raise "Unsupported Format" if !extract || extract.length < 10 || extract.length > 11
7
-
8
- if extract.length == 11
9
- self.country_code = extract.slice!(0..0)
10
- else
11
- self.country_code = '1'
12
- end
13
-
14
- raise "Unsupported Format" if self.country_code != '1'
15
-
16
- self.area_code = extract.slice!(0..2)
17
-
18
- self.number = extract.dup
19
-
20
- self.exchange = extract.slice!(0..2)
21
-
22
- self.suffix = extract
23
-
24
- raise "Unsupported Format" if !self.exchange || !self.suffix
25
- end
26
-
27
- def to_s(fmt = :full_formatted)
28
- raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
29
-
30
- case fmt
31
- when :full_digits
32
- "#{self.country_code}#{self.area_code}#{self.exchange}#{self.suffix}"
33
-
34
- when :local_digits
35
- "#{self.exchange}#{self.suffix}"
36
-
37
- when :full_formatted
38
- "#{self.country_code} (#{self.area_code}) #{self.exchange}-#{self.suffix}"
39
-
40
- when :local_formatted
41
- "#{self.exchange}-#{self.suffix}"
42
- end
43
- end
44
-
45
- OUTPUT_FORMATS = [:full_digits, :local_digits, :full_formatted, :local_formatted]
46
- end
@@ -1,81 +0,0 @@
1
- class State
2
- attr_accessor :sym, :raw
3
-
4
- def initialize(str)
5
- self.raw = str
6
- token = str.strip.downcase
7
-
8
- if FORWARD.has_key?(token.to_sym)
9
- self.sym = token.to_sym
10
- elsif REVERSE.has_key?(token)
11
- self.sym = REVERSE[token]
12
- end
13
-
14
- raise "Unsupported Format" if !self.sym
15
- end
16
-
17
- def to_s(fmt = :full)
18
- raise "Unsupported Format" if !OUTPUT_FORMATS.include?(fmt)
19
- fmt == :full ? FORWARD[self.sym].capitalize : self.sym.to_s.upcase
20
- end
21
-
22
- OUTPUT_FORMATS = [:abbr, :full]
23
-
24
- private
25
-
26
- FORWARD = {
27
- :al => "alabama",
28
- :ak => "alaska",
29
- :az => "arizona",
30
- :ar => "arkansas",
31
- :ca => "california",
32
- :co => "colorado",
33
- :ct => "connecticut",
34
- :de => "delaware",
35
- :dc => "district of columbia",
36
- :fl => "florida",
37
- :ga => "georgia",
38
- :hi => "hawaii",
39
- :id => "idaho",
40
- :il => "illinois",
41
- :in => "indiana",
42
- :ia => "iowa",
43
- :ks => "kansas",
44
- :ky => "kentucky",
45
- :la => "louisiana",
46
- :me => "maine",
47
- :md => "maryland",
48
- :ma => "massachusetts",
49
- :mi => "michigan",
50
- :mn => "minnesota",
51
- :ms => "mississippi",
52
- :mo => "missouri",
53
- :mt => "montana",
54
- :ne => "nebraska",
55
- :nv => "nevada",
56
- :nh => "new hampshire",
57
- :nj => "new jersey",
58
- :nm => "new mexico",
59
- :ny => "new york",
60
- :nc => "north carolina",
61
- :nd => "north dakota",
62
- :oh => "ohio",
63
- :ok => "oklahoma",
64
- :or => "oregon",
65
- :pa => "pennsylvania",
66
- :ri => "Rhode island",
67
- :sc => "south carolina",
68
- :sd => "south dakota",
69
- :tn => "tennessee",
70
- :tx => "texas",
71
- :ut => "utah",
72
- :vt => "vermont",
73
- :va => "virginia",
74
- :wa => "washington",
75
- :wv => "west virginia",
76
- :wi => "wisconsin",
77
- :wy => "wyoming",
78
- }
79
-
80
- REVERSE = FORWARD.inject({}) {|r, f| r[f[1]] = f[0]; r}
81
- end
@@ -1,178 +0,0 @@
1
- class StreetAddress
2
- attr_accessor :number, :pre_direction, :name, :suffix, :post_direction, :unit_type, :unit, :raw
3
-
4
- def initialize(str)
5
- tokens = str.split(/[\s,]/).select {|s| !s.empty?}
6
-
7
- # Check the first token for leading numericality. If so, set number to the first token, and delete it
8
- #
9
- if tokens.first =~ /(^\d+.*)/
10
- self.number = $1
11
- tokens.shift
12
- end
13
-
14
- # If at least two tokens remain, check next-to-last token as unit type. If so, set unit_type and unit, and delete the tokens
15
- #
16
- if tokens.size > 1
17
- self.unit_type = StreetAddress.find_token(tokens[-2], UNIT_TYPES)
18
- if self.unit_type
19
- self.unit = tokens[-1]
20
- tokens.slice!(tokens.size - 2, 2)
21
- end
22
- end
23
-
24
- # If at least one token remains, check last token for directionality. If so, set post_direction and delete the token
25
- #
26
- if tokens.size > 0
27
- self.post_direction = StreetAddress.find_token(tokens[-1], DIRECTIONS)
28
- if self.post_direction
29
- post_direction_token = tokens[-1]
30
- tokens.slice!(tokens.size - 1)
31
- end
32
- end
33
-
34
- # If at least one token remains, check last token for suffix. If so, self set.suffix and delete the token
35
- #
36
- if tokens.size > 0
37
- self.suffix = StreetAddress.find_token(tokens[-1], SUFFIXES)
38
- tokens.slice!(tokens.size - 1) if self.suffix
39
- end
40
-
41
- # If at least two tokens remain, check first for directionality. If so, set pre_direction and delete token
42
- #
43
- if tokens.size > 1
44
- self.pre_direction = StreetAddress.find_token(tokens.first, DIRECTIONS)
45
- tokens.shift if self.pre_direction
46
- end
47
-
48
- # if any tokens remain, set joined remaining tokens as name, otherwise, set name to post_direction, if set, and set post_direction to nil
49
- #
50
- if tokens.size > 0
51
- self.name = tokens.join(' ')
52
- else
53
- self.name = post_direction_token
54
- self.post_direction = nil
55
- end
56
-
57
- validate_parts
58
- end
59
-
60
- def to_s
61
- parts = []
62
- parts << self.number if self.number
63
- parts << DIRECTIONS[self.pre_direction].first if self.pre_direction
64
- parts << self.name if self.name
65
- parts << SUFFIXES[self.suffix].first if self.suffix
66
- parts << DIRECTIONS[self.post_direction].first if self.post_direction
67
- parts << UNIT_TYPES[self.unit_type].first if self.unit_type
68
- parts << self.unit if self.unit
69
- parts.join(' ')
70
- end
71
-
72
- def self.string_for(symbol, form)
73
- raise "Requested unknown form \"#{type}\" for :#{symbol}" if !SUPPORTED_FORMS.include?(form)
74
-
75
- val = DIRECTIONS[symbol] || SUFFIXES[symbol] || UNIT_TYPES[symbol]
76
-
77
- if val
78
- val = ((val[SUPPORTED_FORMS.index(form)] rescue nil) || (val.first rescue val))
79
- end
80
-
81
- val
82
- end
83
-
84
- private
85
-
86
- def validate_parts
87
- [:pre_direction, :suffix, :post_direction, :unit_type].each do |p|
88
- if self.send(p)
89
- legal_values = p == :suffix ? SUFFIXES : p == :unit_type ? UNIT_TYPES : DIRECTIONS
90
- raise "Invalid #{p.to_s} \"#{self.send(p)}\"" if !legal_values.include?(self.send(p))
91
- end
92
- end
93
- end
94
-
95
- def self.find_token(token, values)
96
- values.keys.each do |k|
97
- return k if values[k].detect {|v| v.casecmp(token) == 0}
98
- end
99
-
100
- nil
101
- end
102
-
103
- DIRECTIONS = {
104
- :north => %w(north n n.),
105
- :northeast => %w(northeast ne ne. n.e.),
106
- :east => %w(east e e.),
107
- :southeast => %w(southeast se se. s.e.),
108
- :south => %w(south s s.),
109
- :southwest => %w(southwest sw sw. s.w.),
110
- :west => %w(west w w.),
111
- :northwest => %w(northwest nw nw. n.w.)
112
- }
113
-
114
- SUFFIXES = {
115
- :alley => %w(alley al al.),
116
- :avenue => %w(avenue ave ave. av av.),
117
- :beach => %w(beach bch bch.),
118
- :bend => %w(bend),
119
- :boulevard => %w(boulevard blvd blvd. blv blv.),
120
- :center => %w(center ctr ctr.),
121
- :circle => %w(circle cir cir.),
122
- :cliff => %w(cliff clf clf.),
123
- :club => %w(club),
124
- :condo => %w(condo con con.),
125
- :court => %w(court ct ct. cor cor.),
126
- :cove => %w(cove),
127
- :creek => %w(creek crk crk.),
128
- :crossing => %w(crossing xing xing. crs crs.),
129
- :drive => %w(drive dr dr.),
130
- :extension => %w(extension ext ext.),
131
- :freeway => %w(freeway fwy fwy.),
132
- :gardens => %w(gardens gdns gdns.),
133
- :glen => %w(glen gl gl.),
134
- :green => %w(green grn grn.),
135
- :heights => %w(heights hts hts.),
136
- :highway => %w(highway hwy hwy. hgwy hgwy.),
137
- :hill => %w(hill),
138
- :knoll => %w(knoll knl knl.),
139
- :lake => %w(lake),
140
- :lane => %w(lane ln ln.),
141
- :landing => %w(landing lndg lndg.),
142
- :loop => %w(loop),
143
- :meadows => %w(meadows mdws mdws.),
144
- :manor => %w(manor mnr mnr.),
145
- :mountain => %w(mountain mtn mtn. mnt mnt.),
146
- :oaks => %w(oaks),
147
- :oval => %w(oval),
148
- :park => %w(park pk pk. prk prk.),
149
- :parkway => %w(parkway pkwy pkwy. pky pky.),
150
- :pier => %w(pier),
151
- :place => %w(place pl pl.),
152
- :plaza => %w(plaza plz plz.),
153
- :point => %w(point pt pt. pnt pnt.),
154
- :ridge => %w(ridge ri ri.),
155
- :road => %w(road rd rd.),
156
- :row => %w(row),
157
- :run => %w(run),
158
- :springs => %w(springs spgs spgs.),
159
- :square => %w(square sq sq.),
160
- :street => %w(street st st.),
161
- :station => %w(station sta sta.),
162
- :terrace => %w(terrace ter ter. te te.),
163
- :turnpike => %w(turnpike tpke tpke.),
164
- :trace => %w(trace trc trc.),
165
- :trail => %w(trail trl trl. tl tl.),
166
- :valley => %w(valley vly vly.),
167
- :walk => %w(walk),
168
- :way => %w(way)
169
- }
170
-
171
- UNIT_TYPES = {
172
- :suite => %w(suite ste ste.),
173
- :number => %w(number # nbr nbr.),
174
- :apartment => %w(apartment apt apt.)
175
- }
176
-
177
- SUPPORTED_FORMS = [:long, :short]
178
- end
@@ -1,15 +0,0 @@
1
- class ZipCode
2
- attr_accessor :base, :plus_four, :raw
3
-
4
- def initialize(str)
5
- tokens = str.strip.match(/^(\d{5})(-\d{4})?$/)[0].split('-') rescue nil
6
- raise "Unsupported Format" if !tokens
7
-
8
- self.base = tokens.first
9
- self.plus_four = tokens[1] rescue nil
10
- end
11
-
12
- def to_s
13
- [self.base, self.plus_four].compact.join('-')
14
- end
15
- end