nameable 0.5.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/Gemfile +2 -9
- data/Gemfile.lock +24 -23
- data/{LICENSE → LICENSE.txt} +2 -0
- data/README.md +68 -0
- data/Rakefile +1 -38
- data/{examples/nameable_web_service.rb → bin/nameable_web_service} +4 -2
- data/data/app_c.csv +151672 -0
- data/data/yob2013.txt +33072 -0
- data/lib/nameable.rb +6 -216
- data/lib/nameable/error.rb +5 -0
- data/lib/nameable/extensions.rb +5 -0
- data/lib/nameable/latin.rb +247 -0
- data/lib/nameable/latin/patterns.rb +34 -0
- data/lib/nameable/version.rb +3 -0
- data/nameable.gemspec +24 -0
- data/spec/nameable/latin_spec.rb +284 -0
- data/spec/nameable_spec.rb +5 -17
- data/spec/spec_helper.rb +5 -6
- metadata +53 -39
- data/History.txt +0 -11
- data/README.rdoc +0 -29
- data/TODO +0 -3
- data/VERSION +0 -1
- data/examples/test.rb +0 -45
data/lib/nameable.rb
CHANGED
@@ -1,220 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
require "nameable/version"
|
2
|
+
require "nameable/error"
|
3
|
+
require "nameable/latin"
|
4
|
+
require "nameable/extensions"
|
5
5
|
|
6
6
|
module Nameable
|
7
|
-
|
8
|
-
|
9
|
-
##
|
10
|
-
# Raised if something other than a valid Name is supplied
|
11
|
-
class InvalidNameError < StandardError
|
12
|
-
end
|
13
|
-
|
14
|
-
##
|
15
|
-
# Regex's to match the detritus that people add to their names
|
16
|
-
module Patterns
|
17
|
-
PREFIX = {
|
18
|
-
"Mr." => /^\(*(mr\.*|mister)\)*$/i,
|
19
|
-
"Mrs." => /^\(*(mrs\.*|misses)\)*$/i,
|
20
|
-
"Ms." => /^\(*(ms\.*|miss)\)*$/i,
|
21
|
-
"Dr." => /^\(*(dr\.*|doctor)\)*$/i,
|
22
|
-
"Rev." => /^\(*(rev\.*|reverand)\)*$/i,
|
23
|
-
"Fr." => /^\(*(fr\.*|friar)\)*$/i,
|
24
|
-
"Master" => /^\(*(master)\)*$/i,
|
25
|
-
"Sir" => /^\(*(sir)\)*$/i
|
26
|
-
}
|
27
|
-
|
28
|
-
SUFFIX = {
|
29
|
-
"Sr." => /^\(*(sr\.?|senior)\)*$/i,
|
30
|
-
"Jr." => /^\(*(jr\.?|junior)\)*$/i,
|
31
|
-
"Esq." => /^\(*(esq\.?|esquire)\)*$/i,
|
32
|
-
"Ph.D." => /^\(*(phd\.?)\)*$/i
|
33
|
-
}
|
34
|
-
|
35
|
-
SUFFIX_GENERATIONAL_ROMAN = /^\(*[IVX.]+\)*$/i
|
36
|
-
SUFFIX_ACADEMIC = /^(APR|RPh|MD|MA|DMD|DDS|PharmD|EngD|DPhil|JD|DD|DO|BA|BS|BSc|BE|BFA|MA|MS|MSc|MFA|MLA|MBA)$/i
|
37
|
-
SUFFIX_PROFESSIONAL = /^(PE|CSA|CPA|CPL|CME|CEng|OFM|CSV|Douchebag)$/i
|
38
|
-
SUFFIX_ABBREVIATION = /^[A-Z.]+[A-Z.]+$/ # It should be at least 2 letters
|
39
|
-
|
40
|
-
LAST_NAME_PRE_DANGLERS = /^(mc|vere|von|van|da|de|del|della|di|da|pietro|vanden|du|st|la|ter|ten)$/i
|
41
|
-
O_LAST_NAME_PRE_CONCATS = /^(o'|o`)$/i
|
42
|
-
# MC_LAST_NAME_PRE_CONCAT = /^(mc|da|de)$/i
|
43
|
-
# ST_LAST_NAME_PRE_CONCAT = /^(st)\.*$/i
|
44
|
-
end
|
45
|
-
|
46
|
-
attr_accessor :prefix, :first, :middle, :last, :suffix
|
47
|
-
|
48
|
-
##
|
49
|
-
#
|
50
|
-
def initialize(parts={})
|
51
|
-
self.prefix = parts[:prefix] ? parts[:prefix] : nil
|
52
|
-
self.first = parts[:first] ? parts[:first] : nil
|
53
|
-
self.middle = parts[:middle] ? parts[:middle] : nil
|
54
|
-
self.last = parts[:last] ? parts[:last] : nil
|
55
|
-
self.suffix = parts[:suffix] ? parts[:suffix] : nil
|
56
|
-
end
|
57
|
-
|
58
|
-
##
|
59
|
-
# name is an Array
|
60
|
-
def extract_prefix(name)
|
61
|
-
return unless name and name.size > 1 and @prefix.nil? and @first.nil?
|
62
|
-
Patterns::PREFIX.each_pair do |pretty, regex|
|
63
|
-
if name.first =~ regex
|
64
|
-
@prefix = pretty
|
65
|
-
name.delete(name.first)
|
66
|
-
return
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
##
|
72
|
-
# name is an Array
|
73
|
-
def extract_suffix(name)
|
74
|
-
return unless name and name.size >= 3
|
75
|
-
|
76
|
-
(name.size - 1).downto(2) do |n|
|
77
|
-
suff = nil
|
78
|
-
|
79
|
-
Patterns::SUFFIX.each_pair do |pretty, regex|
|
80
|
-
suff = pretty if name[n] =~ regex
|
81
|
-
end
|
82
|
-
|
83
|
-
if name[n] =~ Patterns::SUFFIX_ACADEMIC or name[n] =~ Patterns::SUFFIX_PROFESSIONAL or name[n] =~ Patterns::SUFFIX_GENERATIONAL_ROMAN
|
84
|
-
suff = name[n].upcase.gsub(/\./,'')
|
85
|
-
end
|
86
|
-
|
87
|
-
if name.join != name.join.upcase and name[n].length > 1 and name[n] =~ Patterns::SUFFIX_ABBREVIATION
|
88
|
-
suff = name[n].upcase.gsub(/\./,'')
|
89
|
-
end
|
90
|
-
|
91
|
-
if suff
|
92
|
-
@suffix = @suffix ? "#{suff}, #{@suffix}" : suff
|
93
|
-
name.delete_at(n)
|
94
|
-
end
|
95
|
-
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
##
|
100
|
-
# name is an Array
|
101
|
-
def extract_first(name)
|
102
|
-
return unless name and name.size >= 1
|
103
|
-
|
104
|
-
@first = name.first
|
105
|
-
name.delete_at(0)
|
106
|
-
|
107
|
-
@first.capitalize! unless @first =~ /[a-z]/ and @first =~ /[A-Z]/
|
108
|
-
end
|
109
|
-
|
110
|
-
##
|
111
|
-
# name is an Array
|
112
|
-
def extract_last(name)
|
113
|
-
return unless name and name.size >= 1
|
114
|
-
|
115
|
-
@last = name.last
|
116
|
-
name.delete_at(name.size - 1)
|
117
|
-
|
118
|
-
@last.capitalize! unless @last =~ /[a-z]/ and @last =~ /[A-Z]/
|
119
|
-
end
|
120
|
-
|
121
|
-
##
|
122
|
-
# name is an Array
|
123
|
-
def extract_middle(name)
|
124
|
-
return unless name and name.size >= 1
|
125
|
-
|
126
|
-
(name.size - 1).downto(0) do |n|
|
127
|
-
next unless name[n]
|
128
|
-
|
129
|
-
if name[n] =~ Patterns::LAST_NAME_PRE_DANGLERS
|
130
|
-
@last = "#{name[n].downcase.capitalize} #{@last}"
|
131
|
-
elsif name[n] =~ Patterns::O_LAST_NAME_PRE_CONCATS
|
132
|
-
@last = "O'#{@last}"
|
133
|
-
# elsif name[n] =~ Patterns::MC_LAST_NAME_PRE_CONCAT
|
134
|
-
# @last = "#{name[n].downcase.capitalize} #{@last}"
|
135
|
-
# elsif name[n] =~ Patterns::ST_LAST_NAME_PRE_CONCAT
|
136
|
-
# @last = "St. #{@last}"
|
137
|
-
elsif name[n] =~ /-+/ and n > 0 and name[n-1]
|
138
|
-
@last = "#{name[n-1]}-#{@last}"
|
139
|
-
name[n-1] = nil
|
140
|
-
else
|
141
|
-
@middle = @middle ? "#{name[n]} #{@middle}" : name[n]
|
142
|
-
end
|
143
|
-
|
144
|
-
name.delete_at(n)
|
145
|
-
end
|
146
|
-
|
147
|
-
@middle.capitalize! if @middle and !(@middle =~ /[a-z]/ and @middle =~ /[A-Z]/)
|
148
|
-
@middle = "#{@middle}." if @middle and @middle.size == 1
|
149
|
-
end
|
150
|
-
|
151
|
-
def parse(name)
|
152
|
-
raise InvalidNameError unless name
|
153
|
-
if name.class == String
|
154
|
-
if name.index(',')
|
155
|
-
name = "#{$2} #{$1}" if name =~ /^([a-z]+)\s*,\s*,*(.*)/i
|
156
|
-
end
|
157
|
-
|
158
|
-
name = name.strip.split(/\s+/)
|
159
|
-
end
|
160
|
-
|
161
|
-
name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 and name.first.split(/[^[:alnum:]]+/)
|
162
|
-
|
163
|
-
extract_prefix(name)
|
164
|
-
extract_suffix(name)
|
165
|
-
extract_first(name)
|
166
|
-
extract_last(name)
|
167
|
-
extract_middle(name)
|
168
|
-
|
169
|
-
raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
|
170
|
-
|
171
|
-
self
|
172
|
-
end
|
173
|
-
|
174
|
-
def to_s
|
175
|
-
[@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : "")
|
176
|
-
end
|
177
|
-
|
178
|
-
def to_name
|
179
|
-
to_nameable
|
180
|
-
end
|
181
|
-
|
182
|
-
def to_fullname
|
183
|
-
to_s
|
184
|
-
end
|
185
|
-
|
186
|
-
def to_prefix
|
187
|
-
@prefix
|
188
|
-
end
|
189
|
-
|
190
|
-
def to_firstname
|
191
|
-
@first
|
192
|
-
end
|
193
|
-
|
194
|
-
def to_lastname
|
195
|
-
@last
|
196
|
-
end
|
197
|
-
|
198
|
-
def to_middlename
|
199
|
-
@middle
|
200
|
-
end
|
201
|
-
|
202
|
-
def to_suffix
|
203
|
-
@suffix
|
204
|
-
end
|
205
|
-
|
206
|
-
def to_nameable
|
207
|
-
[@first, @last].compact.join(' ')
|
208
|
-
end
|
209
|
-
|
210
|
-
def to_hash
|
211
|
-
return {
|
212
|
-
:prefix => @prefix,
|
213
|
-
:first => @first,
|
214
|
-
:middle => @middle,
|
215
|
-
:last => @last,
|
216
|
-
:suffix => @suffix
|
217
|
-
}
|
218
|
-
end
|
7
|
+
def self.parse(name)
|
8
|
+
Nameable::Latin.new.parse(name)
|
219
9
|
end
|
220
10
|
end
|
@@ -0,0 +1,247 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'nameable/error'
|
3
|
+
require 'nameable/latin/patterns'
|
4
|
+
|
5
|
+
module Nameable
|
6
|
+
class Latin
|
7
|
+
@@first_names = {}
|
8
|
+
@@last_names = {}
|
9
|
+
|
10
|
+
attr_accessor :prefix, :first, :middle, :last, :suffix
|
11
|
+
|
12
|
+
##
|
13
|
+
def initialize(*args)
|
14
|
+
if args.size == 1 && args.first.class == Hash
|
15
|
+
parts = args.first
|
16
|
+
@prefix = parts[:prefix] ? parts[:prefix] : nil
|
17
|
+
@first = parts[:first] ? parts[:first] : nil
|
18
|
+
@middle = parts[:middle] ? parts[:middle] : nil
|
19
|
+
@last = parts[:last] ? parts[:last] : nil
|
20
|
+
@suffix = parts[:suffix] ? parts[:suffix] : nil
|
21
|
+
else
|
22
|
+
@first = args.shift if args.size > 0
|
23
|
+
@middle = args.shift if args.size >= 2 # Only grab a middle name if we've got a last name left
|
24
|
+
@last = args.shift if args.size > 0
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# name is an Array
|
30
|
+
def extract_prefix(name)
|
31
|
+
return unless name and name.size > 1 and @prefix.nil? and @first.nil?
|
32
|
+
Nameable::Latin::Patterns::PREFIX.each_pair do |pretty, regex|
|
33
|
+
if name.first =~ regex
|
34
|
+
@prefix = pretty
|
35
|
+
name.delete(name.first)
|
36
|
+
return
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# name is an Array
|
43
|
+
def extract_suffix(name)
|
44
|
+
return unless name and name.size >= 3
|
45
|
+
|
46
|
+
(name.size - 1).downto(2) do |n|
|
47
|
+
suff = nil
|
48
|
+
|
49
|
+
Nameable::Latin::Patterns::SUFFIX.each_pair do |pretty, regex|
|
50
|
+
suff = pretty if name[n] =~ regex
|
51
|
+
end
|
52
|
+
|
53
|
+
if name[n] =~ Nameable::Latin::Patterns::SUFFIX_ACADEMIC or name[n] =~ Nameable::Latin::Patterns::SUFFIX_PROFESSIONAL or name[n] =~ Nameable::Latin::Patterns::SUFFIX_GENERATIONAL_ROMAN
|
54
|
+
suff = name[n].upcase.gsub(/\./,'')
|
55
|
+
end
|
56
|
+
|
57
|
+
if !suff && name.join != name.join.upcase and name[n].length > 1 and name[n] =~ Nameable::Latin::Patterns::SUFFIX_ABBREVIATION
|
58
|
+
suff = name[n].upcase.gsub(/\./,'')
|
59
|
+
end
|
60
|
+
|
61
|
+
if suff
|
62
|
+
@suffix = @suffix ? "#{suff}, #{@suffix}" : suff
|
63
|
+
name.delete_at(n)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
##
|
70
|
+
# name is an Array
|
71
|
+
def extract_first(name)
|
72
|
+
return unless name and name.size >= 1
|
73
|
+
|
74
|
+
@first = name.first
|
75
|
+
name.delete_at(0)
|
76
|
+
|
77
|
+
@first.capitalize! unless @first =~ /[a-z]/ and @first =~ /[A-Z]/
|
78
|
+
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# name is an Array
|
82
|
+
def extract_last(name)
|
83
|
+
return unless name and name.size >= 1
|
84
|
+
|
85
|
+
@last = name.last.gsub(/['`"]+/, "'").gsub(/-+/, '-')
|
86
|
+
name.delete_at(name.size - 1)
|
87
|
+
|
88
|
+
@last.capitalize! unless @last =~ /[a-z]/ and @last =~ /[A-Z]/
|
89
|
+
end
|
90
|
+
|
91
|
+
##
|
92
|
+
# name is an Array
|
93
|
+
def extract_middle(name)
|
94
|
+
return unless name and name.size >= 1
|
95
|
+
|
96
|
+
(name.size - 1).downto(0) do |n|
|
97
|
+
next unless name[n]
|
98
|
+
|
99
|
+
if name[n] =~ Nameable::Latin::Patterns::LAST_NAME_PRE_DANGLERS
|
100
|
+
@last = "#{name[n].downcase.capitalize} #{@last}"
|
101
|
+
elsif name[n] =~ Nameable::Latin::Patterns::O_LAST_NAME_PRE_CONCATS
|
102
|
+
@last = "O'#{@last}"
|
103
|
+
elsif name[n] =~ /\-/ and n > 0 and name[n-1]
|
104
|
+
@last = "#{name[n-1].gsub(/\-/, '')}-#{@last}"
|
105
|
+
name[n-1] = nil
|
106
|
+
else
|
107
|
+
@middle = @middle ? "#{name[n]} #{@middle}" : name[n]
|
108
|
+
end
|
109
|
+
|
110
|
+
name.delete_at(n)
|
111
|
+
end
|
112
|
+
|
113
|
+
@middle.capitalize! if @middle and !(@middle =~ /[a-z]/ and @middle =~ /[A-Z]/)
|
114
|
+
@middle = "#{@middle}." if @middle and @middle.size == 1
|
115
|
+
end
|
116
|
+
|
117
|
+
def parse(name)
|
118
|
+
raise InvalidNameError unless name
|
119
|
+
if name.class == String
|
120
|
+
if name.index(',')
|
121
|
+
name = "#{$2} #{$1}" if name =~ /^([a-z]+)\s*,\s*,*(.*)/i
|
122
|
+
end
|
123
|
+
|
124
|
+
name = name.strip.split(/\s+/)
|
125
|
+
end
|
126
|
+
|
127
|
+
name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 and name.first.split(/[^[:alnum:]]+/)
|
128
|
+
|
129
|
+
extract_prefix(name)
|
130
|
+
extract_suffix(name)
|
131
|
+
extract_first(name)
|
132
|
+
extract_last(name)
|
133
|
+
extract_middle(name)
|
134
|
+
|
135
|
+
raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
|
136
|
+
|
137
|
+
self
|
138
|
+
end
|
139
|
+
|
140
|
+
# http://www.ssa.gov/oact/babynames/limits.html
|
141
|
+
def load_huge_gender_table
|
142
|
+
ranked = {}
|
143
|
+
|
144
|
+
CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'yob2013.txt'), __FILE__)).each do |first, gender, rank|
|
145
|
+
first.downcase!
|
146
|
+
gender.downcase!
|
147
|
+
ranked[first] = {} unless ranked[first]
|
148
|
+
ranked[first][gender] = rank.to_i
|
149
|
+
end
|
150
|
+
|
151
|
+
ranked.each do |first, ranks|
|
152
|
+
if ranks['m'] && !ranks['f']
|
153
|
+
@@first_names[first] = :male
|
154
|
+
elsif !ranks['m'] && ranks['f']
|
155
|
+
@@first_names[first] = :female
|
156
|
+
elsif ranks['m'] && ranks['f']
|
157
|
+
@@first_names[first] = ranks['m'] > ranks['f'] ? :male : :female
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# http://www.census.gov/genealogy/www/data/2000surnames/index.html
|
163
|
+
def load_huge_ethnicity_table
|
164
|
+
CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'app_c.csv'), __FILE__)).each do |name, rank, count, prop100k, cum_prop100k, pctwhite, pctblack, pctapi, pctaian, pct2prace, pcthispanic|
|
165
|
+
next if name == 'name'
|
166
|
+
@@last_names[name.downcase] = {
|
167
|
+
rank:rank.to_i,
|
168
|
+
count:count.to_i,
|
169
|
+
percent_white:pctwhite.to_f,
|
170
|
+
percent_black:pctblack.to_f,
|
171
|
+
percent_asian_pacific_islander:pctapi.to_f,
|
172
|
+
percent_american_indian_alaska_native:pctaian.to_f,
|
173
|
+
percent_two_or_more_races:pct2prace.to_f,
|
174
|
+
percent_hispanic:pcthispanic.to_f
|
175
|
+
}
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def gender
|
180
|
+
return @gender if @gender
|
181
|
+
load_huge_gender_table unless @@first_names && @@first_names.size > 0
|
182
|
+
@gender = @@first_names[@first.to_s.downcase] ? @@first_names[@first.to_s.downcase] : :unknown
|
183
|
+
@gender
|
184
|
+
end
|
185
|
+
|
186
|
+
def ethnicity
|
187
|
+
return @ethnicity if @ethnicity
|
188
|
+
load_huge_ethnicity_table unless @@last_names && @@last_names.size > 0
|
189
|
+
@ethnicity = (@last && @@last_names[@last.downcase]) ? @@last_names[@last.downcase] : :unknown
|
190
|
+
@ethnicity
|
191
|
+
end
|
192
|
+
|
193
|
+
def male?
|
194
|
+
self.gender == :male
|
195
|
+
end
|
196
|
+
|
197
|
+
def female?
|
198
|
+
self.gender == :female
|
199
|
+
end
|
200
|
+
|
201
|
+
def to_s
|
202
|
+
[@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : "")
|
203
|
+
end
|
204
|
+
|
205
|
+
def to_name
|
206
|
+
to_nameable
|
207
|
+
end
|
208
|
+
|
209
|
+
def to_fullname
|
210
|
+
to_s
|
211
|
+
end
|
212
|
+
|
213
|
+
def to_prefix
|
214
|
+
@prefix
|
215
|
+
end
|
216
|
+
|
217
|
+
def to_firstname
|
218
|
+
@first
|
219
|
+
end
|
220
|
+
|
221
|
+
def to_lastname
|
222
|
+
@last
|
223
|
+
end
|
224
|
+
|
225
|
+
def to_middlename
|
226
|
+
@middle
|
227
|
+
end
|
228
|
+
|
229
|
+
def to_suffix
|
230
|
+
@suffix
|
231
|
+
end
|
232
|
+
|
233
|
+
def to_nameable
|
234
|
+
[@first, @last].compact.join(' ')
|
235
|
+
end
|
236
|
+
|
237
|
+
def to_hash
|
238
|
+
return {
|
239
|
+
:prefix => @prefix,
|
240
|
+
:first => @first,
|
241
|
+
:middle => @middle,
|
242
|
+
:last => @last,
|
243
|
+
:suffix => @suffix
|
244
|
+
}
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|