openjournals-nameable 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.codeclimate.yml +7 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +17 -0
- data/.travis.yml +14 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +48 -0
- data/LICENSE.txt +22 -0
- data/README.md +84 -0
- data/Rakefile +12 -0
- data/bin/nameable_web_service +24 -0
- data/certs/chorn.pem +55 -0
- data/checksums/nameable-1.1.0.gem.sha512 +1 -0
- data/checksums/nameable-1.1.1.gem.sha512 +1 -0
- data/checksums/nameable-1.1.3.gem.sha512 +1 -0
- data/checksums/nameable-1.1.4.gem.sha512 +1 -0
- data/data/app_c.csv +151672 -0
- data/data/yob2016.txt +32868 -0
- data/lib/nameable/assets.rb +6 -0
- data/lib/nameable/error.rb +4 -0
- data/lib/nameable/extensions.rb +5 -0
- data/lib/nameable/latin/patterns.rb +39 -0
- data/lib/nameable/latin.rb +251 -0
- data/lib/nameable/version.rb +3 -0
- data/lib/nameable.rb +11 -0
- data/nameable.gemspec +33 -0
- data/spec/nameable/extensions_spec.rb +11 -0
- data/spec/nameable/latin_spec.rb +192 -0
- data/spec/nameable_spec.rb +7 -0
- data/spec/spec_helper.rb +11 -0
- metadata +149 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
module Nameable
|
2
|
+
class Latin
|
3
|
+
# Regex's to match the detritus that people add to their names
|
4
|
+
module Patterns
|
5
|
+
PREFIX = {
|
6
|
+
'Capt.' => /^\(*(capt\.*|captain)\)*$/i,
|
7
|
+
'Dame' => /^\(*(dame)\)*$/i,
|
8
|
+
'Dr.' => /^\(*(dr\.*|doctor)\)*$/i,
|
9
|
+
'Fr.' => /^\(*(fr\.*|friar|father)\)*$/i,
|
10
|
+
'Hon.' => /^\(*(hon\.*|honorable)\)*$/i,
|
11
|
+
'Imam' => /^\(*(imam)\)*$/i,
|
12
|
+
'Ofc.' => /^\(*(ofc\.*|officer)\)*$/i,
|
13
|
+
'Mr.' => /^\(*(mr\.*|mister)\)*$/i,
|
14
|
+
'Mrs.' => /^\(*(mrs\.*|misses)\)*$/i,
|
15
|
+
'Ms.' => /^\(*(ms\.*|miss)\)*$/i,
|
16
|
+
'Rev.' => /^\(*(rev\.*|reverend)\)*$/i,
|
17
|
+
'Master' => /^\(*(master)\)*$/i,
|
18
|
+
'Rabbi' => /^\(*(rabbi)\)*$/i,
|
19
|
+
'Sir' => /^\(*(sir)\)*$/i
|
20
|
+
}.freeze
|
21
|
+
|
22
|
+
SUFFIX = {
|
23
|
+
'Sr.' => /^\(*(sr\.?|senior)\)*$/i,
|
24
|
+
'Jr.' => /^\(*(jr\.?|junior)\)*$/i,
|
25
|
+
'Esq.' => /^\(*(esq\.?|esquire)\)*$/i,
|
26
|
+
'Ph.D.' => /^\(*(p\.?h\.?d\.?)\)*$/i
|
27
|
+
}.freeze
|
28
|
+
|
29
|
+
SUFFIX_GENERATIONAL_ROMAN = /^\(*[IVX.]+\)*$/i
|
30
|
+
SUFFIX_ACADEMIC = /^(APR|RPh|MD|MA|DMD|DDS|PharmD|EngD|DPhil|JD|DD|DO|BA|BS|BSc|BE|BFA|MA|MS|MSc|MFA|MLA|MBA)$/i
|
31
|
+
SUFFIX_PROFESSIONAL = /^(PE|CSA|CPA|CPL|CME|CEng|OFM|CSV)$/i
|
32
|
+
SUFFIX_ABBREVIATION = /^[A-Z]\.?[A-Z]\.?[A-Z]?\.?$/ # 2-3 characters, possibly separated with '.'
|
33
|
+
|
34
|
+
# http://www.onlineaspect.com/2009/08/17/splitting-names/
|
35
|
+
LAST_NAME_PRE_DANGLERS = /^(mc|vere|von|van|der|da|de|del|della|di|da|pietro|vanden|du|st|la|ter|ten)$/i
|
36
|
+
O_LAST_NAME_PRE_CONCATS = /^(o'|o`|o")$/i
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,251 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'nameable/error'
|
3
|
+
require 'nameable/latin/patterns'
|
4
|
+
|
5
|
+
module Nameable
|
6
|
+
class Latin
|
7
|
+
@@first_names = {}
|
8
|
+
@@last_names = {}
|
9
|
+
|
10
|
+
attr_accessor :prefix, :first, :middle, :last, :suffix
|
11
|
+
|
12
|
+
def initialize(*args)
|
13
|
+
if args.size == 1 && args.first.class == Hash
|
14
|
+
parts = args.first
|
15
|
+
@prefix = parts[:prefix] ? parts[:prefix] : nil
|
16
|
+
@first = parts[:first] ? parts[:first] : nil
|
17
|
+
@middle = parts[:middle] ? parts[:middle] : nil
|
18
|
+
@last = parts[:last] ? parts[:last] : nil
|
19
|
+
@suffix = parts[:suffix] ? parts[:suffix] : nil
|
20
|
+
else
|
21
|
+
@first = args.shift unless args.empty?
|
22
|
+
@middle = args.shift if args.size >= 2 # Only grab a middle name if we've got a last name left
|
23
|
+
@last = args.shift unless args.empty?
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# name is an Array
|
29
|
+
def extract_prefix(name)
|
30
|
+
return unless name && name.size > 1 && @prefix.nil? && @first.nil?
|
31
|
+
Nameable::Latin::Patterns::PREFIX.each do |pretty, regex|
|
32
|
+
next unless name.first =~ regex
|
33
|
+
@prefix = pretty
|
34
|
+
name.delete(name.first)
|
35
|
+
break
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# name is an Array
|
41
|
+
def extract_suffix(name)
|
42
|
+
return unless name && name.size >= 3
|
43
|
+
|
44
|
+
(name.size - 1).downto(2) do |n|
|
45
|
+
suff = nil
|
46
|
+
|
47
|
+
Nameable::Latin::Patterns::SUFFIX.each_pair do |pretty, regex|
|
48
|
+
suff = pretty if name[n] =~ regex
|
49
|
+
end
|
50
|
+
|
51
|
+
if name[n] =~ Nameable::Latin::Patterns::SUFFIX_ACADEMIC ||
|
52
|
+
name[n] =~ Nameable::Latin::Patterns::SUFFIX_PROFESSIONAL ||
|
53
|
+
name[n] =~ Nameable::Latin::Patterns::SUFFIX_GENERATIONAL_ROMAN
|
54
|
+
suff = name[n].upcase.delete('.')
|
55
|
+
end
|
56
|
+
|
57
|
+
if !suff &&
|
58
|
+
name.join != name.join.upcase &&
|
59
|
+
name[n].length > 1 &&
|
60
|
+
name[n] =~ Nameable::Latin::Patterns::SUFFIX_ABBREVIATION
|
61
|
+
suff = name[n].upcase.delete('.')
|
62
|
+
end
|
63
|
+
|
64
|
+
if suff
|
65
|
+
@suffix = @suffix ? "#{suff}, #{@suffix}" : suff
|
66
|
+
name.delete_at(n)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
##
|
72
|
+
# name is an Array
|
73
|
+
def extract_first(name)
|
74
|
+
return unless name && name.size >= 1
|
75
|
+
|
76
|
+
@first = name.first
|
77
|
+
name.delete_at(0)
|
78
|
+
|
79
|
+
@first.capitalize! unless @first =~ /[a-z]/ && @first =~ /[A-Z]/
|
80
|
+
end
|
81
|
+
|
82
|
+
##
|
83
|
+
# name is an Array
|
84
|
+
def extract_last(name)
|
85
|
+
return unless name && name.size >= 1
|
86
|
+
|
87
|
+
@last = name.last.gsub(/['`"]+/, "'").gsub(/-+/, '-')
|
88
|
+
name.delete_at(name.size - 1)
|
89
|
+
|
90
|
+
@last.capitalize! unless @last =~ /[a-z]/ && @last =~ /[A-Z]/
|
91
|
+
end
|
92
|
+
|
93
|
+
##
|
94
|
+
# name is an Array
|
95
|
+
def extract_middle(name)
|
96
|
+
return unless name && name.size >= 1
|
97
|
+
|
98
|
+
(name.size - 1).downto(0) do |n|
|
99
|
+
next unless name[n]
|
100
|
+
|
101
|
+
if name[n] =~ Nameable::Latin::Patterns::LAST_NAME_PRE_DANGLERS
|
102
|
+
@last = "#{name[n].downcase.capitalize} #{@last}"
|
103
|
+
@last.gsub!('Van Der', 'van der')
|
104
|
+
@last.gsub!('Van De', 'van de')
|
105
|
+
elsif name[n] =~ Nameable::Latin::Patterns::O_LAST_NAME_PRE_CONCATS
|
106
|
+
@last = "O'#{@last}"
|
107
|
+
elsif name[n] =~ /\-/ && n > 0 && name[n - 1]
|
108
|
+
@last = "#{name[n - 1].delete('-')}-#{@last}"
|
109
|
+
name[n - 1] = nil
|
110
|
+
else
|
111
|
+
@middle = @middle ? "#{name[n]} #{@middle}" : name[n]
|
112
|
+
end
|
113
|
+
|
114
|
+
name.delete_at(n)
|
115
|
+
end
|
116
|
+
|
117
|
+
@middle.capitalize! if @middle && !(@middle =~ /[a-z]/ && @middle =~ /[A-Z]/)
|
118
|
+
@middle = "#{@middle}." if @middle && @middle.size == 1
|
119
|
+
end
|
120
|
+
|
121
|
+
def parse(name)
|
122
|
+
raise InvalidNameError unless name
|
123
|
+
if name.class == String
|
124
|
+
if name.index(',')
|
125
|
+
name = "#{Regexp.last_match(2)} #{Regexp.last_match(1)}" if name =~ /^([a-z]+)\s*,\s*,*([^,]*)/i
|
126
|
+
end
|
127
|
+
|
128
|
+
name = name.strip.split(/\s+/)
|
129
|
+
end
|
130
|
+
|
131
|
+
name = name.first.split(/[^[:alnum:]]+/) if name.size == 1 && name.first.split(/[^[:alnum:]]+/)
|
132
|
+
|
133
|
+
extract_prefix(name)
|
134
|
+
extract_suffix(name)
|
135
|
+
extract_first(name)
|
136
|
+
extract_last(name)
|
137
|
+
extract_middle(name)
|
138
|
+
|
139
|
+
raise InvalidNameError, "A parseable name was not found. #{name.inspect}" unless @first
|
140
|
+
|
141
|
+
self
|
142
|
+
end
|
143
|
+
|
144
|
+
# http://www.ssa.gov/oact/babynames/limits.html
|
145
|
+
def load_huge_gender_table(gender_table = Nameable::Assets::GENDER_TABLE)
|
146
|
+
ranked = {}
|
147
|
+
|
148
|
+
CSV.read(gender_table).each do |first, gender, rank|
|
149
|
+
first.downcase!
|
150
|
+
gender.downcase!
|
151
|
+
ranked[first] = {} unless ranked[first]
|
152
|
+
ranked[first][gender] = rank.to_i
|
153
|
+
end
|
154
|
+
|
155
|
+
ranked.each do |first, ranks|
|
156
|
+
if ranks['m'] && !ranks['f']
|
157
|
+
@@first_names[first] = :male
|
158
|
+
elsif !ranks['m'] && ranks['f']
|
159
|
+
@@first_names[first] = :female
|
160
|
+
elsif ranks['m'] && ranks['f']
|
161
|
+
@@first_names[first] = ranks['m'] > ranks['f'] ? :male : :female
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# http://www.census.gov/genealogy/www/data/2000surnames/index.html
|
167
|
+
def load_huge_ethnicity_table
|
168
|
+
CSV.read(File.expand_path(File.join('..', '..', '..', 'data', 'app_c.csv'), __FILE__)).each do |name, rank, count, _prop100k, _cum_prop100k, pctwhite, pctblack, pctapi, pctaian, pct2prace, pcthispanic|
|
169
|
+
next if name == 'name'
|
170
|
+
@@last_names[name.downcase] = {
|
171
|
+
rank: rank.to_i,
|
172
|
+
count: count.to_i,
|
173
|
+
percent_white: pctwhite.to_f,
|
174
|
+
percent_black: pctblack.to_f,
|
175
|
+
percent_asian_pacific_islander: pctapi.to_f,
|
176
|
+
percent_american_indian_alaska_native: pctaian.to_f,
|
177
|
+
percent_two_or_more_races: pct2prace.to_f,
|
178
|
+
percent_hispanic: pcthispanic.to_f
|
179
|
+
}
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def gender
|
184
|
+
return @gender if @gender
|
185
|
+
load_huge_gender_table unless @@first_names && !@@first_names.empty?
|
186
|
+
@gender = @@first_names[@first.to_s.downcase] ? @@first_names[@first.to_s.downcase] : :unknown
|
187
|
+
@gender
|
188
|
+
end
|
189
|
+
|
190
|
+
def ethnicity
|
191
|
+
return @ethnicity if @ethnicity
|
192
|
+
load_huge_ethnicity_table unless @@last_names && !@@last_names.empty?
|
193
|
+
@ethnicity = @last && @@last_names[@last.downcase] ? @@last_names[@last.downcase] : :unknown
|
194
|
+
@ethnicity
|
195
|
+
end
|
196
|
+
|
197
|
+
def male?
|
198
|
+
gender == :male
|
199
|
+
end
|
200
|
+
|
201
|
+
def female?
|
202
|
+
gender == :female
|
203
|
+
end
|
204
|
+
|
205
|
+
def to_s
|
206
|
+
[@prefix, @first, @middle, @last].compact.join(' ') + (@suffix ? ", #{@suffix}" : '')
|
207
|
+
end
|
208
|
+
|
209
|
+
def to_name
|
210
|
+
to_nameable
|
211
|
+
end
|
212
|
+
|
213
|
+
def to_fullname
|
214
|
+
to_s
|
215
|
+
end
|
216
|
+
|
217
|
+
def to_prefix
|
218
|
+
@prefix
|
219
|
+
end
|
220
|
+
|
221
|
+
def to_firstname
|
222
|
+
@first
|
223
|
+
end
|
224
|
+
|
225
|
+
def to_lastname
|
226
|
+
@last
|
227
|
+
end
|
228
|
+
|
229
|
+
def to_middlename
|
230
|
+
@middle
|
231
|
+
end
|
232
|
+
|
233
|
+
def to_suffix
|
234
|
+
@suffix
|
235
|
+
end
|
236
|
+
|
237
|
+
def to_nameable
|
238
|
+
[@first, @last].compact.join(' ')
|
239
|
+
end
|
240
|
+
|
241
|
+
def to_hash
|
242
|
+
{
|
243
|
+
prefix: @prefix,
|
244
|
+
first: @first,
|
245
|
+
middle: @middle,
|
246
|
+
last: @last,
|
247
|
+
suffix: @suffix
|
248
|
+
}
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
data/lib/nameable.rb
ADDED
data/nameable.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'nameable/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'openjournals-nameable'
|
9
|
+
spec.version = Nameable::VERSION
|
10
|
+
spec.authors = ['Chris Horn']
|
11
|
+
spec.email = ['chorn@chorn.com']
|
12
|
+
spec.summary = 'Parse names into components.'
|
13
|
+
spec.description = 'A library that provides parsing and output of person names, as well as Gender & Ethnicity matching.'
|
14
|
+
spec.homepage = 'https://github.com/chorn/nameable'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
spec.required_ruby_version = '>= 1.9'
|
21
|
+
|
22
|
+
signing_key = File.expand_path '~/.certs/chorn@chorn.com-rubygems.key'
|
23
|
+
if File.file?(signing_key)
|
24
|
+
spec.signing_key = signing_key
|
25
|
+
spec.cert_chain = ['certs/chorn.pem']
|
26
|
+
end
|
27
|
+
|
28
|
+
spec.add_development_dependency 'bundler'
|
29
|
+
spec.add_development_dependency 'rake'
|
30
|
+
spec.add_development_dependency 'rspec', '~> 3.6'
|
31
|
+
spec.add_development_dependency 'simplecov'
|
32
|
+
spec.add_development_dependency 'codeclimate-test-reporter'
|
33
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
shared_examples :generalized_parsing do |input, outputs|
|
4
|
+
let(:nameable) { Nameable::Latin.new.parse(input) }
|
5
|
+
|
6
|
+
it "#parse ``#{input}''" do
|
7
|
+
expect(nameable.prefix).to eq outputs[0]
|
8
|
+
expect(nameable.first).to eq outputs[1]
|
9
|
+
expect(nameable.middle).to eq outputs[2]
|
10
|
+
expect(nameable.last).to eq outputs[3]
|
11
|
+
expect(nameable.suffix).to eq outputs[4]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe Nameable::Latin do
|
16
|
+
describe '.new' do
|
17
|
+
it "doesn't raise" do
|
18
|
+
expect { Nameable::Latin.new }.to_not raise_error
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#parse' do
|
23
|
+
context 'with a single word name' do
|
24
|
+
it_behaves_like :generalized_parsing, 'Chris', [nil, 'Chris', nil, nil, nil]
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'with a simple first and last name' do
|
28
|
+
it_behaves_like :generalized_parsing, 'Chris Horn', [nil, 'Chris', nil, 'Horn', nil]
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'with an uppercase first and last name' do
|
32
|
+
it_behaves_like :generalized_parsing, 'CHRIS HORN', [nil, 'Chris', nil, 'Horn', nil]
|
33
|
+
end
|
34
|
+
|
35
|
+
context 'with a lowercase first and last name' do
|
36
|
+
it_behaves_like :generalized_parsing, 'chris horn', [nil, 'Chris', nil, 'Horn', nil]
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'with a mixed case first and last name' do
|
40
|
+
it_behaves_like :generalized_parsing, 'DeChris Horn', [nil, 'DeChris', nil, 'Horn', nil]
|
41
|
+
end
|
42
|
+
|
43
|
+
context 'with last name, first name' do
|
44
|
+
it_behaves_like :generalized_parsing, 'Horn, Chris', [nil, 'Chris', nil, 'Horn', nil]
|
45
|
+
end
|
46
|
+
|
47
|
+
context 'with last name, first name,' do
|
48
|
+
it_behaves_like :generalized_parsing, 'Horn, Chris,', [nil, 'Chris', nil, 'Horn', nil]
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'with first middle last name' do
|
52
|
+
it_behaves_like :generalized_parsing, 'Chris Derp Horn', [nil, 'Chris', 'Derp', 'Horn', nil]
|
53
|
+
end
|
54
|
+
|
55
|
+
context 'with all uppercase first middle last name' do
|
56
|
+
it_behaves_like :generalized_parsing, 'CHRIS DERP HORN', [nil, 'Chris', 'Derp', 'Horn', nil]
|
57
|
+
end
|
58
|
+
|
59
|
+
context 'with all lowercase first middle last name' do
|
60
|
+
it_behaves_like :generalized_parsing, 'chris derp horn', [nil, 'Chris', 'Derp', 'Horn', nil]
|
61
|
+
end
|
62
|
+
|
63
|
+
context 'with all mixed case first middle last name' do
|
64
|
+
it_behaves_like :generalized_parsing, 'DeChris LeDerp ZeHorn', [nil, 'DeChris', 'LeDerp', 'ZeHorn', nil]
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'with first last suffix' do
|
68
|
+
it_behaves_like :generalized_parsing, 'Chris Horn DRP', [nil, 'Chris', nil, 'Horn', 'DRP']
|
69
|
+
end
|
70
|
+
|
71
|
+
context 'with prefix first last suffix' do
|
72
|
+
it_behaves_like :generalized_parsing, 'Mr. Chris Horn DRP', ['Mr.', 'Chris', nil, 'Horn', 'DRP']
|
73
|
+
end
|
74
|
+
|
75
|
+
%w[Dame Rabbi Imam Master Sir].each do |prefix|
|
76
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", [prefix, 'Chris', nil, 'Horn', nil]
|
77
|
+
end
|
78
|
+
|
79
|
+
context 'with a normalizing prefix' do
|
80
|
+
%w[Mr Mr. Mister].each do |prefix|
|
81
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Mr.', 'Chris', nil, 'Horn', nil]
|
82
|
+
end
|
83
|
+
|
84
|
+
%w[Mrs Mrs. Misses].each do |prefix|
|
85
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Mrs.', 'Chris', nil, 'Horn', nil]
|
86
|
+
end
|
87
|
+
|
88
|
+
%w[Ms Ms. Miss].each do |prefix|
|
89
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Ms.', 'Chris', nil, 'Horn', nil]
|
90
|
+
end
|
91
|
+
|
92
|
+
%w[Dr Dr. Doctor].each do |prefix|
|
93
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Dr.', 'Chris', nil, 'Horn', nil]
|
94
|
+
end
|
95
|
+
|
96
|
+
%w[Rev Rev. Reverend].each do |prefix|
|
97
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Rev.', 'Chris', nil, 'Horn', nil]
|
98
|
+
end
|
99
|
+
|
100
|
+
%w[Fr Fr. Friar Father].each do |prefix|
|
101
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Fr.', 'Chris', nil, 'Horn', nil]
|
102
|
+
end
|
103
|
+
|
104
|
+
%w[Hon Hon. Honorable].each do |prefix|
|
105
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Hon.', 'Chris', nil, 'Horn', nil]
|
106
|
+
end
|
107
|
+
|
108
|
+
%w[Capt Capt. Captain].each do |prefix|
|
109
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Capt.', 'Chris', nil, 'Horn', nil]
|
110
|
+
end
|
111
|
+
|
112
|
+
%w[Ofc Ofc. Officer].each do |prefix|
|
113
|
+
it_behaves_like :generalized_parsing, "#{prefix} Chris Horn", ['Ofc.', 'Chris', nil, 'Horn', nil]
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
context 'with a normalizing suffix' do
|
118
|
+
%w[Sr Sr. Senior].each do |suffix|
|
119
|
+
it_behaves_like :generalized_parsing, "Chris Horn #{suffix}", [nil, 'Chris', nil, 'Horn', 'Sr.']
|
120
|
+
end
|
121
|
+
|
122
|
+
%w[Jr Jr. Junior].each do |suffix|
|
123
|
+
it_behaves_like :generalized_parsing, "Chris Horn #{suffix}", [nil, 'Chris', nil, 'Horn', 'Jr.']
|
124
|
+
end
|
125
|
+
|
126
|
+
%w[Esq Esq. Esquire].each do |suffix|
|
127
|
+
it_behaves_like :generalized_parsing, "Chris Horn #{suffix}", [nil, 'Chris', nil, 'Horn', 'Esq.']
|
128
|
+
end
|
129
|
+
|
130
|
+
%w[PHD PhD Ph.D Ph.D. P.H.D.].each do |suffix|
|
131
|
+
it_behaves_like :generalized_parsing, "Chris Horn #{suffix}", [nil, 'Chris', nil, 'Horn', 'Ph.D.']
|
132
|
+
end
|
133
|
+
|
134
|
+
%w[
|
135
|
+
ii III iv V VI ix Xiii APR RPh MD MA DMD DDS PharmD EngD DPhil
|
136
|
+
JD DD DO BA BS BSc BE BFA MA MS MSc MFA MLA MBA PE CSA CPA CPL CME CEng OFM CSV
|
137
|
+
].each do |suffix|
|
138
|
+
it_behaves_like :generalized_parsing, "Chris Horn #{suffix}", [nil, 'Chris', nil, 'Horn', suffix.upcase]
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
context 'with a multi word last name' do
|
143
|
+
%w[mc vere von van da de del della di da pietro vanden du st la ter ten].each do |prefix|
|
144
|
+
it_behaves_like :generalized_parsing, "Chris #{prefix} Horn", [nil, 'Chris', nil, "#{prefix.downcase.capitalize} Horn", nil]
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
context 'with van der Something last name' do
|
149
|
+
it_behaves_like :generalized_parsing, 'Dr. Ellert van der Velden DRP', ['Dr.', 'Ellert', nil, 'van der Velden', 'DRP']
|
150
|
+
end
|
151
|
+
|
152
|
+
context 'with van de Something last name' do
|
153
|
+
it_behaves_like :generalized_parsing, 'Dr. Sil C. van de Leemput DRP', ['Dr.', 'Sil', 'C.', 'van de Leemput', 'DRP']
|
154
|
+
end
|
155
|
+
|
156
|
+
context "with an o'last-name" do
|
157
|
+
["O'Horn", 'O`Horn', "O' Horn"].each do |last|
|
158
|
+
it_behaves_like :generalized_parsing, "Chris #{last}", [nil, 'Chris', nil, "O'Horn", nil]
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
context 'with a hyphenated last name' do
|
163
|
+
['Horn - Derp', 'Horn-Derp', 'Horn--Derp', 'Horn -- Derp'].each do |last|
|
164
|
+
it_behaves_like :generalized_parsing, "Chris #{last}", [nil, 'Chris', nil, 'Horn-Derp', nil]
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
context 'gender' do
|
170
|
+
it 'Chris is more likely to be male' do
|
171
|
+
expect(Nameable::Latin.new.parse('Chris Horn').male?).to be true
|
172
|
+
end
|
173
|
+
|
174
|
+
it 'Janine is more likely to be female' do
|
175
|
+
expect(Nameable::Latin.new.parse('Janine Horn').female?).to be true
|
176
|
+
end
|
177
|
+
|
178
|
+
it 'Derp has :unknown gender' do
|
179
|
+
expect(Nameable::Latin.new.parse('Derp Horn').gender).to eq(:unknown)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
context 'ethnicity' do
|
184
|
+
it 'Horn has a hash of ethnicity results' do
|
185
|
+
expect(Nameable::Latin.new.parse('Chris Horn').ethnicity).to be_a Hash
|
186
|
+
end
|
187
|
+
|
188
|
+
it "Horn's census :percent_white > 80% " do
|
189
|
+
expect(Nameable::Latin.new.parse('Chris Horn').ethnicity[:percent_white]).to be >= 80
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
|
+
require 'bundler/setup'
|
4
|
+
Bundler.require(:test)
|
5
|
+
require 'simplecov'
|
6
|
+
SimpleCov.start
|
7
|
+
|
8
|
+
require 'nameable'
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
end
|