name_parser 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/.rspec +1 -0
- data/.rvmrc +48 -0
- data/Gemfile +4 -0
- data/README.md +40 -0
- data/Rakefile +8 -0
- data/lib/name_parser/parser.rb +74 -0
- data/lib/name_parser/patterns.rb +29 -0
- data/lib/name_parser/version.rb +3 -0
- data/lib/name_parser.rb +9 -0
- data/name_parser.gemspec +23 -0
- data/spec/name_parser/parser_spec.rb +359 -0
- data/spec/name_parser_spec.rb +25 -0
- data/spec/spec_helper.rb +6 -0
- metadata +89 -0
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/.rvmrc
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
# This is an RVM Project .rvmrc file, used to automatically load the ruby
|
4
|
+
# development environment upon cd'ing into the directory
|
5
|
+
|
6
|
+
# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
|
7
|
+
# Only full ruby name is supported here, for short names use:
|
8
|
+
# echo "rvm use 1.9.2" > .rvmrc
|
9
|
+
environment_id="ruby-1.9.2-p318@name_parser"
|
10
|
+
|
11
|
+
# Uncomment the following lines if you want to verify rvm version per project
|
12
|
+
# rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
|
13
|
+
# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
|
14
|
+
# echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
|
15
|
+
# return 1
|
16
|
+
# }
|
17
|
+
|
18
|
+
# First we attempt to load the desired environment directly from the environment
|
19
|
+
# file. This is very fast and efficient compared to running through the entire
|
20
|
+
# CLI and selector. If you want feedback on which environment was used then
|
21
|
+
# insert the word 'use' after --create as this triggers verbose mode.
|
22
|
+
if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
|
23
|
+
&& -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
|
24
|
+
then
|
25
|
+
\. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
|
26
|
+
[[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
|
27
|
+
\. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
|
28
|
+
else
|
29
|
+
# If the environment file has not yet been created, use the RVM CLI to select.
|
30
|
+
rvm --create "$environment_id" || {
|
31
|
+
echo "Failed to create RVM environment '${environment_id}'."
|
32
|
+
return 1
|
33
|
+
}
|
34
|
+
fi
|
35
|
+
|
36
|
+
# If you use bundler, this might be useful to you:
|
37
|
+
# if [[ -s Gemfile ]] && {
|
38
|
+
# ! builtin command -v bundle >/dev/null ||
|
39
|
+
# builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
|
40
|
+
# }
|
41
|
+
# then
|
42
|
+
# printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
|
43
|
+
# gem install bundler
|
44
|
+
# fi
|
45
|
+
# if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
|
46
|
+
# then
|
47
|
+
# bundle install | grep -vE '^Using|Your bundle is complete'
|
48
|
+
# fi
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
NameParser
|
2
|
+
=========
|
3
|
+
|
4
|
+
Does what it says. Based on Matthew Ericson's people gem: https://github.com/mericson/people which, in turn, is loosely based on
|
5
|
+
the Lingua-EN-NameParser Perl module.
|
6
|
+
|
7
|
+
To set up development environment clone the repo and run `bundle` to get all of the dependencies.
|
8
|
+
|
9
|
+
Usage
|
10
|
+
-----
|
11
|
+
```ruby
|
12
|
+
require "name_parser"
|
13
|
+
|
14
|
+
include NameParser
|
15
|
+
|
16
|
+
name = "Captain Arthur Two Sheds Jackson Jr."
|
17
|
+
|
18
|
+
parser = Parser.new(name)
|
19
|
+
|
20
|
+
parser.first # => "Arthur"
|
21
|
+
parser.middle # => "Two Sheds"
|
22
|
+
parser.last # => "Jackson"
|
23
|
+
parser.title # => "Captain"
|
24
|
+
parser.suffix # => "Jr."
|
25
|
+
```
|
26
|
+
|
27
|
+
or using the mixin
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
require "name_parser"
|
31
|
+
|
32
|
+
include NameParser
|
33
|
+
|
34
|
+
name = "Captain Arthur Two Sheds Jackson Jr."
|
35
|
+
|
36
|
+
parser = name_parser(name) # => NameParser::Parser
|
37
|
+
|
38
|
+
parser.first # => "Arthur"
|
39
|
+
# ...
|
40
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
module NameParser
|
2
|
+
class Parser
|
3
|
+
include Patterns
|
4
|
+
|
5
|
+
attr_reader :first, :middle, :last, :title, :suffix
|
6
|
+
|
7
|
+
def initialize(name)
|
8
|
+
@name = name.dup
|
9
|
+
run
|
10
|
+
end
|
11
|
+
|
12
|
+
protected
|
13
|
+
|
14
|
+
def run
|
15
|
+
remove_non_name_characters
|
16
|
+
remove_extra_spaces
|
17
|
+
clean_trailing_suffixes
|
18
|
+
reverse_last_and_first_names
|
19
|
+
remove_commas
|
20
|
+
parse_title
|
21
|
+
parse_suffix
|
22
|
+
parse_name
|
23
|
+
end
|
24
|
+
|
25
|
+
def remove_non_name_characters
|
26
|
+
@name.gsub!(/[^A-Za-z0-9\-\'\.&\/ \,]/, '')
|
27
|
+
end
|
28
|
+
|
29
|
+
def remove_extra_spaces
|
30
|
+
@name.gsub!(/\s+/, ' ')
|
31
|
+
@name.strip!
|
32
|
+
end
|
33
|
+
|
34
|
+
def clean_trailing_suffixes
|
35
|
+
@name.gsub!(Regexp.new("(.+), (%s)$" % SUFFIX_PATTERN, true), "\\1 \\2")
|
36
|
+
end
|
37
|
+
|
38
|
+
def reverse_last_and_first_names
|
39
|
+
@name.gsub!(/;/, '')
|
40
|
+
@name.gsub!(/(.+),(.+)/, "\\2 ;\\1")
|
41
|
+
@name.strip!
|
42
|
+
end
|
43
|
+
|
44
|
+
def remove_commas
|
45
|
+
@name.gsub!(/,/, '')
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_title
|
49
|
+
if match = @name.match(Regexp.new("^(%s) (.+)" % TITLE_PATTERN, true))
|
50
|
+
@name = match[-1]
|
51
|
+
@title = match[1].strip
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse_suffix
|
56
|
+
if match = @name.match(Regexp.new("(.+) (%s)$" % SUFFIX_PATTERN, true))
|
57
|
+
@name = match[1].strip
|
58
|
+
@suffix = match[2]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def parse_name
|
63
|
+
case
|
64
|
+
when match = @name.match(Regexp.new('^%s%s$' % [ NAME_PATTERN, LAST_NAME_PATTERN ], true))
|
65
|
+
@first, @last = match.captures
|
66
|
+
when match = @name.match(Regexp.new('^%s%s%s%s$' % [ NAME_PATTERN, NAME_PATTERN, NAME_PATTERN, LAST_NAME_PATTERN ], true))
|
67
|
+
@first, *middles, @last = match.captures[0..3]
|
68
|
+
@middle = middles.join(' ')
|
69
|
+
when match = @name.match(Regexp.new('^%s%s%s$' % [ NAME_PATTERN, NAME_PATTERN, LAST_NAME_PATTERN ], true))
|
70
|
+
@first, @middle, @last = match.captures
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module NameParser
|
2
|
+
module Patterns
|
3
|
+
|
4
|
+
NAME_PATTERN = "([\\w\\-\\']+)[\.{1,}\\s|\\s]+"
|
5
|
+
LAST_NAME_PATTERN = "\;?([\\w\\-\\']+|(Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le\
|
6
|
+
|Lo|St\\.|Den|Von|Van|Von Der|Van De[nr])?\\s+([\\w]+))"
|
7
|
+
|
8
|
+
SUFFIX_PATTERN = "Jn?r\.?,? Esq\.?|Sn?r\.?,? Esq\.?|I{1,3},? Esq\.?|Jn?r\.?,? M\.?D\.?|Sn?r\.?,? M\.?D\.?|\
|
9
|
+
I{1,3},? M\.?D\.?|Sn?r\.?|Jn?r\.?|Esq(\.|uire)?|Esquire.|Attorney at Law.|Attorney-at-Law.|Ph\.?d\.?|C\.?P\.?A\.?|\
|
10
|
+
XI{1,3}|X|IV|VI{1,3}|V|IX|I{1,3}\.?|M\.?D\.?|D.?M\.?D\.?"
|
11
|
+
|
12
|
+
STANDARD = "M(ister|aster|issus|iss|r\\.?|rs\\.?|s\\.?|mme\\.?|essr\\.?)"
|
13
|
+
ROYALTY = "Sir|Lord|Lady|Madam(e)?|Dame|Duke|Duchess|King|Queen|Prince|Princess"
|
14
|
+
MEDICINE = "D(r\\.?|octor)|Sister|Matron"
|
15
|
+
LEGAL = "Judge|Justice|Att(\\.|orney) Gen(\\.|eral)"
|
16
|
+
POLICE = "Det(\\.|ective) Insp(\\.|ector)|Det(\\.|ective)|Insp(\\.|ector)|Chief|Constable|Officer"
|
17
|
+
MILITARY = "Brig(adier)?|Capt(\\.?|ain)|C(dr\\.?|ommander|ommodore)|Col(\\.?|onel)|\
|
18
|
+
Gen(\\.?|eral)|Field Marshall|Fl(\\.?|ight) Off(\\.?|icer)|Fl(t\\.?|ight) L(t\\.?|ieutenant)|\
|
19
|
+
P(te\\.?|rivate)|S(gt\\.?|argent)|Air (Commander|Commodore| Marshall)|L(t\\.?|ieutenant) (Col(\\.?|onel)|\
|
20
|
+
Gen(\\.?|eral)|C(Cdr\\.?|ommander))|L(t\\.?|eut\\.?|ieutenant|eutenant)|Maj(\\.?|or) Gen(\\.?|eral)|Maj(\\.?|or)"
|
21
|
+
RELIGIOUS = "Rabbi|Brother|Father|Chaplain|Pastor|(Archb|B)ishop|Cardinal|Pope|\
|
22
|
+
Mother( Superior)?|(Most|Mt\\.|Very|V.) Re(v\\.?|vd\\.?|ver[e|a]nd)|Re(v\\.?|vd\\.?|er[e|a]nd)"
|
23
|
+
POLITICIAN = "Mayor|Sen(\\.|ator)?|Rep(\\.|resentative)?|Ald(\\.|erman)?|Pres(\\.|ident)?|\
|
24
|
+
Ambassador|Assembly(woman|man)|Chair(woman|man)|Commissioner|Congress(woman|man)|Council(wo)man|\
|
25
|
+
Counselor|Delegate|(Lieutentant )Governor|Postmaster( General)"
|
26
|
+
EDUCATOR = "Dean|President|Ass(\\.|oc\\.|ociate|t\\.|istant) Prof(\\.|essor)|Prof(\\.|essor)"
|
27
|
+
TITLE_PATTERN = [ STANDARD, ROYALTY, MEDICINE, LEGAL, POLICE, MILITARY, RELIGIOUS, POLITICIAN, EDUCATOR ].join("|")
|
28
|
+
end
|
29
|
+
end
|
data/lib/name_parser.rb
ADDED
data/name_parser.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "name_parser/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "name_parser"
|
7
|
+
s.version = NameParser::VERSION
|
8
|
+
s.authors = ["Chris Pallotta", "Scott Pullen", "Tom Leonard"]
|
9
|
+
s.email = ["ChristopherF_Pallotta@dfci.harvard.edu", "ScottT_Pullen@dfci.harvard.edu", "Thomas_Leonard@dfci.harvard.edu"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Parses strings.}
|
12
|
+
s.description = %q{Parses particular kinds of strings. For now, it only handles parsing people names.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "name_parser"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency 'rspec'
|
22
|
+
s.add_development_dependency 'debugger'
|
23
|
+
end
|
@@ -0,0 +1,359 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
include NameParser
|
4
|
+
|
5
|
+
Parser.send(:public, *Parser.protected_instance_methods)
|
6
|
+
|
7
|
+
describe Parser do
|
8
|
+
let(:name) { 'Horatio Xavier Hornblower' }
|
9
|
+
let!(:parser) { Parser.new(name) }
|
10
|
+
|
11
|
+
[:name, :first, :middle, :last, :title, :suffix ].each do |attr|
|
12
|
+
describe "#{attr} attribute" do
|
13
|
+
it 'is read only' do
|
14
|
+
parser.methods.should_not include(":#{attr}=".to_sym)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'name attribute' do
|
20
|
+
it 'is set on initialize' do
|
21
|
+
get_name.should == name
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '#remove_non_name_characters' do
|
26
|
+
it 'only allows alpha-numerics, dashes, backslashes, apostrophes and ampersands' do
|
27
|
+
set_name("aZ1/&'`!@$#%^*()_+=[]{}|\:;""")
|
28
|
+
parser.remove_non_name_characters
|
29
|
+
|
30
|
+
get_name.should == "aZ1/&'"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#remove_extra_spaces' do
|
35
|
+
it 'removes leading spaces, tabs and line breaks' do
|
36
|
+
set_name(" \t\nFoo")
|
37
|
+
parser.remove_extra_spaces
|
38
|
+
|
39
|
+
get_name.should == 'Foo'
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'removes trailing spaces, tabs and line breaks' do
|
43
|
+
set_name("Foo \t\n")
|
44
|
+
parser.remove_extra_spaces
|
45
|
+
|
46
|
+
get_name.should == 'Foo'
|
47
|
+
end
|
48
|
+
|
49
|
+
it 'replaces repeating spaces, tabs and line breaks with a single space' do
|
50
|
+
set_name(" Foo \t\nBar ")
|
51
|
+
parser.remove_extra_spaces
|
52
|
+
|
53
|
+
get_name.should == 'Foo Bar'
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '#clean_trailing_suffixes' do
|
58
|
+
it 'removes trailing suffixes' do
|
59
|
+
set_name('Biggie Smalls, Junior, Esquire, Phd., VII')
|
60
|
+
parser.clean_trailing_suffixes
|
61
|
+
|
62
|
+
get_name.should == 'Biggie Smalls, Junior, Esquire, Phd. VII'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe '#reverse_last_and_first_names' do
|
67
|
+
it 'reorders last and first names if comma is present' do
|
68
|
+
set_name('Smith, Johnny')
|
69
|
+
parser.reverse_last_and_first_names
|
70
|
+
|
71
|
+
get_name.should == 'Johnny ;Smith'
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe '#remove_commas' do
|
76
|
+
it 'removes all commas' do
|
77
|
+
set_name('Hounddog ;Taylor,')
|
78
|
+
parser.remove_commas
|
79
|
+
|
80
|
+
get_name.should == 'Hounddog ;Taylor'
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe '#parse_title' do
|
85
|
+
context 'when a title is found' do
|
86
|
+
before { set_name('Colonel Henry Potter') }
|
87
|
+
|
88
|
+
it 'sets title attribute' do
|
89
|
+
parser.parse_title
|
90
|
+
parser.title.should == 'Colonel'
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'removes the title from name' do
|
94
|
+
parser.parse_title
|
95
|
+
|
96
|
+
get_name.should == 'Henry Potter'
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context 'when a title is not found' do
|
101
|
+
it 'returns nil' do
|
102
|
+
set_name('Frank Burns')
|
103
|
+
|
104
|
+
parser.parse_title
|
105
|
+
parser.title.should be_nil
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe '#parse_suffix' do
|
111
|
+
context 'when a suffix is found' do
|
112
|
+
before { set_name('Bubba Watson Jr.') }
|
113
|
+
|
114
|
+
it 'returns the suffix' do
|
115
|
+
parser.parse_suffix
|
116
|
+
parser.suffix.should == 'Jr.'
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
it 'removes the suffix from name' do
|
121
|
+
parser.parse_suffix
|
122
|
+
|
123
|
+
get_name.should == 'Bubba Watson'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'when a suffix is not found' do
|
128
|
+
it 'returns nil' do
|
129
|
+
set_name('Bubba Watson')
|
130
|
+
|
131
|
+
parser.parse_suffix
|
132
|
+
parser.suffix.should be_nil
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
describe '#parse_name' do
|
138
|
+
context 'when first initial and last name' do
|
139
|
+
before do
|
140
|
+
set_name('J Tolkien')
|
141
|
+
parser.parse_name
|
142
|
+
end
|
143
|
+
|
144
|
+
it 'returns first initial' do
|
145
|
+
parser.first.should == 'J'
|
146
|
+
end
|
147
|
+
|
148
|
+
it 'returns nil middle name' do
|
149
|
+
parser.middle.should be_nil
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'returns last name' do
|
153
|
+
parser.last.should == 'Tolkien'
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
context 'when first initial, middle initial and last name' do
|
158
|
+
before do
|
159
|
+
set_name('J R Tolkien')
|
160
|
+
parser.parse_name
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'returns first initial' do
|
164
|
+
parser.first.should == 'J'
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'returns middle initial' do
|
168
|
+
parser.middle.should == 'R'
|
169
|
+
end
|
170
|
+
|
171
|
+
it 'returns last name' do
|
172
|
+
parser.last.should == 'Tolkien'
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
context 'when first initial dot middle initial dot last name' do
|
177
|
+
before do
|
178
|
+
set_name('J. R. Tolkien')
|
179
|
+
parser.parse_name
|
180
|
+
end
|
181
|
+
|
182
|
+
it 'returns first initial' do
|
183
|
+
parser.first.should == 'J'
|
184
|
+
end
|
185
|
+
|
186
|
+
it 'returns middle initial' do
|
187
|
+
parser.middle.should == 'R'
|
188
|
+
end
|
189
|
+
|
190
|
+
it 'returns last name' do
|
191
|
+
parser.last.should == 'Tolkien'
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
context 'when first initial, two middle initials and last name' do
|
196
|
+
before do
|
197
|
+
set_name('J R R Tolkien')
|
198
|
+
parser.parse_name
|
199
|
+
end
|
200
|
+
|
201
|
+
it 'returns first initial' do
|
202
|
+
parser.first.should == 'J'
|
203
|
+
end
|
204
|
+
|
205
|
+
it 'returns both middle initials' do
|
206
|
+
parser.middle.should == 'R R'
|
207
|
+
end
|
208
|
+
|
209
|
+
it 'returns last name' do
|
210
|
+
parser.last.should == 'Tolkien'
|
211
|
+
end
|
212
|
+
|
213
|
+
end
|
214
|
+
|
215
|
+
context 'when first initial, middle name and last name' do
|
216
|
+
before do
|
217
|
+
set_name('J Ronald Tolkien')
|
218
|
+
parser.parse_name
|
219
|
+
end
|
220
|
+
|
221
|
+
it 'returns first initial' do
|
222
|
+
parser.first.should == 'J'
|
223
|
+
end
|
224
|
+
|
225
|
+
it 'returns middle name' do
|
226
|
+
parser.middle.should == 'Ronald'
|
227
|
+
end
|
228
|
+
|
229
|
+
it 'returns last name' do
|
230
|
+
parser.last.should == 'Tolkien'
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
context 'when first name, middle initial and last name' do
|
235
|
+
before do
|
236
|
+
set_name('John R Tolkien')
|
237
|
+
parser.parse_name
|
238
|
+
end
|
239
|
+
|
240
|
+
it 'returns first name' do
|
241
|
+
parser.first.should == 'John'
|
242
|
+
end
|
243
|
+
|
244
|
+
it 'returns middle initial' do
|
245
|
+
parser.middle.should == 'R'
|
246
|
+
end
|
247
|
+
|
248
|
+
it 'returns last name' do
|
249
|
+
parser.last.should == 'Tolkien'
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
context 'when first name, two middle initials and last name' do
|
254
|
+
before do
|
255
|
+
set_name('John R R Tolkien')
|
256
|
+
parser.parse_name
|
257
|
+
end
|
258
|
+
|
259
|
+
it 'returns first name' do
|
260
|
+
parser.first.should == 'John'
|
261
|
+
end
|
262
|
+
|
263
|
+
it 'returns middle name' do
|
264
|
+
parser.middle.should == 'R R'
|
265
|
+
end
|
266
|
+
|
267
|
+
it 'returns last name' do
|
268
|
+
parser.last.should == 'Tolkien'
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
context 'when first name, two middle initials with dots and last name' do
|
273
|
+
before do
|
274
|
+
set_name('John R. R. Tolkien')
|
275
|
+
parser.parse_name
|
276
|
+
end
|
277
|
+
|
278
|
+
it 'returns first name' do
|
279
|
+
parser.first.should == 'John'
|
280
|
+
end
|
281
|
+
|
282
|
+
it 'returns middle name' do
|
283
|
+
parser.middle.should == 'R R'
|
284
|
+
end
|
285
|
+
|
286
|
+
it 'returns last name' do
|
287
|
+
parser.last.should == 'Tolkien'
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
context 'when first name and last name' do
|
292
|
+
before do
|
293
|
+
set_name('John Tolkien')
|
294
|
+
parser.parse_name
|
295
|
+
end
|
296
|
+
|
297
|
+
it 'returns first name' do
|
298
|
+
parser.first.should == 'John'
|
299
|
+
end
|
300
|
+
|
301
|
+
it 'returns nil middle name' do
|
302
|
+
parser.middle.should be_nil
|
303
|
+
end
|
304
|
+
|
305
|
+
it 'returns last name' do
|
306
|
+
parser.last.should == 'Tolkien'
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
context 'when first name, middle name and last name' do
|
311
|
+
before do
|
312
|
+
set_name('John Ronald Tolkien')
|
313
|
+
parser.parse_name
|
314
|
+
end
|
315
|
+
|
316
|
+
it 'returns first name' do
|
317
|
+
parser.first.should == 'John'
|
318
|
+
end
|
319
|
+
|
320
|
+
it 'returns middle name' do
|
321
|
+
parser.middle.should == 'Ronald'
|
322
|
+
end
|
323
|
+
|
324
|
+
it 'returns last name' do
|
325
|
+
parser.last.should == 'Tolkien'
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
context 'when last name is hyphenated' do
|
330
|
+
it 'returns last name' do
|
331
|
+
set_name('John R. Tolkien-Smith')
|
332
|
+
parser.parse_name
|
333
|
+
parser.last.should == 'Tolkien-Smith'
|
334
|
+
end
|
335
|
+
|
336
|
+
end
|
337
|
+
|
338
|
+
context 'when last name is preceded by a semicolon' do
|
339
|
+
it 'returns last name' do
|
340
|
+
set_name('J R R ;Tolkien')
|
341
|
+
parser.parse_name
|
342
|
+
parser.last.should == 'Tolkien'
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
def set_name(name)
|
348
|
+
parser.instance_variable_set(:@first, nil)
|
349
|
+
parser.instance_variable_set(:@middle, nil)
|
350
|
+
parser.instance_variable_set(:@last, nil)
|
351
|
+
parser.instance_variable_set(:@title, nil)
|
352
|
+
parser.instance_variable_set(:@suffix, nil)
|
353
|
+
parser.instance_variable_set(:@name, name)
|
354
|
+
end
|
355
|
+
|
356
|
+
def get_name
|
357
|
+
parser.instance_variable_get(:@name)
|
358
|
+
end
|
359
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
class TestClass
|
4
|
+
include NameParser
|
5
|
+
end
|
6
|
+
|
7
|
+
describe NameParser do
|
8
|
+
let!(:name) { "Adams Jr., Mr. John Quincy" }
|
9
|
+
let!(:test_class) { TestClass.new }
|
10
|
+
|
11
|
+
describe '#name_parser' do
|
12
|
+
it 'returns a new NameParser::Parser object' do
|
13
|
+
test_class.name_parser(name).class.should == NameParser::Parser
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should run the parser' do
|
17
|
+
parser = test_class.name_parser(name)
|
18
|
+
parser.title.should == 'Mr.'
|
19
|
+
parser.first.should == 'John'
|
20
|
+
parser.middle.should == 'Quincy'
|
21
|
+
parser.last.should == 'Adams'
|
22
|
+
parser.suffix.should == 'Jr.'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: name_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.5
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Chris Pallotta
|
9
|
+
- Scott Pullen
|
10
|
+
- Tom Leonard
|
11
|
+
autorequire:
|
12
|
+
bindir: bin
|
13
|
+
cert_chain: []
|
14
|
+
date: 2013-02-07 00:00:00.000000000Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rspec
|
18
|
+
requirement: &2152901460 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: '0'
|
24
|
+
type: :development
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *2152901460
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: debugger
|
29
|
+
requirement: &2152901040 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ! '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :development
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: *2152901040
|
38
|
+
description: Parses particular kinds of strings. For now, it only handles parsing
|
39
|
+
people names.
|
40
|
+
email:
|
41
|
+
- ChristopherF_Pallotta@dfci.harvard.edu
|
42
|
+
- ScottT_Pullen@dfci.harvard.edu
|
43
|
+
- Thomas_Leonard@dfci.harvard.edu
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- .rspec
|
50
|
+
- .rvmrc
|
51
|
+
- Gemfile
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- lib/name_parser.rb
|
55
|
+
- lib/name_parser/parser.rb
|
56
|
+
- lib/name_parser/patterns.rb
|
57
|
+
- lib/name_parser/version.rb
|
58
|
+
- name_parser.gemspec
|
59
|
+
- spec/name_parser/parser_spec.rb
|
60
|
+
- spec/name_parser_spec.rb
|
61
|
+
- spec/spec_helper.rb
|
62
|
+
homepage: ''
|
63
|
+
licenses: []
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ! '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
requirements: []
|
81
|
+
rubyforge_project: name_parser
|
82
|
+
rubygems_version: 1.8.17
|
83
|
+
signing_key:
|
84
|
+
specification_version: 3
|
85
|
+
summary: Parses strings.
|
86
|
+
test_files:
|
87
|
+
- spec/name_parser/parser_spec.rb
|
88
|
+
- spec/name_parser_spec.rb
|
89
|
+
- spec/spec_helper.rb
|