human_name_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +39 -0
- data/Rakefile +10 -0
- data/fixtures/test_names.txt +31 -0
- data/human_name_parser.gemspec +20 -0
- data/lib/human_name_parser/name.rb +149 -0
- data/lib/human_name_parser/version.rb +3 -0
- data/lib/human_name_parser.rb +9 -0
- data/spec/human_name_parser_spec.rb +15 -0
- data/spec/name_spec.rb +154 -0
- data/spec/spec_helper.rb +8 -0
- metadata +95 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2011 Adam Bachman
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
Attempt to parse and categorize the parts of names.
|
2
|
+
|
3
|
+
With code borrowed from:
|
4
|
+
|
5
|
+
* https://github.com/bricooke/name_parser
|
6
|
+
* https://github.com/jasonpriem/HumanNameParser.php
|
7
|
+
* https://github.com/jconley88/NameParser
|
8
|
+
|
9
|
+
## Install
|
10
|
+
|
11
|
+
`gem install human_name_parser`
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
require 'rubygems'
|
16
|
+
require 'human_name_parser'
|
17
|
+
|
18
|
+
name = HumanNameParser.parse 'George W. Bush Jr.'
|
19
|
+
name.first # => 'George'
|
20
|
+
name.last # => 'Bush'
|
21
|
+
name.initials # => 'GWB'
|
22
|
+
name.suffix # => 'Jr.'
|
23
|
+
name.to_s # => 'George W. Bush Jr.'
|
24
|
+
|
25
|
+
## Development
|
26
|
+
|
27
|
+
~/ $ git clone ... && cd human_name_parser
|
28
|
+
~/ $ bundle install
|
29
|
+
~/ $ rake
|
30
|
+
|
31
|
+
## TODO
|
32
|
+
|
33
|
+
Handle nicknames
|
34
|
+
|
35
|
+
Handle strangely placed commas
|
36
|
+
|
37
|
+
Handle multiple last names. e.g., "Björn Charles O'Malley y Muñoz"
|
38
|
+
|
39
|
+
Handle multiple first names. e.g., "Mary Joe Francis Smith"
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
Björn O'Malley||Björn|||O'Malley|
|
2
|
+
Bin Lin||Bin|||Lin|
|
3
|
+
Linda Jones||Linda|||Jones|
|
4
|
+
Jason H. Priem||Jason||H.|Priem|
|
5
|
+
Björn O'Malley-Muñoz||Björn|||O'Malley-Muñoz|
|
6
|
+
Björn C. O'Malley||Björn||C.|O'Malley|
|
7
|
+
Björn "Bill" O'Malley||Björn|Bill||O'Malley|
|
8
|
+
Björn ("Bill") O'Malley||Björn|Bill||O'Malley|
|
9
|
+
Björn ("Wild Bill") O'Malley||Björn|Wild Bill||O'Malley|
|
10
|
+
Björn (Bill) O'Malley||Björn|Bill||O'Malley|
|
11
|
+
Björn 'Bill' O'Malley||Björn|Bill||O'Malley|
|
12
|
+
Björn C O'Malley||Björn||C|O'Malley|
|
13
|
+
Björn C. R. O'Malley||Björn||C. R.|O'Malley|
|
14
|
+
Björn Charles O'Malley||Björn||Charles|O'Malley|
|
15
|
+
Björn Charles R. O'Malley||Björn||Charles R.|O'Malley|
|
16
|
+
Björn van O'Malley||Björn|||van O'Malley|
|
17
|
+
Björn Charles van der O'Malley||Björn||Charles|van der O'Malley|
|
18
|
+
Björn Charles O'Malley y Muñoz||Björn||Charles|O'Malley y Muñoz|
|
19
|
+
Björn O'Malley, Jr.||Björn|||O'Malley|Jr.
|
20
|
+
Björn O'Malley Jr||Björn|||O'Malley|Jr
|
21
|
+
B O'Malley||B|||O'Malley|
|
22
|
+
William Carlos Williams||William||Carlos|Williams|
|
23
|
+
C. Björn Roger O'Malley|C.|Björn||Roger|O'Malley|
|
24
|
+
B. C. O'Malley||B.||C.|O'Malley|
|
25
|
+
B C O'Malley||B||C|O'Malley|
|
26
|
+
B.J. Thomas||B.J.|||Thomas|
|
27
|
+
O'Malley, Björn||Björn|||O'Malley|
|
28
|
+
O'Malley, Björn Jr||Björn|||O'Malley|Jr
|
29
|
+
O'Malley, C. Björn|C.|Björn|||O'Malley|
|
30
|
+
O'Malley, C. Björn III|C.|Björn|||O'Malley|III
|
31
|
+
O'Malley y Muñoz, C. Björn Roger III|C.|Björn||Roger|O'Malley y Muñoz|III
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "human_name_parser/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "human_name_parser"
|
7
|
+
s.version = HumanNameParser::VERSION
|
8
|
+
s.authors = ["Adam Bachman"]
|
9
|
+
s.email = ["adam.bachman@gmail.com"]
|
10
|
+
s.homepage = "https://github.com/abachman/human_name_parser"
|
11
|
+
s.summary = %q{Split most American names into their component parts.}
|
12
|
+
s.description = %q{human_name_parser is intended to split names into their component parts.}
|
13
|
+
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
|
19
|
+
s.add_development_dependency "rspec"
|
20
|
+
end
|
@@ -0,0 +1,149 @@
|
|
1
|
+
module HumanNameParser
|
2
|
+
class Name
|
3
|
+
PREFIXES = ['mr', 'ms', 'miss', 'mrs', 'sir', 'prof', 'professor', 'md', 'dr']
|
4
|
+
SUFFIXES = ['esq','esquire','jr','sr','2','ii','iii','iv']
|
5
|
+
LAST_PREFIXES = ['al', 'bar','ben','bin','da','dal','de la', 'de', 'del', 'der', 'di', 'el', 'ibn', 'la', 'le', 'mc', 'san', 'st', 'ste', 'van', 'van der', 'van den', 'vel','von']
|
6
|
+
|
7
|
+
attr_accessor :first, :middle, :last, :prefix, :suffix
|
8
|
+
|
9
|
+
def initialize name
|
10
|
+
self.first = ''
|
11
|
+
self.middle = ''
|
12
|
+
self.last = ''
|
13
|
+
self.prefix = ''
|
14
|
+
self.suffix = ''
|
15
|
+
|
16
|
+
@input_string = name
|
17
|
+
|
18
|
+
parse
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse
|
22
|
+
normalize_and_split
|
23
|
+
|
24
|
+
if @split_name.length == 1
|
25
|
+
_first, _ = @split_name
|
26
|
+
self.first = _first
|
27
|
+
self.last = ''
|
28
|
+
elsif @split_name.length == 2
|
29
|
+
_first, _last = @split_name
|
30
|
+
self.first = _first
|
31
|
+
self.last = _last
|
32
|
+
else
|
33
|
+
parse_prefix.
|
34
|
+
parse_suffix.
|
35
|
+
parse_last_name.
|
36
|
+
parse_first_name.
|
37
|
+
parse_middle_name
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def parse_prefix
|
42
|
+
if is_prefix? @split_name.first
|
43
|
+
self.prefix = @split_name.shift
|
44
|
+
end
|
45
|
+
|
46
|
+
return self
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_suffix
|
50
|
+
self.suffix = []
|
51
|
+
while is_suffix? @split_name.last
|
52
|
+
self.suffix.unshift @split_name.pop
|
53
|
+
end
|
54
|
+
self.suffix = self.suffix.join(' ')
|
55
|
+
|
56
|
+
return self
|
57
|
+
end
|
58
|
+
|
59
|
+
def parse_last_name
|
60
|
+
self.last = []
|
61
|
+
self.last.unshift @split_name.pop
|
62
|
+
|
63
|
+
while is_last_name_prefix?(@split_name.last)
|
64
|
+
self.last.unshift @split_name.pop
|
65
|
+
end
|
66
|
+
|
67
|
+
self.last = self.last.join(' ')
|
68
|
+
|
69
|
+
return self
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_first_name
|
73
|
+
self.first = @split_name.shift || ''
|
74
|
+
|
75
|
+
return self
|
76
|
+
end
|
77
|
+
|
78
|
+
def parse_middle_name
|
79
|
+
# whatever's left
|
80
|
+
self.middle = @split_name.join ' '
|
81
|
+
|
82
|
+
return self
|
83
|
+
end
|
84
|
+
|
85
|
+
def initials
|
86
|
+
_i = ''
|
87
|
+
_i += self.first.slice(0,1) if self.first && self.first.length > 0
|
88
|
+
_i += self.middle.slice(0,1) if self.middle && self.middle.length > 0
|
89
|
+
_i += self.last.slice(0,1) if self.last && self.last.length > 0
|
90
|
+
_i.upcase
|
91
|
+
end
|
92
|
+
|
93
|
+
def to_s
|
94
|
+
[
|
95
|
+
self.prefix,
|
96
|
+
self.first,
|
97
|
+
self.middle,
|
98
|
+
self.last,
|
99
|
+
self.suffix
|
100
|
+
].reject {|n| n.length == 0}.join(' ')
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
def normalize_and_split
|
105
|
+
if @input_string.count(",") > 1
|
106
|
+
# this is a thing that I cannot recognize
|
107
|
+
@split_name = []
|
108
|
+
elsif @input_string.count(",") == 1
|
109
|
+
@split_name = split_last_comma_first_middle
|
110
|
+
else
|
111
|
+
@split_name = split_first_middle_last
|
112
|
+
end
|
113
|
+
|
114
|
+
return self
|
115
|
+
end
|
116
|
+
|
117
|
+
def split_last_comma_first_middle
|
118
|
+
match = @input_string.match(",")
|
119
|
+
normalized = ""
|
120
|
+
if match
|
121
|
+
normalized = [match.post_match.strip, match.pre_match.strip].join(" ")
|
122
|
+
end
|
123
|
+
normalized.split(" ")
|
124
|
+
end
|
125
|
+
|
126
|
+
def split_first_middle_last
|
127
|
+
@input_string.split(" ")
|
128
|
+
end
|
129
|
+
|
130
|
+
def is_prefix?(string)
|
131
|
+
is_ix?(PREFIXES, string)
|
132
|
+
end
|
133
|
+
|
134
|
+
def is_suffix?(string)
|
135
|
+
is_ix?(SUFFIXES, string)
|
136
|
+
end
|
137
|
+
|
138
|
+
def is_last_name_prefix?(string)
|
139
|
+
return false if string.nil? || string == ""
|
140
|
+
LAST_PREFIXES.any? { |p| string.downcase.match(/^#{p}$/) }
|
141
|
+
end
|
142
|
+
|
143
|
+
def is_ix?(kind, string)
|
144
|
+
return false if string.nil? || string == ""
|
145
|
+
kind.any? {|k| string.downcase.match(/^#{k}\.?$/)}
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'human_name_parser'
|
2
|
+
|
3
|
+
describe HumanNameParser do
|
4
|
+
it "returns a Name object" do
|
5
|
+
name = HumanNameParser.parse ''
|
6
|
+
name.class.should == HumanNameParser::Name
|
7
|
+
end
|
8
|
+
|
9
|
+
it "parses names" do
|
10
|
+
name = HumanNameParser.parse "John H. Smith"
|
11
|
+
name.first.should == 'John'
|
12
|
+
name.last.should == 'Smith'
|
13
|
+
name.initials.should == 'JHS'
|
14
|
+
end
|
15
|
+
end
|
data/spec/name_spec.rb
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
require 'human_name_parser/name'
|
2
|
+
|
3
|
+
describe HumanNameParser::Name do
|
4
|
+
# any unicode issues?
|
5
|
+
context "when full name is Björn Charles van der O'Malley" do
|
6
|
+
let(:full_name) { "Björn Charles van der O'Malley" }
|
7
|
+
it "should parse the name" do
|
8
|
+
@name = HumanNameParser::Name.new full_name
|
9
|
+
@name.first.should == 'Björn'
|
10
|
+
@name.last.should == "van der O'Malley"
|
11
|
+
@name.middle.should == 'Charles'
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
context 'when full name is Mary Lou Smith' do
|
16
|
+
let(:full_name) { "Mary Lou Smith" }
|
17
|
+
before do
|
18
|
+
@name = HumanNameParser::Name.new full_name
|
19
|
+
end
|
20
|
+
|
21
|
+
it "gets first name" do
|
22
|
+
@name.first.should == 'Mary'
|
23
|
+
end
|
24
|
+
|
25
|
+
it "gets last name" do
|
26
|
+
@name.last.should == 'Smith'
|
27
|
+
end
|
28
|
+
|
29
|
+
it "gets prefix" do
|
30
|
+
@name.prefix.should == ''
|
31
|
+
end
|
32
|
+
|
33
|
+
it "gets suffix" do
|
34
|
+
@name.suffix.should == ''
|
35
|
+
end
|
36
|
+
|
37
|
+
it "gets initials" do
|
38
|
+
@name.initials.should == 'MLS'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'when full name is Mr. Alphonse di Morel Jr. Esq.' do
|
43
|
+
let(:full_name) { "Mr. Alphonse di Morel Jr. Esq." }
|
44
|
+
before { @name = HumanNameParser::Name.new full_name }
|
45
|
+
|
46
|
+
it "gets first" do
|
47
|
+
@name.first.should == 'Alphonse'
|
48
|
+
end
|
49
|
+
|
50
|
+
it "gets last" do
|
51
|
+
@name.last.should == 'di Morel'
|
52
|
+
end
|
53
|
+
|
54
|
+
it "gets prefix" do
|
55
|
+
@name.prefix.should == 'Mr.'
|
56
|
+
end
|
57
|
+
|
58
|
+
it "gets suffix" do
|
59
|
+
@name.suffix.should == 'Jr. Esq.'
|
60
|
+
end
|
61
|
+
|
62
|
+
it "gets initials" do
|
63
|
+
@name.initials.should == 'AD'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'when full name is ROBOTO' do
|
68
|
+
let(:full_name) { "ROBOTO" }
|
69
|
+
before { @name = HumanNameParser::Name.new full_name }
|
70
|
+
|
71
|
+
it "gets first" do
|
72
|
+
@name.first.should == 'ROBOTO'
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'gets initials' do
|
76
|
+
@name.initials.should == 'R'
|
77
|
+
end
|
78
|
+
|
79
|
+
it "doesn't get last" do
|
80
|
+
@name.last.should == ''
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context 'when full name is John Paul Ringo' do
|
85
|
+
let(:full_name) { "John Paul Ringo" }
|
86
|
+
before { @name = HumanNameParser::Name.new full_name }
|
87
|
+
|
88
|
+
it "gets first" do
|
89
|
+
@name.first.should == 'John'
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'gets initials' do
|
93
|
+
@name.initials.should == 'JPR'
|
94
|
+
end
|
95
|
+
|
96
|
+
it "gets last" do
|
97
|
+
@name.last.should == 'Ringo'
|
98
|
+
end
|
99
|
+
|
100
|
+
it "gets middle" do
|
101
|
+
@name.middle.should == 'Paul'
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context 'when full name is Downey Jr., Robert' do
|
106
|
+
let(:full_name) { 'Downey Jr., Robert' }
|
107
|
+
before { @name = HumanNameParser::Name.new full_name }
|
108
|
+
|
109
|
+
it "gets first" do
|
110
|
+
@name.first.should == 'Robert'
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'gets initials' do
|
114
|
+
@name.initials.should == 'RD'
|
115
|
+
end
|
116
|
+
|
117
|
+
it "gets last" do
|
118
|
+
@name.last.should == 'Downey'
|
119
|
+
end
|
120
|
+
|
121
|
+
it "gets middle" do
|
122
|
+
@name.middle.should == ''
|
123
|
+
end
|
124
|
+
|
125
|
+
it "gets suffix" do
|
126
|
+
@name.suffix.should == 'Jr.'
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context 'when full name is garbage' do
|
131
|
+
let(:full_name) { '1234 Anywhere St., North Pole, SD 22323' }
|
132
|
+
before { @name = HumanNameParser::Name.new full_name }
|
133
|
+
|
134
|
+
it "gets first" do
|
135
|
+
@name.first.should == ''
|
136
|
+
end
|
137
|
+
|
138
|
+
it 'gets initials' do
|
139
|
+
@name.initials.should == ''
|
140
|
+
end
|
141
|
+
|
142
|
+
it "gets last" do
|
143
|
+
@name.last.should == ''
|
144
|
+
end
|
145
|
+
|
146
|
+
it "gets middle" do
|
147
|
+
@name.middle.should == ''
|
148
|
+
end
|
149
|
+
|
150
|
+
it "gets suffix" do
|
151
|
+
@name.suffix.should == ''
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: human_name_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Adam Bachman
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-11-10 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: rspec
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description: human_name_parser is intended to split names into their component parts.
|
36
|
+
email:
|
37
|
+
- adam.bachman@gmail.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files: []
|
43
|
+
|
44
|
+
files:
|
45
|
+
- .gitignore
|
46
|
+
- Gemfile
|
47
|
+
- LICENSE
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- fixtures/test_names.txt
|
51
|
+
- human_name_parser.gemspec
|
52
|
+
- lib/human_name_parser.rb
|
53
|
+
- lib/human_name_parser/name.rb
|
54
|
+
- lib/human_name_parser/version.rb
|
55
|
+
- spec/human_name_parser_spec.rb
|
56
|
+
- spec/name_spec.rb
|
57
|
+
- spec/spec_helper.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: https://github.com/abachman/human_name_parser
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options: []
|
64
|
+
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 3
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
hash: 3
|
82
|
+
segments:
|
83
|
+
- 0
|
84
|
+
version: "0"
|
85
|
+
requirements: []
|
86
|
+
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.6.2
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: Split most American names into their component parts.
|
92
|
+
test_files:
|
93
|
+
- spec/human_name_parser_spec.rb
|
94
|
+
- spec/name_spec.rb
|
95
|
+
- spec/spec_helper.rb
|