human_name_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in human_name_parser.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2011 Adam Bachman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ Attempt to parse and categorize the parts of names.
2
+
3
+ With code borrowed from:
4
+
5
+ * https://github.com/bricooke/name_parser
6
+ * https://github.com/jasonpriem/HumanNameParser.php
7
+ * https://github.com/jconley88/NameParser
8
+
9
+ ## Install
10
+
11
+ `gem install human_name_parser`
12
+
13
+ ## Usage
14
+
15
+ require 'rubygems'
16
+ require 'human_name_parser'
17
+
18
+ name = HumanNameParser.parse 'George W. Bush Jr.'
19
+ name.first # => 'George'
20
+ name.last # => 'Bush'
21
+ name.initials # => 'GWB'
22
+ name.suffix # => 'Jr.'
23
+ name.to_s # => 'George W. Bush Jr.'
24
+
25
+ ## Development
26
+
27
+ ~/ $ git clone ... && cd human_name_parser
28
+ ~/ $ bundle install
29
+ ~/ $ rake
30
+
31
+ ## TODO
32
+
33
+ Handle nicknames
34
+
35
+ Handle strangely placed commas
36
+
37
+ Handle multiple last names. e.g., "Björn Charles O'Malley y Muñoz"
38
+
39
+ Handle multiple first names. e.g., "Mary Joe Francis Smith"
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ task :default => [:spec]
5
+
6
+ desc "Run all specs"
7
+ RSpec::Core::RakeTask.new do |t|
8
+ t.rspec_opts = %w(-fs --color)
9
+ t.ruby_opts = %w(-w)
10
+ end
@@ -0,0 +1,31 @@
1
+ Björn O'Malley||Björn|||O'Malley|
2
+ Bin Lin||Bin|||Lin|
3
+ Linda Jones||Linda|||Jones|
4
+ Jason H. Priem||Jason||H.|Priem|
5
+ Björn O'Malley-Muñoz||Björn|||O'Malley-Muñoz|
6
+ Björn C. O'Malley||Björn||C.|O'Malley|
7
+ Björn "Bill" O'Malley||Björn|Bill||O'Malley|
8
+ Björn ("Bill") O'Malley||Björn|Bill||O'Malley|
9
+ Björn ("Wild Bill") O'Malley||Björn|Wild Bill||O'Malley|
10
+ Björn (Bill) O'Malley||Björn|Bill||O'Malley|
11
+ Björn 'Bill' O'Malley||Björn|Bill||O'Malley|
12
+ Björn C O'Malley||Björn||C|O'Malley|
13
+ Björn C. R. O'Malley||Björn||C. R.|O'Malley|
14
+ Björn Charles O'Malley||Björn||Charles|O'Malley|
15
+ Björn Charles R. O'Malley||Björn||Charles R.|O'Malley|
16
+ Björn van O'Malley||Björn|||van O'Malley|
17
+ Björn Charles van der O'Malley||Björn||Charles|van der O'Malley|
18
+ Björn Charles O'Malley y Muñoz||Björn||Charles|O'Malley y Muñoz|
19
+ Björn O'Malley, Jr.||Björn|||O'Malley|Jr.
20
+ Björn O'Malley Jr||Björn|||O'Malley|Jr
21
+ B O'Malley||B|||O'Malley|
22
+ William Carlos Williams||William||Carlos|Williams|
23
+ C. Björn Roger O'Malley|C.|Björn||Roger|O'Malley|
24
+ B. C. O'Malley||B.||C.|O'Malley|
25
+ B C O'Malley||B||C|O'Malley|
26
+ B.J. Thomas||B.J.|||Thomas|
27
+ O'Malley, Björn||Björn|||O'Malley|
28
+ O'Malley, Björn Jr||Björn|||O'Malley|Jr
29
+ O'Malley, C. Björn|C.|Björn|||O'Malley|
30
+ O'Malley, C. Björn III|C.|Björn|||O'Malley|III
31
+ O'Malley y Muñoz, C. Björn Roger III|C.|Björn||Roger|O'Malley y Muñoz|III
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "human_name_parser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "human_name_parser"
7
+ s.version = HumanNameParser::VERSION
8
+ s.authors = ["Adam Bachman"]
9
+ s.email = ["adam.bachman@gmail.com"]
10
+ s.homepage = "https://github.com/abachman/human_name_parser"
11
+ s.summary = %q{Split most American names into their component parts.}
12
+ s.description = %q{human_name_parser is intended to split names into their component parts.}
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ s.require_paths = ["lib"]
18
+
19
+ s.add_development_dependency "rspec"
20
+ end
@@ -0,0 +1,149 @@
1
+ module HumanNameParser
2
+ class Name
3
+ PREFIXES = ['mr', 'ms', 'miss', 'mrs', 'sir', 'prof', 'professor', 'md', 'dr']
4
+ SUFFIXES = ['esq','esquire','jr','sr','2','ii','iii','iv']
5
+ LAST_PREFIXES = ['al', 'bar','ben','bin','da','dal','de la', 'de', 'del', 'der', 'di', 'el', 'ibn', 'la', 'le', 'mc', 'san', 'st', 'ste', 'van', 'van der', 'van den', 'vel','von']
6
+
7
+ attr_accessor :first, :middle, :last, :prefix, :suffix
8
+
9
+ def initialize name
10
+ self.first = ''
11
+ self.middle = ''
12
+ self.last = ''
13
+ self.prefix = ''
14
+ self.suffix = ''
15
+
16
+ @input_string = name
17
+
18
+ parse
19
+ end
20
+
21
+ def parse
22
+ normalize_and_split
23
+
24
+ if @split_name.length == 1
25
+ _first, _ = @split_name
26
+ self.first = _first
27
+ self.last = ''
28
+ elsif @split_name.length == 2
29
+ _first, _last = @split_name
30
+ self.first = _first
31
+ self.last = _last
32
+ else
33
+ parse_prefix.
34
+ parse_suffix.
35
+ parse_last_name.
36
+ parse_first_name.
37
+ parse_middle_name
38
+ end
39
+ end
40
+
41
+ def parse_prefix
42
+ if is_prefix? @split_name.first
43
+ self.prefix = @split_name.shift
44
+ end
45
+
46
+ return self
47
+ end
48
+
49
+ def parse_suffix
50
+ self.suffix = []
51
+ while is_suffix? @split_name.last
52
+ self.suffix.unshift @split_name.pop
53
+ end
54
+ self.suffix = self.suffix.join(' ')
55
+
56
+ return self
57
+ end
58
+
59
+ def parse_last_name
60
+ self.last = []
61
+ self.last.unshift @split_name.pop
62
+
63
+ while is_last_name_prefix?(@split_name.last)
64
+ self.last.unshift @split_name.pop
65
+ end
66
+
67
+ self.last = self.last.join(' ')
68
+
69
+ return self
70
+ end
71
+
72
+ def parse_first_name
73
+ self.first = @split_name.shift || ''
74
+
75
+ return self
76
+ end
77
+
78
+ def parse_middle_name
79
+ # whatever's left
80
+ self.middle = @split_name.join ' '
81
+
82
+ return self
83
+ end
84
+
85
+ def initials
86
+ _i = ''
87
+ _i += self.first.slice(0,1) if self.first && self.first.length > 0
88
+ _i += self.middle.slice(0,1) if self.middle && self.middle.length > 0
89
+ _i += self.last.slice(0,1) if self.last && self.last.length > 0
90
+ _i.upcase
91
+ end
92
+
93
+ def to_s
94
+ [
95
+ self.prefix,
96
+ self.first,
97
+ self.middle,
98
+ self.last,
99
+ self.suffix
100
+ ].reject {|n| n.length == 0}.join(' ')
101
+ end
102
+
103
+ private
104
+ def normalize_and_split
105
+ if @input_string.count(",") > 1
106
+ # this is a thing that I cannot recognize
107
+ @split_name = []
108
+ elsif @input_string.count(",") == 1
109
+ @split_name = split_last_comma_first_middle
110
+ else
111
+ @split_name = split_first_middle_last
112
+ end
113
+
114
+ return self
115
+ end
116
+
117
+ def split_last_comma_first_middle
118
+ match = @input_string.match(",")
119
+ normalized = ""
120
+ if match
121
+ normalized = [match.post_match.strip, match.pre_match.strip].join(" ")
122
+ end
123
+ normalized.split(" ")
124
+ end
125
+
126
+ def split_first_middle_last
127
+ @input_string.split(" ")
128
+ end
129
+
130
+ def is_prefix?(string)
131
+ is_ix?(PREFIXES, string)
132
+ end
133
+
134
+ def is_suffix?(string)
135
+ is_ix?(SUFFIXES, string)
136
+ end
137
+
138
+ def is_last_name_prefix?(string)
139
+ return false if string.nil? || string == ""
140
+ LAST_PREFIXES.any? { |p| string.downcase.match(/^#{p}$/) }
141
+ end
142
+
143
+ def is_ix?(kind, string)
144
+ return false if string.nil? || string == ""
145
+ kind.any? {|k| string.downcase.match(/^#{k}\.?$/)}
146
+ end
147
+
148
+ end
149
+ end
@@ -0,0 +1,3 @@
1
+ module HumanNameParser
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,9 @@
1
+ require "human_name_parser/version"
2
+
3
+ module HumanNameParser
4
+ autoload :Name, 'human_name_parser/name'
5
+
6
+ def self.parse input_string
7
+ return Name.new(input_string)
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ require 'human_name_parser'
2
+
3
+ describe HumanNameParser do
4
+ it "returns a Name object" do
5
+ name = HumanNameParser.parse ''
6
+ name.class.should == HumanNameParser::Name
7
+ end
8
+
9
+ it "parses names" do
10
+ name = HumanNameParser.parse "John H. Smith"
11
+ name.first.should == 'John'
12
+ name.last.should == 'Smith'
13
+ name.initials.should == 'JHS'
14
+ end
15
+ end
data/spec/name_spec.rb ADDED
@@ -0,0 +1,154 @@
1
+ require 'human_name_parser/name'
2
+
3
+ describe HumanNameParser::Name do
4
+ # any unicode issues?
5
+ context "when full name is Björn Charles van der O'Malley" do
6
+ let(:full_name) { "Björn Charles van der O'Malley" }
7
+ it "should parse the name" do
8
+ @name = HumanNameParser::Name.new full_name
9
+ @name.first.should == 'Björn'
10
+ @name.last.should == "van der O'Malley"
11
+ @name.middle.should == 'Charles'
12
+ end
13
+ end
14
+
15
+ context 'when full name is Mary Lou Smith' do
16
+ let(:full_name) { "Mary Lou Smith" }
17
+ before do
18
+ @name = HumanNameParser::Name.new full_name
19
+ end
20
+
21
+ it "gets first name" do
22
+ @name.first.should == 'Mary'
23
+ end
24
+
25
+ it "gets last name" do
26
+ @name.last.should == 'Smith'
27
+ end
28
+
29
+ it "gets prefix" do
30
+ @name.prefix.should == ''
31
+ end
32
+
33
+ it "gets suffix" do
34
+ @name.suffix.should == ''
35
+ end
36
+
37
+ it "gets initials" do
38
+ @name.initials.should == 'MLS'
39
+ end
40
+ end
41
+
42
+ context 'when full name is Mr. Alphonse di Morel Jr. Esq.' do
43
+ let(:full_name) { "Mr. Alphonse di Morel Jr. Esq." }
44
+ before { @name = HumanNameParser::Name.new full_name }
45
+
46
+ it "gets first" do
47
+ @name.first.should == 'Alphonse'
48
+ end
49
+
50
+ it "gets last" do
51
+ @name.last.should == 'di Morel'
52
+ end
53
+
54
+ it "gets prefix" do
55
+ @name.prefix.should == 'Mr.'
56
+ end
57
+
58
+ it "gets suffix" do
59
+ @name.suffix.should == 'Jr. Esq.'
60
+ end
61
+
62
+ it "gets initials" do
63
+ @name.initials.should == 'AD'
64
+ end
65
+ end
66
+
67
+ context 'when full name is ROBOTO' do
68
+ let(:full_name) { "ROBOTO" }
69
+ before { @name = HumanNameParser::Name.new full_name }
70
+
71
+ it "gets first" do
72
+ @name.first.should == 'ROBOTO'
73
+ end
74
+
75
+ it 'gets initials' do
76
+ @name.initials.should == 'R'
77
+ end
78
+
79
+ it "doesn't get last" do
80
+ @name.last.should == ''
81
+ end
82
+ end
83
+
84
+ context 'when full name is John Paul Ringo' do
85
+ let(:full_name) { "John Paul Ringo" }
86
+ before { @name = HumanNameParser::Name.new full_name }
87
+
88
+ it "gets first" do
89
+ @name.first.should == 'John'
90
+ end
91
+
92
+ it 'gets initials' do
93
+ @name.initials.should == 'JPR'
94
+ end
95
+
96
+ it "gets last" do
97
+ @name.last.should == 'Ringo'
98
+ end
99
+
100
+ it "gets middle" do
101
+ @name.middle.should == 'Paul'
102
+ end
103
+ end
104
+
105
+ context 'when full name is Downey Jr., Robert' do
106
+ let(:full_name) { 'Downey Jr., Robert' }
107
+ before { @name = HumanNameParser::Name.new full_name }
108
+
109
+ it "gets first" do
110
+ @name.first.should == 'Robert'
111
+ end
112
+
113
+ it 'gets initials' do
114
+ @name.initials.should == 'RD'
115
+ end
116
+
117
+ it "gets last" do
118
+ @name.last.should == 'Downey'
119
+ end
120
+
121
+ it "gets middle" do
122
+ @name.middle.should == ''
123
+ end
124
+
125
+ it "gets suffix" do
126
+ @name.suffix.should == 'Jr.'
127
+ end
128
+ end
129
+
130
+ context 'when full name is garbage' do
131
+ let(:full_name) { '1234 Anywhere St., North Pole, SD 22323' }
132
+ before { @name = HumanNameParser::Name.new full_name }
133
+
134
+ it "gets first" do
135
+ @name.first.should == ''
136
+ end
137
+
138
+ it 'gets initials' do
139
+ @name.initials.should == ''
140
+ end
141
+
142
+ it "gets last" do
143
+ @name.last.should == ''
144
+ end
145
+
146
+ it "gets middle" do
147
+ @name.middle.should == ''
148
+ end
149
+
150
+ it "gets suffix" do
151
+ @name.suffix.should == ''
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,8 @@
1
+ $:.unshift File.expand_path('..', __FILE__)
2
+ $:.unshift File.expand_path('../../lib', __FILE__)
3
+ require 'human_name_parser'
4
+ require 'rspec'
5
+
6
+ RSpec.configure do |c|
7
+ c.mock_with :rspec
8
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: human_name_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Adam Bachman
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-11-10 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: human_name_parser is intended to split names into their component parts.
36
+ email:
37
+ - adam.bachman@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - LICENSE
48
+ - README.md
49
+ - Rakefile
50
+ - fixtures/test_names.txt
51
+ - human_name_parser.gemspec
52
+ - lib/human_name_parser.rb
53
+ - lib/human_name_parser/name.rb
54
+ - lib/human_name_parser/version.rb
55
+ - spec/human_name_parser_spec.rb
56
+ - spec/name_spec.rb
57
+ - spec/spec_helper.rb
58
+ has_rdoc: true
59
+ homepage: https://github.com/abachman/human_name_parser
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options: []
64
+
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.6.2
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Split most American names into their component parts.
92
+ test_files:
93
+ - spec/human_name_parser_spec.rb
94
+ - spec/name_spec.rb
95
+ - spec/spec_helper.rb