human_name_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in human_name_parser.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2011 Adam Bachman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ Attempt to parse and categorize the parts of names.
2
+
3
+ With code borrowed from:
4
+
5
+ * https://github.com/bricooke/name_parser
6
+ * https://github.com/jasonpriem/HumanNameParser.php
7
+ * https://github.com/jconley88/NameParser
8
+
9
+ ## Install
10
+
11
+ `gem install human_name_parser`
12
+
13
+ ## Usage
14
+
15
+ require 'rubygems'
16
+ require 'human_name_parser'
17
+
18
+ name = HumanNameParser.parse 'George W. Bush Jr.'
19
+ name.first # => 'George'
20
+ name.last # => 'Bush'
21
+ name.initials # => 'GWB'
22
+ name.suffix # => 'Jr.'
23
+ name.to_s # => 'George W. Bush Jr.'
24
+
25
+ ## Development
26
+
27
+ ~/ $ git clone ... && cd human_name_parser
28
+ ~/ $ bundle install
29
+ ~/ $ rake
30
+
31
+ ## TODO
32
+
33
+ Handle nicknames
34
+
35
+ Handle strangely placed commas
36
+
37
+ Handle multiple last names. e.g., "Björn Charles O'Malley y Muñoz"
38
+
39
+ Handle multiple first names. e.g., "Mary Joe Francis Smith"
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ task :default => [:spec]
5
+
6
+ desc "Run all specs"
7
+ RSpec::Core::RakeTask.new do |t|
8
+ t.rspec_opts = %w(-fs --color)
9
+ t.ruby_opts = %w(-w)
10
+ end
@@ -0,0 +1,31 @@
1
+ Björn O'Malley||Björn|||O'Malley|
2
+ Bin Lin||Bin|||Lin|
3
+ Linda Jones||Linda|||Jones|
4
+ Jason H. Priem||Jason||H.|Priem|
5
+ Björn O'Malley-Muñoz||Björn|||O'Malley-Muñoz|
6
+ Björn C. O'Malley||Björn||C.|O'Malley|
7
+ Björn "Bill" O'Malley||Björn|Bill||O'Malley|
8
+ Björn ("Bill") O'Malley||Björn|Bill||O'Malley|
9
+ Björn ("Wild Bill") O'Malley||Björn|Wild Bill||O'Malley|
10
+ Björn (Bill) O'Malley||Björn|Bill||O'Malley|
11
+ Björn 'Bill' O'Malley||Björn|Bill||O'Malley|
12
+ Björn C O'Malley||Björn||C|O'Malley|
13
+ Björn C. R. O'Malley||Björn||C. R.|O'Malley|
14
+ Björn Charles O'Malley||Björn||Charles|O'Malley|
15
+ Björn Charles R. O'Malley||Björn||Charles R.|O'Malley|
16
+ Björn van O'Malley||Björn|||van O'Malley|
17
+ Björn Charles van der O'Malley||Björn||Charles|van der O'Malley|
18
+ Björn Charles O'Malley y Muñoz||Björn||Charles|O'Malley y Muñoz|
19
+ Björn O'Malley, Jr.||Björn|||O'Malley|Jr.
20
+ Björn O'Malley Jr||Björn|||O'Malley|Jr
21
+ B O'Malley||B|||O'Malley|
22
+ William Carlos Williams||William||Carlos|Williams|
23
+ C. Björn Roger O'Malley|C.|Björn||Roger|O'Malley|
24
+ B. C. O'Malley||B.||C.|O'Malley|
25
+ B C O'Malley||B||C|O'Malley|
26
+ B.J. Thomas||B.J.|||Thomas|
27
+ O'Malley, Björn||Björn|||O'Malley|
28
+ O'Malley, Björn Jr||Björn|||O'Malley|Jr
29
+ O'Malley, C. Björn|C.|Björn|||O'Malley|
30
+ O'Malley, C. Björn III|C.|Björn|||O'Malley|III
31
+ O'Malley y Muñoz, C. Björn Roger III|C.|Björn||Roger|O'Malley y Muñoz|III
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "human_name_parser/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "human_name_parser"
7
+ s.version = HumanNameParser::VERSION
8
+ s.authors = ["Adam Bachman"]
9
+ s.email = ["adam.bachman@gmail.com"]
10
+ s.homepage = "https://github.com/abachman/human_name_parser"
11
+ s.summary = %q{Split most American names into their component parts.}
12
+ s.description = %q{human_name_parser is intended to split names into their component parts.}
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
16
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
+ s.require_paths = ["lib"]
18
+
19
+ s.add_development_dependency "rspec"
20
+ end
@@ -0,0 +1,149 @@
1
+ module HumanNameParser
2
+ class Name
3
+ PREFIXES = ['mr', 'ms', 'miss', 'mrs', 'sir', 'prof', 'professor', 'md', 'dr']
4
+ SUFFIXES = ['esq','esquire','jr','sr','2','ii','iii','iv']
5
+ LAST_PREFIXES = ['al', 'bar','ben','bin','da','dal','de la', 'de', 'del', 'der', 'di', 'el', 'ibn', 'la', 'le', 'mc', 'san', 'st', 'ste', 'van', 'van der', 'van den', 'vel','von']
6
+
7
+ attr_accessor :first, :middle, :last, :prefix, :suffix
8
+
9
+ def initialize name
10
+ self.first = ''
11
+ self.middle = ''
12
+ self.last = ''
13
+ self.prefix = ''
14
+ self.suffix = ''
15
+
16
+ @input_string = name
17
+
18
+ parse
19
+ end
20
+
21
+ def parse
22
+ normalize_and_split
23
+
24
+ if @split_name.length == 1
25
+ _first, _ = @split_name
26
+ self.first = _first
27
+ self.last = ''
28
+ elsif @split_name.length == 2
29
+ _first, _last = @split_name
30
+ self.first = _first
31
+ self.last = _last
32
+ else
33
+ parse_prefix.
34
+ parse_suffix.
35
+ parse_last_name.
36
+ parse_first_name.
37
+ parse_middle_name
38
+ end
39
+ end
40
+
41
+ def parse_prefix
42
+ if is_prefix? @split_name.first
43
+ self.prefix = @split_name.shift
44
+ end
45
+
46
+ return self
47
+ end
48
+
49
+ def parse_suffix
50
+ self.suffix = []
51
+ while is_suffix? @split_name.last
52
+ self.suffix.unshift @split_name.pop
53
+ end
54
+ self.suffix = self.suffix.join(' ')
55
+
56
+ return self
57
+ end
58
+
59
+ def parse_last_name
60
+ self.last = []
61
+ self.last.unshift @split_name.pop
62
+
63
+ while is_last_name_prefix?(@split_name.last)
64
+ self.last.unshift @split_name.pop
65
+ end
66
+
67
+ self.last = self.last.join(' ')
68
+
69
+ return self
70
+ end
71
+
72
+ def parse_first_name
73
+ self.first = @split_name.shift || ''
74
+
75
+ return self
76
+ end
77
+
78
+ def parse_middle_name
79
+ # whatever's left
80
+ self.middle = @split_name.join ' '
81
+
82
+ return self
83
+ end
84
+
85
+ def initials
86
+ _i = ''
87
+ _i += self.first.slice(0,1) if self.first && self.first.length > 0
88
+ _i += self.middle.slice(0,1) if self.middle && self.middle.length > 0
89
+ _i += self.last.slice(0,1) if self.last && self.last.length > 0
90
+ _i.upcase
91
+ end
92
+
93
+ def to_s
94
+ [
95
+ self.prefix,
96
+ self.first,
97
+ self.middle,
98
+ self.last,
99
+ self.suffix
100
+ ].reject {|n| n.length == 0}.join(' ')
101
+ end
102
+
103
+ private
104
+ def normalize_and_split
105
+ if @input_string.count(",") > 1
106
+ # this is a thing that I cannot recognize
107
+ @split_name = []
108
+ elsif @input_string.count(",") == 1
109
+ @split_name = split_last_comma_first_middle
110
+ else
111
+ @split_name = split_first_middle_last
112
+ end
113
+
114
+ return self
115
+ end
116
+
117
+ def split_last_comma_first_middle
118
+ match = @input_string.match(",")
119
+ normalized = ""
120
+ if match
121
+ normalized = [match.post_match.strip, match.pre_match.strip].join(" ")
122
+ end
123
+ normalized.split(" ")
124
+ end
125
+
126
+ def split_first_middle_last
127
+ @input_string.split(" ")
128
+ end
129
+
130
+ def is_prefix?(string)
131
+ is_ix?(PREFIXES, string)
132
+ end
133
+
134
+ def is_suffix?(string)
135
+ is_ix?(SUFFIXES, string)
136
+ end
137
+
138
+ def is_last_name_prefix?(string)
139
+ return false if string.nil? || string == ""
140
+ LAST_PREFIXES.any? { |p| string.downcase.match(/^#{p}$/) }
141
+ end
142
+
143
+ def is_ix?(kind, string)
144
+ return false if string.nil? || string == ""
145
+ kind.any? {|k| string.downcase.match(/^#{k}\.?$/)}
146
+ end
147
+
148
+ end
149
+ end
@@ -0,0 +1,3 @@
1
+ module HumanNameParser
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,9 @@
1
+ require "human_name_parser/version"
2
+
3
+ module HumanNameParser
4
+ autoload :Name, 'human_name_parser/name'
5
+
6
+ def self.parse input_string
7
+ return Name.new(input_string)
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ require 'human_name_parser'
2
+
3
+ describe HumanNameParser do
4
+ it "returns a Name object" do
5
+ name = HumanNameParser.parse ''
6
+ name.class.should == HumanNameParser::Name
7
+ end
8
+
9
+ it "parses names" do
10
+ name = HumanNameParser.parse "John H. Smith"
11
+ name.first.should == 'John'
12
+ name.last.should == 'Smith'
13
+ name.initials.should == 'JHS'
14
+ end
15
+ end
data/spec/name_spec.rb ADDED
@@ -0,0 +1,154 @@
1
+ require 'human_name_parser/name'
2
+
3
+ describe HumanNameParser::Name do
4
+ # any unicode issues?
5
+ context "when full name is Björn Charles van der O'Malley" do
6
+ let(:full_name) { "Björn Charles van der O'Malley" }
7
+ it "should parse the name" do
8
+ @name = HumanNameParser::Name.new full_name
9
+ @name.first.should == 'Björn'
10
+ @name.last.should == "van der O'Malley"
11
+ @name.middle.should == 'Charles'
12
+ end
13
+ end
14
+
15
+ context 'when full name is Mary Lou Smith' do
16
+ let(:full_name) { "Mary Lou Smith" }
17
+ before do
18
+ @name = HumanNameParser::Name.new full_name
19
+ end
20
+
21
+ it "gets first name" do
22
+ @name.first.should == 'Mary'
23
+ end
24
+
25
+ it "gets last name" do
26
+ @name.last.should == 'Smith'
27
+ end
28
+
29
+ it "gets prefix" do
30
+ @name.prefix.should == ''
31
+ end
32
+
33
+ it "gets suffix" do
34
+ @name.suffix.should == ''
35
+ end
36
+
37
+ it "gets initials" do
38
+ @name.initials.should == 'MLS'
39
+ end
40
+ end
41
+
42
+ context 'when full name is Mr. Alphonse di Morel Jr. Esq.' do
43
+ let(:full_name) { "Mr. Alphonse di Morel Jr. Esq." }
44
+ before { @name = HumanNameParser::Name.new full_name }
45
+
46
+ it "gets first" do
47
+ @name.first.should == 'Alphonse'
48
+ end
49
+
50
+ it "gets last" do
51
+ @name.last.should == 'di Morel'
52
+ end
53
+
54
+ it "gets prefix" do
55
+ @name.prefix.should == 'Mr.'
56
+ end
57
+
58
+ it "gets suffix" do
59
+ @name.suffix.should == 'Jr. Esq.'
60
+ end
61
+
62
+ it "gets initials" do
63
+ @name.initials.should == 'AD'
64
+ end
65
+ end
66
+
67
+ context 'when full name is ROBOTO' do
68
+ let(:full_name) { "ROBOTO" }
69
+ before { @name = HumanNameParser::Name.new full_name }
70
+
71
+ it "gets first" do
72
+ @name.first.should == 'ROBOTO'
73
+ end
74
+
75
+ it 'gets initials' do
76
+ @name.initials.should == 'R'
77
+ end
78
+
79
+ it "doesn't get last" do
80
+ @name.last.should == ''
81
+ end
82
+ end
83
+
84
+ context 'when full name is John Paul Ringo' do
85
+ let(:full_name) { "John Paul Ringo" }
86
+ before { @name = HumanNameParser::Name.new full_name }
87
+
88
+ it "gets first" do
89
+ @name.first.should == 'John'
90
+ end
91
+
92
+ it 'gets initials' do
93
+ @name.initials.should == 'JPR'
94
+ end
95
+
96
+ it "gets last" do
97
+ @name.last.should == 'Ringo'
98
+ end
99
+
100
+ it "gets middle" do
101
+ @name.middle.should == 'Paul'
102
+ end
103
+ end
104
+
105
+ context 'when full name is Downey Jr., Robert' do
106
+ let(:full_name) { 'Downey Jr., Robert' }
107
+ before { @name = HumanNameParser::Name.new full_name }
108
+
109
+ it "gets first" do
110
+ @name.first.should == 'Robert'
111
+ end
112
+
113
+ it 'gets initials' do
114
+ @name.initials.should == 'RD'
115
+ end
116
+
117
+ it "gets last" do
118
+ @name.last.should == 'Downey'
119
+ end
120
+
121
+ it "gets middle" do
122
+ @name.middle.should == ''
123
+ end
124
+
125
+ it "gets suffix" do
126
+ @name.suffix.should == 'Jr.'
127
+ end
128
+ end
129
+
130
+ context 'when full name is garbage' do
131
+ let(:full_name) { '1234 Anywhere St., North Pole, SD 22323' }
132
+ before { @name = HumanNameParser::Name.new full_name }
133
+
134
+ it "gets first" do
135
+ @name.first.should == ''
136
+ end
137
+
138
+ it 'gets initials' do
139
+ @name.initials.should == ''
140
+ end
141
+
142
+ it "gets last" do
143
+ @name.last.should == ''
144
+ end
145
+
146
+ it "gets middle" do
147
+ @name.middle.should == ''
148
+ end
149
+
150
+ it "gets suffix" do
151
+ @name.suffix.should == ''
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,8 @@
1
+ $:.unshift File.expand_path('..', __FILE__)
2
+ $:.unshift File.expand_path('../../lib', __FILE__)
3
+ require 'human_name_parser'
4
+ require 'rspec'
5
+
6
+ RSpec.configure do |c|
7
+ c.mock_with :rspec
8
+ end
metadata ADDED
@@ -0,0 +1,95 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: human_name_parser
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Adam Bachman
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-11-10 00:00:00 -05:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rspec
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: human_name_parser is intended to split names into their component parts.
36
+ email:
37
+ - adam.bachman@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - LICENSE
48
+ - README.md
49
+ - Rakefile
50
+ - fixtures/test_names.txt
51
+ - human_name_parser.gemspec
52
+ - lib/human_name_parser.rb
53
+ - lib/human_name_parser/name.rb
54
+ - lib/human_name_parser/version.rb
55
+ - spec/human_name_parser_spec.rb
56
+ - spec/name_spec.rb
57
+ - spec/spec_helper.rb
58
+ has_rdoc: true
59
+ homepage: https://github.com/abachman/human_name_parser
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options: []
64
+
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.6.2
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Split most American names into their component parts.
92
+ test_files:
93
+ - spec/human_name_parser_spec.rb
94
+ - spec/name_spec.rb
95
+ - spec/spec_helper.rb