fullname-parser 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
@@ -0,0 +1,18 @@
1
+ fullname_parser
2
+ ===============
3
+
4
+ There are two ways to use this function:
5
+
6
+ require 'fullname/parser'
7
+ Fullname::Parser.parse_fullname("Xiaohui Zhang")
8
+
9
+ => {:last=>"Zhang", :middle=>nil, :first=>"Xiaohui", :prefix=>nil, :suffix=>nil}
10
+
11
+ or
12
+
13
+ require 'fullname/parser'
14
+ include Fullname::Parser
15
+ parse_fullname("Xiaohui Zhang")
16
+
17
+ => {:last=>"Zhang", :middle=>nil, :first=>"Xiaohui", :prefix=>nil, :suffix=>nil}
18
+
@@ -0,0 +1,17 @@
1
+ $:.unshift File.expand_path("../lib", __FILE__)
2
+ require 'fullname/parser/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "fullname-parser"
6
+ s.version = Fullname::Parser::VERSION
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ['xiaohui']
9
+ s.email = ['xiaohui@zhangxh.net']
10
+ s.homepage = 'http://github.com/xiaohui-zhangxh/fullname-parser'
11
+ s.summary = "Split fullname into pieces(prefix/first/middle/last/suffix)"
12
+ s.description = "For parsing people's fullname into pieces(prefix/first/middle/last/suffix)"
13
+
14
+ s.files = `git ls-files`.split("\n")
15
+ s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
16
+ s.require_path = 'lib'
17
+ end
@@ -0,0 +1,185 @@
1
+ # encoding: utf-8
2
+ require File.expand_path('../parser/version', __FILE__)
3
+
4
+ module Fullname
5
+ module Parser
6
+
7
+ # When "II" or "III" or even "IV" appear in the Middle Name/Suffix slot, it can safely be assumed that they are Suffixes.
8
+ # (John Smith has a son named John Smith II, who has a son named John Smith III, etc.) However, nobody (except a king)
9
+ # puts "I" after their name to indicate that they are the "first." If anything, they put "Sr." Therefore, a letter "I"
10
+ # appearing in the Middle Name/Suffix slot can be assumed to be their Middle Initial.
11
+ # So here 'i' will be removed from the GENERATION_LIST
12
+ #
13
+ # Also almost nobody will reach to 'v'(except a king), so all suffixes later than 'v' we won't use.
14
+ GENERATION_LIST = [
15
+ #'i',
16
+ 'ii', 'iii', 'iv', 'v',
17
+ # 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi', 'xvii', 'xviii', 'xix', 'xx',
18
+ ] unless const_defined?(:GENERATION_LIST)
19
+
20
+ GLOBAL_SUFFIX_LIST = GENERATION_LIST + [
21
+ 'jr', 'jr.',
22
+ 'sr', 'sr.',
23
+ ] unless const_defined?(:GLOBAL_SUFFIX_LIST)
24
+
25
+ SUFFIX_LIST = [
26
+ 'b.a.',
27
+ 'capt.', 'col.', 'cfa', 'c.f.a', 'c.f.a.', 'cpa', 'c.p.a', 'c.p.a.',
28
+ 'edd', 'ed.d',
29
+ 'mph',
30
+ 'pc', 'p.c.', 'psyd', 'psyd.', 'psy.d', 'phd', 'phd.', 'ph.d', 'ph.d.',
31
+ 'r.s.m.',
32
+ 'usn',
33
+ ] unless const_defined?(:SUFFIX_LIST)
34
+
35
+ IGNORABLE_SUFFIXES = [
36
+ 'do', 'd.o.', 'd.o', 'dds', 'd.d.s.',
37
+ 'esq', 'esq.',
38
+ 'md', 'm.d.', 'm.d',
39
+ 'mr.', 'ms.', 'mrs.',
40
+ 'jd', 'jd.', 'j.d.',
41
+ 'retd', 'ret.', 'retd.',
42
+ 'usmc',
43
+ ] unless const_defined?(:IGNORABLE_SUFFIXES)
44
+
45
+ SUFFIX_CAN_BE_LASTNAME = [
46
+ 'do',
47
+ ] unless const_defined?(:SUFFIX_CAN_BE_LASTNAME)
48
+
49
+ PREFIX_LIST = [
50
+ 'asst.',
51
+ 'attorney', 'atty.',
52
+ 'bg', 'brig', 'gen',
53
+ 'colonel', 'cardinal', 'capt', 'capt.', 'captain', 'cdr', 'col' , 'col.', 'congressman', 'cpt',
54
+ 'dir.', 'dr', 'dr.',
55
+ 'exec.',
56
+ 'general', 'gen', 'gen.',
57
+ 'honorable', 'hon', 'hon.',
58
+ 'judge', 'justice', 'chiefjustice',
59
+ 'lieutenant', 'lcdr', 'lt', 'lt.', 'ltc', 'ltcol.', 'ltcol', 'ltjg',
60
+ 'mr', 'mr.', 'ms', 'ms.', 'mrs', 'mrs.', 'maj', 'maj.', 'major', 'miss',
61
+ 'president', 'prof', 'prof.', 'professor',
62
+ 'reverend', 'rev', 'rev.',
63
+ 'sheriff',
64
+ 'sr', 'sr.'
65
+
66
+ ] unless const_defined?(:PREFIX_LIST)
67
+
68
+ IGNORABLE_PREFIXS = [
69
+ 'the',
70
+ ] unless const_defined?(:IGNORABLE_PREFIXS)
71
+
72
+
73
+ # These will be considered part of the last name
74
+ LAST_NAME_EXTENSIONS = [
75
+ 'bar', 'ben',
76
+ 'da', 'dal', 'dan', 'de', 'del', 'den', 'der', 'des', 'dela', 'della', 'di', 'do', 'du',
77
+ 'el',
78
+ 'la', 'le', 'lo',
79
+ 'mac', 'mc',
80
+ 'san',
81
+ 'st', 'st.', 'sta', 'sta.',
82
+ 'van','von', 'ver', 'vanden', 'vander'
83
+ ] unless const_defined?(:LAST_NAME_EXTENSIONS)
84
+
85
+ CONVERSION = {
86
+ '1st' => 'I',
87
+ '2nd' => 'II',
88
+ '3rd' => 'III',
89
+ '4th' => 'IV',
90
+ '5th' => 'V',
91
+ '6th' => 'VI',
92
+ '7th' => 'VII',
93
+ '8th' => 'VIII',
94
+ '9th' => 'IX',
95
+ } unless const_defined?(:CONVERSION)
96
+
97
+ def parse_fullname(name)
98
+ first_name = nil
99
+ middle_name = nil
100
+ last_name = nil
101
+ prefix = nil
102
+ suffix = nil
103
+
104
+ # replace "’" to "'"
105
+ name = name.gsub(/’/, "'")
106
+ # remove strings which contain and include in parentheses
107
+ # ex. 'Susan M. (Scully) Schultz' => 'Susan M. Schultz'
108
+ # 'Jay (Jung) Heum Kim' => 'Jay Heum Kim'
109
+ name = name.gsub(/\(.*?\)/, ' ').gsub(/\(|\)/, '')
110
+ # remove quoted strings
111
+ # Darin "Derry" Ronald Anderson => 'Darin Ronald Anderson'
112
+ # Nancy M. "Shelli" Egger => 'Nancy M. Egger'
113
+ # Nicole 'nikki' Adame => 'Nicole Adame'
114
+ name = name.gsub(/".*?"/, ' ').gsub(/'.*?'/i, ' ')
115
+
116
+ # remove curly brackets
117
+ # Henry C.{Harry} Wilson => 'Henry C. Wilson'
118
+ # Cellestine {Steen} Armstrong => 'Cellestine Armstrong'
119
+ name = name.gsub(/\{.*?\}/, ' ')
120
+ # remove exceptional names
121
+ # ex. "William . D. 'Bill' Beard" => "William D. 'Bill' Beard"
122
+ # also this regexp can remove
123
+ name = name.gsub(/\s+[^a-zA-Z]+\s+/, ' ')
124
+ # Why we use substitute(sub) comma to whitespace, not global substitute(gsub).
125
+ # the reason is the substitution applies for suffix splitting, not for replacing
126
+ # bad data. As we want, convert "Marvene A Gordon, JD" to "Marvene A Gordon JD",
127
+ # so that the suffix will get into the split array.
128
+ # and, standardize suffix as '2nd' => 'II', '3rd' => 'III'
129
+ nameSplit = name.gsub(',', ' ').strip.split(/\s+/).map{ |n| CONVERSION[n.downcase] || n }
130
+
131
+ return { :last=>name } if nameSplit.length <= 1
132
+
133
+ suffix_arr = []
134
+ while (nameSplit.length > 1)
135
+ if IGNORABLE_SUFFIXES.include?(nameSplit.last.downcase)
136
+ suffix_arr.unshift([nameSplit.pop, false])
137
+ elsif SUFFIX_LIST.include?(nameSplit.last.downcase) || GLOBAL_SUFFIX_LIST.include?(nameSplit.last.downcase)
138
+ suffix_arr.unshift([nameSplit.pop, true])
139
+ else
140
+ break
141
+ end
142
+ end
143
+
144
+ # Loop around until we run into a name that is not contained in the PREFIX_LIST
145
+ # ex(FL): 'Lt Col Marvene A Gordon', 'The Honorable Dexter F George'
146
+ prefix_arr = []
147
+ while (nameSplit.length > 1)
148
+ if IGNORABLE_PREFIXS.include?(nameSplit.first.downcase)
149
+ nameSplit.shift
150
+ elsif PREFIX_LIST.include?(nameSplit.first.downcase)
151
+ prefix_arr.push(nameSplit.shift)
152
+ else
153
+ break
154
+ end
155
+ end
156
+ prefix = prefix_arr.join(' ') if prefix_arr.size > 0
157
+
158
+ # Loop around until we run into a name that is not contained in the LAST_NAME_EXTENSIONS
159
+ last_name_arr = []
160
+ last_name_arr.push(nameSplit.pop)
161
+ last_name_arr.push(nameSplit.pop) while nameSplit.length > 1 && LAST_NAME_EXTENSIONS.include?(nameSplit.last.downcase)
162
+ last_name = last_name_arr.reverse.join(' ') if last_name_arr.size > 0
163
+
164
+ first_name = nameSplit.shift if nameSplit.length >= 1
165
+ middle_name = nameSplit.join(' ') if nameSplit.length > 0
166
+ if first_name.nil? && prefix
167
+ first_name = prefix
168
+ prefix = nil
169
+ end
170
+
171
+ if first_name.nil? && suffix_arr.any? && SUFFIX_CAN_BE_LASTNAME.include?(suffix_arr.first.first.downcase)
172
+ first_name = last_name
173
+ last_name = suffix_arr.shift.first
174
+ end
175
+ if last_name =~ /^[A-Z]\.?$/i && suffix_arr.any? && SUFFIX_CAN_BE_LASTNAME.include?(suffix_arr.first.first.downcase)
176
+ middle_name = [middle_name, last_name].compact.join(' ')
177
+ last_name = suffix_arr.shift.first
178
+ end
179
+ suffix_arr.delete_if{|a, b| !b}
180
+ suffix = suffix_arr.size == 0 ? nil : suffix_arr.first.first # only return first suffix
181
+ return { :last => last_name, :middle => middle_name, :first => first_name, :prefix => prefix, :suffix => suffix }
182
+ end # << parse_fullname
183
+ extend self
184
+ end
185
+ end
@@ -0,0 +1,6 @@
1
+
2
+ module Fullname
3
+ module Parser
4
+ VERSION = '1.0.3'
5
+ end
6
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fullname-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.3
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - xiaohui
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-15 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: For parsing people's fullname into pieces(prefix/first/middle/last/suffix)
15
+ email:
16
+ - xiaohui@zhangxh.net
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - README.md
23
+ - fullname-parser.gemspec
24
+ - lib/fullname/parser.rb
25
+ - lib/fullname/parser/version.rb
26
+ homepage: http://github.com/xiaohui-zhangxh/fullname-parser
27
+ licenses: []
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ required_rubygems_version: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 1.8.25
47
+ signing_key:
48
+ specification_version: 3
49
+ summary: Split fullname into pieces(prefix/first/middle/last/suffix)
50
+ test_files: []
51
+ has_rdoc: