fullname-parser 1.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8701ad30b034d522d4116053672296dad64fe2b8
4
+ data.tar.gz: 00b0315fbefe4988a2aadde0c14a0f128baf7271
5
+ SHA512:
6
+ metadata.gz: f3498c76c53588b73363fa76652fd1d268282e8742116931793330eab1cc9becbbc8b5f65fe20647be02e0b5755d7c729e6cf4f309b436eda1d82adba56d5e74
7
+ data.tar.gz: 4472a7dc0e50d9129c0ab96b7b0006b588f1f5db1c96dccf79f21d34870ad09f2a498c1481311e94ce69e9c75c13a36019b69a632c34017f1d840f6dc250973d
@@ -101,91 +101,174 @@ module Fullname
101
101
  '9th' => 'IX',
102
102
  } unless const_defined?(:CONVERSION)
103
103
 
104
- def parse_fullname(name)
105
- first_name = nil
106
- middle_name = nil
107
- last_name = nil
108
- prefix = nil
109
- suffix = nil
110
-
111
- # replace "’" to "'"
112
- name = name.gsub(/’/, "'")
113
- # remove strings which contain and include in parentheses
114
- # ex. 'Susan M. (Scully) Schultz' => 'Susan M. Schultz'
115
- # 'Jay (Jung) Heum Kim' => 'Jay Heum Kim'
116
- name = name.gsub(/\(.*?\)/, ' ').gsub(/\(|\)/, '')
117
- # remove quoted strings
118
- # Darin "Derry" Ronald Anderson => 'Darin Ronald Anderson'
119
- # Nancy M. "Shelli" Egger => 'Nancy M. Egger'
120
- # Nicole 'nikki' Adame => 'Nicole Adame'
121
- name = name.gsub(/".*?"/, ' ').gsub(/'.*?'/i, ' ')
122
-
123
- # remove curly brackets
124
- # Henry C.{Harry} Wilson => 'Henry C. Wilson'
125
- # Cellestine {Steen} Armstrong => 'Cellestine Armstrong'
126
- name = name.gsub(/\{.*?\}/, ' ')
127
- # remove exceptional names
128
- # ex. "William . D. 'Bill' Beard" => "William D. 'Bill' Beard"
129
- # also this regexp can remove
130
- name = name.gsub(/\s+[^a-zA-Z]+\s+/, ' ')
131
- # Why we use substitute(sub) comma to whitespace, not global substitute(gsub).
132
- # the reason is the substitution applies for suffix splitting, not for replacing
133
- # bad data. As we want, convert "Marvene A Gordon, JD" to "Marvene A Gordon JD",
134
- # so that the suffix will get into the split array.
135
- # and, standardize suffix as '2nd' => 'II', '3rd' => 'III'
136
- nameSplit = name.gsub(',', ' ').strip.split(/\s+/).map{ |n| CONVERSION[n.downcase] || n }
137
-
138
- return { :last=>name } if nameSplit.length <= 1
139
-
140
- suffix_arr = []
141
- while (nameSplit.length > 1)
142
- if IGNORABLE_SUFFIXES.include?(nameSplit.last.downcase)
143
- suffix_arr.unshift([nameSplit.pop, false])
144
- elsif SUFFIX_LIST.include?(nameSplit.last.downcase) || GLOBAL_SUFFIX_LIST.include?(nameSplit.last.downcase)
145
- suffix_arr.unshift([nameSplit.pop, true])
146
- else
147
- break
104
+ class Error < StandardError; end
105
+ class Identifier
106
+ attr_reader :name, :original_name, :prefix, :firstname, :middlename, :lastname, :suffix
107
+ def initialize(name)
108
+ @original_name = name.dup
109
+ @name = name.dup
110
+ @prefix_list = []
111
+ @suffix_list = []
112
+ sanitize!
113
+ flip_parts!
114
+ breakup!
115
+ end
116
+
117
+ private
118
+
119
+ def sanitize!
120
+ # replace "’" to "'"
121
+ name.gsub!(/’/, "'")
122
+ # remove strings which contain and include in parentheses
123
+ # ex. 'Susan M. (Scully) Schultz' => 'Susan M. Schultz'
124
+ # 'Jay (Jung) Heum Kim' => 'Jay Heum Kim'
125
+ name.gsub!(/\(.*?\)/, ' ')
126
+ name.gsub!(/\(|\)/, '')
127
+ # remove quoted strings
128
+ # Darin "Derry" Ronald Anderson => 'Darin Ronald Anderson'
129
+ # Nancy M. "Shelli" Egger => 'Nancy M. Egger'
130
+ # Nicole 'nikki' Adame => 'Nicole Adame'
131
+ name.gsub!(/".*?"/, ' ')
132
+ name.gsub!(/'.*?'/i, ' ')
133
+
134
+ # remove curly brackets
135
+ # Henry C.{Harry} Wilson => 'Henry C. Wilson'
136
+ # Cellestine {Steen} Armstrong => 'Cellestine Armstrong'
137
+ name.gsub!(/\{.*?\}/, ' ')
138
+ # remove exceptional names
139
+ # ex. "William . D. 'Bill' Beard" => "William D. 'Bill' Beard"
140
+ # also this regexp can remove
141
+ name.gsub!(/\s+[^a-zA-Z]+\s+/, ' ')
142
+ # Why we use substitute(sub) comma to whitespace, not global substitute(gsub).
143
+ # the reason is the substitution applies for suffix splitting, not for replacing
144
+ # bad data. As we want, convert "Marvene A Gordon, JD" to "Marvene A Gordon JD",
145
+ # so that the suffix will get into the split array.
146
+ # and, standardize suffix as '2nd' => 'II', '3rd' => 'III'
147
+ CONVERSION.each_pair do |finder, replacer|
148
+ name.gsub!(Regexp.new("\\b#{Regexp.escape(finder)}\\b", true), replacer)
148
149
  end
149
150
  end
150
-
151
- # Loop around until we run into a name that is not contained in the PREFIX_LIST
152
- # ex(FL): 'Lt Col Marvene A Gordon', 'The Honorable Dexter F George'
153
- prefix_arr = []
154
- while (nameSplit.length > 1)
155
- if IGNORABLE_PREFIXS.include?(nameSplit.first.downcase)
156
- nameSplit.shift
157
- elsif PREFIX_LIST.include?(nameSplit.first.downcase)
158
- prefix_arr.push(nameSplit.shift)
159
- else
160
- break
151
+
152
+ def extract_suffix(str)
153
+ list = []
154
+ loop do
155
+ m = /(.*)[, ](.+)/.match(str)
156
+ break unless m
157
+ remaining = m[1]
158
+ last_part = m[2].strip
159
+ last_part_downcase = last_part.downcase
160
+ if IGNORABLE_SUFFIXES.include?(last_part_downcase)
161
+ list.unshift([last_part, false])
162
+ elsif SUFFIX_LIST.include?(last_part_downcase) || GLOBAL_SUFFIX_LIST.include?(last_part_downcase)
163
+ list.unshift([last_part, true])
164
+ else
165
+ break
166
+ end
167
+ str = remaining.gsub(/[, ]+$/, '').strip
161
168
  end
169
+ [str, list]
162
170
  end
163
- prefix = prefix_arr.join(' ') if prefix_arr.size > 0
164
-
165
- # Loop around until we run into a name that is not contained in the LAST_NAME_EXTENSIONS
166
- last_name_arr = []
167
- last_name_arr.push(nameSplit.pop)
168
- last_name_arr.push(nameSplit.pop) while nameSplit.length > 1 && LAST_NAME_EXTENSIONS.include?(nameSplit.last.downcase)
169
- last_name = last_name_arr.reverse.join(' ') if last_name_arr.size > 0
170
-
171
- first_name = nameSplit.shift if nameSplit.length >= 1
172
- middle_name = nameSplit.join(' ') if nameSplit.length > 0
173
- if first_name.nil? && prefix
174
- first_name = prefix
175
- prefix = nil
171
+
172
+ def extract_prefix(str)
173
+ list = []
174
+ loop do
175
+ m = /(.+?)[, ](.+)/.match(str)
176
+ break unless m
177
+ remining = m[2]
178
+ first_part = m[1]
179
+ first_part_downcase = first_part.downcase
180
+ if IGNORABLE_PREFIXS.include?(first_part_downcase)
181
+ # skip words
182
+ elsif PREFIX_LIST.include?(first_part_downcase)
183
+ list.push(first_part)
184
+ else
185
+ break
186
+ end
187
+ str = remining.gsub(/^[, ]+/, '').strip
188
+ end
189
+ [str, list]
176
190
  end
177
-
178
- if first_name.nil? && suffix_arr.any? && SUFFIX_CAN_BE_LASTNAME.include?(suffix_arr.first.first.downcase)
179
- first_name = last_name
180
- last_name = suffix_arr.shift.first
191
+
192
+ def extract_suffix_before_flipping_parts
193
+ remaining, list = extract_suffix(name)
194
+ @name = remaining
195
+ @suffix_list += list
181
196
  end
182
- if last_name =~ /^[A-Z]\.?$/i && suffix_arr.any? && SUFFIX_CAN_BE_LASTNAME.include?(suffix_arr.first.first.downcase)
183
- middle_name = [middle_name, last_name].compact.join(' ')
184
- last_name = suffix_arr.shift.first
197
+
198
+ def flip_parts!
199
+ extract_suffix_before_flipping_parts
200
+ parts = name.split(/,/)
201
+ case parts.size
202
+ when 1
203
+ when 2
204
+ remining, list = extract_suffix(parts[0])
205
+ @name = [parts[1], remining].join(' ').strip.gsub(/ +/, ' ')
206
+ @suffix_list += list
207
+ when 3
208
+ remining, list = extract_suffix(parts[0..1].join(' '))
209
+ @name = [parts[2], remining].join(' ').strip.gsub(/ +/, ' ')
210
+ @suffix_list += list
211
+ else
212
+ fail Error.new("name [ #{name} ] has >2 commas, don't know how to parse")
213
+ end
214
+
215
+ extract_prefix_after_flipping_parts
185
216
  end
186
- suffix_arr.delete_if{|a, b| !b}
187
- suffix = suffix_arr.size == 0 ? nil : suffix_arr.first.first # only return first suffix
188
- return { :last => last_name, :middle => middle_name, :first => first_name, :prefix => prefix, :suffix => suffix }
217
+
218
+ def extract_prefix_after_flipping_parts
219
+ remaining, list = extract_prefix(name)
220
+ @name = remaining
221
+ @prefix_list += list
222
+ end
223
+
224
+ def breakup!
225
+ parts = name.split(/[, ]+/)
226
+
227
+ # process prefix
228
+ @prefix = @prefix_list.join(' ') if @prefix_list.any?
229
+
230
+ # process lastname
231
+ # Loop around until we run into a name that is not contained in the LAST_NAME_EXTENSIONS
232
+ last_name_arr = []
233
+ last_name_arr.push(parts.pop)
234
+ last_name_arr.push(parts.pop) while parts.length > 1 && LAST_NAME_EXTENSIONS.include?(parts.last.downcase)
235
+ @lastname = last_name_arr.reverse.join(' ')
236
+
237
+ # process firstname and middlename
238
+ @firstname = parts.shift if parts.length >= 1
239
+ @middlename = parts.join(' ') if parts.length > 0
240
+ if firstname.nil? && prefix
241
+ @firstname = prefix
242
+ @prefix = nil
243
+ end
244
+
245
+ # move lastname to firstname, move first suffix to lastname
246
+ if firstname.nil? && @suffix_list.any? && SUFFIX_CAN_BE_LASTNAME.include?(@suffix_list.first.first.downcase)
247
+ @firstname = lastname
248
+ @lastname = @suffix_list.shift.first
249
+ end
250
+
251
+ # move lastname to middlename, move first suffix to lastname
252
+ if lastname =~ /^[A-Z]\.?$/i && @suffix_list.any? && SUFFIX_CAN_BE_LASTNAME.include?(@suffix_list.first.first.downcase)
253
+ @middlename = [middlename, lastname].compact.join(' ')
254
+ @lastname = @suffix_list.shift.first
255
+ end
256
+
257
+ # process suffix
258
+ @suffix_list.delete_if { |_, ignore_able| !ignore_able }
259
+ @suffix = @suffix_list.any? ? @suffix_list.first.first : nil
260
+ end
261
+ end
262
+
263
+ def parse_fullname(name)
264
+ i = Identifier.new(name)
265
+ return {
266
+ prefix: i.prefix,
267
+ first: i.firstname,
268
+ middle: i.middlename,
269
+ last: i.lastname,
270
+ suffix: i.suffix
271
+ }
189
272
  end # << parse_fullname
190
273
  extend self
191
274
  end
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Fullname
3
3
  module Parser
4
- VERSION = '1.0.5'
4
+ VERSION = '1.1.0'
5
5
  end
6
6
  end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fullname-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
5
- prerelease:
4
+ version: 1.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - xiaohui
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-24 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description: For parsing people's fullname into pieces(prefix/first/middle/last/suffix)
15
14
  email:
@@ -25,27 +24,25 @@ files:
25
24
  - lib/fullname/parser/version.rb
26
25
  homepage: http://github.com/xiaohui-zhangxh/fullname-parser
27
26
  licenses: []
27
+ metadata: {}
28
28
  post_install_message:
29
29
  rdoc_options: []
30
30
  require_paths:
31
31
  - lib
32
32
  required_ruby_version: !ruby/object:Gem::Requirement
33
- none: false
34
33
  requirements:
35
- - - ! '>='
34
+ - - '>='
36
35
  - !ruby/object:Gem::Version
37
36
  version: '0'
38
37
  required_rubygems_version: !ruby/object:Gem::Requirement
39
- none: false
40
38
  requirements:
41
- - - ! '>='
39
+ - - '>='
42
40
  - !ruby/object:Gem::Version
43
41
  version: '0'
44
42
  requirements: []
45
43
  rubyforge_project:
46
- rubygems_version: 1.8.25
44
+ rubygems_version: 2.4.8
47
45
  signing_key:
48
- specification_version: 3
46
+ specification_version: 4
49
47
  summary: Split fullname into pieces(prefix/first/middle/last/suffix)
50
48
  test_files: []
51
- has_rdoc: