fullname-parser 1.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8701ad30b034d522d4116053672296dad64fe2b8
4
+ data.tar.gz: 00b0315fbefe4988a2aadde0c14a0f128baf7271
5
+ SHA512:
6
+ metadata.gz: f3498c76c53588b73363fa76652fd1d268282e8742116931793330eab1cc9becbbc8b5f65fe20647be02e0b5755d7c729e6cf4f309b436eda1d82adba56d5e74
7
+ data.tar.gz: 4472a7dc0e50d9129c0ab96b7b0006b588f1f5db1c96dccf79f21d34870ad09f2a498c1481311e94ce69e9c75c13a36019b69a632c34017f1d840f6dc250973d
@@ -101,91 +101,174 @@ module Fullname
101
101
  '9th' => 'IX',
102
102
  } unless const_defined?(:CONVERSION)
103
103
 
104
- def parse_fullname(name)
105
- first_name = nil
106
- middle_name = nil
107
- last_name = nil
108
- prefix = nil
109
- suffix = nil
110
-
111
- # replace "’" to "'"
112
- name = name.gsub(/’/, "'")
113
- # remove strings which contain and include in parentheses
114
- # ex. 'Susan M. (Scully) Schultz' => 'Susan M. Schultz'
115
- # 'Jay (Jung) Heum Kim' => 'Jay Heum Kim'
116
- name = name.gsub(/\(.*?\)/, ' ').gsub(/\(|\)/, '')
117
- # remove quoted strings
118
- # Darin "Derry" Ronald Anderson => 'Darin Ronald Anderson'
119
- # Nancy M. "Shelli" Egger => 'Nancy M. Egger'
120
- # Nicole 'nikki' Adame => 'Nicole Adame'
121
- name = name.gsub(/".*?"/, ' ').gsub(/'.*?'/i, ' ')
122
-
123
- # remove curly brackets
124
- # Henry C.{Harry} Wilson => 'Henry C. Wilson'
125
- # Cellestine {Steen} Armstrong => 'Cellestine Armstrong'
126
- name = name.gsub(/\{.*?\}/, ' ')
127
- # remove exceptional names
128
- # ex. "William . D. 'Bill' Beard" => "William D. 'Bill' Beard"
129
- # also this regexp can remove
130
- name = name.gsub(/\s+[^a-zA-Z]+\s+/, ' ')
131
- # Why we use substitute(sub) comma to whitespace, not global substitute(gsub).
132
- # the reason is the substitution applies for suffix splitting, not for replacing
133
- # bad data. As we want, convert "Marvene A Gordon, JD" to "Marvene A Gordon JD",
134
- # so that the suffix will get into the split array.
135
- # and, standardize suffix as '2nd' => 'II', '3rd' => 'III'
136
- nameSplit = name.gsub(',', ' ').strip.split(/\s+/).map{ |n| CONVERSION[n.downcase] || n }
137
-
138
- return { :last=>name } if nameSplit.length <= 1
139
-
140
- suffix_arr = []
141
- while (nameSplit.length > 1)
142
- if IGNORABLE_SUFFIXES.include?(nameSplit.last.downcase)
143
- suffix_arr.unshift([nameSplit.pop, false])
144
- elsif SUFFIX_LIST.include?(nameSplit.last.downcase) || GLOBAL_SUFFIX_LIST.include?(nameSplit.last.downcase)
145
- suffix_arr.unshift([nameSplit.pop, true])
146
- else
147
- break
104
+ class Error < StandardError; end
105
+ class Identifier
106
+ attr_reader :name, :original_name, :prefix, :firstname, :middlename, :lastname, :suffix
107
+ def initialize(name)
108
+ @original_name = name.dup
109
+ @name = name.dup
110
+ @prefix_list = []
111
+ @suffix_list = []
112
+ sanitize!
113
+ flip_parts!
114
+ breakup!
115
+ end
116
+
117
+ private
118
+
119
+ def sanitize!
120
+ # replace "’" to "'"
121
+ name.gsub!(/’/, "'")
122
+ # remove strings which contain and include in parentheses
123
+ # ex. 'Susan M. (Scully) Schultz' => 'Susan M. Schultz'
124
+ # 'Jay (Jung) Heum Kim' => 'Jay Heum Kim'
125
+ name.gsub!(/\(.*?\)/, ' ')
126
+ name.gsub!(/\(|\)/, '')
127
+ # remove quoted strings
128
+ # Darin "Derry" Ronald Anderson => 'Darin Ronald Anderson'
129
+ # Nancy M. "Shelli" Egger => 'Nancy M. Egger'
130
+ # Nicole 'nikki' Adame => 'Nicole Adame'
131
+ name.gsub!(/".*?"/, ' ')
132
+ name.gsub!(/'.*?'/i, ' ')
133
+
134
+ # remove curly brackets
135
+ # Henry C.{Harry} Wilson => 'Henry C. Wilson'
136
+ # Cellestine {Steen} Armstrong => 'Cellestine Armstrong'
137
+ name.gsub!(/\{.*?\}/, ' ')
138
+ # remove exceptional names
139
+ # ex. "William . D. 'Bill' Beard" => "William D. 'Bill' Beard"
140
+ # also this regexp can remove
141
+ name.gsub!(/\s+[^a-zA-Z]+\s+/, ' ')
142
+ # Why we use substitute(sub) comma to whitespace, not global substitute(gsub).
143
+ # the reason is the substitution applies for suffix splitting, not for replacing
144
+ # bad data. As we want, convert "Marvene A Gordon, JD" to "Marvene A Gordon JD",
145
+ # so that the suffix will get into the split array.
146
+ # and, standardize suffix as '2nd' => 'II', '3rd' => 'III'
147
+ CONVERSION.each_pair do |finder, replacer|
148
+ name.gsub!(Regexp.new("\\b#{Regexp.escape(finder)}\\b", true), replacer)
148
149
  end
149
150
  end
150
-
151
- # Loop around until we run into a name that is not contained in the PREFIX_LIST
152
- # ex(FL): 'Lt Col Marvene A Gordon', 'The Honorable Dexter F George'
153
- prefix_arr = []
154
- while (nameSplit.length > 1)
155
- if IGNORABLE_PREFIXS.include?(nameSplit.first.downcase)
156
- nameSplit.shift
157
- elsif PREFIX_LIST.include?(nameSplit.first.downcase)
158
- prefix_arr.push(nameSplit.shift)
159
- else
160
- break
151
+
152
+ def extract_suffix(str)
153
+ list = []
154
+ loop do
155
+ m = /(.*)[, ](.+)/.match(str)
156
+ break unless m
157
+ remaining = m[1]
158
+ last_part = m[2].strip
159
+ last_part_downcase = last_part.downcase
160
+ if IGNORABLE_SUFFIXES.include?(last_part_downcase)
161
+ list.unshift([last_part, false])
162
+ elsif SUFFIX_LIST.include?(last_part_downcase) || GLOBAL_SUFFIX_LIST.include?(last_part_downcase)
163
+ list.unshift([last_part, true])
164
+ else
165
+ break
166
+ end
167
+ str = remaining.gsub(/[, ]+$/, '').strip
161
168
  end
169
+ [str, list]
162
170
  end
163
- prefix = prefix_arr.join(' ') if prefix_arr.size > 0
164
-
165
- # Loop around until we run into a name that is not contained in the LAST_NAME_EXTENSIONS
166
- last_name_arr = []
167
- last_name_arr.push(nameSplit.pop)
168
- last_name_arr.push(nameSplit.pop) while nameSplit.length > 1 && LAST_NAME_EXTENSIONS.include?(nameSplit.last.downcase)
169
- last_name = last_name_arr.reverse.join(' ') if last_name_arr.size > 0
170
-
171
- first_name = nameSplit.shift if nameSplit.length >= 1
172
- middle_name = nameSplit.join(' ') if nameSplit.length > 0
173
- if first_name.nil? && prefix
174
- first_name = prefix
175
- prefix = nil
171
+
172
+ def extract_prefix(str)
173
+ list = []
174
+ loop do
175
+ m = /(.+?)[, ](.+)/.match(str)
176
+ break unless m
177
+ remining = m[2]
178
+ first_part = m[1]
179
+ first_part_downcase = first_part.downcase
180
+ if IGNORABLE_PREFIXS.include?(first_part_downcase)
181
+ # skip words
182
+ elsif PREFIX_LIST.include?(first_part_downcase)
183
+ list.push(first_part)
184
+ else
185
+ break
186
+ end
187
+ str = remining.gsub(/^[, ]+/, '').strip
188
+ end
189
+ [str, list]
176
190
  end
177
-
178
- if first_name.nil? && suffix_arr.any? && SUFFIX_CAN_BE_LASTNAME.include?(suffix_arr.first.first.downcase)
179
- first_name = last_name
180
- last_name = suffix_arr.shift.first
191
+
192
+ def extract_suffix_before_flipping_parts
193
+ remaining, list = extract_suffix(name)
194
+ @name = remaining
195
+ @suffix_list += list
181
196
  end
182
- if last_name =~ /^[A-Z]\.?$/i && suffix_arr.any? && SUFFIX_CAN_BE_LASTNAME.include?(suffix_arr.first.first.downcase)
183
- middle_name = [middle_name, last_name].compact.join(' ')
184
- last_name = suffix_arr.shift.first
197
+
198
+ def flip_parts!
199
+ extract_suffix_before_flipping_parts
200
+ parts = name.split(/,/)
201
+ case parts.size
202
+ when 1
203
+ when 2
204
+ remining, list = extract_suffix(parts[0])
205
+ @name = [parts[1], remining].join(' ').strip.gsub(/ +/, ' ')
206
+ @suffix_list += list
207
+ when 3
208
+ remining, list = extract_suffix(parts[0..1].join(' '))
209
+ @name = [parts[2], remining].join(' ').strip.gsub(/ +/, ' ')
210
+ @suffix_list += list
211
+ else
212
+ fail Error.new("name [ #{name} ] has >2 commas, don't know how to parse")
213
+ end
214
+
215
+ extract_prefix_after_flipping_parts
185
216
  end
186
- suffix_arr.delete_if{|a, b| !b}
187
- suffix = suffix_arr.size == 0 ? nil : suffix_arr.first.first # only return first suffix
188
- return { :last => last_name, :middle => middle_name, :first => first_name, :prefix => prefix, :suffix => suffix }
217
+
218
+ def extract_prefix_after_flipping_parts
219
+ remaining, list = extract_prefix(name)
220
+ @name = remaining
221
+ @prefix_list += list
222
+ end
223
+
224
+ def breakup!
225
+ parts = name.split(/[, ]+/)
226
+
227
+ # process prefix
228
+ @prefix = @prefix_list.join(' ') if @prefix_list.any?
229
+
230
+ # process lastname
231
+ # Loop around until we run into a name that is not contained in the LAST_NAME_EXTENSIONS
232
+ last_name_arr = []
233
+ last_name_arr.push(parts.pop)
234
+ last_name_arr.push(parts.pop) while parts.length > 1 && LAST_NAME_EXTENSIONS.include?(parts.last.downcase)
235
+ @lastname = last_name_arr.reverse.join(' ')
236
+
237
+ # process firstname and middlename
238
+ @firstname = parts.shift if parts.length >= 1
239
+ @middlename = parts.join(' ') if parts.length > 0
240
+ if firstname.nil? && prefix
241
+ @firstname = prefix
242
+ @prefix = nil
243
+ end
244
+
245
+ # move lastname to firstname, move first suffix to lastname
246
+ if firstname.nil? && @suffix_list.any? && SUFFIX_CAN_BE_LASTNAME.include?(@suffix_list.first.first.downcase)
247
+ @firstname = lastname
248
+ @lastname = @suffix_list.shift.first
249
+ end
250
+
251
+ # move lastname to middlename, move first suffix to lastname
252
+ if lastname =~ /^[A-Z]\.?$/i && @suffix_list.any? && SUFFIX_CAN_BE_LASTNAME.include?(@suffix_list.first.first.downcase)
253
+ @middlename = [middlename, lastname].compact.join(' ')
254
+ @lastname = @suffix_list.shift.first
255
+ end
256
+
257
+ # process suffix
258
+ @suffix_list.delete_if { |_, ignore_able| !ignore_able }
259
+ @suffix = @suffix_list.any? ? @suffix_list.first.first : nil
260
+ end
261
+ end
262
+
263
+ def parse_fullname(name)
264
+ i = Identifier.new(name)
265
+ return {
266
+ prefix: i.prefix,
267
+ first: i.firstname,
268
+ middle: i.middlename,
269
+ last: i.lastname,
270
+ suffix: i.suffix
271
+ }
189
272
  end # << parse_fullname
190
273
  extend self
191
274
  end
@@ -1,6 +1,6 @@
1
1
 
2
2
  module Fullname
3
3
  module Parser
4
- VERSION = '1.0.5'
4
+ VERSION = '1.1.0'
5
5
  end
6
6
  end
metadata CHANGED
@@ -1,15 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fullname-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
5
- prerelease:
4
+ version: 1.1.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - xiaohui
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-24 00:00:00.000000000 Z
11
+ date: 2016-02-16 00:00:00.000000000 Z
13
12
  dependencies: []
14
13
  description: For parsing people's fullname into pieces(prefix/first/middle/last/suffix)
15
14
  email:
@@ -25,27 +24,25 @@ files:
25
24
  - lib/fullname/parser/version.rb
26
25
  homepage: http://github.com/xiaohui-zhangxh/fullname-parser
27
26
  licenses: []
27
+ metadata: {}
28
28
  post_install_message:
29
29
  rdoc_options: []
30
30
  require_paths:
31
31
  - lib
32
32
  required_ruby_version: !ruby/object:Gem::Requirement
33
- none: false
34
33
  requirements:
35
- - - ! '>='
34
+ - - '>='
36
35
  - !ruby/object:Gem::Version
37
36
  version: '0'
38
37
  required_rubygems_version: !ruby/object:Gem::Requirement
39
- none: false
40
38
  requirements:
41
- - - ! '>='
39
+ - - '>='
42
40
  - !ruby/object:Gem::Version
43
41
  version: '0'
44
42
  requirements: []
45
43
  rubyforge_project:
46
- rubygems_version: 1.8.25
44
+ rubygems_version: 2.4.8
47
45
  signing_key:
48
- specification_version: 3
46
+ specification_version: 4
49
47
  summary: Split fullname into pieces(prefix/first/middle/last/suffix)
50
48
  test_files: []
51
- has_rdoc: