namor 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/namor/namor.rb CHANGED
@@ -19,7 +19,28 @@ class Namor::Namor
19
19
  suppression_list = @config[:suppress] || []
20
20
  suppression_re = Regexp.new('\b?' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '\b?')
21
21
 
22
- name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
22
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
23
+ end
24
+
25
+ def fullscrub(name)
26
+ final_cleaning(scrub(name))
27
+ end
28
+
29
+ def demaiden(lastname)
30
+ return [nil,nil] unless lastname && !lastname.empty?
31
+ if lastname =~ /\-/
32
+ [lastname.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')]
33
+ else
34
+ [lastname.gsub(/ /, ''), lastname.split(/ /).last]
35
+ end
36
+ end
37
+
38
+ def final_cleaning(name)
39
+ if name && !name.empty?
40
+ name.gsub(/\-/, '')
41
+ else
42
+ nil
43
+ end
23
44
  end
24
45
 
25
46
  def extract(name, opts = {})
@@ -30,7 +51,7 @@ class Namor::Namor
30
51
  if detitled_name =~ /,/
31
52
  # "last, first[ middle]"
32
53
  lastname, firstname = detitled_name.split(/\s*,\s*/)
33
- lastname.gsub!(/ /, '')
54
+ lastname, de_maidened_last = demaiden(lastname)
34
55
  middlename = nil
35
56
  if firstname && firstname =~ / /
36
57
  pieces = firstname.split(/ +/)
@@ -38,26 +59,29 @@ class Namor::Namor
38
59
  middlename = pieces.join if pieces.any?
39
60
  end
40
61
  else
41
- # "first [middle ]last"
62
+ # "first [middle-initial ]last" or "first everything-else-is-the-lastname"
42
63
  pieces = detitled_name.split(' ')
43
64
  firstname = pieces.shift
44
- middlename = nil
45
65
  if pieces.count > 1 && pieces.first.length == 1
46
66
  # assume this is a middle initial
47
67
  middlename = pieces.shift
68
+ else
69
+ middlename = nil
48
70
  end
49
71
 
50
- lastname = pieces.join
72
+ lastname, de_maidened_last = demaiden(pieces.join(' '))
51
73
  end
52
74
 
53
- firstname = nil if firstname.empty?
54
- middlename = nil if middlename && middlename.empty?
55
- lastname = nil if lastname.empty?
75
+ firstname = final_cleaning(firstname)
76
+ middlename = final_cleaning(middlename)
77
+ lastname = final_cleaning(lastname)
78
+ de_maidened_last = final_cleaning(de_maidened_last)
56
79
 
57
80
  fm = [firstname, middlename].compact.join(' ')
58
81
  fullname = [lastname, fm].compact.join(',')
82
+ nee_fullname = [de_maidened_last, fm].compact.join(',')
59
83
 
60
- [firstname, middlename, lastname, fullname]
84
+ [firstname, middlename, lastname, fullname, nee_fullname]
61
85
  end
62
86
 
63
87
  def extract_with_cluster(name, opts = {})
data/lib/namor/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -9,71 +9,75 @@ describe "name extract" do
9
9
  end
10
10
 
11
11
  it "should handle 2-part names without commas" do
12
- @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
+ @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN', 'SMITH,JOHN']
13
13
  end
14
14
 
15
15
  it "should handle 2-part names with commas" do
16
- @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
16
+ @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN', 'SMITH,JOHN']
17
17
  end
18
18
 
19
19
  it "should handle 2-part names with commas and middle initials" do
20
- @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
21
21
  end
22
22
 
23
23
  it "should handle 2-part names with commas and middle initials" do
24
- @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
25
25
  end
26
26
 
27
27
  it "should strip elements within parentheses" do
28
- @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
+ @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
29
29
  end
30
30
 
31
31
  it "should drop periods" do
32
- @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
32
+ @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
33
33
  end
34
34
 
35
35
  it "should drop spaces in last name (only when input has a comma)" do
36
- @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
+ @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY', 'JONES,MARY']
37
37
  end
38
38
 
39
39
  it "should drop dashes, apostrophes, ampersands" do
40
- @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
41
- @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
42
- @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
43
- @namor.extract("Smith, Bob & Sue").should == ['BOB', 'SUE', 'SMITH', 'SMITH,BOB SUE']
44
- @namor.extract("Research & Development").should == ['RESEARCH', nil, 'DEVELOPMENT', 'DEVELOPMENT,RESEARCH']
40
+ @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY', 'JONES,MARY']
41
+ @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S', 'OKEEFE,MARY S']
42
+ @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL', 'CLAUDE,JEANMICHEL']
43
+ @namor.extract("Smith, Bob & Sue").should == ['BOB', 'SUE', 'SMITH', 'SMITH,BOB SUE', 'SMITH,BOB SUE']
44
+ @namor.extract("Research & Development").should == ['RESEARCH', nil, 'DEVELOPMENT', 'DEVELOPMENT,RESEARCH', 'DEVELOPMENT,RESEARCH']
45
45
  end
46
46
 
47
47
  it "should concatenate extract name pieces" do
48
- @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
49
- @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
48
+ @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH', 'GARUDA,RAJESH']
49
+ @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA', 'KUMAR,RAJESH VISHNUGARUDA']
50
50
  end
51
51
 
52
52
  it "should excise suffixes like 'Jr.' from lastnames" do
53
- @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
53
+ @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M', 'SMITH,EDWARD M']
54
54
  end
55
55
 
56
56
  it "should excise terms from optional suppression list" do
57
- @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
58
- @namor.extract("Smith Jr, Edward M M.D.").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
59
- @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
57
+ @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M', 'SMITH,EDWARD M']
58
+ @namor.extract("Smith Jr, Edward M M.D.").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M', 'SMITH,EDWARD M']
59
+ @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD', 'SMITH,EDWARD PHD']
60
60
  end
61
61
 
62
62
  it "should handle pathological cases" do
63
- @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
63
+ @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH', 'MARY SMITH']
64
64
  end
65
65
 
66
66
  it "should squash multi-part last names" do
67
- @namor.extract("Al Hassan, Bashar").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR']
68
- @namor.extract("Bashar Al-Hassan").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR']
67
+ @namor.extract("Al Hassan, Bashar").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR', 'HASSAN,BASHAR']
68
+ @namor.extract("Bashar Al-Hassan").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR', 'HASSAN,BASHAR']
69
69
  end
70
70
 
71
71
  it "should squash hyphenated first names" do
72
- @namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE']
72
+ @namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE', 'SMITH,ANNEMARIE']
73
73
  end
74
74
 
75
75
  it "should treat some cases with periods as first.last" do
76
- @namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
76
+ @namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN', 'SMITH,JOHN']
77
+ end
78
+
79
+ it "should generate estimated maiden names" do
80
+ @namor.extract("Jones-De Quento, Maria").should == ['MARIA', nil, 'JONESDEQUENTO', 'JONESDEQUENTO,MARIA', 'DEQUENTO,MARIA']
77
81
  end
78
82
  end
79
83
 
@@ -109,7 +113,7 @@ describe "name componentization" do
109
113
  end
110
114
 
111
115
  it "should scrub individual name components of punctuation and titles" do
112
- @namor.scrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
116
+ @namor.fullscrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
113
117
  end
114
118
 
115
119
  it "should delete strings inside parens" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  segments:
110
110
  - 0
111
- hash: 236992470015496114
111
+ hash: 3235644830650956760
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  none: false
114
114
  requirements:
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  version: '0'
118
118
  segments:
119
119
  - 0
120
- hash: 236992470015496114
120
+ hash: 3235644830650956760
121
121
  requirements: []
122
122
  rubyforge_project:
123
123
  rubygems_version: 1.8.24