namor 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/namor/namor.rb CHANGED
@@ -19,7 +19,28 @@ class Namor::Namor
19
19
  suppression_list = @config[:suppress] || []
20
20
  suppression_re = Regexp.new('\b?' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '\b?')
21
21
 
22
- name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
22
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
23
+ end
24
+
25
+ def fullscrub(name)
26
+ final_cleaning(scrub(name))
27
+ end
28
+
29
+ def demaiden(lastname)
30
+ return [nil,nil] unless lastname && !lastname.empty?
31
+ if lastname =~ /\-/
32
+ [lastname.gsub(/ /, ''), lastname.split(/\-/).last.gsub(/ /, '')]
33
+ else
34
+ [lastname.gsub(/ /, ''), lastname.split(/ /).last]
35
+ end
36
+ end
37
+
38
+ def final_cleaning(name)
39
+ if name && !name.empty?
40
+ name.gsub(/\-/, '')
41
+ else
42
+ nil
43
+ end
23
44
  end
24
45
 
25
46
  def extract(name, opts = {})
@@ -30,7 +51,7 @@ class Namor::Namor
30
51
  if detitled_name =~ /,/
31
52
  # "last, first[ middle]"
32
53
  lastname, firstname = detitled_name.split(/\s*,\s*/)
33
- lastname.gsub!(/ /, '')
54
+ lastname, de_maidened_last = demaiden(lastname)
34
55
  middlename = nil
35
56
  if firstname && firstname =~ / /
36
57
  pieces = firstname.split(/ +/)
@@ -38,26 +59,29 @@ class Namor::Namor
38
59
  middlename = pieces.join if pieces.any?
39
60
  end
40
61
  else
41
- # "first [middle ]last"
62
+ # "first [middle-initial ]last" or "first everything-else-is-the-lastname"
42
63
  pieces = detitled_name.split(' ')
43
64
  firstname = pieces.shift
44
- middlename = nil
45
65
  if pieces.count > 1 && pieces.first.length == 1
46
66
  # assume this is a middle initial
47
67
  middlename = pieces.shift
68
+ else
69
+ middlename = nil
48
70
  end
49
71
 
50
- lastname = pieces.join
72
+ lastname, de_maidened_last = demaiden(pieces.join(' '))
51
73
  end
52
74
 
53
- firstname = nil if firstname.empty?
54
- middlename = nil if middlename && middlename.empty?
55
- lastname = nil if lastname.empty?
75
+ firstname = final_cleaning(firstname)
76
+ middlename = final_cleaning(middlename)
77
+ lastname = final_cleaning(lastname)
78
+ de_maidened_last = final_cleaning(de_maidened_last)
56
79
 
57
80
  fm = [firstname, middlename].compact.join(' ')
58
81
  fullname = [lastname, fm].compact.join(',')
82
+ nee_fullname = [de_maidened_last, fm].compact.join(',')
59
83
 
60
- [firstname, middlename, lastname, fullname]
84
+ [firstname, middlename, lastname, fullname, nee_fullname]
61
85
  end
62
86
 
63
87
  def extract_with_cluster(name, opts = {})
data/lib/namor/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.1"
3
3
  end
@@ -9,71 +9,75 @@ describe "name extract" do
9
9
  end
10
10
 
11
11
  it "should handle 2-part names without commas" do
12
- @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
+ @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN', 'SMITH,JOHN']
13
13
  end
14
14
 
15
15
  it "should handle 2-part names with commas" do
16
- @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
16
+ @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN', 'SMITH,JOHN']
17
17
  end
18
18
 
19
19
  it "should handle 2-part names with commas and middle initials" do
20
- @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
21
21
  end
22
22
 
23
23
  it "should handle 2-part names with commas and middle initials" do
24
- @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
25
25
  end
26
26
 
27
27
  it "should strip elements within parentheses" do
28
- @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
+ @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
29
29
  end
30
30
 
31
31
  it "should drop periods" do
32
- @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
32
+ @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R', 'SMITH,JOHN R']
33
33
  end
34
34
 
35
35
  it "should drop spaces in last name (only when input has a comma)" do
36
- @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
+ @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY', 'JONES,MARY']
37
37
  end
38
38
 
39
39
  it "should drop dashes, apostrophes, ampersands" do
40
- @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
41
- @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
42
- @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
43
- @namor.extract("Smith, Bob & Sue").should == ['BOB', 'SUE', 'SMITH', 'SMITH,BOB SUE']
44
- @namor.extract("Research & Development").should == ['RESEARCH', nil, 'DEVELOPMENT', 'DEVELOPMENT,RESEARCH']
40
+ @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY', 'JONES,MARY']
41
+ @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S', 'OKEEFE,MARY S']
42
+ @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL', 'CLAUDE,JEANMICHEL']
43
+ @namor.extract("Smith, Bob & Sue").should == ['BOB', 'SUE', 'SMITH', 'SMITH,BOB SUE', 'SMITH,BOB SUE']
44
+ @namor.extract("Research & Development").should == ['RESEARCH', nil, 'DEVELOPMENT', 'DEVELOPMENT,RESEARCH', 'DEVELOPMENT,RESEARCH']
45
45
  end
46
46
 
47
47
  it "should concatenate extract name pieces" do
48
- @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
49
- @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
48
+ @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH', 'GARUDA,RAJESH']
49
+ @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA', 'KUMAR,RAJESH VISHNUGARUDA']
50
50
  end
51
51
 
52
52
  it "should excise suffixes like 'Jr.' from lastnames" do
53
- @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
53
+ @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M', 'SMITH,EDWARD M']
54
54
  end
55
55
 
56
56
  it "should excise terms from optional suppression list" do
57
- @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
58
- @namor.extract("Smith Jr, Edward M M.D.").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
59
- @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
57
+ @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M', 'SMITH,EDWARD M']
58
+ @namor.extract("Smith Jr, Edward M M.D.").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M', 'SMITH,EDWARD M']
59
+ @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD', 'SMITH,EDWARD PHD']
60
60
  end
61
61
 
62
62
  it "should handle pathological cases" do
63
- @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
63
+ @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH', 'MARY SMITH']
64
64
  end
65
65
 
66
66
  it "should squash multi-part last names" do
67
- @namor.extract("Al Hassan, Bashar").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR']
68
- @namor.extract("Bashar Al-Hassan").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR']
67
+ @namor.extract("Al Hassan, Bashar").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR', 'HASSAN,BASHAR']
68
+ @namor.extract("Bashar Al-Hassan").should == ['BASHAR', nil, 'ALHASSAN', 'ALHASSAN,BASHAR', 'HASSAN,BASHAR']
69
69
  end
70
70
 
71
71
  it "should squash hyphenated first names" do
72
- @namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE']
72
+ @namor.extract("Smith,Anne-Marie").should == ['ANNEMARIE', nil, 'SMITH', 'SMITH,ANNEMARIE', 'SMITH,ANNEMARIE']
73
73
  end
74
74
 
75
75
  it "should treat some cases with periods as first.last" do
76
- @namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
76
+ @namor.extract("john.smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN', 'SMITH,JOHN']
77
+ end
78
+
79
+ it "should generate estimated maiden names" do
80
+ @namor.extract("Jones-De Quento, Maria").should == ['MARIA', nil, 'JONESDEQUENTO', 'JONESDEQUENTO,MARIA', 'DEQUENTO,MARIA']
77
81
  end
78
82
  end
79
83
 
@@ -109,7 +113,7 @@ describe "name componentization" do
109
113
  end
110
114
 
111
115
  it "should scrub individual name components of punctuation and titles" do
112
- @namor.scrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
116
+ @namor.fullscrub('Foxworthy-Smythe, ESQ.').should == 'FOXWORTHYSMYTHE'
113
117
  end
114
118
 
115
119
  it "should delete strings inside parens" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  segments:
110
110
  - 0
111
- hash: 236992470015496114
111
+ hash: 3235644830650956760
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  none: false
114
114
  requirements:
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  version: '0'
118
118
  segments:
119
119
  - 0
120
- hash: 236992470015496114
120
+ hash: 3235644830650956760
121
121
  requirements: []
122
122
  rubyforge_project:
123
123
  rubygems_version: 1.8.24