namor 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .DS_Store
@@ -0,0 +1,77 @@
1
+ class Namor::Namor
2
+ def initialize(opts = {})
3
+ config(opts)
4
+ end
5
+
6
+ def config(opts)
7
+ @config = opts
8
+ end
9
+
10
+ def extract(name)
11
+ return [] if name.nil?
12
+
13
+ suppression_list = @config[:suppress] || []
14
+ suppression_re = suppression_list.join('|')
15
+
16
+ detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
17
+
18
+ if detitled_name =~ /,/
19
+ # "last, first[ middle]"
20
+ lastname, firstname = detitled_name.split(/\s*,\s*/)
21
+ lastname.gsub!(/ /, '')
22
+ middlename = nil
23
+ if firstname && firstname =~ / /
24
+ pieces = firstname.split(/ +/)
25
+ firstname = pieces.shift
26
+ middlename = pieces.join if pieces.any?
27
+ end
28
+ else
29
+ # "first [middle ]last"
30
+ pieces = detitled_name.split(' ')
31
+ firstname = pieces.shift
32
+ middlename = nil
33
+ if pieces.count > 1 && pieces.first.length == 1
34
+ # assume this is a middle initial
35
+ middlename = pieces.shift
36
+ end
37
+
38
+ lastname = pieces.join
39
+ end
40
+
41
+ firstname = nil if firstname.empty?
42
+ middlename = nil if middlename && middlename.empty?
43
+ lastname = nil if lastname.empty?
44
+
45
+ fm = [firstname, middlename].compact.join(' ')
46
+ fullname = [lastname, fm].compact.join(',')
47
+
48
+ [firstname, middlename, lastname, fullname]
49
+ end
50
+
51
+ def extract_with_cluster(name)
52
+ ary = extract(name)
53
+ return [] if ary.empty?
54
+ ary << ary.last.gsub(/\W/, '_')
55
+ end
56
+
57
+
58
+ def components(*args)
59
+ suppression_list = @config[:suppress] ? @config[:suppress].map(&:upcase) : []
60
+
61
+ names = args
62
+ bits = []
63
+ names.compact.each do |name|
64
+ name = name.dup
65
+ name.gsub!(/\([^\(]*\)/, '')
66
+ name.gsub!(/\[[^\[]*\]/, '')
67
+ name.gsub!(/[\(\)\[\]\']/, '')
68
+ name.gsub!(/[,._-]/, ' ')
69
+ bits += name.split(/\s+/).map(&:upcase)
70
+ end
71
+
72
+ suppress_re = %w{MD JR SR I+ IV}.join('|')
73
+ bits.delete_if {|bit| suppression_list.include?(bit) || bit =~ /^(#{suppress_re})$/}
74
+ bits.delete_if(&:empty?)
75
+ bits.uniq.sort
76
+ end
77
+ end
data/lib/namor/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/namor.rb CHANGED
@@ -1,50 +1,5 @@
1
- require "namor/version"
1
+ require_relative "namor/version"
2
+ require_relative "namor/namor"
2
3
 
3
4
  module Namor
4
- def self.extract(name, args = {})
5
- return [] if name.nil?
6
-
7
- suppression_list = args[:suppress] || []
8
- suppression_re = suppression_list.join('|')
9
-
10
- detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
11
-
12
- if detitled_name =~ /,/
13
- # "last, first[ middle]"
14
- lastname, firstname = detitled_name.split(/\s*,\s*/)
15
- lastname.gsub!(/ /, '')
16
- middlename = nil
17
- if firstname && firstname =~ / /
18
- pieces = firstname.split(/ +/)
19
- firstname = pieces.shift
20
- middlename = pieces.join if pieces.any?
21
- end
22
- else
23
- # "first [middle ]last"
24
- pieces = detitled_name.split(' ')
25
- firstname = pieces.shift
26
- middlename = nil
27
- if pieces.count > 1 && pieces.first.length == 1
28
- # assume this is a middle initial
29
- middlename = pieces.shift
30
- end
31
-
32
- lastname = pieces.join
33
- end
34
-
35
- firstname = nil if firstname.empty?
36
- middlename = nil if middlename && middlename.empty?
37
- lastname = nil if lastname.empty?
38
-
39
- fm = [firstname, middlename].compact.join(' ')
40
- fullname = [lastname, fm].compact.join(',')
41
-
42
- [firstname, middlename, lastname, fullname]
43
- end
44
-
45
- def self.extract_with_cluster(name, args = {})
46
- ary = extract(name, args)
47
- return [] if ary.empty?
48
- ary << ary.last.gsub(/\W/, '_')
49
- end
50
5
  end
@@ -3,61 +3,92 @@
3
3
  require "spec_helper"
4
4
 
5
5
  describe "name extract" do
6
+ before(:all) do
7
+ @namor = Namor::Namor.new
8
+ @namor.config(:suppress => ['MD', 'DDS'])
9
+ end
10
+
6
11
  it "should handle 2-part names without commas" do
7
- Namor::extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
+ @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
8
13
  end
9
14
 
10
15
  it "should handle 2-part names with commas" do
11
- Namor::extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
16
+ @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
17
  end
13
18
 
14
19
  it "should handle 2-part names with commas and middle initials" do
15
- Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
16
21
  end
17
22
 
18
23
  it "should handle 2-part names with commas and middle initials" do
19
- Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
25
  end
21
26
 
22
27
  it "should strip elements within parentheses" do
23
- Namor::extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
+ @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
29
  end
25
30
 
26
31
  it "should drop periods" do
27
- Namor::extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
32
+ @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
33
  end
29
34
 
30
35
  it "should drop spaces in last name (only when input has a comma)" do
31
- Namor::extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
+ @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
32
37
  end
33
38
 
34
39
  it "should drop dashes & apostrophes" do
35
- Namor::extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
- Namor::extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
37
- Namor::extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
40
+ @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
41
+ @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
42
+ @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
38
43
  end
39
44
 
40
45
  it "should concatenate extract name pieces" do
41
- Namor::extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
42
- Namor::extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
46
+ @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
47
+ @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
43
48
  end
44
49
 
45
50
  it "should excise suffixes like 'Jr.' from lastnames" do
46
- Namor::extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
51
+ @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
47
52
  end
48
53
 
49
54
  it "should excise terms from optional suppression list" do
50
- Namor::extract("Smith Jr, Edward M MD DDS", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
51
- Namor::extract("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
55
+ @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
56
+ @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
52
57
  end
53
58
 
54
59
  it "should handle pathological cases" do
55
- Namor::extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
60
+ @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
56
61
  end
57
62
  end
58
63
 
59
64
  describe "with cluster coding" do
65
+ before(:all) do
66
+ @namor = Namor::Namor.new
67
+ @namor.config(:suppress => ['MD', 'DDS'])
68
+ end
69
+
60
70
  it "should generate cluster labels" do
61
- Namor::extract_with_cluster("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).last.should == 'SMITH_EDWARD_PHD'
71
+ @namor.extract_with_cluster("Smith Jr, Edward III MD PHD").last.should == 'SMITH_EDWARD_PHD'
72
+ end
73
+ end
74
+
75
+ describe "name componentization" do
76
+ before(:all) do
77
+ @namor = Namor::Namor.new
78
+ @namor.config(:suppress => ['esq'])
79
+ end
80
+
81
+ it "should include initials" do
82
+ @namor.components("john q. smith").should == ['JOHN', 'Q', 'SMITH']
83
+ end
84
+
85
+ it "should excise common suffixes" do
86
+ @namor.components("john smith III").should == ['JOHN', 'SMITH']
87
+ @namor.components("john smith jr").should == ['JOHN', 'SMITH']
88
+ end
89
+
90
+ it "should excise from suppression list" do
91
+ @namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
92
+ @namor.components("john smith esq.").should == ['JOHN', 'SMITH']
62
93
  end
63
94
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-08 00:00:00.000000000 Z
12
+ date: 2012-08-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2152651100 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: 0.9.2
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2152651100
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rspec
27
- requirement: &2152661440 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ~>
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: 2.9.0
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2152661440
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.9.0
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: guard-rspec
38
- requirement: &2152659940 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ~>
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: 0.7.0
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2152659940
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.7.0
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: ruby_gntp
49
- requirement: &2152658740 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ~>
@@ -54,7 +69,12 @@ dependencies:
54
69
  version: 0.3.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2152658740
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.3.4
58
78
  description: Munging English names
59
79
  email:
60
80
  - jmay@pobox.com
@@ -68,6 +88,7 @@ files:
68
88
  - README.md
69
89
  - Rakefile
70
90
  - lib/namor.rb
91
+ - lib/namor/namor.rb
71
92
  - lib/namor/version.rb
72
93
  - namor.gemspec
73
94
  - spec/lib/namor_spec.rb
@@ -84,19 +105,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
84
105
  - - ! '>='
85
106
  - !ruby/object:Gem::Version
86
107
  version: '0'
108
+ segments:
109
+ - 0
110
+ hash: 3634688006053498514
87
111
  required_rubygems_version: !ruby/object:Gem::Requirement
88
112
  none: false
89
113
  requirements:
90
114
  - - ! '>='
91
115
  - !ruby/object:Gem::Version
92
116
  version: '0'
117
+ segments:
118
+ - 0
119
+ hash: 3634688006053498514
93
120
  requirements: []
94
121
  rubyforge_project:
95
- rubygems_version: 1.8.10
122
+ rubygems_version: 1.8.24
96
123
  signing_key:
97
124
  specification_version: 3
98
125
  summary: Parse & extract pieces of names
99
126
  test_files:
100
127
  - spec/lib/namor_spec.rb
101
128
  - spec/spec_helper.rb
102
- has_rdoc: