namor 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .DS_Store
@@ -0,0 +1,77 @@
1
+ class Namor::Namor
2
+ def initialize(opts = {})
3
+ config(opts)
4
+ end
5
+
6
+ def config(opts)
7
+ @config = opts
8
+ end
9
+
10
+ def extract(name)
11
+ return [] if name.nil?
12
+
13
+ suppression_list = @config[:suppress] || []
14
+ suppression_re = suppression_list.join('|')
15
+
16
+ detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
17
+
18
+ if detitled_name =~ /,/
19
+ # "last, first[ middle]"
20
+ lastname, firstname = detitled_name.split(/\s*,\s*/)
21
+ lastname.gsub!(/ /, '')
22
+ middlename = nil
23
+ if firstname && firstname =~ / /
24
+ pieces = firstname.split(/ +/)
25
+ firstname = pieces.shift
26
+ middlename = pieces.join if pieces.any?
27
+ end
28
+ else
29
+ # "first [middle ]last"
30
+ pieces = detitled_name.split(' ')
31
+ firstname = pieces.shift
32
+ middlename = nil
33
+ if pieces.count > 1 && pieces.first.length == 1
34
+ # assume this is a middle initial
35
+ middlename = pieces.shift
36
+ end
37
+
38
+ lastname = pieces.join
39
+ end
40
+
41
+ firstname = nil if firstname.empty?
42
+ middlename = nil if middlename && middlename.empty?
43
+ lastname = nil if lastname.empty?
44
+
45
+ fm = [firstname, middlename].compact.join(' ')
46
+ fullname = [lastname, fm].compact.join(',')
47
+
48
+ [firstname, middlename, lastname, fullname]
49
+ end
50
+
51
+ def extract_with_cluster(name)
52
+ ary = extract(name)
53
+ return [] if ary.empty?
54
+ ary << ary.last.gsub(/\W/, '_')
55
+ end
56
+
57
+
58
+ def components(*args)
59
+ suppression_list = @config[:suppress] ? @config[:suppress].map(&:upcase) : []
60
+
61
+ names = args
62
+ bits = []
63
+ names.compact.each do |name|
64
+ name = name.dup
65
+ name.gsub!(/\([^\(]*\)/, '')
66
+ name.gsub!(/\[[^\[]*\]/, '')
67
+ name.gsub!(/[\(\)\[\]\']/, '')
68
+ name.gsub!(/[,._-]/, ' ')
69
+ bits += name.split(/\s+/).map(&:upcase)
70
+ end
71
+
72
+ suppress_re = %w{MD JR SR I+ IV}.join('|')
73
+ bits.delete_if {|bit| suppression_list.include?(bit) || bit =~ /^(#{suppress_re})$/}
74
+ bits.delete_if(&:empty?)
75
+ bits.uniq.sort
76
+ end
77
+ end
data/lib/namor/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/namor.rb CHANGED
@@ -1,50 +1,5 @@
1
- require "namor/version"
1
+ require_relative "namor/version"
2
+ require_relative "namor/namor"
2
3
 
3
4
  module Namor
4
- def self.extract(name, args = {})
5
- return [] if name.nil?
6
-
7
- suppression_list = args[:suppress] || []
8
- suppression_re = suppression_list.join('|')
9
-
10
- detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
11
-
12
- if detitled_name =~ /,/
13
- # "last, first[ middle]"
14
- lastname, firstname = detitled_name.split(/\s*,\s*/)
15
- lastname.gsub!(/ /, '')
16
- middlename = nil
17
- if firstname && firstname =~ / /
18
- pieces = firstname.split(/ +/)
19
- firstname = pieces.shift
20
- middlename = pieces.join if pieces.any?
21
- end
22
- else
23
- # "first [middle ]last"
24
- pieces = detitled_name.split(' ')
25
- firstname = pieces.shift
26
- middlename = nil
27
- if pieces.count > 1 && pieces.first.length == 1
28
- # assume this is a middle initial
29
- middlename = pieces.shift
30
- end
31
-
32
- lastname = pieces.join
33
- end
34
-
35
- firstname = nil if firstname.empty?
36
- middlename = nil if middlename && middlename.empty?
37
- lastname = nil if lastname.empty?
38
-
39
- fm = [firstname, middlename].compact.join(' ')
40
- fullname = [lastname, fm].compact.join(',')
41
-
42
- [firstname, middlename, lastname, fullname]
43
- end
44
-
45
- def self.extract_with_cluster(name, args = {})
46
- ary = extract(name, args)
47
- return [] if ary.empty?
48
- ary << ary.last.gsub(/\W/, '_')
49
- end
50
5
  end
@@ -3,61 +3,92 @@
3
3
  require "spec_helper"
4
4
 
5
5
  describe "name extract" do
6
+ before(:all) do
7
+ @namor = Namor::Namor.new
8
+ @namor.config(:suppress => ['MD', 'DDS'])
9
+ end
10
+
6
11
  it "should handle 2-part names without commas" do
7
- Namor::extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
+ @namor.extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
8
13
  end
9
14
 
10
15
  it "should handle 2-part names with commas" do
11
- Namor::extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
16
+ @namor.extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
17
  end
13
18
 
14
19
  it "should handle 2-part names with commas and middle initials" do
15
- Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
16
21
  end
17
22
 
18
23
  it "should handle 2-part names with commas and middle initials" do
19
- Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
+ @namor.extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
25
  end
21
26
 
22
27
  it "should strip elements within parentheses" do
23
- Namor::extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
+ @namor.extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
29
  end
25
30
 
26
31
  it "should drop periods" do
27
- Namor::extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
32
+ @namor.extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
33
  end
29
34
 
30
35
  it "should drop spaces in last name (only when input has a comma)" do
31
- Namor::extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
+ @namor.extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
32
37
  end
33
38
 
34
39
  it "should drop dashes & apostrophes" do
35
- Namor::extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
- Namor::extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
37
- Namor::extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
40
+ @namor.extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
41
+ @namor.extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
42
+ @namor.extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
38
43
  end
39
44
 
40
45
  it "should concatenate extract name pieces" do
41
- Namor::extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
42
- Namor::extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
46
+ @namor.extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
47
+ @namor.extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
43
48
  end
44
49
 
45
50
  it "should excise suffixes like 'Jr.' from lastnames" do
46
- Namor::extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
51
+ @namor.extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
47
52
  end
48
53
 
49
54
  it "should excise terms from optional suppression list" do
50
- Namor::extract("Smith Jr, Edward M MD DDS", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
51
- Namor::extract("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
55
+ @namor.extract("Smith Jr, Edward M MD DDS").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
56
+ @namor.extract("Smith Jr, Edward III MD PHD").should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
52
57
  end
53
58
 
54
59
  it "should handle pathological cases" do
55
- Namor::extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
60
+ @namor.extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
56
61
  end
57
62
  end
58
63
 
59
64
  describe "with cluster coding" do
65
+ before(:all) do
66
+ @namor = Namor::Namor.new
67
+ @namor.config(:suppress => ['MD', 'DDS'])
68
+ end
69
+
60
70
  it "should generate cluster labels" do
61
- Namor::extract_with_cluster("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).last.should == 'SMITH_EDWARD_PHD'
71
+ @namor.extract_with_cluster("Smith Jr, Edward III MD PHD").last.should == 'SMITH_EDWARD_PHD'
72
+ end
73
+ end
74
+
75
+ describe "name componentization" do
76
+ before(:all) do
77
+ @namor = Namor::Namor.new
78
+ @namor.config(:suppress => ['esq'])
79
+ end
80
+
81
+ it "should include initials" do
82
+ @namor.components("john q. smith").should == ['JOHN', 'Q', 'SMITH']
83
+ end
84
+
85
+ it "should excise common suffixes" do
86
+ @namor.components("john smith III").should == ['JOHN', 'SMITH']
87
+ @namor.components("john smith jr").should == ['JOHN', 'SMITH']
88
+ end
89
+
90
+ it "should excise from suppression list" do
91
+ @namor.components("john smith esk.").should == ['ESK', 'JOHN', 'SMITH']
92
+ @namor.components("john smith esq.").should == ['JOHN', 'SMITH']
62
93
  end
63
94
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-08 00:00:00.000000000 Z
12
+ date: 2012-08-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2152651100 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,15 @@ dependencies:
21
21
  version: 0.9.2
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2152651100
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 0.9.2
25
30
  - !ruby/object:Gem::Dependency
26
31
  name: rspec
27
- requirement: &2152661440 !ruby/object:Gem::Requirement
32
+ requirement: !ruby/object:Gem::Requirement
28
33
  none: false
29
34
  requirements:
30
35
  - - ~>
@@ -32,10 +37,15 @@ dependencies:
32
37
  version: 2.9.0
33
38
  type: :development
34
39
  prerelease: false
35
- version_requirements: *2152661440
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.9.0
36
46
  - !ruby/object:Gem::Dependency
37
47
  name: guard-rspec
38
- requirement: &2152659940 !ruby/object:Gem::Requirement
48
+ requirement: !ruby/object:Gem::Requirement
39
49
  none: false
40
50
  requirements:
41
51
  - - ~>
@@ -43,10 +53,15 @@ dependencies:
43
53
  version: 0.7.0
44
54
  type: :development
45
55
  prerelease: false
46
- version_requirements: *2152659940
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.7.0
47
62
  - !ruby/object:Gem::Dependency
48
63
  name: ruby_gntp
49
- requirement: &2152658740 !ruby/object:Gem::Requirement
64
+ requirement: !ruby/object:Gem::Requirement
50
65
  none: false
51
66
  requirements:
52
67
  - - ~>
@@ -54,7 +69,12 @@ dependencies:
54
69
  version: 0.3.4
55
70
  type: :development
56
71
  prerelease: false
57
- version_requirements: *2152658740
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 0.3.4
58
78
  description: Munging English names
59
79
  email:
60
80
  - jmay@pobox.com
@@ -68,6 +88,7 @@ files:
68
88
  - README.md
69
89
  - Rakefile
70
90
  - lib/namor.rb
91
+ - lib/namor/namor.rb
71
92
  - lib/namor/version.rb
72
93
  - namor.gemspec
73
94
  - spec/lib/namor_spec.rb
@@ -84,19 +105,24 @@ required_ruby_version: !ruby/object:Gem::Requirement
84
105
  - - ! '>='
85
106
  - !ruby/object:Gem::Version
86
107
  version: '0'
108
+ segments:
109
+ - 0
110
+ hash: 3634688006053498514
87
111
  required_rubygems_version: !ruby/object:Gem::Requirement
88
112
  none: false
89
113
  requirements:
90
114
  - - ! '>='
91
115
  - !ruby/object:Gem::Version
92
116
  version: '0'
117
+ segments:
118
+ - 0
119
+ hash: 3634688006053498514
93
120
  requirements: []
94
121
  rubyforge_project:
95
- rubygems_version: 1.8.10
122
+ rubygems_version: 1.8.24
96
123
  signing_key:
97
124
  specification_version: 3
98
125
  summary: Parse & extract pieces of names
99
126
  test_files:
100
127
  - spec/lib/namor_spec.rb
101
128
  - spec/spec_helper.rb
102
- has_rdoc: