namor 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/namor/namor.rb +15 -3
- data/lib/namor/version.rb +1 -1
- data/spec/lib/namor_spec.rb +5 -2
- metadata +3 -3
data/lib/namor/namor.rb
CHANGED
@@ -1,12 +1,25 @@
|
|
1
1
|
class Namor::Namor
|
2
2
|
def initialize(opts = {})
|
3
3
|
config(opts)
|
4
|
+
@re_cache = {}
|
4
5
|
end
|
5
6
|
|
6
7
|
def config(opts)
|
7
8
|
@config = opts
|
8
9
|
end
|
9
10
|
|
11
|
+
|
12
|
+
def suppression_re(supp_list)
|
13
|
+
suppression_list = (@config[:suppress] || []) + (supp_list || [])
|
14
|
+
|
15
|
+
re = '\b(' + suppression_list.compact.map{|s| s.chomp('.')}.map(&:upcase).join('|') + ')\b'
|
16
|
+
Regexp.new(re)
|
17
|
+
# bits = suppression_list.compact.map do |s|
|
18
|
+
# '\b' + s.upcase.chomp('.') + '\b'
|
19
|
+
# end
|
20
|
+
# Regexp.new(bits.join('|'))
|
21
|
+
end
|
22
|
+
|
10
23
|
# clean up a single name component
|
11
24
|
# * output all converted to uppercase
|
12
25
|
# * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
|
@@ -16,10 +29,9 @@ class Namor::Namor
|
|
16
29
|
# * remove punctuation
|
17
30
|
# * squeeze whitespace & trim spaces from ends
|
18
31
|
def scrub(name, opts = {})
|
19
|
-
|
20
|
-
suppression_re = Regexp.new('(\s|^)' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '(\s|\.|$)')
|
32
|
+
@re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress])
|
21
33
|
|
22
|
-
name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(
|
34
|
+
name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
|
23
35
|
end
|
24
36
|
|
25
37
|
def fullscrub(name, opts = {})
|
data/lib/namor/version.rb
CHANGED
data/spec/lib/namor_spec.rb
CHANGED
@@ -5,7 +5,7 @@ require "spec_helper"
|
|
5
5
|
describe "name extract" do
|
6
6
|
before(:all) do
|
7
7
|
@namor = Namor::Namor.new
|
8
|
-
@namor.config(:suppress => ['MD', 'dds', 'M\.D
|
8
|
+
@namor.config(:suppress => ['MD', 'dds', 'M\.D'])
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should handle 2-part names without commas" do
|
@@ -145,10 +145,13 @@ describe "title suppression" do
|
|
145
145
|
it "should only suppress isolated terms" do
|
146
146
|
@namor.scrub("Smith, Mary RN", :suppress => ['RN']).should == 'SMITH, MARY'
|
147
147
|
@namor.scrub("Smith, Marnie", :suppress => ['RN']).should == 'SMITH, MARNIE'
|
148
|
+
|
149
|
+
@namor.scrub("Gonzalez, Lourdes", :suppress => ['RN', 'RD', 'DO']).should == 'GONZALEZ, LOURDES'
|
148
150
|
end
|
149
151
|
|
150
152
|
it "should scrub words with periods" do
|
151
|
-
@namor.scrub("Smith, Mary M.D.", :suppress => ['M.D
|
153
|
+
@namor.scrub("Smith, Mary M.D.", :suppress => ['M.D']).should == 'SMITH, MARY'
|
154
|
+
@namor.scrub("Smith, Mary M.D.", :suppress => ['RN', 'M.D.', 'DDS']).should == 'SMITH, MARY'
|
152
155
|
end
|
153
156
|
|
154
157
|
it "should scrub individual name components of punctuation and titles" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
segments:
|
110
110
|
- 0
|
111
|
-
hash: -
|
111
|
+
hash: -2504802179826978420
|
112
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
113
|
none: false
|
114
114
|
requirements:
|
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
segments:
|
119
119
|
- 0
|
120
|
-
hash: -
|
120
|
+
hash: -2504802179826978420
|
121
121
|
requirements: []
|
122
122
|
rubyforge_project:
|
123
123
|
rubygems_version: 1.8.24
|