namor 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/namor/namor.rb +15 -3
- data/lib/namor/version.rb +1 -1
- data/spec/lib/namor_spec.rb +5 -2
- metadata +3 -3
data/lib/namor/namor.rb
CHANGED
@@ -1,12 +1,25 @@
|
|
1
1
|
class Namor::Namor
|
2
2
|
def initialize(opts = {})
|
3
3
|
config(opts)
|
4
|
+
@re_cache = {}
|
4
5
|
end
|
5
6
|
|
6
7
|
def config(opts)
|
7
8
|
@config = opts
|
8
9
|
end
|
9
10
|
|
11
|
+
|
12
|
+
def suppression_re(supp_list)
|
13
|
+
suppression_list = (@config[:suppress] || []) + (supp_list || [])
|
14
|
+
|
15
|
+
re = '\b(' + suppression_list.compact.map{|s| s.chomp('.')}.map(&:upcase).join('|') + ')\b'
|
16
|
+
Regexp.new(re)
|
17
|
+
# bits = suppression_list.compact.map do |s|
|
18
|
+
# '\b' + s.upcase.chomp('.') + '\b'
|
19
|
+
# end
|
20
|
+
# Regexp.new(bits.join('|'))
|
21
|
+
end
|
22
|
+
|
10
23
|
# clean up a single name component
|
11
24
|
# * output all converted to uppercase
|
12
25
|
# * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
|
@@ -16,10 +29,9 @@ class Namor::Namor
|
|
16
29
|
# * remove punctuation
|
17
30
|
# * squeeze whitespace & trim spaces from ends
|
18
31
|
def scrub(name, opts = {})
|
19
|
-
|
20
|
-
suppression_re = Regexp.new('(\s|^)' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '(\s|\.|$)')
|
32
|
+
@re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress])
|
21
33
|
|
22
|
-
name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(
|
34
|
+
name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
|
23
35
|
end
|
24
36
|
|
25
37
|
def fullscrub(name, opts = {})
|
data/lib/namor/version.rb
CHANGED
data/spec/lib/namor_spec.rb
CHANGED
@@ -5,7 +5,7 @@ require "spec_helper"
|
|
5
5
|
describe "name extract" do
|
6
6
|
before(:all) do
|
7
7
|
@namor = Namor::Namor.new
|
8
|
-
@namor.config(:suppress => ['MD', 'dds', 'M\.D
|
8
|
+
@namor.config(:suppress => ['MD', 'dds', 'M\.D'])
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should handle 2-part names without commas" do
|
@@ -145,10 +145,13 @@ describe "title suppression" do
|
|
145
145
|
it "should only suppress isolated terms" do
|
146
146
|
@namor.scrub("Smith, Mary RN", :suppress => ['RN']).should == 'SMITH, MARY'
|
147
147
|
@namor.scrub("Smith, Marnie", :suppress => ['RN']).should == 'SMITH, MARNIE'
|
148
|
+
|
149
|
+
@namor.scrub("Gonzalez, Lourdes", :suppress => ['RN', 'RD', 'DO']).should == 'GONZALEZ, LOURDES'
|
148
150
|
end
|
149
151
|
|
150
152
|
it "should scrub words with periods" do
|
151
|
-
@namor.scrub("Smith, Mary M.D.", :suppress => ['M.D
|
153
|
+
@namor.scrub("Smith, Mary M.D.", :suppress => ['M.D']).should == 'SMITH, MARY'
|
154
|
+
@namor.scrub("Smith, Mary M.D.", :suppress => ['RN', 'M.D.', 'DDS']).should == 'SMITH, MARY'
|
152
155
|
end
|
153
156
|
|
154
157
|
it "should scrub individual name components of punctuation and titles" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
108
108
|
version: '0'
|
109
109
|
segments:
|
110
110
|
- 0
|
111
|
-
hash: -
|
111
|
+
hash: -2504802179826978420
|
112
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
113
|
none: false
|
114
114
|
requirements:
|
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
segments:
|
119
119
|
- 0
|
120
|
-
hash: -
|
120
|
+
hash: -2504802179826978420
|
121
121
|
requirements: []
|
122
122
|
rubyforge_project:
|
123
123
|
rubygems_version: 1.8.24
|