namor 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/namor/namor.rb CHANGED
@@ -1,12 +1,25 @@
1
1
  class Namor::Namor
2
2
  def initialize(opts = {})
3
3
  config(opts)
4
+ @re_cache = {}
4
5
  end
5
6
 
6
7
  def config(opts)
7
8
  @config = opts
8
9
  end
9
10
 
11
+
12
+ def suppression_re(supp_list)
13
+ suppression_list = (@config[:suppress] || []) + (supp_list || [])
14
+
15
+ re = '\b(' + suppression_list.compact.map{|s| s.chomp('.')}.map(&:upcase).join('|') + ')\b'
16
+ Regexp.new(re)
17
+ # bits = suppression_list.compact.map do |s|
18
+ # '\b' + s.upcase.chomp('.') + '\b'
19
+ # end
20
+ # Regexp.new(bits.join('|'))
21
+ end
22
+
10
23
  # clean up a single name component
11
24
  # * output all converted to uppercase
12
25
  # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
@@ -16,10 +29,9 @@ class Namor::Namor
16
29
  # * remove punctuation
17
30
  # * squeeze whitespace & trim spaces from ends
18
31
  def scrub(name, opts = {})
19
- suppression_list = @config[:suppress] || []
20
- suppression_re = Regexp.new('(\s|^)' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '(\s|\.|$)')
32
+ @re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress])
21
33
 
22
- name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
34
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
23
35
  end
24
36
 
25
37
  def fullscrub(name, opts = {})
data/lib/namor/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
@@ -5,7 +5,7 @@ require "spec_helper"
5
5
  describe "name extract" do
6
6
  before(:all) do
7
7
  @namor = Namor::Namor.new
8
- @namor.config(:suppress => ['MD', 'dds', 'M\.D\.'])
8
+ @namor.config(:suppress => ['MD', 'dds', 'M\.D'])
9
9
  end
10
10
 
11
11
  it "should handle 2-part names without commas" do
@@ -145,10 +145,13 @@ describe "title suppression" do
145
145
  it "should only suppress isolated terms" do
146
146
  @namor.scrub("Smith, Mary RN", :suppress => ['RN']).should == 'SMITH, MARY'
147
147
  @namor.scrub("Smith, Marnie", :suppress => ['RN']).should == 'SMITH, MARNIE'
148
+
149
+ @namor.scrub("Gonzalez, Lourdes", :suppress => ['RN', 'RD', 'DO']).should == 'GONZALEZ, LOURDES'
148
150
  end
149
151
 
150
152
  it "should scrub words with periods" do
151
- @namor.scrub("Smith, Mary M.D.", :suppress => ['M.D.']).should == 'SMITH, MARY'
153
+ @namor.scrub("Smith, Mary M.D.", :suppress => ['M.D']).should == 'SMITH, MARY'
154
+ @namor.scrub("Smith, Mary M.D.", :suppress => ['RN', 'M.D.', 'DDS']).should == 'SMITH, MARY'
152
155
  end
153
156
 
154
157
  it "should scrub individual name components of punctuation and titles" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  segments:
110
110
  - 0
111
- hash: -2390200260815377507
111
+ hash: -2504802179826978420
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  none: false
114
114
  requirements:
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  version: '0'
118
118
  segments:
119
119
  - 0
120
- hash: -2390200260815377507
120
+ hash: -2504802179826978420
121
121
  requirements: []
122
122
  rubyforge_project:
123
123
  rubygems_version: 1.8.24