namor 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/namor/namor.rb CHANGED
@@ -1,12 +1,25 @@
1
1
  class Namor::Namor
2
2
  def initialize(opts = {})
3
3
  config(opts)
4
+ @re_cache = {}
4
5
  end
5
6
 
6
7
  def config(opts)
7
8
  @config = opts
8
9
  end
9
10
 
11
+
12
+ def suppression_re(supp_list)
13
+ suppression_list = (@config[:suppress] || []) + (supp_list || [])
14
+
15
+ re = '\b(' + suppression_list.compact.map{|s| s.chomp('.')}.map(&:upcase).join('|') + ')\b'
16
+ Regexp.new(re)
17
+ # bits = suppression_list.compact.map do |s|
18
+ # '\b' + s.upcase.chomp('.') + '\b'
19
+ # end
20
+ # Regexp.new(bits.join('|'))
21
+ end
22
+
10
23
  # clean up a single name component
11
24
  # * output all converted to uppercase
12
25
  # * strip leading ZZ+ or XX+ (frequently used as invalid-account prefixes)
@@ -16,10 +29,9 @@ class Namor::Namor
16
29
  # * remove punctuation
17
30
  # * squeeze whitespace & trim spaces from ends
18
31
  def scrub(name, opts = {})
19
- suppression_list = @config[:suppress] || []
20
- suppression_re = Regexp.new('(\s|^)' + (suppression_list + (opts[:suppress]||[])).compact.map(&:upcase).join('|') + '(\s|\.|$)')
32
+ @re_cache[opts[:suppress]] ||= suppression_re(opts[:suppress])
21
33
 
22
- name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(suppression_re, '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
34
+ name && name.upcase.gsub(/^[ZX]{2,}/, '').gsub(@re_cache[opts[:suppress]], '').gsub(/\b(JR|SR|II|III|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/\./, ' ').gsub(/[_'\&]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
23
35
  end
24
36
 
25
37
  def fullscrub(name, opts = {})
data/lib/namor/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Namor
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
@@ -5,7 +5,7 @@ require "spec_helper"
5
5
  describe "name extract" do
6
6
  before(:all) do
7
7
  @namor = Namor::Namor.new
8
- @namor.config(:suppress => ['MD', 'dds', 'M\.D\.'])
8
+ @namor.config(:suppress => ['MD', 'dds', 'M\.D'])
9
9
  end
10
10
 
11
11
  it "should handle 2-part names without commas" do
@@ -145,10 +145,13 @@ describe "title suppression" do
145
145
  it "should only suppress isolated terms" do
146
146
  @namor.scrub("Smith, Mary RN", :suppress => ['RN']).should == 'SMITH, MARY'
147
147
  @namor.scrub("Smith, Marnie", :suppress => ['RN']).should == 'SMITH, MARNIE'
148
+
149
+ @namor.scrub("Gonzalez, Lourdes", :suppress => ['RN', 'RD', 'DO']).should == 'GONZALEZ, LOURDES'
148
150
  end
149
151
 
150
152
  it "should scrub words with periods" do
151
- @namor.scrub("Smith, Mary M.D.", :suppress => ['M.D.']).should == 'SMITH, MARY'
153
+ @namor.scrub("Smith, Mary M.D.", :suppress => ['M.D']).should == 'SMITH, MARY'
154
+ @namor.scrub("Smith, Mary M.D.", :suppress => ['RN', 'M.D.', 'DDS']).should == 'SMITH, MARY'
152
155
  end
153
156
 
154
157
  it "should scrub individual name components of punctuation and titles" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -108,7 +108,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  segments:
110
110
  - 0
111
- hash: -2390200260815377507
111
+ hash: -2504802179826978420
112
112
  required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  none: false
114
114
  requirements:
@@ -117,7 +117,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
117
117
  version: '0'
118
118
  segments:
119
119
  - 0
120
- hash: -2390200260815377507
120
+ hash: -2504802179826978420
121
121
  requirements: []
122
122
  rubyforge_project:
123
123
  rubygems_version: 1.8.24