namor 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in namor.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Jason May
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # Namor
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'namor'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install namor
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,50 @@
1
+ require "namor/version"
2
+
3
+ module Namor
4
+ def self.extract(name, args = {})
5
+ return [] if name.nil?
6
+
7
+ suppression_list = args[:suppress] || []
8
+ suppression_re = suppression_list.join('|')
9
+
10
+ detitled_name = name.upcase.gsub(/\b(#{suppression_re})\b/i, '').gsub(/\b(MD|JR|SR|I+|IV)\b/i, '').gsub(/\([^\(]*\)/, '').gsub(/[_.'-]/, '').gsub(/,\s*$/, '').gsub(/ +/, ' ').strip
11
+
12
+ if detitled_name =~ /,/
13
+ # "last, first[ middle]"
14
+ lastname, firstname = detitled_name.split(/\s*,\s*/)
15
+ lastname.gsub!(/ /, '')
16
+ middlename = nil
17
+ if firstname && firstname =~ / /
18
+ pieces = firstname.split(/ +/)
19
+ firstname = pieces.shift
20
+ middlename = pieces.join if pieces.any?
21
+ end
22
+ else
23
+ # "first [middle ]last"
24
+ pieces = detitled_name.split(' ')
25
+ firstname = pieces.shift
26
+ middlename = nil
27
+ if pieces.count > 1 && pieces.first.length == 1
28
+ # assume this is a middle initial
29
+ middlename = pieces.shift
30
+ end
31
+
32
+ lastname = pieces.join
33
+ end
34
+
35
+ firstname = nil if firstname.empty?
36
+ middlename = nil if middlename && middlename.empty?
37
+ lastname = nil if lastname.empty?
38
+
39
+ fm = [firstname, middlename].compact.join(' ')
40
+ fullname = [lastname, fm].compact.join(',')
41
+
42
+ [firstname, middlename, lastname, fullname]
43
+ end
44
+
45
+ def self.extract_with_cluster(name, args = {})
46
+ ary = extract(name, args)
47
+ return [] if ary.empty?
48
+ ary << ary.last.gsub(/\W/, '_')
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ module Namor
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/namor/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Jason May"]
6
+ gem.email = ["jmay@pobox.com"]
7
+ gem.description = %q{Munging English names}
8
+ gem.summary = %q{Parse & extract pieces of names}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "namor"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Namor::VERSION
17
+
18
+ gem.add_development_dependency "rake", "~> 0.9.2"
19
+ gem.add_development_dependency "rspec", "~> 2.9.0"
20
+ gem.add_development_dependency "guard-rspec", "~> 0.7.0"
21
+ gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
22
+ end
@@ -0,0 +1,63 @@
1
+ # spec for name component extraction
2
+
3
+ require "spec_helper"
4
+
5
+ describe "name extract" do
6
+ it "should handle 2-part names without commas" do
7
+ Namor::extract("john smith").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
8
+ end
9
+
10
+ it "should handle 2-part names with commas" do
11
+ Namor::extract("SMITH, JOHN").should == ['JOHN', nil, 'SMITH', 'SMITH,JOHN']
12
+ end
13
+
14
+ it "should handle 2-part names with commas and middle initials" do
15
+ Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
16
+ end
17
+
18
+ it "should handle 2-part names with commas and middle initials" do
19
+ Namor::extract("SMITH, JOHN R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
20
+ end
21
+
22
+ it "should strip elements within parentheses" do
23
+ Namor::extract("SMITH, JOHN (Jacko) R").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
24
+ end
25
+
26
+ it "should drop periods" do
27
+ Namor::extract("John R. Smith").should == ['JOHN', 'R', 'SMITH', 'SMITH,JOHN R']
28
+ end
29
+
30
+ it "should drop spaces in last name (only when input has a comma)" do
31
+ Namor::extract("Smith Jones, Mary").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
32
+ end
33
+
34
+ it "should drop dashes & apostrophes" do
35
+ Namor::extract("Mary Smith-Jones").should == ['MARY', nil, 'SMITHJONES', 'SMITHJONES,MARY']
36
+ Namor::extract("Mary S. O'Keefe").should == ['MARY', 'S', 'OKEEFE', 'OKEEFE,MARY S']
37
+ Namor::extract("Jean-Michel Claude").should == ['JEANMICHEL', nil, 'CLAUDE', 'CLAUDE,JEANMICHEL']
38
+ end
39
+
40
+ it "should concatenate extract name pieces" do
41
+ Namor::extract("rajesh kumar vishnu garuda").should == ['RAJESH', nil, 'KUMARVISHNUGARUDA', 'KUMARVISHNUGARUDA,RAJESH']
42
+ Namor::extract("Kumar, Rajesh Vishnu Garuda").should == ['RAJESH', 'VISHNUGARUDA', 'KUMAR', 'KUMAR,RAJESH VISHNUGARUDA']
43
+ end
44
+
45
+ it "should excise suffixes like 'Jr.' from lastnames" do
46
+ Namor::extract("Smith Jr, Edward M").should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
47
+ end
48
+
49
+ it "should excise terms from optional suppression list" do
50
+ Namor::extract("Smith Jr, Edward M MD DDS", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'M', 'SMITH', 'SMITH,EDWARD M']
51
+ Namor::extract("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).should == ['EDWARD', 'PHD', 'SMITH', 'SMITH,EDWARD PHD']
52
+ end
53
+
54
+ it "should handle pathological cases" do
55
+ Namor::extract(", Mary Smith").should == ['MARY', 'SMITH', nil, 'MARY SMITH']
56
+ end
57
+ end
58
+
59
+ describe "with cluster coding" do
60
+ it "should generate cluster labels" do
61
+ Namor::extract_with_cluster("Smith Jr, Edward III MD PHD", :suppress => ['MD', 'DDS']).last.should == 'SMITH_EDWARD_PHD'
62
+ end
63
+ end
@@ -0,0 +1,35 @@
1
+ # require File.expand_path("../../config/environment", __FILE__)
2
+ # require 'rspec/rails'
3
+ require 'rspec/autorun'
4
+ # require 'capybara/rspec'
5
+ require "tempfile"
6
+
7
+ # Requires supporting ruby files with custom matchers and macros, etc,
8
+ # in spec/support/ and its subdirectories.
9
+ # Dir[Rails.root.join("spec/support/**/*.rb")].each {|f| require f}
10
+
11
+ require File.expand_path('../../lib/namor', __FILE__)
12
+
13
+ RSpec.configure do |config|
14
+ # ## Mock Framework
15
+ #
16
+ # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
17
+ #
18
+ # config.mock_with :mocha
19
+ # config.mock_with :flexmock
20
+ # config.mock_with :rr
21
+
22
+ # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
23
+ # config.fixture_path = "#{::Rails.root}/spec/fixtures"
24
+
25
+ # If you're not using ActiveRecord, or you'd prefer not to run each of your
26
+ # examples within a transaction, remove the following line or assign false
27
+ # instead of true.
28
+ # config.use_transactional_fixtures = true
29
+ # config.use_instantiated_fixtures = false
30
+
31
+ # If true, the base class of anonymous controllers will be inferred
32
+ # automatically. This will be the default behavior in future versions of
33
+ # rspec-rails.
34
+ # config.infer_base_class_for_anonymous_controllers = false
35
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: namor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jason May
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-08 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: &2152651100 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 0.9.2
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2152651100
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2152661440 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 2.9.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2152661440
36
+ - !ruby/object:Gem::Dependency
37
+ name: guard-rspec
38
+ requirement: &2152659940 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: 0.7.0
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2152659940
47
+ - !ruby/object:Gem::Dependency
48
+ name: ruby_gntp
49
+ requirement: &2152658740 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.3.4
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2152658740
58
+ description: Munging English names
59
+ email:
60
+ - jmay@pobox.com
61
+ executables: []
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - .gitignore
66
+ - Gemfile
67
+ - LICENSE
68
+ - README.md
69
+ - Rakefile
70
+ - lib/namor.rb
71
+ - lib/namor/version.rb
72
+ - namor.gemspec
73
+ - spec/lib/namor_spec.rb
74
+ - spec/spec_helper.rb
75
+ homepage: ''
76
+ licenses: []
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ! '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 1.8.10
96
+ signing_key:
97
+ specification_version: 3
98
+ summary: Parse & extract pieces of names
99
+ test_files:
100
+ - spec/lib/namor_spec.rb
101
+ - spec/spec_helper.rb
102
+ has_rdoc: