RubyGems - name_parser - Versions diffs - 0.0.5 - Mend

name_parser 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

data/.gitignore +5 -0
data/.rspec +1 -0
data/.rvmrc +48 -0
data/Gemfile +4 -0
data/README.md +40 -0
data/Rakefile +8 -0
data/lib/name_parser/parser.rb +74 -0
data/lib/name_parser/patterns.rb +29 -0
data/lib/name_parser/version.rb +3 -0
data/lib/name_parser.rb +9 -0
data/name_parser.gemspec +23 -0
data/spec/name_parser/parser_spec.rb +359 -0
data/spec/name_parser_spec.rb +25 -0
data/spec/spec_helper.rb +6 -0
metadata +89 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,5 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*
+.project

data/.rspec ADDED Viewed

	@@ -0,0 +1 @@
1	+ --colour

data/.rvmrc ADDED Viewed

@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+# This is an RVM Project .rvmrc file, used to automatically load the ruby
+# development environment upon cd'ing into the directory
+# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional,
+# Only full ruby name is supported here, for short names use:
+#     echo "rvm use 1.9.2" > .rvmrc
+environment_id="ruby-1.9.2-p318@name_parser"
+# Uncomment the following lines if you want to verify rvm version per project
+# rvmrc_rvm_version="1.10.3" # 1.10.1 seams as a safe start
+# eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
+#   echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
+#   return 1
+# }
+# First we attempt to load the desired environment directly from the environment
+# file. This is very fast and efficient compared to running through the entire
+# CLI and selector. If you want feedback on which environment was used then
+# insert the word 'use' after --create as this triggers verbose mode.
+if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
+  && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
+then
+  \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
+  [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
+    \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
+else
+  # If the environment file has not yet been created, use the RVM CLI to select.
+  rvm --create  "$environment_id" || {
+    echo "Failed to create RVM environment '${environment_id}'."
+    return 1
+  }
+fi
+# If you use bundler, this might be useful to you:
+# if [[ -s Gemfile ]] && {
+#   ! builtin command -v bundle >/dev/null ||
+#   builtin command -v bundle | grep $rvm_path/bin/bundle >/dev/null
+# }
+# then
+#   printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
+#   gem install bundler
+# fi
+# if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
+# then
+#   bundle install | grep -vE '^Using|Your bundle is complete'
+# fi

data/Gemfile ADDED Viewed

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in parsely.gemspec
+gemspec

data/README.md ADDED Viewed

@@ -0,0 +1,40 @@
+NameParser
+=========
+Does what it says. Based on Matthew Ericson's people gem: https://github.com/mericson/people which, in turn, is loosely based on
+the Lingua-EN-NameParser Perl module.
+To set up development environment clone the repo and run `bundle` to get all of the dependencies.
+Usage
+-----
+```ruby
+require "name_parser"
+include NameParser
+name = "Captain Arthur Two Sheds Jackson Jr."
+parser = Parser.new(name)
+parser.first  # => "Arthur"
+parser.middle # => "Two Sheds"
+parser.last   # => "Jackson"
+parser.title  # => "Captain"
+parser.suffix # => "Jr."
+```
+or using the mixin
+```ruby
+require "name_parser"
+include NameParser
+name = "Captain Arthur Two Sheds Jackson Jr."
+parser = name_parser(name) # => NameParser::Parser
+parser.first # => "Arthur"
+# ...
+```

data/Rakefile ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env rake
+require "bundler/gem_tasks"
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task :test => :spec
+task :default => :spec

data/lib/name_parser/parser.rb ADDED Viewed

@@ -0,0 +1,74 @@
+module NameParser
+  class Parser
+    include Patterns
+    attr_reader :first, :middle, :last, :title, :suffix
+    def initialize(name)
+      @name = name.dup
+      run
+    end
+    protected
+      def run
+        remove_non_name_characters
+        remove_extra_spaces
+        clean_trailing_suffixes
+        reverse_last_and_first_names
+        remove_commas
+        parse_title
+        parse_suffix
+        parse_name
+      end
+      def remove_non_name_characters
+        @name.gsub!(/[^A-Za-z0-9\-\'\.&\/ \,]/, '')
+      end
+      def remove_extra_spaces
+        @name.gsub!(/\s+/, ' ')
+        @name.strip!
+      end
+      def clean_trailing_suffixes
+        @name.gsub!(Regexp.new("(.+), (%s)$" % SUFFIX_PATTERN, true), "\\1 \\2")
+      end
+      def reverse_last_and_first_names
+        @name.gsub!(/;/, '')
+        @name.gsub!(/(.+),(.+)/, "\\2 ;\\1")
+        @name.strip!
+      end
+      def remove_commas
+        @name.gsub!(/,/, '')
+      end
+      def parse_title
+        if match = @name.match(Regexp.new("^(%s) (.+)" % TITLE_PATTERN, true))
+          @name = match[-1]
+          @title = match[1].strip
+        end
+      end
+      def parse_suffix
+        if match = @name.match(Regexp.new("(.+) (%s)$" % SUFFIX_PATTERN, true))
+          @name = match[1].strip
+          @suffix = match[2]
+        end
+      end
+      def parse_name
+        case
+          when match = @name.match(Regexp.new('^%s%s$' % [ NAME_PATTERN, LAST_NAME_PATTERN ], true))
+            @first, @last = match.captures
+          when match = @name.match(Regexp.new('^%s%s%s%s$' % [ NAME_PATTERN, NAME_PATTERN, NAME_PATTERN, LAST_NAME_PATTERN ], true))
+            @first, *middles, @last = match.captures[0..3]
+            @middle = middles.join(' ')
+          when match = @name.match(Regexp.new('^%s%s%s$' % [ NAME_PATTERN, NAME_PATTERN, LAST_NAME_PATTERN ], true))
+            @first, @middle, @last = match.captures
+        end
+      end
+  end
+end

data/lib/name_parser/patterns.rb ADDED Viewed

@@ -0,0 +1,29 @@
+module NameParser
+  module Patterns
+    NAME_PATTERN = "([\\w\\-\\']+)[\.{1,}\\s|\\s]+"
+    LAST_NAME_PATTERN = "\;?([\\w\\-\\']+|(Mc|Mac|Des|Dell[ae]|Del|De La|De Los|Da|Di|Du|La|Le\
+    |Lo|St\\.|Den|Von|Van|Von Der|Van De[nr])?\\s+([\\w]+))"
+    SUFFIX_PATTERN = "Jn?r\.?,? Esq\.?|Sn?r\.?,? Esq\.?|I{1,3},? Esq\.?|Jn?r\.?,? M\.?D\.?|Sn?r\.?,? M\.?D\.?|\
+    I{1,3},? M\.?D\.?|Sn?r\.?|Jn?r\.?|Esq(\.|uire)?|Esquire.|Attorney at Law.|Attorney-at-Law.|Ph\.?d\.?|C\.?P\.?A\.?|\
+    XI{1,3}|X|IV|VI{1,3}|V|IX|I{1,3}\.?|M\.?D\.?|D.?M\.?D\.?"
+    STANDARD = "M(ister|aster|issus|iss|r\\.?|rs\\.?|s\\.?|mme\\.?|essr\\.?)"
+    ROYALTY = "Sir|Lord|Lady|Madam(e)?|Dame|Duke|Duchess|King|Queen|Prince|Princess"
+    MEDICINE = "D(r\\.?|octor)|Sister|Matron"
+    LEGAL = "Judge|Justice|Att(\\.|orney) Gen(\\.|eral)"
+    POLICE = "Det(\\.|ective) Insp(\\.|ector)|Det(\\.|ective)|Insp(\\.|ector)|Chief|Constable|Officer"
+    MILITARY = "Brig(adier)?|Capt(\\.?|ain)|C(dr\\.?|ommander|ommodore)|Col(\\.?|onel)|\
+    Gen(\\.?|eral)|Field Marshall|Fl(\\.?|ight) Off(\\.?|icer)|Fl(t\\.?|ight) L(t\\.?|ieutenant)|\
+    P(te\\.?|rivate)|S(gt\\.?|argent)|Air (Commander|Commodore| Marshall)|L(t\\.?|ieutenant) (Col(\\.?|onel)|\
+    Gen(\\.?|eral)|C(Cdr\\.?|ommander))|L(t\\.?|eut\\.?|ieutenant|eutenant)|Maj(\\.?|or) Gen(\\.?|eral)|Maj(\\.?|or)"
+    RELIGIOUS = "Rabbi|Brother|Father|Chaplain|Pastor|(Archb|B)ishop|Cardinal|Pope|\
+    Mother( Superior)?|(Most|Mt\\.|Very|V.) Re(v\\.?|vd\\.?|ver[e|a]nd)|Re(v\\.?|vd\\.?|er[e|a]nd)"
+    POLITICIAN = "Mayor|Sen(\\.|ator)?|Rep(\\.|resentative)?|Ald(\\.|erman)?|Pres(\\.|ident)?|\
+    Ambassador|Assembly(woman|man)|Chair(woman|man)|Commissioner|Congress(woman|man)|Council(wo)man|\
+    Counselor|Delegate|(Lieutentant )Governor|Postmaster( General)"
+    EDUCATOR = "Dean|President|Ass(\\.|oc\\.|ociate|t\\.|istant) Prof(\\.|essor)|Prof(\\.|essor)"
+    TITLE_PATTERN = [ STANDARD, ROYALTY, MEDICINE, LEGAL, POLICE, MILITARY, RELIGIOUS, POLITICIAN, EDUCATOR ].join("|")
+ end
+end

data/lib/name_parser/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module NameParser
+  VERSION = "0.0.5"
+end

data/lib/name_parser.rb ADDED Viewed

@@ -0,0 +1,9 @@
+module NameParser
+  autoload :Version, 'name_parser/version'
+  autoload :Patterns,'name_parser/patterns'
+  autoload :Parser,  'name_parser/parser'
+  def name_parser(name)
+    Parser.new(name)
+  end
+end

data/name_parser.gemspec ADDED Viewed

@@ -0,0 +1,23 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "name_parser/version"
+Gem::Specification.new do |s|
+  s.name        = "name_parser"
+  s.version     = NameParser::VERSION
+  s.authors     = ["Chris Pallotta", "Scott Pullen", "Tom Leonard"]
+  s.email       = ["ChristopherF_Pallotta@dfci.harvard.edu", "ScottT_Pullen@dfci.harvard.edu", "Thomas_Leonard@dfci.harvard.edu"]
+  s.homepage    = ""
+  s.summary     = %q{Parses strings.}
+  s.description = %q{Parses particular kinds of strings. For now, it only handles parsing people names.}
+  s.rubyforge_project = "name_parser"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_development_dependency 'rspec'
+  s.add_development_dependency 'debugger'
+end

data/spec/name_parser/parser_spec.rb ADDED Viewed

@@ -0,0 +1,359 @@
+require 'spec_helper'
+include NameParser
+Parser.send(:public, *Parser.protected_instance_methods)
+describe Parser do
+  let(:name) { 'Horatio Xavier Hornblower' }
+  let!(:parser) { Parser.new(name) }
+  [:name, :first, :middle, :last, :title, :suffix ].each do |attr|
+    describe "#{attr} attribute" do
+      it 'is read only' do
+        parser.methods.should_not include(":#{attr}=".to_sym)
+      end
+    end
+  end
+  describe 'name attribute' do
+    it 'is set on initialize' do
+      get_name.should == name
+    end
+  end
+  describe '#remove_non_name_characters' do
+    it 'only allows alpha-numerics, dashes, backslashes, apostrophes and ampersands' do
+      set_name("aZ1/&'`!@$#%^*()_+=[]{}|\:;""")
+      parser.remove_non_name_characters
+      get_name.should == "aZ1/&'"
+    end
+  end
+  describe '#remove_extra_spaces' do
+    it 'removes leading spaces, tabs and line breaks' do
+      set_name(" \t\nFoo")
+      parser.remove_extra_spaces
+      get_name.should == 'Foo'
+    end
+    it 'removes trailing spaces, tabs and line breaks' do
+      set_name("Foo \t\n")
+      parser.remove_extra_spaces
+      get_name.should == 'Foo'
+    end
+    it 'replaces repeating spaces, tabs and line breaks with a single space' do
+      set_name("  Foo  \t\nBar  ")
+      parser.remove_extra_spaces
+      get_name.should == 'Foo Bar'
+    end
+  end
+  describe '#clean_trailing_suffixes' do
+    it 'removes trailing suffixes' do
+      set_name('Biggie Smalls, Junior, Esquire, Phd., VII')
+      parser.clean_trailing_suffixes
+      get_name.should == 'Biggie Smalls, Junior, Esquire, Phd. VII'
+    end
+  end
+  describe '#reverse_last_and_first_names' do
+    it 'reorders last and first names if comma is present' do
+      set_name('Smith, Johnny')
+      parser.reverse_last_and_first_names
+      get_name.should == 'Johnny ;Smith'
+    end
+  end
+  describe '#remove_commas' do
+    it 'removes all commas' do
+      set_name('Hounddog ;Taylor,')
+      parser.remove_commas
+      get_name.should == 'Hounddog ;Taylor'
+    end
+  end
+  describe '#parse_title' do
+    context 'when a title is found' do
+      before { set_name('Colonel Henry Potter') }
+      it 'sets title attribute' do
+        parser.parse_title
+        parser.title.should == 'Colonel'
+      end
+      it 'removes the title from name' do
+        parser.parse_title
+        get_name.should == 'Henry Potter'
+      end
+    end
+    context 'when a title is not found' do
+      it 'returns nil' do
+        set_name('Frank Burns')
+        parser.parse_title
+        parser.title.should be_nil
+      end
+    end
+  end
+  describe '#parse_suffix' do
+    context 'when a suffix is found' do
+      before { set_name('Bubba Watson Jr.') }
+      it 'returns the suffix' do
+        parser.parse_suffix
+        parser.suffix.should == 'Jr.'
+      end
+      it 'removes the suffix from name' do
+        parser.parse_suffix
+        get_name.should == 'Bubba Watson'
+      end
+    end
+    context 'when a suffix is not found' do
+       it 'returns nil' do
+         set_name('Bubba Watson')
+         parser.parse_suffix
+         parser.suffix.should be_nil
+       end
+    end
+  end
+  describe '#parse_name' do
+    context 'when first initial and last name' do
+      before do
+        set_name('J Tolkien')
+        parser.parse_name
+      end
+      it 'returns first initial' do
+        parser.first.should == 'J'
+      end
+      it 'returns nil middle name' do
+        parser.middle.should be_nil
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first initial, middle initial and last name' do
+      before do
+        set_name('J R Tolkien')
+        parser.parse_name
+      end
+      it 'returns first initial' do
+        parser.first.should == 'J'
+      end
+      it 'returns middle initial' do
+        parser.middle.should == 'R'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first initial dot middle initial dot last name' do
+      before do
+        set_name('J. R. Tolkien')
+        parser.parse_name
+      end
+      it 'returns first initial' do
+        parser.first.should == 'J'
+      end
+      it 'returns middle initial' do
+        parser.middle.should == 'R'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first initial, two middle initials and last name' do
+      before do
+        set_name('J R R Tolkien')
+         parser.parse_name
+      end
+      it 'returns first initial' do
+        parser.first.should == 'J'
+      end
+      it 'returns both middle initials' do
+        parser.middle.should == 'R R'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first initial, middle name and last name' do
+      before do
+        set_name('J Ronald Tolkien')
+        parser.parse_name
+      end
+      it 'returns first initial' do
+        parser.first.should == 'J'
+      end
+      it 'returns middle name' do
+        parser.middle.should == 'Ronald'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first name, middle initial and last name' do
+      before do
+        set_name('John R Tolkien')
+        parser.parse_name
+      end
+      it 'returns first name' do
+        parser.first.should == 'John'
+      end
+      it 'returns middle initial' do
+        parser.middle.should == 'R'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first name, two middle initials and last name' do
+      before do
+        set_name('John R R Tolkien')
+        parser.parse_name
+      end
+      it 'returns first name' do
+        parser.first.should == 'John'
+      end
+      it 'returns middle name' do
+        parser.middle.should == 'R R'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first name, two middle initials with dots and last name' do
+      before do
+        set_name('John R. R. Tolkien')
+        parser.parse_name
+      end
+      it 'returns first name' do
+        parser.first.should == 'John'
+      end
+      it 'returns middle name' do
+        parser.middle.should == 'R R'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first name and last name' do
+      before do
+        set_name('John Tolkien')
+        parser.parse_name
+      end
+      it 'returns first name' do
+        parser.first.should == 'John'
+      end
+      it 'returns nil middle name' do
+        parser.middle.should be_nil
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when first name, middle name and last name' do
+      before do
+        set_name('John Ronald Tolkien')
+        parser.parse_name
+      end
+      it 'returns first name' do
+        parser.first.should == 'John'
+      end
+      it 'returns  middle name' do
+        parser.middle.should == 'Ronald'
+      end
+      it 'returns last name' do
+        parser.last.should == 'Tolkien'
+      end
+    end
+    context 'when last name is hyphenated' do
+      it 'returns last name' do
+        set_name('John R. Tolkien-Smith')
+        parser.parse_name
+        parser.last.should == 'Tolkien-Smith'
+      end
+    end
+    context 'when last name is preceded by a semicolon' do
+      it 'returns last name' do
+        set_name('J R R ;Tolkien')
+        parser.parse_name
+        parser.last.should == 'Tolkien'
+      end
+    end
+  end
+  def set_name(name)
+    parser.instance_variable_set(:@first, nil)
+    parser.instance_variable_set(:@middle, nil)
+    parser.instance_variable_set(:@last, nil)
+    parser.instance_variable_set(:@title, nil)
+    parser.instance_variable_set(:@suffix, nil)
+    parser.instance_variable_set(:@name, name)
+  end
+  def get_name
+    parser.instance_variable_get(:@name)
+  end
+end

data/spec/name_parser_spec.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require 'spec_helper'
+class TestClass
+  include NameParser
+end
+describe NameParser do
+  let!(:name) { "Adams Jr., Mr. John Quincy" }
+  let!(:test_class) { TestClass.new }
+  describe '#name_parser' do
+    it 'returns a new NameParser::Parser object' do
+      test_class.name_parser(name).class.should == NameParser::Parser
+    end
+    it 'should run the parser' do
+      parser = test_class.name_parser(name)
+      parser.title.should == 'Mr.'
+      parser.first.should == 'John'
+      parser.middle.should == 'Quincy'
+      parser.last.should == 'Adams'
+      parser.suffix.should == 'Jr.'
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require 'rubygems'
+require 'rspec'
+require 'debugger'
+$:.push File.expand_path("../lib", __FILE__)
+require 'name_parser'

metadata ADDED Viewed

@@ -0,0 +1,89 @@
+--- !ruby/object:Gem::Specification
+name: name_parser
+version: !ruby/object:Gem::Version
+  version: 0.0.5
+  prerelease:
+platform: ruby
+authors:
+- Chris Pallotta
+- Scott Pullen
+- Tom Leonard
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-02-07 00:00:00.000000000Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: &2152901460 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *2152901460
+- !ruby/object:Gem::Dependency
+  name: debugger
+  requirement: &2152901040 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *2152901040
+description: Parses particular kinds of strings. For now, it only handles parsing
+  people names.
+email:
+- ChristopherF_Pallotta@dfci.harvard.edu
+- ScottT_Pullen@dfci.harvard.edu
+- Thomas_Leonard@dfci.harvard.edu
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- .rspec
+- .rvmrc
+- Gemfile
+- README.md
+- Rakefile
+- lib/name_parser.rb
+- lib/name_parser/parser.rb
+- lib/name_parser/patterns.rb
+- lib/name_parser/version.rb
+- name_parser.gemspec
+- spec/name_parser/parser_spec.rb
+- spec/name_parser_spec.rb
+- spec/spec_helper.rb
+homepage: ''
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project: name_parser
+rubygems_version: 1.8.17
+signing_key:
+specification_version: 3
+summary: Parses strings.
+test_files:
+- spec/name_parser/parser_spec.rb
+- spec/name_parser_spec.rb
+- spec/spec_helper.rb