RubyGems - name-tamer - Versions diffs - 0.0.3 → 0.0.4 - Mend

name-tamer 0.0.3 → 0.0.4

Files changed (7) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ec6a806fda32f1cde3963a72bb194491d7f34824
-  data.tar.gz: 9adbe554327717b744cd8bb550f4c4c259f7f0a9
+  metadata.gz: 9bb03b1eb2ecf3657424b2eb6d15009143783799
+  data.tar.gz: cb539043cf2bad1f2ce258355fb5b12995078642
 SHA512:
-  metadata.gz: ad2cf9d1f5b8f45234bb36d9e28f31a157ba2d3932ee8d3b503c8ca8f2671f30882bc9af107cfc75fa9c147646e6431f0b9712b398fb931ffa38c178abd3870a
-  data.tar.gz: 2db7830058a83550a0ce4adb252621d4ff2dd3c1f9c65572cbb35ee490099f6bfc9cd58ac682e778c5eedebe126d76038e2bd36483fa42c71031a43dbf62cafc
+  metadata.gz: 5c004130e0b5cd5f6a14de3e061cf448c0c9a40081d0f76544ae9b7e0f7311661fc767f74b742b0f76ffa28c74d067584261ea04f4e50b270e62b6c0df369fc1
+  data.tar.gz: 99b9a308fc495e1c8ee25452d62f264af02ba301243cd9cb80ff9cfeeb2af024dd3da38d1f4cf79e7e6c0cbf306d62208a84cad688a50df4c7ac6f688972f567

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    name-tamer (0.0.2)
+    name-tamer (0.0.3)
 GEM
   remote: https://rubygems.org/

data/README.md CHANGED

@@ -28,14 +28,42 @@ Examples:
 NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith
 ```
+Or you can create an instance if you need several versions of the name
 ```ruby
-name_tamer = NameTamer['Mr. John Q. Smith III, MD']
+name_tamer = NameTamer.new 'Mr. John Q. Smith III, MD'
 name_tamer.slug # => john-smith
 name_tamer.nice_name # => John Q. Smith
+name_tamer.contact_type # => :person
+```
+NameTamer will make an intelligent guess at the type of the name but it's not infallible. NameTamer likes it if you tell it whether the name is a person or an organization:
+```ruby
+name_tamer = NameTamer.new 'Di Doo Doo d.o.o.', contact_type: :organization
+name_tamer.simple_name # => Di Doo Doo
 ```
 ## Contributing
+There must be lots of name suffixes and prefixes that I haven't catered for, so please get in touch if `name-tamer` doesn't recognise one that you've found.
+If there are any other common two-word family names that I've missed then please let me know. `name-tamer` tries to make sure Helena Bonham Carter gets slugified to `helena-bonham-carter` and not `helena-carter`, but I'm sure there are loads of two-word family names I don't know about.
+Please read all the following articles before contributing:
+* [Personal names around the world](http://www.w3.org/International/questions/qa-personal-names)
+* [Namae (名前)](https://github.com/berkmancenter/namae)
+* [Matts Name Parser](https://github.com/mericson/people)
+* [Types of business entity](http://en.wikipedia.org/wiki/Types_of_business_entity)
+* [List of professional designations in the United States](http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA))
+* [List of post-nominal letters (United Kingdom)](http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom))
+* [Nobiliary particle](http://en.wikipedia.org/wiki/Nobiliary_particle)
+* [Spanish naming customs](http://en.wikipedia.org/wiki/Spanish_naming_customs)
+* [Unified style sheet for linguistics](http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf) [PDF]
+### How to contribute
 1.  Fork it
 1.  Create your feature branch (`git checkout -b my-new-feature`)
 1.  Commit your changes (`git commit -am 'Add some feature'`)

data/lib/name-tamer.rb CHANGED

@@ -3,7 +3,7 @@
 # References:
 # http://www.w3.org/International/questions/qa-personal-names
 # https://github.com/berkmancenter/namae
-# https://github.com/mericson
+# https://github.com/mericson/people
 # http://en.wikipedia.org/wiki/Types_of_business_entity
 # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(USA)
 # http://en.wikipedia.org/wiki/List_of_post-nominal_letters_(United_Kingdom)
@@ -12,7 +12,7 @@
 # http://linguistlist.org/pubs/tocs/JournalUnifiedStyleSheet2007.pdf [PDF]
 class NameTamer
-  attr_reader :name, :contact_type
+  attr_reader :name
   class << self
     def [](name, args = {})
@@ -21,8 +21,8 @@ class NameTamer
   end
   def nice_name
-    if @nice_name.nil?
-      @nice_name = @name.dup          # Start with the name we've received
+    unless @nice_name
+      @nice_name = name.dup          # Start with the name we've received
       tidy_spacing                    # " John   Smith " -> "John Smith"
       consolidate_initials            # "I. B. M." -> "I.B.M."
@@ -38,7 +38,7 @@ class NameTamer
   end
   def simple_name
-    if @simple_name.nil?
+    unless @simple_name
       @simple_name = nice_name.dup    # Start with nice name
       remove_initials                 # "John Q. Doe" -> "John Doe"
@@ -53,7 +53,7 @@ class NameTamer
   end
   def slug
-    if @slug.nil?
+    unless @slug
       @slug = simple_name.dup         # Start with search name
       slugify                         # "John Doe" -> "john-doe"
     end
@@ -66,25 +66,31 @@ class NameTamer
     contact_type_best_effort
   end
+  def contact_type= new_contact_type
+    ct_as_sym = new_contact_type.to_sym
+    unless @contact_type.nil? || @contact_type == ct_as_sym
+      puts "Changing contact type of #{@name} from #{@contact_type} to #{new_contact_type}"
+    end
+    @contact_type = ct_as_sym
+  end
 =begin These lines aren't used and aren't covered by specs
   def name=(new_name)
     initialize new_name, :contact_type => @contact_type
   end
-  def contact_type=(new_contact_type)
-    initialize @name, :contact_type => new_contact_type
-  end
   def to_hash
     {
-      name:         @name,
-      nice_name:    @nice_name,
-      simple_name:  @simple_name,
-      slug:         @slug,
-      contact_type: @contact_type,
-      last_name:    @last_name,
-      remainder:    @remainder,
-      adfix_found:  @adfix_found
+      name:         name,
+      nice_name:    nice_name,
+      simple_name:  simple_name,
+      slug:         slug,
+      contact_type: contact_type,
+      last_name:    last_name,
+      remainder:    remainder,
+      adfix_found:  adfix_found
     }
   end
 =end
@@ -98,7 +104,6 @@ class NameTamer
   def tidy_spacing
     @nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
     @nice_name.strip!              # remove leading & trailing whitespace
     @nice_name = ensure_whitespace_is_ascii_space @nice_name
   end
@@ -176,11 +181,7 @@ class NameTamer
   # Conjoin compound names with non-breaking spaces
   def use_nonbreaking_spaces_in_compound_names
     # Fix known last names that have spaces (not hyphens!)
-    [
-      'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore',
-      'Holmes à Court', 'Holmes a Court', 'Baron Cohen',
-      'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
-    ].each do |compound_name|
+    COMPOUND_NAMES.each do |compound_name|
       @nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
     end
@@ -197,10 +198,10 @@ class NameTamer
   # i.e. only remove initials if there's also a proper name there
   def remove_initials
     if @contact_type == :person
-      name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
+      temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
       # If the name still has at least one space we're OK
-      @simple_name = name if name.include?(ASCII_SPACE)
+      @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
     end
   end
@@ -237,9 +238,17 @@ class NameTamer
   # Initialization and utilities
   #--------------------------------------------------------
-  def initialize(name, args = {})
-    @name         = name || ''
-    @contact_type = args[:contact_type].to_sym unless args[:contact_type].nil?
+  def initialize(new_name, args = {})
+    @name = new_name || ''
+    if args[:contact_type]
+      ct = args[:contact_type]
+      ct = ct.to_s unless [String, Symbol].include? ct.class
+      ct.downcase! if ct.class == String
+      ct = ct.to_sym
+      ct = nil unless [:person, :organization].include? ct
+      @contact_type = ct
+    end
     @nice_name    = nil
     @simple_name  = nil
@@ -251,12 +260,6 @@ class NameTamer
     @adfix_found  = false
   end
-  def set_contact_type contact_type
-    contact_type_sym = contact_type.to_sym
-    puts "Changing contact type of #{@name} from #{@contact_type} to #{contact_type}".red unless @contact_type.nil? || @contact_type == contact_type_sym
-    @contact_type = contact_type_sym
-  end
   # If we don't know the contact type, what's our best guess?
   def contact_type_best_effort
     if @contact_type
@@ -275,23 +278,23 @@ class NameTamer
   # We pass to this routine either prefixes or suffixes
   def remove_outermost_adfix adfix_type, name_part
     adfixes       = ADFIX_PATTERNS[adfix_type]
-    contact_type  = contact_type_best_effort
-    parts         = name_part.partition adfixes[contact_type]
+    ct            = contact_type_best_effort
+    parts         = name_part.partition adfixes[ct]
     @adfix_found  = !parts[1].empty?
     # If the contact type is indeterminate and we didn't find a diagnostic adfix
     # for a person then try again for an organization
     if @contact_type.nil?
       unless @adfix_found
-        contact_type  = :organization
-        parts         = name_part.partition adfixes[contact_type]
+        ct            = :organization
+        parts         = name_part.partition adfixes[ct]
         @adfix_found  = !parts[1].empty?
       end
     end
     if @adfix_found
       # If we've found a diagnostic adfix then set the contact type
-      set_contact_type contact_type
+      self.contact_type = ct
       # The remainder of the name will be in parts[0] or parts[2] depending
       # on whether this is a prefix or a suffix.
@@ -317,44 +320,44 @@ class NameTamer
   # Improved in several areas, also now adds non-breaking spaces for
   # compound names like "van der Pump"
   def name_case lowercase
-    name = lowercase # We assume the name is passed already downcased
-    name.gsub!(/\b\w/) { |first| first.upcase }
-    name.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
+    n = lowercase # We assume the name is passed already downcased
+    n.gsub!(/\b\w/) { |first| first.upcase }
+    n.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
     # Our list of terminal characters that indicate a non-celtic name used
     # to include o but we removed it because of MacMurdo.
-    if name =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or name =~ /\bMc/
-      name.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
+    if n =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or n =~ /\bMc/
+      n.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
       # Fix Mac exceptions
       [
         'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
         'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
-      ].each { |mac_name| name.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
+      ].each { |mac_name| n.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
     end
     # Fix ff wierdybonks
     [
       'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
-    ].each { |ff_name| name.gsub!(ff_name,ff_name.downcase) }
+    ].each { |ff_name| n.gsub!(ff_name,ff_name.downcase) }
     # Fixes for name modifiers followed by space
     # Also replaces spaces with non-breaking spaces
     NAME_MODIFIERS.each do |modifier|
-      name.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
+      n.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
     end
     # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
     ['Dell', 'D'].each do |modifier|
-      name.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
+      n.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
     end
     # Upcase words with no vowels, e.g JPR Williams
-    name.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
+    n.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
     # Except Ng
-    name.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
+    n.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
-    name
+    n
   end
   def parameterize string, args = {}
@@ -432,9 +435,14 @@ class NameTamer
   FILTER_RFC3987  = /[^#{ISEGMENT_NZ_NC}]/
   FILTER_COMPAT   = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
-  NAME_MODIFIERS    = [
-    'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lr]', 'D[ao]s', 'El', 'La', 'L[eo]',
-    'V[ao]n', 'Of', 'St[\.]?'
+  NAME_MODIFIERS  = [
+    'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lr]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'St[\.]?'
+  ]
+  COMPOUND_NAMES  = [
+    'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
+    'Baron Cohen', 'Strang Steel',
+    'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
   ]
   # These are the prefixes and suffixes we want to remove
@@ -497,10 +505,10 @@ class NameTamer
     patterns  = {}
     adfix     = ADFIXES[adfix_type]
-    [:person, :organization].each do |contact_type|
-      with_optional_spaces    = adfix[contact_type].map { |p| p.gsub(ASCII_SPACE,' *') }
+    [:person, :organization].each do |ct|
+      with_optional_spaces    = adfix[ct].map { |p| p.gsub(ASCII_SPACE,' *') }
       pattern_string          = with_optional_spaces.join('|').gsub('.', '\.*')
-      patterns[contact_type]  = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
+      patterns[ct]  = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
     end
     ADFIX_PATTERNS[adfix_type] = patterns

data/lib/name-tamer/version.rb CHANGED

@@ -1,3 +1,3 @@
 class NameTamer
-  VERSION = "0.0.3"
+  VERSION = "0.0.4"
 end

data/spec/name_tamer_spec.rb CHANGED

@@ -119,7 +119,12 @@ describe NameTamer do
       { n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
       { n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
       { n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
-      { n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' }
+      { n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' },
+      { nn: '', sn: '', s: '' },
+      { n:'Union Square Ventures', t: 'Organization', nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
+      { n:'John Smith', t: 'Person', nn:'John Smith', sn:'John Smith', s:'john-smith' },
+      { n:'John Smith', t: :nonsense, nn:'John Smith', sn:'John Smith', s:'john-smith' },
+      { n:'John Smith', t: Kernel, nn:'John Smith', sn:'John Smith', s:'john-smith' },
     ]
   end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: name-tamer
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Xenapto
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-05-31 00:00:00.000000000 Z
+date: 2014-06-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler