RubyGems - name-tamer - Versions diffs - 0.1.3 → 0.1.4 - Mend

name-tamer 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ef4402ebc8c35fce9d01108561f09a5af085c445
-  data.tar.gz: 5ac1d2d43bac66079d8089a508bef15f22aa03bf
+  metadata.gz: efc79a2d297ca97447620a9f2cfa839667108a1d
+  data.tar.gz: 98022db00b0fccf4e7d2090d7e2883b8ae6239bd
 SHA512:
-  metadata.gz: 1ed1bcaeaf186d62442600930ebc6c30baeeb1df065b16abb7c781a41e15ab7fd101591877a1f4ac0562d48e1ed717b8a9330a5cc67b7a30a0f69f7375c03d0e
-  data.tar.gz: 4929c9a7ff6742df2a4e160ddaeb0b0a209af80996d83cef6d66a814cba8751003895354fe80ac98af7cad5f5d4e9a3f99a8a1f260b4b5c0ae9c117d27422d99
+  metadata.gz: b100a7a8944c5ab4beade888f8d17be2d7547c84857301bba3ecf78862df3445844e9228fe1dbbedf15305f47c4c849d880e871f661452b39de2ff94885e2dfe
+  data.tar.gz: aedb38fce8a533cea1c3d5d615c66bf985b57a106c91a30363b1c678d99a8e631aaa8fdae1477bd6ef78fca33b03025ea499c2ee67175e8130554e0f93b71945

data/.hound.yml ADDED Viewed

@@ -0,0 +1,17 @@
+LineLength:
+  Description: 'Limit lines to 120 characters.'
+  Max: 120
+  Enabled: true
+MethodLength:
+  Description: 'Avoid methods longer than 10 lines of code.'
+  Max: 23
+  Enabled: true
+Documentation:
+  Description: 'Document classes and non-namespace modules.'
+  Enabled: false
+FileName:
+  Description: 'Use snake_case for source file names.'
+  Enabled: false

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,10 @@
+inherit_from: .hound.yml
+CyclomaticComplexity:
+  Description: 'Avoid complex methods.'
+  Max: 8
+ClassLength:
+  Description: 'Avoid classes longer than 100 lines of code.'
+  CountComments: false  # count full line comments?
+  Max: 301

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    name-tamer (0.1.2)
+    name-tamer (0.1.3)
 GEM
   remote: https://rubygems.org/

data/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # NameTamer
-![Gem Version](http://img.shields.io/gem/v/name-tamer.svg?style=flat)&nbsp;[![Coverage Status](https://img.shields.io/coveralls/Xenapto/name-tamer.svg?style=flat)](https://coveralls.io/r/Xenapto/name-tamer?branch=master)
+![Gem Version](http://img.shields.io/gem/v/name-tamer.svg?style=flat)&nbsp;[![Code Climate](http://img.shields.io/codeclimate/github/Xenapto/name-tamer.svg?style=flat)](https://codeclimate.com/github/Xenapto/name-tamer)&nbsp;[![Coverage Status](https://img.shields.io/coveralls/Xenapto/name-tamer.svg?style=flat)](https://coveralls.io/r/Xenapto/name-tamer?branch=master)
 [![Developer status](http://img.shields.io/badge/developer-awesome-brightgreen.svg?style=flat)](http://xenapto.com)
 ![build status](https://circleci.com/gh/Xenapto/name-tamer.png?circle-token=2293f2a1d8463a948c2a2ce4bb3bd99786958c59)

data/Rakefile CHANGED Viewed

	@@ -1 +1 @@
1	- require "bundler/gem_tasks"
1	+ require 'bundler/gem_tasks'

data/lib/name-tamer.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # encoding: utf-8
+require 'string_extras'
 # References:
 # http://www.w3.org/International/questions/qa-personal-names
@@ -46,7 +47,7 @@ class NameTamer
       remove_dots_from_abbreviations  # "J.P.R. Williams" -> "JPR Williams"
       standardize_words               # "B&Q Intl" -> "B and Q International"
-      @simple_name = ensure_whitespace_is_ascii_space @simple_name
+      @simple_name.whitespace_to!(ASCII_SPACE)
     end
     @simple_name
@@ -66,7 +67,7 @@ class NameTamer
     contact_type_best_effort
   end
-  def contact_type= new_contact_type
+  def contact_type=(new_contact_type)
     ct_as_sym = new_contact_type.to_sym
     unless @contact_type.nil? || @contact_type == ct_as_sym
@@ -76,24 +77,23 @@ class NameTamer
     @contact_type = ct_as_sym
   end
-=begin These lines aren't used and aren't covered by specs
-  def name=(new_name)
-    initialize new_name, :contact_type => @contact_type
-  end
-  def to_hash
-    {
-      name:         name,
-      nice_name:    nice_name,
-      simple_name:  simple_name,
-      slug:         slug,
-      contact_type: contact_type,
-      last_name:    last_name,
-      remainder:    remainder,
-      adfix_found:  adfix_found
-    }
-  end
-=end
+  # These lines aren't used and aren't covered by specs
+  #   def name=(new_name)
+  #     initialize new_name, :contact_type => @contact_type
+  #   end
+  #
+  #   def to_hash
+  #     {
+  #       name:         name,
+  #       nice_name:    nice_name,
+  #       simple_name:  simple_name,
+  #       slug:         slug,
+  #       contact_type: contact_type,
+  #       last_name:    last_name,
+  #       remainder:    remainder,
+  #       adfix_found:  adfix_found
+  #     }
+  #   end
   private
@@ -102,50 +102,56 @@ class NameTamer
   #--------------------------------------------------------
   def tidy_spacing
-    @nice_name.gsub!(/,\s*/, ', ') # Ensure commas have exactly one space after them
-    @nice_name.strip!              # remove leading & trailing whitespace
-    @nice_name = ensure_whitespace_is_ascii_space @nice_name
+    @nice_name
+      .space_after_comma!
+      .strip_or_self!
+      .whitespace_to!(ASCII_SPACE)
   end
   # Remove spaces from groups of initials
   def consolidate_initials
-    @nice_name.gsub!(/\b([a-z])\.* (?=[a-z][\. ])/i) { |match| "#{$1}." }   # Remove spaces from initial groups
-    @nice_name.gsub!(/\b([a-z](?:\.[a-z])+)\.?(?= )/i) { |match| "#{$1}." } # Ensure each group ends with a dot
+    @nice_name
+      .remove_spaces_from_initials!
+      .ensure_space_after_initials!
   end
   # An adfix is either a prefix or a suffix
   def remove_adfixes
     if @last_name.nil?
       # Our name is still in one part, not two
-      begin
+      loop do
         @nice_name = remove_outermost_adfix(:suffix, @nice_name)
-      end while @adfix_found
+        break unless @adfix_found
+      end
-      begin
+      loop do
         @nice_name = remove_outermost_adfix(:prefix, @nice_name)
-      end while @adfix_found
+        break unless @adfix_found
+      end
     else
       # Our name is currently in two halves
-      begin
+      loop do
         @last_name = remove_outermost_adfix(:suffix, @last_name)
-      end while @adfix_found
+        break unless @adfix_found
+      end
-      begin
+      loop do
         @remainder = remove_outermost_adfix(:prefix, @remainder)
-      end while @adfix_found
+        break unless @adfix_found
+      end
     end
   end
   # Names in the form "Smith, John" need to be turned around to "John Smith"
   def fixup_last_name_first
-    unless @contact_type == :organization
-      parts = @nice_name.split ', '
+    return if @contact_type == :organization
-      if parts.count == 2
-        @last_name    = parts[0] # Sometimes the last name alone is all caps and we can name-case it
-        @remainder    = parts[1]
-      end
-    end
+    parts = @nice_name.split ', '
+    return unless parts.count == 2
+    @last_name    = parts[0] # Sometimes the last name alone is all caps and we can name-case it
+    @remainder    = parts[1]
   end
   # Sometimes we end up with mismatched braces after adfix stripping
@@ -168,7 +174,8 @@ class NameTamer
       uppercase = @nice_name.upcase
       # Some companies like to be all lowercase so don't mess with them
-      @nice_name  = name_case(lowercase) if @nice_name == uppercase || ( @nice_name == lowercase && @contact_type != :organization )
+      @nice_name  = name_case(lowercase)  if @nice_name == uppercase ||
+                                           ( @nice_name == lowercase && @contact_type != :organization)
     else
       lowercase = @last_name.downcase
       uppercase = @last_name.upcase
@@ -180,14 +187,9 @@ class NameTamer
   # Conjoin compound names with non-breaking spaces
   def use_nonbreaking_spaces_in_compound_names
-    # Fix known last names that have spaces (not hyphens!)
-    COMPOUND_NAMES.each do |compound_name|
-      @nice_name.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
-    end
-    NAME_MODIFIERS.each do |modifier|
-      @nice_name.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |match| "#{$1}#{NONBREAKING_SPACE}" }
-    end
+    @nice_name
+      .nbsp_in_compound_name!
+      .nbsp_in_name_modifier!
   end
   #--------------------------------------------------------
@@ -197,48 +199,45 @@ class NameTamer
   # Remove initials from personal names unless they are the only identifier.
   # i.e. only remove initials if there's also a proper name there
   def remove_initials
-    if @contact_type == :person
-      temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
+    return unless @contact_type == :person
-      # If the name still has at least one space we're OK
-      @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
-    end
+    temp_name = @simple_name.gsub(/\b([a-z](?:\.*\s+|\.))/i, '')
+    # If the name still has at least one space we're OK
+    @simple_name = temp_name if temp_name.include?(ASCII_SPACE)
   end
   def remove_middle_names
-    if @contact_type == :person
-      parts       = @simple_name.split
-      first_name  = nil
-      last_name   = nil
-      # Find first usable name
-      parts.each_index do |i|
-        part = parts[i]
-        unless part.gsub(FILTER_COMPAT, '').empty?
-          first_name  = part
-          parts       = parts.slice(i + 1, parts.length) # don't use "slice!"
-          break
-        end
-      end
-      # Find last usable name
-      parts.reverse_each do |part|
-        unless part.gsub(FILTER_COMPAT, '').empty?
-          last_name = part
-          break
-        end
-      end
+    return unless @contact_type == :person
+    parts       = @simple_name.split
+    first_name  = nil
+    last_name   = nil
+    # Find first usable name
+    parts.each_index do |i|
+      part = parts[i]
+      next if part.gsub(FILTER_COMPAT, '').empty?
+      first_name  = part
+      parts       = parts.slice(i + 1, parts.length) # don't use "slice!"
+      break
+    end
-      if first_name || last_name
-        separator     = first_name && last_name ? ' ' : ''
-        @simple_name  = "#{first_name}#{separator}#{last_name}"
-      end
+    # Find last usable name
+    parts.reverse_each do |part|
+      next if part.gsub(FILTER_COMPAT, '').empty?
+      last_name = part
+      break
     end
+    return unless first_name || last_name
+    separator     = first_name && last_name ? ' ' : ''
+    @simple_name  = "#{first_name}#{separator}#{last_name}"
   end
   def remove_dots_from_abbreviations
-    @simple_name.gsub!(/\b([a-z])\./i) { |match| $1 }
+    @simple_name.gsub!(/\b([a-z])\./i) { |_match| Regexp.last_match[1] }
   end
   def standardize_words
@@ -253,7 +252,7 @@ class NameTamer
   def slugify
     # Inflector::parameterize just gives up with non-latin characters so...
-    #@slug = @slug.parameterize # Can't use this
+    # @slug = @slug.parameterize # Can't use this
     # Instead we'll do it ourselves
     @slug = parameterize @slug
@@ -296,12 +295,8 @@ class NameTamer
     end
   end
-  def ensure_whitespace_is_ascii_space string
-    string.gsub(/[[:space:]]+/, ASCII_SPACE) # /\s/ doesn't match Unicode whitespace in Ruby 1.9.3
-  end
   # We pass to this routine either prefixes or suffixes
-  def remove_outermost_adfix adfix_type, name_part
+  def remove_outermost_adfix(adfix_type, name_part)
     adfixes       = ADFIX_PATTERNS[adfix_type]
     ct            = contact_type_best_effort
     parts         = name_part.partition adfixes[ct]
@@ -344,48 +339,19 @@ class NameTamer
   # Substantially modified for Xendata
   # Improved in several areas, also now adds non-breaking spaces for
   # compound names like "van der Pump"
-  def name_case lowercase
-    n = lowercase # We assume the name is passed already downcased
-    n.gsub!(/\b\w/) { |first| first.upcase }
-    n.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
-    # Our list of terminal characters that indicate a non-celtic name used
-    # to include o but we removed it because of MacMurdo.
-    if n =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ or n =~ /\bMc/
-      n.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |match| $1 + $2.capitalize }
-      # Fix Mac exceptions
-      [
-        'MacEdo', 'MacEvicius', 'MacHado', 'MacHar', 'MacHin', 'MacHlin', 'MacIas', 'MacIulis', 'MacKie', 'MacKle',
-        'MacKlin', 'MacKmin', 'MacKmurdo', 'MacQuarie', 'MacLise', 'MacKenzie'
-      ].each { |mac_name| n.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
-    end
-    # Fix ff wierdybonks
-    [
-      'Fforbes', 'Fforde', 'Ffinch', 'Ffrench', 'Ffoulkes'
-    ].each { |ff_name| n.gsub!(ff_name,ff_name.downcase) }
-    # Fixes for name modifiers followed by space
-    # Also replaces spaces with non-breaking spaces
-    NAME_MODIFIERS.each do |modifier|
-      n.gsub!(/((?:[[:space:]]|^)#{modifier})(\s+|-)/) { |match| "#{$1.rstrip.downcase}#{$2.tr(ASCII_SPACE, NONBREAKING_SPACE)}" }
-    end
-    # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
-    ['Dell', 'D'].each do |modifier|
-      n.gsub!(/(.#{modifier}')(\w)/) { |match| "#{$1.rstrip.downcase}#{$2}" }
-    end
-    # Upcase words with no vowels, e.g JPR Williams
-    n.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |match| $1.upcase }
-    # Except Ng
-    n.gsub!(/\b(NG)\b/i) { |match| $1.capitalize } # http://en.wikipedia.org/wiki/Ng
+  def name_case(lowercase)
+    n = lowercase.dup # We assume the name is passed already downcased
     n
+      .upcase_first_letter!
+      .downcase_after_apostrophe!
+      .fix_mac!
+      .fix_ff!
+      .fix_name_modifiers!
+      .upcase_initials!
   end
-  def parameterize string, args = {}
+  def parameterize(string, args = {})
     sep     = args[:sep]      || SLUG_DELIMITER
     rfc3987 = args[:rfc3987]  || false
     filter  = args[:filter]   || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
@@ -394,29 +360,12 @@ class NameTamer
     # things we want to alter for the slug, like whitespace (e.g. %20)
     new_string = URI.unescape(string)
-    # Then we change any whitespace into our separator character
-    new_string.gsub!(/\s+/, sep)
-    # Change some characters embedded in words to our separator character
-    # e.g. example.com -> example-com
-    new_string.gsub!(/(?<!\s)[\.\/](?!\s)/, sep)
-    # Then we strip any other illegal characters out completely
-    new_string.gsub!(filter, '')
-    # Make sure separators are not where they shouldn't be
-    unless sep.nil? || sep.empty?
-      re_sep = Regexp.escape(sep)
-      # No more than one of the separator in a row.
-      new_string.gsub!(/#{re_sep}{2,}/, sep)
-      # Remove leading/trailing separator.
-      new_string.gsub!(/^#{re_sep}|#{re_sep}$/i, '')
-    end
-    # Any characters that resemble latin characters might usefully be
-    # transliterated into ones that are easy to type on an anglophone
-    # keyboard.
-    new_string.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
+    new_string
+      .whitespace_to!(sep)
+      .invalid_chars_to!(sep)
+      .strip_invalid!(filter)
+      .fix_separators!(sep)
+      .approximate_latin_chars!
     # Have we got anything left?
     new_string = '_' if new_string.empty?
@@ -434,39 +383,6 @@ class NameTamer
   ADFIX_JOINERS     = "[#{ASCII_SPACE}-]"
   SLUG_DELIMITER    =  '-'
-  # Transliterations (like the i18n defaults)
-  # see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
-  APPROXIMATIONS = {
-    "À"=>"A", "Á"=>"A", "Â"=>"A", "Ã"=>"A", "Ä"=>"A", "Å"=>"A", "Æ"=>"AE",
-    "Ç"=>"C", "È"=>"E", "É"=>"E", "Ê"=>"E", "Ë"=>"E", "Ì"=>"I", "Í"=>"I",
-    "Î"=>"I", "Ï"=>"I", "Ð"=>"D", "Ñ"=>"N", "Ò"=>"O", "Ó"=>"O", "Ô"=>"O",
-    "Õ"=>"O", "Ö"=>"O", "×"=>"x", "Ø"=>"O", "Ù"=>"U", "Ú"=>"U", "Û"=>"U",
-    "Ü"=>"U", "Ý"=>"Y", "Þ"=>"Th", "ß"=>"ss", "à"=>"a", "á"=>"a", "â"=>"a",
-    "ã"=>"a", "ä"=>"a", "å"=>"a", "æ"=>"ae", "ç"=>"c", "è"=>"e", "é"=>"e",
-    "ê"=>"e", "ë"=>"e", "ì"=>"i", "í"=>"i", "î"=>"i", "ï"=>"i", "ð"=>"d",
-    "ñ"=>"n", "ò"=>"o", "ó"=>"o", "ô"=>"o", "õ"=>"o", "ö"=>"o", "ø"=>"o",
-    "ù"=>"u", "ú"=>"u", "û"=>"u", "ü"=>"u", "ý"=>"y", "þ"=>"th", "ÿ"=>"y",
-    "Ā"=>"A", "ā"=>"a", "Ă"=>"A", "ă"=>"a", "Ą"=>"A", "ą"=>"a", "Ć"=>"C",
-    "ć"=>"c", "Ĉ"=>"C", "ĉ"=>"c", "Ċ"=>"C", "ċ"=>"c", "Č"=>"C", "č"=>"c",
-    "Ď"=>"D", "ď"=>"d", "Đ"=>"D", "đ"=>"d", "Ē"=>"E", "ē"=>"e", "Ĕ"=>"E",
-    "ĕ"=>"e", "Ė"=>"E", "ė"=>"e", "Ę"=>"E", "ę"=>"e", "Ě"=>"E", "ě"=>"e",
-    "Ĝ"=>"G", "ĝ"=>"g", "Ğ"=>"G", "ğ"=>"g", "Ġ"=>"G", "ġ"=>"g", "Ģ"=>"G",
-    "ģ"=>"g", "Ĥ"=>"H", "ĥ"=>"h", "Ħ"=>"H", "ħ"=>"h", "Ĩ"=>"I", "ĩ"=>"i",
-    "Ī"=>"I", "ī"=>"i", "Ĭ"=>"I", "ĭ"=>"i", "Į"=>"I", "į"=>"i", "İ"=>"I",
-    "ı"=>"i", "Ĳ"=>"IJ", "ĳ"=>"ij", "Ĵ"=>"J", "ĵ"=>"j", "Ķ"=>"K", "ķ"=>"k",
-    "ĸ"=>"k", "Ĺ"=>"L", "ĺ"=>"l", "Ļ"=>"L", "ļ"=>"l", "Ľ"=>"L", "ľ"=>"l",
-    "Ŀ"=>"L", "ŀ"=>"l", "Ł"=>"L", "ł"=>"l", "Ń"=>"N", "ń"=>"n", "Ņ"=>"N",
-    "ņ"=>"n", "Ň"=>"N", "ň"=>"n", "ŉ"=>"'n", "Ŋ"=>"NG", "ŋ"=>"ng",
-    "Ō"=>"O", "ō"=>"o", "Ŏ"=>"O", "ŏ"=>"o", "Ő"=>"O", "ő"=>"o", "Œ"=>"OE",
-    "œ"=>"oe", "Ŕ"=>"R", "ŕ"=>"r", "Ŗ"=>"R", "ŗ"=>"r", "Ř"=>"R", "ř"=>"r",
-    "Ś"=>"S", "ś"=>"s", "Ŝ"=>"S", "ŝ"=>"s", "Ş"=>"S", "ş"=>"s", "Š"=>"S",
-    "š"=>"s", "Ţ"=>"T", "ţ"=>"t", "Ť"=>"T", "ť"=>"t", "Ŧ"=>"T", "ŧ"=>"t",
-    "Ũ"=>"U", "ũ"=>"u", "Ū"=>"U", "ū"=>"u", "Ŭ"=>"U", "ŭ"=>"u", "Ů"=>"U",
-    "ů"=>"u", "Ű"=>"U", "ű"=>"u", "Ų"=>"U", "ų"=>"u", "Ŵ"=>"W", "ŵ"=>"w",
-    "Ŷ"=>"Y", "ŷ"=>"y", "Ÿ"=>"Y", "Ź"=>"Z", "ź"=>"z", "Ż"=>"Z", "ż"=>"z",
-    "Ž"=>"Z", "ž"=>"z"
-  }
   # Constants for parameterizing Unicode strings for IRIs
   #
   # Allowed characters in an IRI segment are defined by RFC 3987
@@ -505,21 +421,10 @@ class NameTamer
   FILTER_RFC3987  = /[^#{ISEGMENT_NZ_NC}]/
   FILTER_COMPAT   = /[^#{ALPHA}#{DIGIT}\-_#{UCSCHAR}]/
-  NAME_MODIFIERS  = [
-    'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San', 'St[\.]?',
-    'Zur'
-  ]
-  COMPOUND_NAMES  = [
-    'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
-    'Baron Cohen', 'Strang Steel',
-    'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
-  ]
   # These are the prefixes and suffixes we want to remove
   # If you add to the list, you can use spaces and dots where appropriate
   # Ensure any single letters are followed by a dot because we'll add one to the string
-  # during processing, e.g. "y Cía." should be "y. Cía."
+  # during processing, e.g. "y Cia." should be "y. Cia."
   ADFIXES = {
     prefix: {
       person: [
@@ -534,7 +439,7 @@ class NameTamer
       organization: [
         'Fa.', 'P.T.', 'P.T. Tbk.', 'U.D.'
       ],
-      before:'\\A', after:ADFIX_JOINERS
+      before: '\\A', after: ADFIX_JOINERS
     },
     suffix: {
       person: [
@@ -543,10 +448,10 @@ class NameTamer
         'M.I.E.T.', 'B.Tech.',
         'Cantab.', 'D.Phil.', 'I.T.I.L. v3', 'B.Eng.', 'C.Eng.', 'M.Jur.', 'C.F.A.', 'D.B.E.',
         'D.D.S.', 'D.V.M.', 'Eng.D.', 'A.C.A.', 'C.T.A.', 'E.R.P.', 'F.C.A', 'F.P.C.', 'F.R.M.', 'M.B.A.', 'M.B.E.',
-        'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.', 'Ed.D.',
-        'Hons.', 'LL.B.',
-        'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.', 'O.K.',
-        'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'I', 'V'
+        'M.E.P.', 'M.Eng.', 'M.Jur.', 'M.S.P.', 'O.B.E.', 'P.M.C.', 'P.M.P.', 'P.S.P.', 'V.M.D.', 'B.Ed.', 'B.Sc.',
+        'Ed.D.', 'Hons.', 'LL.B.',
+        'LL.D.', 'LL.M.', 'M.Ed.', 'M.Sc.', 'Oxon.', 'Ph.D.', 'B.A.', 'Esq.', 'J.D.', 'K.C.', 'M.A.', 'M.D.', 'M.P.',
+        'O.K.', 'P.A.', 'Q.C.', 'III', 'Jr.', 'Sr.', 'II', 'IV', 'I', 'V'
       ],
       organization: [
         'S. de R.L. de C.V.', 'S.A.P.I. de C.V.', 'y. Cía. S. en C.', 'Private Limited', 'S.M. Pte. Ltd.',
@@ -572,7 +477,7 @@ class NameTamer
         'ПУП.', 'С.Д.', 'בע"מ', '任意組合', '匿名組合', '合同会社', '合名会社', '合資会社', '有限会社', '有限公司', '株式会社',
         'A/S', 'G/S', 'I/S', 'K/S', 'P/S', 'S/A'
       ],
-      before:ADFIX_JOINERS, after:'\\z'
+      before: ADFIX_JOINERS, after: '\\z'
     }
   }
@@ -583,7 +488,7 @@ class NameTamer
     adfix     = ADFIXES[adfix_type]
     [:person, :organization].each do |ct|
-      with_optional_spaces    = adfix[ct].map { |p| p.gsub(ASCII_SPACE,' *') }
+      with_optional_spaces    = adfix[ct].map { |p| p.gsub(ASCII_SPACE, ' *') }
       pattern_string          = with_optional_spaces.join('|').gsub('.', '\.*')
       patterns[ct]  = /#{adfix[:before]}\(*(?:#{pattern_string})\)*#{adfix[:after]}/i
     end

data/lib/name-tamer/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class NameTamer
-  VERSION = "0.1.3"
+  VERSION = '0.1.4'
 end

data/lib/string_extras.rb ADDED Viewed

@@ -0,0 +1,188 @@
+# encoding: utf-8
+class String
+  # Strip illegal characters out completely
+  def strip_invalid!(filter)
+    self.gsub!(filter, '')
+    self # Allows chaining
+  end
+  def strip_or_self!
+    self.strip!
+    self # Allows chaining
+  end
+  # Change any whitespace into our separator character
+  def whitespace_to!(separator)
+    self.gsub!(/[[:space:]]+/, separator)
+    self # Allows chaining
+  end
+  # Ensure commas have exactly one space after them
+  def space_after_comma!
+    self.gsub!(/,[[:space:]]*/, ', ')
+    self # Allows chaining
+  end
+  # Change some characters embedded in words to our separator character
+  # e.g. example.com -> example-com
+  def invalid_chars_to!(separator)
+    self.gsub!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
+    self # Allows chaining
+  end
+  # Make sure separators are not where they shouldn't be
+  def fix_separators!(separator)
+    unless separator.nil? || separator.empty?
+      r = Regexp.escape(separator)
+      # No more than one of the separator in a row.
+      self.gsub!(/#{r}{2,}/, separator)
+      # Remove leading/trailing separator.
+      self.gsub!(/^#{r}|#{r}$/i, '')
+    end
+    self # Allows chaining
+  end
+  # Any characters that resemble latin characters might usefully be
+  # transliterated into ones that are easy to type on an anglophone
+  # keyboard.
+  def approximate_latin_chars!
+    self.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
+    self # Allows chaining
+  end
+  def upcase_first_letter!
+    self.gsub!(/\b\w/) { |first| first.upcase }
+    self # Allows chaining
+  end
+  def downcase_after_apostrophe!
+    self.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
+    self # Allows chaining
+  end
+  # Our list of terminal characters that indicate a non-celtic name used
+  # to include o but we removed it because of MacMurdo.
+  def fix_mac!
+    if self =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ || self =~ /\bMc/
+      self.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
+      # Fix Mac exceptions
+      %w(
+        MacEdo MacEvicius MacHado MacHar MacHin MacHlin MacIas MacIulis MacKie
+        MacKle MacKlin MacKmin MacKmurdo MacQuarie MacLise MacKenzie
+      ).each { |mac_name| self.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
+    end
+    self # Allows chaining
+  end
+  # Fix ff wierdybonks
+  def fix_ff!
+    %w(
+      Fforbes Fforde Ffinch Ffrench Ffoulkes
+    ).each { |ff_name| self.gsub!(ff_name, ff_name.downcase) }
+    self # Allows chaining
+  end
+  # Fixes for name modifiers followed by space
+  # Also replaces spaces with non-breaking spaces
+  # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
+  def fix_name_modifiers!
+    NAME_MODIFIERS.each do |modifier|
+      self.gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
+        "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2].tr(ASCII_SPACE, NONBREAKING_SPACE)}"
+      end
+    end
+    %w(Dell D).each do |modifier|
+      self.gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
+    end
+    self # Allows chaining
+  end
+  # Upcase words with no vowels, e.g JPR Williams
+  # Except Ng
+  def upcase_initials!
+    self.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
+    self.gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } # http://en.wikipedia.org/wiki/Ng
+    self # Allows chaining
+  end
+  # Fix known last names that have spaces (not hyphens!)
+  def nbsp_in_compound_name!
+    COMPOUND_NAMES.each do |compound_name|
+      self.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
+    end
+    self # Allows chaining
+  end
+  def nbsp_in_name_modifier!
+    NAME_MODIFIERS.each do |modifier|
+      self.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
+    end
+    self # Allows chaining
+  end
+  def remove_spaces_from_initials!
+    self.gsub!(/\b([a-z])(\.)* \b(?![a-z0-9']{2,})/i) { |_| "#{Regexp.last_match[1]}#{Regexp.last_match[2]}" }
+    self # Allows chaining
+  end
+  def ensure_space_after_initials!
+    self.gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " }
+    self # Allows chaining
+  end
+  NONBREAKING_SPACE = "\u00a0"
+  ASCII_SPACE       = "\u0020"
+  COMPOUND_NAMES  = [
+    'Lane Fox', 'Bonham Carter', 'Pitt Rivers', 'Lloyd Webber', 'Sebag Montefiore', 'Holmes à Court', 'Holmes a Court',
+    'Baron Cohen', 'Strang Steel',
+    'Service Company', 'Corporation Company', 'Corporation System', 'Incorporations Limited'
+  ]
+  NAME_MODIFIERS  = [
+    'Al', 'Ap', 'Ben', 'Dell[ae]', 'D[aeiou]', 'De[lrn]', 'D[ao]s', 'El', 'La', 'L[eo]', 'V[ao]n', 'Of', 'San',
+    'St[\.]?', 'Zur'
+  ]
+  # Transliterations (like the i18n defaults)
+  # see https://github.com/svenfuchs/i18n/blob/master/lib/i18n/backend/transliterator.rb
+  APPROXIMATIONS = {
+    'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE',
+    'Ç' => 'C', 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I',
+    'Î' => 'I', 'Ï' => 'I', 'Ð' => 'D', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O',
+    'Õ' => 'O', 'Ö' => 'O', '×' => 'x', 'Ø' => 'O', 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U',
+    'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'Th', 'ß' => 'ss', 'à' => 'a', 'á' => 'a', 'â' => 'a',
+    'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
+    'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'd',
+    'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 'ø' => 'o',
+    'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'þ' => 'th', 'ÿ' => 'y',
+    'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A', 'ą' => 'a', 'Ć' => 'C',
+    'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c', 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c',
+    'Ď' => 'D', 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e', 'Ĕ' => 'E',
+    'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E', 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e',
+    'Ĝ' => 'G', 'ĝ' => 'g', 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
+    'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h', 'Ĩ' => 'I', 'ĩ' => 'i',
+    'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I', 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I',
+    'ı' => 'i', 'Ĳ' => 'IJ', 'ĳ' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K', 'ķ' => 'k',
+    'ĸ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l', 'Ľ' => 'L', 'ľ' => 'l',
+    'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L', 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N',
+    'ņ' => 'n', 'Ň' => 'N', 'ň' => 'n', 'ŉ' => "'n", 'Ŋ' => 'NG', 'ŋ' => 'ng',
+    'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O', 'ő' => 'o', 'Œ' => 'OE',
+    'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r', 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r',
+    'Ś' => 'S', 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's', 'Š' => 'S',
+    'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T', 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't',
+    'Ũ' => 'U', 'ũ' => 'u', 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
+    'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u', 'Ŵ' => 'W', 'ŵ' => 'w',
+    'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y', 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z',
+    'Ž' => 'Z', 'ž' => 'z'
+  }
+end

data/name-tamer.gemspec CHANGED Viewed

@@ -7,14 +7,14 @@ Gem::Specification.new do |spec|
   spec.version       = NameTamer::VERSION
   spec.authors       = ['Xenapto']
   spec.email         = ['developers@xenapto.com']
-  spec.description   = %q{Useful methods for taming names}
-  spec.summary       = %q{Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith}
+  spec.description   = %q(Useful methods for taming names)
+  spec.summary       = %q(Example: NameTamer['Mr. John Q. Smith III, MD'].simple_name # => John Smith)
   spec.homepage      = 'https://github.com/Xenapto/name-tamer'
   spec.license       = 'MIT'
-  spec.files         = `git ls-files`.split($/)
-  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
-  spec.test_files    = spec.files.grep(%r{^(test|spec|features|coverage)/})
+  spec.files         = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
+  spec.executables   = spec.files.grep(/^bin\//) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(/^(test|spec|features|coverage)\//)
   spec.require_paths = ['lib']
   spec.add_development_dependency 'bundler', '~> 1'

data/spec/name_tamer_spec.rb CHANGED Viewed

@@ -5,157 +5,191 @@ require 'name-tamer'
 describe NameTamer do
   let(:names) do
     [
-      { n:'John Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'JOHN SMITH', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'john smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'Smith, John', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'John    Smith', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'Smith, John', nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'John J Smith', t: :person, nn:'John J Smith', sn:'John Smith', s:'john-smith' },
-      { n:'John J. Smith', t: :person, nn:'John J. Smith', sn:'John Smith', s:'john-smith' },
-      { n:'SMITH, Mr John J.R.', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
-      { n:' SMITH,  Mr John J. R.  ', t: :person, nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
-      { n:'SMITH, Mr John J.R.', nn:'John J.R. Smith', sn:'John Smith', s:'john-smith' },
-      { n:'Mr John J.R. SMITH JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
-      { n:'Mr John J.R. SMITH III,JD', t: :person, nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
-      { n:'Mr John J.R. SMITH JD', nn:'John J.R. SMITH', sn:'John SMITH', s:'john-smith' },
-      { n:'Mr Jean-Michel SMITH JD', t: :person, nn:'Jean-Michel SMITH', sn:'Jean-Michel SMITH', s:'jean-michel-smith' },
-      { n:'Mr Jean Michel-SMITH JD', nn:'Jean Michel-SMITH', sn:'Jean Michel-SMITH', s:'jean-michel-smith' },
-      { n:'Dr Martha Lane Fox Ph.D', nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
-      { n:'Lane Fox Ph.D, Dr Martha', t: :person, nn:'Martha Lane Fox', sn:'Martha Lane Fox', s:'martha-lane-fox' },
-      { n:'Baroness Lane-Fox of Lewisham', t: :person, nn:'Lane-Fox of Lewisham', sn:'Lane-Fox of Lewisham', s:'lane-fox-of-lewisham' },
-      { n:'MACDONALDS LLC', nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
-      { n:'MACDONALDS LLC', t: :organization, nn:'MacDonalds', sn:'MacDonalds', s:'macdonalds' },
-      { n:'macdonalds', t: :organization, nn:'macdonalds', sn:'macdonalds', s:'macdonalds' },
-      { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization, nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
-      { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
-      { n:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', nn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub', sn:'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub', s:'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
-      { n:'K.V.A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
-      { n:'K. V. A. Instruments y Cía S. en C.', nn:'K.V.A. Instruments', sn:'KVA Instruments', s:'kva-instruments' },
-      { n:'J.P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
-      { n:'J. P. Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
-      { n:'J P Rangaswami', nn:'J.P. Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
-      { n:'JP Rangaswami', nn:'JP Rangaswami', sn:'JP Rangaswami', s:'jp-rangaswami' },
-      { n:'Audrey fforbes', nn:'Audrey fforbes', sn:'Audrey fforbes', s:'audrey-fforbes' },
-      { n:'J. Arthur Rank', t: :person, nn:'J. Arthur Rank', sn:'Arthur Rank', s:'arthur-rank' },
-      { n:'PHILIP NG', t: :person, nn:'Philip Ng', sn:'Philip Ng', s:'philip-ng' },
-      { n:'Super R&D', nn:'Super R&D', sn:'Super R and D', s:'super-r-and-d' },
-      { n:'Harry Dean Stanton', t: :person, nn:'Harry Dean Stanton', sn:'Harry Stanton', s:'harry-stanton' },
-      { n:'Union Square Ventures', t: :organization, nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
-      { n:'J Arthur Rank Inc.', t: :organization, nn:'J Arthur Rank', sn:'J Arthur Rank', s:'j-arthur-rank' },
-      { n:'Jean VAN DER VELDE', t: :person, nn:'Jean VAN DER VELDE', sn:'Jean VAN DER VELDE', s:'jean-van-der-velde' },
-      { n:'Al Capone', t: :person, nn:'Al Capone', sn:'Al Capone', s:'al-capone' },
-      { n:'Fahd al-Saud', t: :person, nn:'Fahd al-Saud', sn:'Fahd al-Saud', s:'fahd-al-saud' },
-      { n:'Mehmet al Auouiby', t: :person, nn:'Mehmet al Auouiby', sn:'Mehmet al Auouiby', s:'mehmet-al-auouiby' },
-      { n:'Macquarie Bank', t: :organization, nn:'Macquarie Bank', sn:'Macquarie Bank', s:'macquarie-bank' },
-      { n:"COMMEDIA DELL'ARTE", t: :organization, nn:"Commedia dell'Arte", sn:"Commedia dell'Arte", s:'commedia-dellarte' },
-      { n:'Della Smith', t: :person, nn:'Della Smith', sn:'Della Smith', s:'della-smith' },
-      { n:'Antonio DELLA MONTEVERDE', nn:'Antonio DELLA MONTEVERDE', sn:'Antonio DELLA MONTEVERDE', s:'antonio-della-monteverde' },
-      { n:'Tony St Clair', t: :person, nn:'Tony St Clair', sn:'Tony St Clair', s:'tony-st-clair' },
-      { n:'Seamus O\'Malley', t: :person, nn:'Seamus O\'Malley', sn:'Seamus O\'Malley', s:'seamus-omalley' },
-      { n:'SeedCamp', t: :organization, nn:'SeedCamp', sn:'SeedCamp', s:'seedcamp' },
-      { n:'Peter Van Der Auwera', t: :person, nn:'Peter Van Der Auwera', sn:'Peter Van Der Auwera', s:'peter-van-der-auwera' },
-      { n:'VAN DER AUWERA, Peter', t: :person, nn:'Peter van der Auwera', sn:'Peter van der Auwera', s:'peter-van-der-auwera' },
-      { n:'Li Fan', t: :person, nn:'Li Fan', sn:'Li Fan', s:'li-fan' },
-      { n:'Fan Li', t: :person, nn:'Fan Li', sn:'Fan Li', s:'fan-li' },
-      { n:'Levi Strauss & Co.', nn:'Levi Strauss', sn:'Levi Strauss', s:'levi-strauss' },
-      { n:'Standard & Poor\'s', t: :organization, nn:'Standard & Poor\'s', sn:'Standard and Poor\'s', s:'standard-and-poors' },
-      { n:'I B M Services', t: :organization, nn:'I.B.M. Services', sn:'IBM Services', s:'ibm-services' },
-      { n:'Sean Park DDS', t: :person, nn:'Sean Park', sn:'Sean Park', s:'sean-park' },
-      { n:'SEAN MACLISE PARK', t: :person, nn:'Sean Maclise Park', sn:'Sean Park', s:'sean-park' },
-      { n:'AJ Hanna', t: :person, nn:'AJ Hanna', sn:'AJ Hanna', s:'aj-hanna' },
-      { n:'Free & Clear', t: :organization, nn:'Free & Clear', sn:'Free and Clear', s:'free-and-clear' },
-      { n:'Adam D\'ANGELO', t: :person, nn:'Adam D\'ANGELO', sn:'Adam D\'ANGELO', s:'adam-dangelo' },
-      { n:'MACKENZIE, Doug', t: :person, nn:'Doug Mackenzie', sn:'Doug Mackenzie', s:'doug-mackenzie' },
-      { n:'Up + Down', t: :organization, nn:'Up + Down', sn:'Up plus Down', s:'up-plus-down' },
-      { n:'San Francisco Ltd', t: :organization, nn:'San Francisco', sn:'San Francisco', s:'san-francisco' },
-      { n:'AT&T', t: :organization, nn:'At&T', sn:'At and T', s:'at-and-t' },
-      { n:'SMITH, John, Jr.', t: :person, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'I Heart Movies', t: :organization, nn:'I Heart Movies', sn:'I Heart Movies', s:'i-heart-movies' },
-      { n:'Y Combinator', t: :organization, nn:'Y Combinator', sn:'Y Combinator', s:'y-combinator' },
-      { n:'Ben\'s 10 Hens', t: :organization, nn:'Ben\'s 10 Hens', sn:'Ben\'s 10 Hens', s:'bens-10-hens' },
-      { n:'Elazer Edelman, MD , PhD', t: :person, nn:'Elazer Edelman', sn:'Elazer Edelman', s:'elazer-edelman' },
-      { n:'Judith M. O\'Brien', t: :person, nn:'Judith M. O\'Brien', sn:'Judith O\'Brien', s:'judith-obrien' },
-      { n:'MORRISON, Van', t: :person, nn:'Van Morrison', sn:'Van Morrison', s:'van-morrison' },
-      { n:'i/o Ventures', t: :organization, nn:'i/o Ventures', sn:'i/o Ventures', s:'i-o-ventures' },
-      { n:'C T Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
-      { n:'C.T. Corporation System', t: :person, nn:'C.T. Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
-      { n:'CT Corporation System', t: :person, nn:'CT Corporation System', sn:'CT Corporation System', s:'ct-corporation-system'},
-      { n:'Corporation Service Company', t: :person, nn:'Corporation Service Company', sn:'Corporation Service Company', s:'corporation-service-company'},
-      { n:'Kurshuni,Inc.', t: :organization, nn:'Kurshuni', sn:'Kurshuni', s:'kurshuni' },
-      { n:'Cellular Inc-LLC', t: :organization, nn:'Cellular', sn:'Cellular', s:'cellular' },
-      { n:'Emtec (AZ) Limited', t: :organization, nn:'Emtec (AZ)', sn:'Emtec (AZ)', s:'emtec-az' },
-      { n:'Emtec (LLC) Limited', t: :organization, nn:'Emtec', sn:'Emtec', s:'emtec' },
-      { n:'Emtec (XYZ LLC) Limited', t: :organization, nn:'Emtec (XYZ)', sn:'Emtec (XYZ)', s:'emtec-xyz' },
-      { n:'Tao Ma', t: :person, nn:'Tao', sn:'Tao', s:'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
-      { n:'(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn:'Courtney J. Miller', sn:'Courtney Miller', s:'courtney-miller' },
-      { n:'(Mr Woo) The Window Cleaner', t: :person, nn:'(Woo) The Window Cleaner', sn:'(Woo) Cleaner', s:'woo-cleaner'},
-      { n:'DOMINIC MACMURDO', t: :person, nn:'Dominic MacMurdo', sn:'Dominic MacMurdo', s:'dominic-macmurdo' },
-      { n:'DOMINIC MACEDO', t: :person, nn:'Dominic Macedo', sn:'Dominic Macedo', s:'dominic-macedo' },
-      { n:'DOMINIC MACDONALD', t: :person, nn:'Dominic MacDonald', sn:'Dominic MacDonald', s:'dominic-macdonald' },
-      { n:'AGUSTA DO ROMEIRO', t: :person, nn:'Agusta do Romeiro', sn:'Agusta do Romeiro', s:'agusta-do-romeiro' },
-      { n:'CARLOS DOS SANTOS', t: :person, nn:'Carlos dos Santos', sn:'Carlos dos Santos', s:'carlos-dos-santos' },
-      { n:'유정 신', t: :organization, nn:'유정 신', sn:'유정 신', s:'유정-신' },
-      { n:'xxx%52zzz', t: :organization, nn:'xxx%52zzz', sn:'xxx%52zzz', s:'xxxrzzz' },
-      { n:'Евгений Болотнов', t: :organization, nn:'Евгений Болотнов', sn:'Евгений Болотнов', s:'Евгений-Болотнов' },
-      { n:'김태성', t: :organization, nn:'김태성', sn:'김태성', s:'김태성' },
-      { n:'ゴルフスタジアム', t: :organization, nn:'ゴルフスタジアム', sn:'ゴルフスタジアム', s:'ゴルフスタジアム' },
-      { n:'我摘', t: :organization, nn:'我摘', sn:'我摘', s:'我摘' },
-      { n:'Καρατζάς Στέφανος', t: :organization, nn:'Καρατζάς Στέφανος', sn:'Καρατζάς Στέφανος', s:'Καρατζάς-Στέφανος' },
-      { n:'โชติวัน วัฒนลาภ', t: :organization, nn:'โชติวัน วัฒนลาภ', sn:'โชติวัน วัฒนลาภ', s:'โชติวัน-วัฒนลาภ' },
-      { n:'張 續寶', t: :organization, nn:'張 續寶', sn:'張 續寶', s:'張-續寶' },
-      { n:'Юрий Гайдук', t: :organization, nn:'Юрий Гайдук', sn:'Юрий Гайдук', s:'Юрий-Гайдук' },
-      { n:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn:'☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', s:'☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
-      { n:'♠ KlasikB0i ♠', t: :organization, nn:'♠ KlasikB0i ♠', sn:'♠ KlasikB0i ♠', s:'♠-klasikb0i-♠' },
-      { n:'* Shorusan *', t: :organization, nn:'* Shorusan *', sn:'* Shorusan *', s:'shorusan' },
-      { n:'项目谷', t: :organization, nn:'项目谷', sn:'项目谷', s:'项目谷' },
-      { n:'ООО "Инновационные полимерные адгезивы"', t: :organization, nn:'ООО "Инновационные полимерные адгезивы"', sn:'ООО "Инновационные полимерные адгезивы"', s:'ООО-Инновационные-полимерные-адгезивы' },
-      { n:'عبدالله ...', t: :organization, nn:'عبدالله ...', sn:'عبدالله ...', s:'عبدالله' },
-      { n:'กมลชนก ทิศไธสง', t: :organization, nn:'กมลชนก ทิศไธสง', sn:'กมลชนก ทิศไธสง', s:'กมลชนก-ทิศไธสง' },
-      { n:'יוֹ אָב', t: :organization, nn:'יוֹ אָב', sn:'יוֹ אָב', s:'יוֹ-אָב' },
-      { n:'יגאל נימני', t: :organization, nn:'יגאל נימני', sn:'יגאל נימני', s:'יגאל-נימני' },
-      { n:'ניסים דניאלי', t: :organization, nn:'ניסים דניאלי', sn:'ניסים דניאלי', s:'ניסים-דניאלי' },
-      { n:'مساء الخير', t: :organization, nn:'مساء الخير', sn:'مساء الخير', s:'مساء-الخير' },
-      { n:'محمود ياسر', t: :organization, nn:'محمود ياسر', sn:'محمود ياسر', s:'محمود-ياسر' },
-      { n:'קובי ביטר', t: :organization, nn:'קובי ביטר', sn:'קובי ביטר', s:'קובי-ביטר' },
-      { n:'الملاك الحارس', t: :organization, nn:'الملاك الحارس', sn:'الملاك الحارس', s:'الملاك-الحارس' },
-      { n:'কবির হাসান', t: :organization, nn:'কবির হাসান', sn:'কবির হাসান', s:'কবির-হাসান' },
+      { n: 'John Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'JOHN SMITH', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'john smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'Smith, John', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'John    Smith', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'Smith, John', nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'John J. Smith', t: :person, nn: 'John J. Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'John J. Smith', t: :person, nn: 'John J. Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'SMITH, Mr John J.R.', t: :person, nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: ' SMITH,  Mr John J. R.  ', t: :person, nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'SMITH, Mr John J.R.', nn: 'John J.R. Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'Mr John J.R. SMITH JD', t: :person, nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
+      { n: 'Mr John J.R. SMITH III,JD', t: :person, nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
+      { n: 'Mr John J.R. SMITH JD', nn: 'John J.R. SMITH', sn: 'John SMITH', s: 'john-smith' },
+      { n: 'Mr Jean-Michel SMITH JD', t: :person, nn: 'Jean-Michel SMITH', sn: 'Jean-Michel SMITH',
+        s: 'jean-michel-smith' },
+      { n: 'Mr Jean Michel-SMITH JD', nn: 'Jean Michel-SMITH', sn: 'Jean Michel-SMITH', s: 'jean-michel-smith' },
+      { n: 'Dr Martha Lane Fox Ph.D', nn: 'Martha Lane Fox', sn: 'Martha Lane Fox', s: 'martha-lane-fox' },
+      { n: 'Lane Fox Ph.D, Dr Martha', t: :person, nn: 'Martha Lane Fox', sn: 'Martha Lane Fox', s: 'martha-lane-fox' },
+      { n: 'Baroness Lane-Fox of Lewisham', t: :person, nn: 'Lane-Fox of Lewisham', sn: 'Lane-Fox of Lewisham',
+        s: 'lane-fox-of-lewisham' },
+      { n: 'MACDONALDS LLC', nn: 'MacDonalds', sn: 'MacDonalds', s: 'macdonalds' },
+      { n: 'MACDONALDS LLC', t: :organization, nn: 'MacDonalds', sn: 'MacDonalds', s: 'macdonalds' },
+      { n: 'macdonalds', t: :organization, nn: 'macdonalds', sn: 'macdonalds', s: 'macdonalds' },
+      { n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP', t: :organization,
+        nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
+        sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
+        s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
+      { n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
+        nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
+        sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
+        s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
+      { n: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub LLP',
+        nn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble & Grub',
+        sn: 'Pugh, Pugh, Barney McGrew, Cuthbert, Dibble and Grub',
+        s: 'pugh-pugh-barney-mcgrew-cuthbert-dibble-and-grub' },
+      { n: 'K.V.A. Instruments y Cía S. en C.', nn: 'K.V.A. Instruments', sn: 'KVA Instruments', s: 'kva-instruments' },
+      { n: 'K. V. A. Instruments y Cía S. en C.', nn: 'K.V.A. Instruments', sn: 'KVA Instruments',
+        s: 'kva-instruments' },
+      { n: 'J.P.R. Williams', nn: 'J.P.R. Williams', sn: 'JPR Williams', s: 'jpr-williams' },
+      { n: 'J. P. R. Williams', nn: 'J.P.R. Williams', sn: 'JPR Williams', s: 'jpr-williams' },
+      { n: 'J P R Williams', nn: 'JPR Williams', sn: 'JPR Williams', s: 'jpr-williams' },
+      { n: 'JPR Williams', nn: 'JPR Williams', sn: 'JPR Williams', s: 'jpr-williams' },
+      { n: 'Audrey fforbes', nn: 'Audrey fforbes', sn: 'Audrey fforbes', s: 'audrey-fforbes' },
+      { n: 'J. Arthur Rank', t: :person, nn: 'J. Arthur Rank', sn: 'Arthur Rank', s: 'arthur-rank' },
+      { n: 'PHILIP NG', t: :person, nn: 'Philip Ng', sn: 'Philip Ng', s: 'philip-ng' },
+      { n: 'Super R&D', nn: 'Super R&D', sn: 'Super R and D', s: 'super-r-and-d' },
+      { n: 'Harry Dean Stanton', t: :person, nn: 'Harry Dean Stanton', sn: 'Harry Stanton', s: 'harry-stanton' },
+      { n: 'Union Square Ventures', t: :organization, nn: 'Union Square Ventures', sn: 'Union Square Ventures',
+        s: 'union-square-ventures' },
+      { n: 'J Arthur Rank Inc.', t: :organization, nn: 'J Arthur Rank', sn: 'J Arthur Rank', s: 'j-arthur-rank' },
+      { n: 'Jean VAN DER VELDE', t: :person, nn: 'Jean VAN DER VELDE', sn: 'Jean VAN DER VELDE',
+        s: 'jean-van-der-velde' },
+      { n: 'Al Capone', t: :person, nn: 'Al Capone', sn: 'Al Capone', s: 'al-capone' },
+      { n: 'Fahd al-Saud', t: :person, nn: 'Fahd al-Saud', sn: 'Fahd al-Saud', s: 'fahd-al-saud' },
+      { n: 'Mehmet al Auouiby', t: :person, nn: 'Mehmet al Auouiby', sn: 'Mehmet al Auouiby', s: 'mehmet-al-auouiby' },
+      { n: 'Macquarie Bank', t: :organization, nn: 'Macquarie Bank', sn: 'Macquarie Bank', s: 'macquarie-bank' },
+      { n: "COMMEDIA DELL'ARTE", t: :organization, nn: "Commedia dell'Arte", sn: "Commedia dell'Arte",
+        s: 'commedia-dellarte' },
+      { n: 'Della Smith', t: :person, nn: 'Della Smith', sn: 'Della Smith', s: 'della-smith' },
+      { n: 'Antonio DELLA MONTEVERDE', nn: 'Antonio DELLA MONTEVERDE', sn: 'Antonio DELLA MONTEVERDE',
+        s: 'antonio-della-monteverde' },
+      { n: 'Tony St Clair', t: :person, nn: 'Tony St Clair', sn: 'Tony St Clair', s: 'tony-st-clair' },
+      { n: 'Seamus O\'Malley', t: :person, nn: 'Seamus O\'Malley', sn: 'Seamus O\'Malley', s: 'seamus-omalley' },
+      { n: 'SeedCamp', t: :organization, nn: 'SeedCamp', sn: 'SeedCamp', s: 'seedcamp' },
+      { n: 'Peter Van Der Auwera', t: :person, nn: 'Peter Van Der Auwera', sn: 'Peter Van Der Auwera',
+        s: 'peter-van-der-auwera' },
+      { n: 'VAN DER AUWERA, Peter', t: :person, nn: 'Peter van der Auwera', sn: 'Peter van der Auwera',
+        s: 'peter-van-der-auwera' },
+      { n: 'Li Fan', t: :person, nn: 'Li Fan', sn: 'Li Fan', s: 'li-fan' },
+      { n: 'Fan Li', t: :person, nn: 'Fan Li', sn: 'Fan Li', s: 'fan-li' },
+      { n: 'Levi Strauss & Co.', nn: 'Levi Strauss', sn: 'Levi Strauss', s: 'levi-strauss' },
+      { n: 'Standard & Poor\'s', t: :organization, nn: 'Standard & Poor\'s', sn: 'Standard and Poor\'s',
+        s: 'standard-and-poors' },
+      { n: 'I B M Services', t: :organization, nn: 'IBM Services', sn: 'IBM Services', s: 'ibm-services' },
+      { n: 'Sean Park DDS', t: :person, nn: 'Sean Park', sn: 'Sean Park', s: 'sean-park' },
+      { n: 'SEAN MACLISE PARK', t: :person, nn: 'Sean Maclise Park', sn: 'Sean Park', s: 'sean-park' },
+      { n: 'AJ Hanna', t: :person, nn: 'AJ Hanna', sn: 'AJ Hanna', s: 'aj-hanna' },
+      { n: 'Free & Clear', t: :organization, nn: 'Free & Clear', sn: 'Free and Clear', s: 'free-and-clear' },
+      { n: 'Adam D\'ANGELO', t: :person, nn: 'Adam D\'ANGELO', sn: 'Adam D\'ANGELO', s: 'adam-dangelo' },
+      { n: 'MACKENZIE, Doug', t: :person, nn: 'Doug Mackenzie', sn: 'Doug Mackenzie', s: 'doug-mackenzie' },
+      { n: 'Up + Down', t: :organization, nn: 'Up + Down', sn: 'Up plus Down', s: 'up-plus-down' },
+      { n: 'San Francisco Ltd', t: :organization, nn: 'San Francisco', sn: 'San Francisco', s: 'san-francisco' },
+      { n: 'AT&T', t: :organization, nn: 'At&T', sn: 'At and T', s: 'at-and-t' },
+      { n: 'SMITH, John, Jr.', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'I Heart Movies', t: :organization, nn: 'I Heart Movies', sn: 'I Heart Movies', s: 'i-heart-movies' },
+      { n: 'Y Combinator', t: :organization, nn: 'Y Combinator', sn: 'Y Combinator', s: 'y-combinator' },
+      { n: 'Ben\'s 10 Hens', t: :organization, nn: 'Ben\'s 10 Hens', sn: 'Ben\'s 10 Hens', s: 'bens-10-hens' },
+      { n: 'Elazer Edelman, MD , PhD', t: :person, nn: 'Elazer Edelman', sn: 'Elazer Edelman', s: 'elazer-edelman' },
+      { n: 'Judith M. O\'Brien', t: :person, nn: 'Judith M. O\'Brien', sn: 'Judith O\'Brien', s: 'judith-obrien' },
+      { n: 'MORRISON, Van', t: :person, nn: 'Van Morrison', sn: 'Van Morrison', s: 'van-morrison' },
+      { n: 'i/o Ventures', t: :organization, nn: 'i/o Ventures', sn: 'i/o Ventures', s: 'i-o-ventures' },
+      { n: 'C T Corporation System', t: :person, nn: 'CT Corporation System', sn: 'CT Corporation System',
+        s: 'ct-corporation-system' },
+      { n: 'C.T. Corporation System', t: :person, nn: 'C.T. Corporation System', sn: 'CT Corporation System',
+        s: 'ct-corporation-system' },
+      { n: 'CT Corporation System', t: :person, nn: 'CT Corporation System', sn: 'CT Corporation System',
+        s: 'ct-corporation-system' },
+      { n: 'Corporation Service Company', t: :person, nn: 'Corporation Service Company',
+        sn: 'Corporation Service Company', s: 'corporation-service-company' },
+      { n: 'Kurshuni,Inc.', t: :organization, nn: 'Kurshuni', sn: 'Kurshuni', s: 'kurshuni' },
+      { n: 'Cellular Inc-LLC', t: :organization, nn: 'Cellular', sn: 'Cellular', s: 'cellular' },
+      { n: 'Emtec (AZ) Limited', t: :organization, nn: 'Emtec (AZ)', sn: 'Emtec (AZ)', s: 'emtec-az' },
+      { n: 'Emtec (LLC) Limited', t: :organization, nn: 'Emtec', sn: 'Emtec', s: 'emtec' },
+      { n: 'Emtec (XYZ LLC) Limited', t: :organization, nn: 'Emtec (XYZ)', sn: 'Emtec (XYZ)', s: 'emtec-xyz' },
+      { n: 'Tao Ma', t: :person, nn: 'Tao', sn: 'Tao',
+        s: 'tao' }, # Unfortunate but we can't distinguish between Ma and M.A.
+      { n: '(Mr.) Courtney J. Miller, J.D., LL.M.', t: :person, nn: 'Courtney J. Miller', sn: 'Courtney Miller',
+        s: 'courtney-miller' },
+      { n: '(Mr Woo) The Window Cleaner', t: :person, nn: '(Woo) The Window Cleaner', sn: '(Woo) Cleaner',
+        s: 'woo-cleaner' },
+      { n: 'DOMINIC MACMURDO', t: :person, nn: 'Dominic MacMurdo', sn: 'Dominic MacMurdo', s: 'dominic-macmurdo' },
+      { n: 'DOMINIC MACEDO', t: :person, nn: 'Dominic Macedo', sn: 'Dominic Macedo', s: 'dominic-macedo' },
+      { n: 'DOMINIC MACDONALD', t: :person, nn: 'Dominic MacDonald', sn: 'Dominic MacDonald', s: 'dominic-macdonald' },
+      { n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
+      { n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
+      { n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
+      { n: 'xxx%52zzz', t: :organization, nn: 'xxx%52zzz', sn: 'xxx%52zzz', s: 'xxxrzzz' },
+      { n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
+        s: 'Евгений-Болотнов' },
+      { n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },
+      { n: 'ゴルフスタジアム', t: :organization, nn: 'ゴルフスタジアム', sn: 'ゴルフスタジアム', s: 'ゴルフスタジアム' },
+      { n: '我摘', t: :organization, nn: '我摘', sn: '我摘', s: '我摘' },
+      { n: 'Καρατζάς Στέφανος', t: :organization, nn: 'Καρατζάς Στέφανος', sn: 'Καρατζάς Στέφανος',
+        s: 'Καρατζάς-Στέφανος' },
+      { n: 'โชติวัน วัฒนลาภ', t: :organization, nn: 'โชติวัน วัฒนลาภ', sn: 'โชติวัน วัฒนลาภ', s: 'โชติวัน-วัฒนลาภ' },
+      { n: '張 續寶', t: :organization, nn: '張 續寶', sn: '張 續寶', s: '張-續寶' },
+      { n: 'Юрий Гайдук', t: :organization, nn: 'Юрий Гайдук', sn: 'Юрий Гайдук', s: 'Юрий-Гайдук' },
+      { n: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', t: :organization, nn: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣', sn: '☣ ©Ʀѱ∏†ʘ Σɏ§†℈Ϻ ☣',
+        s: '☣-©Ʀѱ∏†ʘ-Σɏ§†℈Ϻ-☣' },
+      { n: '♠ KlasikB0i ♠', t: :organization, nn: '♠ KlasikB0i ♠', sn: '♠ KlasikB0i ♠', s: '♠-klasikb0i-♠' },
+      { n: '* Shorusan *', t: :organization, nn: '* Shorusan *', sn: '* Shorusan *', s: 'shorusan' },
+      { n: '项目谷', t: :organization, nn: '项目谷', sn: '项目谷', s: '项目谷' },
+      { n: 'ООО "Инновационные полимерные адгезивы"', t: :organization, nn: 'ООО "Инновационные полимерные адгезивы"',
+        sn: 'ООО "Инновационные полимерные адгезивы"', s: 'ООО-Инновационные-полимерные-адгезивы' },
+      { n: 'عبدالله ...', t: :organization, nn: 'عبدالله ...', sn: 'عبدالله ...', s: 'عبدالله' },
+      { n: 'กมลชนก ทิศไธสง', t: :organization, nn: 'กมลชนก ทิศไธสง', sn: 'กมลชนก ทิศไธสง', s: 'กมลชนก-ทิศไธสง' },
+      { n: 'יוֹ אָב', t: :organization, nn: 'יוֹ אָב', sn: 'יוֹ אָב', s: 'יוֹ-אָב' },
+      { n: 'יגאל נימני', t: :organization, nn: 'יגאל נימני', sn: 'יגאל נימני', s: 'יגאל-נימני' },
+      { n: 'ניסים דניאלי', t: :organization, nn: 'ניסים דניאלי', sn: 'ניסים דניאלי', s: 'ניסים-דניאלי' },
+      { n: 'مساء الخير', t: :organization, nn: 'مساء الخير', sn: 'مساء الخير', s: 'مساء-الخير' },
+      { n: 'محمود ياسر', t: :organization, nn: 'محمود ياسر', sn: 'محمود ياسر', s: 'محمود-ياسر' },
+      { n: 'קובי ביטר', t: :organization, nn: 'קובי ביטר', sn: 'קובי ביטר', s: 'קובי-ביטר' },
+      { n: 'الملاك الحارس', t: :organization, nn: 'الملاك الحارس', sn: 'الملاك الحارس', s: 'الملاك-الحارس' },
+      { n: 'কবির হাসান', t: :organization, nn: 'কবির হাসান', sn: 'কবির হাসান', s: 'কবির-হাসান' },
       { nn: '', sn: '', s: '_' },
-      { n:'Union Square Ventures', t: 'Organization', nn:'Union Square Ventures', sn:'Union Square Ventures', s:'union-square-ventures' },
-      { n:'John Smith', t: 'Person', nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'John Smith', t: :nonsense, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'John Smith', t: Kernel, nn:'John Smith', sn:'John Smith', s:'john-smith' },
-      { n:'Ms Jane Smith', t: :person, nn:'Jane Smith', sn:'Jane Smith', s:'jane-smith' },
-      { n:'example.com', t: :organization, nn:'example.com', sn:'example.com', s:'example-com' },
-      { n:'Hermann Müller', t: :person, nn: 'Hermann Müller', sn: 'Hermann Müller', s:'hermann-muller'},
-      { n:'b-to-v Partners AG', t: :organization, nn:'b-to-v Partners', sn:'b-to-v Partners', s:'b-to-v-partners' },
-      { n:'*', t: :person, nn: '*', sn: '*', s:'_'},
-      { n:'* *', t: :person, nn: '* *', sn: '* *', s:'_'},
-      { n:'* Olga *', t: :person, nn: '* Olga *', sn: 'Olga', s:'olga'},
-      { n:'* Olga Bedia García *', t: :person, nn: '* Olga Bedia García *', sn: 'Olga García', s:'olga-garcia'},
-      { n:'John Smith M.A. (Oxon)', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith'}
+      { n: 'Union Square Ventures', t: 'Organization', nn: 'Union Square Ventures', sn: 'Union Square Ventures',
+        s: 'union-square-ventures' },
+      { n: 'John Smith', t: 'Person', nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'John Smith', t: :nonsense, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'John Smith', t: Kernel, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'Ms Jane Smith', t: :person, nn: 'Jane Smith', sn: 'Jane Smith', s: 'jane-smith' },
+      { n: 'example.com', t: :organization, nn: 'example.com', sn: 'example.com', s: 'example-com' },
+      { n: 'Hermann Müller', t: :person, nn: 'Hermann Müller', sn: 'Hermann Müller', s: 'hermann-muller' },
+      { n: 'b-to-v Partners AG', t: :organization, nn: 'b-to-v Partners', sn: 'b-to-v Partners', s: 'b-to-v-partners' },
+      { n: '*', t: :person, nn: '*', sn: '*', s: '_' },
+      { n: '* *', t: :person, nn: '* *', sn: '* *', s: '_' },
+      { n: '* Olga *', t: :person, nn: '* Olga *', sn: 'Olga', s: 'olga' },
+      { n: '* Olga Bedia García *', t: :person, nn: '* Olga Bedia García *', sn: 'Olga García', s: 'olga-garcia' },
+      { n: 'John Smith M.A. (Oxon)', t: :person, nn: 'John Smith', sn: 'John Smith', s: 'john-smith' },
+      { n: 'I B M', t: :organization, nn: 'Ibm', sn: 'Ibm', s: 'ibm' },
+      { n: 'I-B-M', t: :organization, nn: 'I-B-M', sn: 'I-B-M', s: 'i-b-m' },
+      { n: 'I.B.M.', t: :organization, nn: 'I.B.M.', sn: 'IBM', s: 'ibm' }
     ]
   end
-  it "makes a slug" do
+  it 'makes a slug' do
     names.each do |name_data|
       name = name_data[:n]
-      NameTamer[name, contact_type:name_data[:t]].slug.should == name_data[:s]
+      NameTamer[name, contact_type: name_data[:t]].slug.should == name_data[:s]
     end
   end
-  it "makes a nice name" do
+  it 'makes a nice name' do
     names.each do |name_data|
       name      = name_data[:n]
-      nice_name = NameTamer[name, contact_type:name_data[:t]].nice_name
+      nice_name = NameTamer[name, contact_type: name_data[:t]].nice_name
       nice_name.should == name_data[:nn]
     end
   end
-  it "makes a searchable name" do
+  it 'makes a searchable name' do
     names.each do |name_data|
       name = name_data[:n]
-      NameTamer[name, contact_type:name_data[:t]].simple_name.should == name_data[:sn]
+      NameTamer[name, contact_type: name_data[:t]].simple_name.should == name_data[:sn]
     end
   end
 end

data/spec/spec_helper.rb CHANGED Viewed

@@ -5,11 +5,11 @@ Coveralls.wear!
 SimpleCov.start
 RSpec.configure do |config|
- # Run specs in random order to surface order dependencies. If you find an
+  # Run specs in random order to surface order dependencies. If you find an
   # order dependency and want to debug it, you can fix the order by providing
   # the seed, which is printed after each run.
   #     --seed 1234
-  config.order = "random"
+  config.order = 'random'
   # Manually-added
   config.color_enabled = true

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: name-tamer
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - Xenapto
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-02 00:00:00.000000000 Z
+date: 2014-06-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -109,6 +109,8 @@ extra_rdoc_files: []
 files:
 - ".env"
 - ".gitignore"
+- ".hound.yml"
+- ".rubocop.yml"
 - ".ruby-version"
 - Gemfile
 - Gemfile.lock
@@ -120,6 +122,7 @@ files:
 - doc/suffixes.csv
 - lib/name-tamer.rb
 - lib/name-tamer/version.rb
+- lib/string_extras.rb
 - name-tamer.gemspec
 - spec/name_tamer_spec.rb
 - spec/spec_helper.rb