RubyGems - name-tamer - Versions diffs - 0.1.9 → 0.2.0 - Mend

name-tamer 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b556d5a36fcb89c56e435fd67a0f159987b7f8b9
-  data.tar.gz: 7ac8e5b948e6edb607f367d4d8303bb3c97c0d9d
+  metadata.gz: 3532b7472b3daecb0bb11863268531c229771639
+  data.tar.gz: 0096dd16106d480f6c5e1e043dbf54896f787599
 SHA512:
-  metadata.gz: 215966db363f5630a1b53671c95792057deb29306a07e1cf93c1772700be4b3fc3e4f9cab40a1ba6d39f58813eddead3226d1faa2583aa21e3d73f41eb2c1403
-  data.tar.gz: 4d2bcd7d0f9b8556c548235c4d12ae0c92643399444a2ce70c5e9b8ef984577d54915827a86f4b243d3434775a1ddb37ccd1d0055b8645c56ab770bdba558613
+  metadata.gz: 4ee6d017e93b54acd10791f44a2920c46fe76faaafcb0171ab59c582f7d07c34036bc64610c5298ae363a33fa26fbedd1711800771acc29620beb6967adea10e
+  data.tar.gz: 23741e994c62fc8c746f826e3124824a29e3a5ecff22930a1ec350db0363b5b6d20d67bc5104d546f25c9301850a8944e936cb5e41c6320524d766303a2be69c

data/.rubocop.yml CHANGED Viewed

@@ -7,4 +7,4 @@ CyclomaticComplexity:
 ClassLength:
   Description: 'Avoid classes longer than 100 lines of code.'
   CountComments: false  # count full line comments?
-  Max: 316
+  Max: 321

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    name-tamer (0.1.8)
+    name-tamer (0.1.9)
 GEM
   remote: https://rubygems.org/

data/lib/name-tamer.rb CHANGED Viewed

@@ -23,11 +23,12 @@ class NameTamer
   def tidy_name
     unless @tidy_name
-      @tidy_name = name.dup          # Start with the name we've received
+      @tidy_name = name.dup # Start with the name we've received
-      tidy_spacing                    # " John   Smith " -> "John Smith"
-      fix_encoding_errors             # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
-      consolidate_initials            # "I. B. M." -> "I.B.M."
+      ensure_safe           # Invalid byte sequence in UTF-8, for example
+      tidy_spacing          # " John   Smith " -> "John Smith"
+      fix_encoding_errors   # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
+      consolidate_initials  # "I. B. M." -> "I.B.M."
     end
     @tidy_name
@@ -111,6 +112,10 @@ class NameTamer
   # Tidy up the name we've received
   #--------------------------------------------------------
+  def ensure_safe
+    @tidy_name.ensure_safe
+  end
   def tidy_spacing
     @tidy_name
       .space_after_comma!
@@ -295,6 +300,7 @@ class NameTamer
       @contact_type = ct
     end
+    @tidy_name    = nil
     @nice_name    = nil
     @simple_name  = nil
     @slug         = nil

data/lib/name-tamer/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class NameTamer
-  VERSION = '0.1.9'
+  VERSION = '0.2.0'
 end

data/lib/string_extras.rb CHANGED Viewed

@@ -2,83 +2,74 @@
 class String
   # Strip illegal characters out completely
   def strip_unwanted!(filter)
-    self.gsub!(filter, '')
-    self # Allows chaining
+    substitute!(filter, '')
   end
   def strip_or_self!
-    self.strip!
-    self # Allows chaining
+    strip! || self
   end
   # Change any whitespace into our separator character
   def whitespace_to!(separator)
-    self.gsub!(/[[:space:]]+/, separator)
-    self # Allows chaining
+    substitute!(/[[:space:]]+/, separator)
   end
   # Ensure commas have exactly one space after them
   def space_after_comma!
-    self.gsub!(/,[[:space:]]*/, ', ')
-    self # Allows chaining
+    substitute!(/,[[:space:]]*/, ', ')
   end
   # Change some characters embedded in words to our separator character
   # e.g. example.com -> example-com
   def invalid_chars_to!(separator)
-    self.gsub!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
-    self # Allows chaining
+    substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
   end
   # Make sure separators are not where they shouldn't be
   def fix_separators!(separator)
-    unless separator.nil? || separator.empty?
-      r = Regexp.escape(separator)
-      # No more than one of the separator in a row.
-      self.gsub!(/#{r}{2,}/, separator)
-      # Remove leading/trailing separator.
-      self.gsub!(/^#{r}|#{r}$/i, '')
-    end
+    return self if separator.nil? || separator.empty?
-    self # Allows chaining
+    r = Regexp.escape(separator)
+    # No more than one of the separator in a row.
+    substitute!(/#{r}{2,}/, separator)
+    # Remove leading/trailing separator.
+    substitute!(/^#{r}|#{r}$/i, '')
   end
   # Any characters that resemble latin characters might usefully be
   # transliterated into ones that are easy to type on an anglophone
   # keyboard.
   def approximate_latin_chars!
-    self.gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char }
-    self # Allows chaining
+    gsub!(/[^\x00-\x7f]/u) { |char| APPROXIMATIONS[char] || char } || self
   end
   # Strings that were wrongly encoded with single-byte encodings sometimes have
   # tell-tale substrings that we can put back into the correct UTF-8 character
   def fix_encoding_errors!
-    self.gsub!(BAD_ENCODING_PATTERNS) { |substring| BAD_ENCODING[substring] || substring }
-    self # Allows chaining
+    gsub!(BAD_ENCODING_PATTERNS) { |substring| BAD_ENCODING[substring] || substring } || self
   end
   def upcase_first_letter!
-    self.gsub!(/\b\w/) { |first| first.upcase }
-    self # Allows chaining
+    gsub!(/\b\w/) { |first| first.upcase } || self
   end
   def downcase_after_apostrophe!
-    self.gsub!(/\'\w\b/) { |c| c.downcase } # Lowercase 's
-    self # Allows chaining
+    gsub!(/\'\w\b/) { |c| c.downcase } || self # Lowercase 's
   end
   # Our list of terminal characters that indicate a non-celtic name used
   # to include o but we removed it because of MacMurdo.
   def fix_mac!
     if self =~ /\bMac[A-Za-z]{2,}[^acizj]\b/ || self =~ /\bMc/
-      self.gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
+      gsub!(/\b(Ma?c)([A-Za-z]+)/) { |_| Regexp.last_match[1] + Regexp.last_match[2].capitalize }
       # Fix Mac exceptions
       %w(
         MacEdo MacEvicius MacHado MacHar MacHin MacHlin MacIas MacIulis MacKie
         MacKle MacKlin MacKmin MacKmurdo MacQuarie MacLise MacKenzie
-      ).each { |mac_name| self.gsub!(/\b#{mac_name}/, mac_name.capitalize) }
+      ).each { |mac_name| substitute!(/\b#{mac_name}/, mac_name.capitalize) }
     end
     self # Allows chaining
@@ -88,7 +79,7 @@ class String
   def fix_ff!
     %w(
       Fforbes Fforde Ffinch Ffrench Ffoulkes
-    ).each { |ff_name| self.gsub!(ff_name, ff_name.downcase) }
+    ).each { |ff_name| substitute!(ff_name, ff_name.downcase) }
     self # Allows chaining
   end
@@ -98,13 +89,13 @@ class String
   # Fixes for name modifiers followed by an apostrophe, e.g. d'Artagnan, Commedia dell'Arte
   def fix_name_modifiers!
     NAME_MODIFIERS.each do |modifier|
-      self.gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
+      gsub!(/((?:[[:space:]]|^)#{modifier})([[:space:]]+|-)/) do |_|
         "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2].tr(ASCII_SPACE, NONBREAKING_SPACE)}"
       end
     end
     %w(Dell D).each do |modifier|
-      self.gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
+      gsub!(/(.#{modifier}')(\w)/) { |_| "#{Regexp.last_match[1].rstrip.downcase}#{Regexp.last_match[2]}" }
     end
     self # Allows chaining
@@ -113,16 +104,14 @@ class String
   # Upcase words with no vowels, e.g JPR Williams
   # Except Ng
   def upcase_initials!
-    self.gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
-    self.gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } # http://en.wikipedia.org/wiki/Ng
-    self # Allows chaining
+    gsub!(/\b([bcdfghjklmnpqrstvwxz]+)\b/i) { |_| Regexp.last_match[1].upcase }
+    gsub!(/\b(NG)\b/i) { |_| Regexp.last_match[1].capitalize } || self # http://en.wikipedia.org/wiki/Ng
   end
   # Fix known last names that have spaces (not hyphens!)
   def nbsp_in_compound_name!
     COMPOUND_NAMES.each do |compound_name|
-      self.gsub!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
+      substitute!(compound_name, compound_name.tr(ASCII_SPACE, NONBREAKING_SPACE))
     end
     self # Allows chaining
@@ -130,25 +119,33 @@ class String
   def nbsp_in_name_modifier!
     NAME_MODIFIERS.each do |modifier|
-      self.gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
+      gsub!(/([[:space:]]#{modifier})([[:space:]])/i) { |_| "#{Regexp.last_match[1]}#{NONBREAKING_SPACE}" }
     end
     self # Allows chaining
   end
   def remove_periods_from_initials!
-    self.gsub!(/\b([a-z])\./i) { |_| Regexp.last_match[1] }
-    self # Allows chaining
+    gsub!(/\b([a-z])\./i) { |_| Regexp.last_match[1] } || self
   end
   def remove_spaces_from_initials!
-    self.gsub!(/\b([a-z])(\.)* \b(?![a-z0-9']{2,})/i) { |_| "#{Regexp.last_match[1]}#{Regexp.last_match[2]}" }
-    self # Allows chaining
+    gsub!(/\b([a-z])(\.)* \b(?![a-z0-9'\u00C0-\u00FF]{2,})/i) do |_|
+      "#{Regexp.last_match[1]}#{Regexp.last_match[2]}"
+    end || self
   end
   def ensure_space_after_initials!
-    self.gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " }
-    self # Allows chaining
+    gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
+  end
+  def ensure_safe
+    return if valid_encoding?
+    encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
+  end
+  def substitute!(pattern, replacement)
+    gsub!(pattern, replacement) || self
   end
   NONBREAKING_SPACE = "\u00a0"

data/spec/name_tamer_spec.rb CHANGED Viewed

@@ -183,12 +183,9 @@ describe NameTamer do
         sn: 'Scout Loyalty Optimizer',
         s: 'scout-loyalty-optimizer'
       },
-      { n: 'RenÃ© Descartes',
-        t: :person,
-        nn: 'René Descartes',
-        sn: 'René Descartes',
-        s: 'rene-descartes'
-      }
+      { n: 'RenÃ© Descartes', t: :person, nn: 'René Descartes', sn: 'René Descartes', s: 'rene-descartes' },
+      { n: 'Pablo M Sánchez', t: :person, nn: 'Pablo M Sánchez', sn: 'Pablo Sánchez', s: 'pablo-sanchez' },
+      { n: "\xc3\x28", t: :person, nn: '()', sn: '()', s: '_' } # Invalid byte sequence in UTF-8
     ]
   end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: name-tamer
 version: !ruby/object:Gem::Version
-  version: 0.1.9
+  version: 0.2.0
 platform: ruby
 authors:
 - Xenapto
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-30 00:00:00.000000000 Z
+date: 2014-07-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler