RubyGems - name-tamer - Versions diffs - 0.2.0 → 0.2.1 - Mend

name-tamer 0.2.0 → 0.2.1

Files changed (7) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3532b7472b3daecb0bb11863268531c229771639
-  data.tar.gz: 0096dd16106d480f6c5e1e043dbf54896f787599
+  metadata.gz: 0a8134129faabc9dc73dec3bd107873adf6a8f21
+  data.tar.gz: 6c2c5f68c2e3e9a41c6ccc0ddba78a896952c725
 SHA512:
-  metadata.gz: 4ee6d017e93b54acd10791f44a2920c46fe76faaafcb0171ab59c582f7d07c34036bc64610c5298ae363a33fa26fbedd1711800771acc29620beb6967adea10e
-  data.tar.gz: 23741e994c62fc8c746f826e3124824a29e3a5ecff22930a1ec350db0363b5b6d20d67bc5104d546f25c9301850a8944e936cb5e41c6320524d766303a2be69c
+  metadata.gz: 199a1fb93b68757fd0a9bc6657c1595c655da6ad5ad0ba87ae6cbf43dea88a15e3ecc9cb7317ed3edff415cac3288e26f012653e2432e222a4497aed41d21534
+  data.tar.gz: ae987673dfb5a693706a470765a3d7ac300dd8ff14c76d796659e97c5db79085fa735d1c2b1e218baf2da829fba047690cddbe640b02758b63a6fe49e503d01b

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    name-tamer (0.1.9)
+    name-tamer (0.2.0)
 GEM
   remote: https://rubygems.org/

data/lib/name-tamer.rb CHANGED

@@ -19,13 +19,35 @@ class NameTamer
     def [](name, args = {})
       new name, args
     end
+    # Make a slug from a string
+    def parameterize(string, args = {})
+      sep     = args[:sep]      || SLUG_DELIMITER
+      rfc3987 = args[:rfc3987]  || false
+      filter  = args[:filter]   || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
+      new_string = string.dup
+      new_string
+        .whitespace_to!(sep)
+        .invalid_chars_to!(sep)
+        .strip_unwanted!(filter)
+        .fix_separators!(sep)
+        .approximate_latin_chars!
+      # Have we got anything left?
+      new_string = '_' if new_string.empty?
+      # downcase any latin characters
+      new_string.downcase
+    end
   end
   def tidy_name
     unless @tidy_name
       @tidy_name = name.dup # Start with the name we've received
-      ensure_safe           # Invalid byte sequence in UTF-8, for example
+      unescape              # Unescape percent-encoded characters and fix UTF-8 encoding
       tidy_spacing          # " John   Smith " -> "John Smith"
       fix_encoding_errors   # "Ren\u00c3\u00a9 Descartes" -> "Ren\u00e9 Descartes"
       consolidate_initials  # "I. B. M." -> "I.B.M."
@@ -65,12 +87,7 @@ class NameTamer
   end
   def slug
-    unless @slug
-      @slug = simple_name.dup         # Start with search name
-      slugify                         # "John Doe" -> "john-doe"
-    end
-    @slug
+    @slug ||= NameTamer.parameterize simple_name.dup # "John Doe" -> "john-doe"
   end
   def contact_type
@@ -112,8 +129,8 @@ class NameTamer
   # Tidy up the name we've received
   #--------------------------------------------------------
-  def ensure_safe
-    @tidy_name.ensure_safe
+  def unescape
+    @tidy_name.ensure_safe!.safe_unescape!
   end
   def tidy_spacing
@@ -272,18 +289,6 @@ class NameTamer
     @simple_name.strip_unwanted!(/["“”™℠®©℗]/)           # remove quotes and commercial decoration
   end
-  #--------------------------------------------------------
-  # Make slug from search name
-  #--------------------------------------------------------
-  def slugify
-    # Inflector::parameterize just gives up with non-latin characters so...
-    # @slug = @slug.parameterize # Can't use this
-    # Instead we'll do it ourselves
-    @slug = parameterize @slug
-  end
   #--------------------------------------------------------
   # Initialization and utilities
   #--------------------------------------------------------
@@ -378,29 +383,6 @@ class NameTamer
       .upcase_initials!
   end
-  def parameterize(string, args = {})
-    sep     = args[:sep]      || SLUG_DELIMITER
-    rfc3987 = args[:rfc3987]  || false
-    filter  = args[:filter]   || (rfc3987 ? FILTER_RFC3987 : FILTER_COMPAT)
-    # First we unescape any pct-encoded characters. These might turn into
-    # things we want to alter for the slug, like whitespace (e.g. %20)
-    new_string = URI.unescape(string)
-    new_string
-      .whitespace_to!(sep)
-      .invalid_chars_to!(sep)
-      .strip_unwanted!(filter)
-      .fix_separators!(sep)
-      .approximate_latin_chars!
-    # Have we got anything left?
-    new_string = '_' if new_string.empty?
-    # downcase any latin characters
-    new_string.downcase
-  end
   #--------------------------------------------------------
   # Constants
   #--------------------------------------------------------

data/lib/name-tamer/version.rb CHANGED

@@ -1,3 +1,3 @@
 class NameTamer
-  VERSION = '0.2.0'
+  VERSION = '0.2.1'
 end

data/lib/string_extras.rb CHANGED

@@ -25,6 +25,16 @@ class String
     substitute!(/(?<![[:space:]])[\.\/](?![[:space:]])/, separator)
   end
+  # Unescape percent-encoded characters
+  # This might introduce UTF-8 invalid byte sequence
+  # so we take precautions
+  def safe_unescape!
+    string = URI.unescape(self)
+    return self if self == string
+    replace string
+    ensure_safe!
+  end
   # Make sure separators are not where they shouldn't be
   def fix_separators!(separator)
     return self if separator.nil? || separator.empty?
@@ -139,8 +149,7 @@ class String
     gsub!(/\b([a-z]\.)(?=[a-z0-9]{2,})/i) { |_| "#{Regexp.last_match[1]} " } || self
   end
-  def ensure_safe
-    return if valid_encoding?
+  def ensure_safe!
     encode!('UTF-8', invalid: :replace, undef: :replace, replace: '')
   end

data/spec/name_tamer_spec.rb CHANGED

@@ -122,7 +122,7 @@ describe NameTamer do
       { n: 'AGUSTA DO ROMEIRO', t: :person, nn: 'Agusta do Romeiro', sn: 'Agusta do Romeiro', s: 'agusta-do-romeiro' },
       { n: 'CARLOS DOS SANTOS', t: :person, nn: 'Carlos dos Santos', sn: 'Carlos dos Santos', s: 'carlos-dos-santos' },
       { n: '유정 신', t: :organization, nn: '유정 신', sn: '유정 신', s: '유정-신' },
-      { n: 'xxx%52zzz', t: :organization, nn: 'xxx%52zzz', sn: 'xxx%52zzz', s: 'xxxrzzz' },
+      { n: 'xxx%52zzz', t: :organization, nn: 'xxxRzzz', sn: 'xxxRzzz', s: 'xxxrzzz' },
       { n: 'Евгений Болотнов', t: :organization, nn: 'Евгений Болотнов', sn: 'Евгений Болотнов',
         s: 'Евгений-Болотнов' },
       { n: '김태성', t: :organization, nn: '김태성', sn: '김태성', s: '김태성' },

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: name-tamer
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
 platform: ruby
 authors:
 - Xenapto
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-07-04 00:00:00.000000000 Z
+date: 2014-07-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler