RubyGems - nomener - Versions diffs - 0.2.6 → 0.2.7 - Mend

nomener 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/README.md +1 -0
data/lib/nomener/helper.rb +12 -15
data/lib/nomener/name.rb +25 -30
data/lib/nomener/parser.rb +93 -102
data/lib/nomener/version.rb +1 -1
data/spec/nomener/nomener_name_spec.rb +27 -0
metadata +1 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2c250686ec8119b88c20fe08ee5e4ecd720411b5
-  data.tar.gz: 1efeb93999ebf6d3ccc9b879c9ac0a0c84696cac
+  metadata.gz: cd56c9231c23b185899cb3f0c85f7bfd050fbd0c
+  data.tar.gz: b37745c76f6263bd7fb771aab566286f44937693
 SHA512:
-  metadata.gz: 1e66b1549074d0ffd816aae1bc4c84cc143a324f09e94e23ddcd0c61b29d04fe497e275a508d7320dba446370e5a528ecd03ea105ab5baa978bf3c30876c300d
-  data.tar.gz: 3b2fbf3c124d97965e8836eed594276041a5135079f162ca2024f213ac08a894e7467f3fcf69101cdc9edadf4d4e124a73dc76c97a54d3f5f74433d028a3f384
+  metadata.gz: c314a3ad037c4a9fdb120ef77fbb9c40c7998574f99c74f8062120dbefa940e7995c4def79637793b21f6d0f41d3efa747c80813fbc4403c09c06197f1e3998d
+  data.tar.gz: dcf8213aeb321016c3e01c798b379c620b178e0456780d1a6a65cd9ce43c55f2a1ed101b032888303cdded55d558b164da14b036ca4089f073dcd0a483bd34c7

data/README.md CHANGED

@@ -1,6 +1,7 @@
 # Nomener
 [![Gem Version](https://badge.fury.io/rb/nomener.svg)](http://badge.fury.io/rb/nomener)
 [![Build Status](https://travis-ci.org/dan-ding/nomener.svg?branch=master)](https://travis-ci.org/dan-ding/nomener)
+[![Code Climate](https://codeclimate.com/github/dan-ding/nomener/badges/gpa.svg)](https://codeclimate.com/github/dan-ding/nomener)
 Nomener assists with parsing peoples names that they give themselves (or other people). Nomener ~~is~~ was a fork of [People](https://github.com/dan-ding/people) as it uses some code contributed there. It's currently geared towards western style name formatting, however other cultural name formatting is (or would like to be supported). Currently it attempts to parse names through pattern matching without using large(r) dictionary/library/data files (except for name decorations and suffixes, see usage). It may not be possible to do without such in all languages.

data/lib/nomener/helper.rb CHANGED

@@ -2,9 +2,7 @@
 # For Ruby 1.9.3, 2.0.0
 rv = RUBY_VERSION.split(".")[(0..1)].join("")
-if rv >= '19' && rv < '21'
-  require "string-scrub"
-end
+require "string-scrub" if(rv >= '19' && rv < '21')
 module Nomener
   module Helper
@@ -20,24 +18,23 @@ module Nomener
     #
     # Returns a string which is (ideally) pretty much the same as it was given.
     def self.reformat(name, leftleft = '"', rightright = '"', left = "'", right = "'")
-      n = name.dup
-      n.scrub! # remove illegal characters
+      nomen = name.dup
+      nomen.scrub! # remove illegal characters
       # translate fullwidth to typewriter
-      n.tr!("\uFF02\uFF07", "\u0022\u0027")
+      nomen.tr!("\uFF02\uFF07", "\u0022\u0027")
-      n.tr!("\u0022\u00AB\u201C\u201E\u2036\u300E\u301D\u301F\uFE43", leftleft) # replace left double quotes
-      n.tr!("\u0022\u00BB\u201D\u201F\u2033\u300F\u301E\uFE44", rightright) # replace right double quotes
+      nomen.tr!("\u0022\u00AB\u201C\u201E\u2036\u300E\u301D\u301F\uFE43", leftleft) # replace left double quotes
+      nomen.tr!("\u0022\u00BB\u201D\u201F\u2033\u300F\u301E\uFE44", rightright) # replace right double quotes
-      n.tr!("\u0027\u2018\u201A\u2035\u2039\u300C\uFE41\uFF62", left) # replace left single quotes
-      n.tr!("\u0027\u2019\u201B\u2032\u203A\u300D\uFE42\uFF62", right) # replace left single quotes
+      nomen.tr!("\u0027\u2018\u201A\u2035\u2039\u300C\uFE41\uFF62", left) # replace left single quotes
+      nomen.tr!("\u0027\u2019\u201B\u2032\u203A\u300D\uFE42\uFF62", right) # replace left single quotes
-      #n.gsub!(/\./, ' ')
-      n.gsub!(/[^\p{Alpha}\-&\/ \.\,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, " ") # what others may be in a name?
-      n.gsub!(/\p{Blank}+/, " ") # compress whitespace
-      n.strip! # trim space
+      nomen.gsub!(/[^\p{Alpha}\-&\/ \.\,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, " ") # what others may be in a name?
+      nomen.squeeze! " "
+      nomen.strip!
-      n
+      nomen
     end
   end

data/lib/nomener/name.rb CHANGED

@@ -50,37 +50,32 @@ module Nomener
       fix = last.dup
-      # if there are multiple last names separated by spaces
-      fix = fix.split(" ").map { |v| v.capitalize }.join " "
       # if there are multiple last names separated by a dash
-      if !fix.index("-").nil?
-        fix = fix.split("-").map { |v|
-          v.split(" ").map { |w| w.capitalize }.join " "
-        }.join "-"
-      end
-      # anything begining with Mac and not ending in [aciozj]
-      if m = fix.match(/Mac([\p{Alpha}]{2,}[^aciozj])/i)
-        unless m[1].match(%r!^
-          hin|
-          hlen|
-          har|
-          kle|
-          klin|
-          kie|
-          hado|     # Portugese
-          evicius|  # Lithuanian
-          iulis|    # Lithuanian
-          ias       # Lithuanian
-        !x)
-          fix.sub!(/Mac#{m[1]}/, "Mac#{m[1].capitalize}")
-        end
-      elsif m = fix.match(/Mc([\p{Alpha}]{2,})/i) # anything beginning with Mc
-        fix.sub!(/Mc#{m[1]}/, "Mc#{m[1].capitalize}")
-      elsif fix.match(/'\p{Alpha}/) # names like D'Angelo or Van 't Hooft
-        fix.gsub!(/('\p{Alpha})/) { |s| (s[-1] != 't') ? s.upcase : s } #no cap 't
-      end
+      fix = fix.split("-").map { |v|
+        v.split(" ").map { |w| w.capitalize }.join " "
+      }.join "-"
+      # anything begining with Mac and not ending in [aciozj], except for a few
+      fix.sub!(/Mac(?!
+        hin|
+        hlen|
+        har|
+        kle|
+        klin|
+        kie|
+        hado|     # Portugese
+        evicius|  # Lithuanian
+        iulis|    # Lithuanian
+        ias       # Lithuanian
+      )([\p{Alpha}]{2,}[^aAcCiIoOzZjJ])\b/x) { |s| "Mac#{$1.capitalize}" }
+      fix.sub! /\bMacmurdo\b/, "MacMurdo" # fix MacMurdo
+      # anything beginning with Mc, Mcdonald == McDonald
+      fix.sub!(/Mc(\p{Alpha}{2,})/) { |s| "Mc#{$1.capitalize}" }
+      # names like D'Angelo or Van 't Hooft, no cap 't
+      fix.gsub!(/('\p{Alpha})(?=\p{Alpha})/) { |s| "'#{$1[(1..-1)].capitalize}" }
       fix
     end

data/lib/nomener/parser.rb CHANGED

@@ -15,7 +15,7 @@ module Nomener
     TRAILER_TRASH = /[,|\s]+$/
     # regex for name characters we aren't going to use
-    DIRTY_STUFF = /[^,'(?:\p{Alpha}(?<\.))\p{Alpha}]{2,}/
+    DIRTY_STUFF = /[^,'(?:\p{Alpha}(?<\.))\p{Alpha}\p{Blank}]{2,}/
     # regex for boundaries we'll use to find leftover nickname boundaries
     NICKNAME_LEFTOVER = /["'\(\)]{2}/
@@ -62,85 +62,58 @@ module Nomener
     # Returns a hash of name parts or nil
     # Raises ArgumentError if 'name' is not a string or is empty
     def self.parse!(name, format = {:order => :auto, :spacelimit => 0})
-      raise ArgumentError, 'Name to parse not provided' unless (name.kind_of?(String) && !name.empty?)
+      raise ArgumentError, "Name to parse not provided" unless (name.kind_of?(String) && !name.empty?)
       name = Nomener::Helper.reformat name
+      newname = { :title => "", :first => "", :nick => "", :middle => "", :last => "", :suffix => "" }
       # grab any identified nickname before working on the rest
-      nick = parse_nick! name
+      newname[:nick] = parse_nick! name
       cleanup! name
       # grab any suffix' we can find
-      suffix = parse_suffix! name
+      newname[:suffix] = parse_suffix! name
       cleanup! name
-      title = parse_title! name
-      cleanup! name
+      newname[:title] = parse_title! name
+      name = dustoff name
+      newname[:last] = name # possibly mononyms
+      case name
+      when /,/ # if there's a comma, it may be a useful hint
+        clues = name.split(",").each { |i| i.strip! }
+        raise ParseError, "Could not decipher commas in \"#{name}\"" if clues.length > 2
-      name.gsub! PERIOD, ' '
-      name.squeeze! " "
-      name.strip!
-      first = last = middle = ""
-      # if there's a comma, it may be a useful hint
-      if !name.index(',').nil? # && (format[:order] == :auto || format[:order] == :lcf)
-        clues = name.split(",")
-        clues.each { |i| i.strip! }
-        # convention is last, first
-        if clues.length == 2
-          last, first = clues
-          # Mies van der Rohe, Ludwig
-          # Snepscheut, Jan L. A. van de
-          # check the last by comparing a re-ordering of the name
-          first_parts = first.split " "
-          unless first_parts.length == 1
-            check = parse_last!("#{first} #{last}", :fl)
-            # let's trust the full name
-            if check != last
-              first = "#{first} #{last}".sub(check, '').strip
-              last = check
-            end
+        # convention is last, first when there's a comma
+        newname[:last], newname[:first] = clues
+        # check the last by comparing a re-ordering of the name
+        # Mies van der Rohe, Ludwig
+        # Snepscheut, Jan L. A. van de
+        unless newname[:first].nil? || newname[:first].split(" ").length == 1
+          check = parse_last!("#{newname[:first]} #{newname[:last]}", :fl)
+          # let's trust the full name
+          if check != newname[:last]
+            newname[:first] = "#{newname[:first]} #{newname[:last]}".sub(check, "").strip
+            newname[:last] = check
           end
-          # titles are part of the first name
-          title = parse_title!(first) if title.nil? || title.empty?
-        elsif clues.length == 1
-          last = clues.shift
-        else
-          raise ParseError, "Could not decipher commas in \"#{name}\""
         end
-      elsif !name.index(" ").nil?
-        last = parse_last!(name, format[:order])
-        first, middle = parse_first!(name, format[:spacelimit])
-      else
-        last = name # possibly mononym
-        first = ""
+        # titles which are part of the first name...
+        newname[:title] = parse_title!(newname[:first]) if newname[:title].empty?
+      when / / # no comma, check for space on first then last
+        newname[:last] = parse_last!(name, format[:order])
+        newname[:first], newname[:middle] = parse_first!(name, format[:spacelimit])
       end
-      {
-        :title => (title || "").strip,
-        :suffix => (suffix || "").strip,
-        :nick => (nick || "").strip,
-        :first => (first || "").strip,
-        :last => (last || "").strip,
-        :middle => (middle || "").strip
-      }
-    end
+      cleanup! newname[:last], newname[:first], newname[:middle]
-    # Internal: Clean up a string where there are numerous consecutive and trailing non-name characters.
-    #   Modifies given string in place.
-    #
-    # dirty - string to clean up
-    #
-    # Returns nothing
-    def self.cleanup!(dirty)
-      dirty.gsub! DIRTY_STUFF, ''
-      dirty.squeeze! " "
-      # remove any trailing commas or whitespace
-      dirty.gsub! TRAILER_TRASH, ''
-      dirty.strip!
+      newname
     end
     # Internal: pull off a title if we can
@@ -153,13 +126,9 @@ module Nomener
       titles = []
       nm.gsub! TITLES do |title|
         titles << title.strip
-        ''
+        ""
       end
-      t = titles.join " "
-      t.gsub! PERIOD, ' '
-      t.squeeze! " "
-      t.strip!
-      t
+      dustoff titles.join(" ")
     end
     # Internal: pull off what suffixes we can
@@ -172,13 +141,9 @@ module Nomener
       suffixes = []
       nm.gsub! SUFFIXES do |suffix|
         suffixes << suffix.strip
-        ''
+        ""
       end
-      s = suffixes.join " "
-      s.gsub! /\./, ' '
-      s.squeeze! " "
-      s.strip!
-      s
+      dustoff suffixes.join(" ")
     end
     # Internal: parse nickname out of string. presuming it's in quotes
@@ -189,14 +154,12 @@ module Nomener
     # Returns string of the nickname found or and empty string
     def self.parse_nick!(nm)
       nick = ""
-      nm.sub! NICKNAME, ''
-      nick = $1.strip unless $1.nil?
-      nm.sub! NICKNAME_LEFTOVER, ''
-      nm.squeeze! " "
-      nick.gsub! /\./, ' '
-      nick.squeeze! " "
-      nick.strip!
-      nick
+      nm.sub! NICKNAME do |z|
+        nick = $1.strip
+        ""
+      end
+      nm.sub! NICKNAME_LEFTOVER, ""
+      dustoff nick
     end
     # Internal: parse last name from string
@@ -207,24 +170,22 @@ module Nomener
     #
     # Returns string of the last name found or an empty string
     def self.parse_last!(nm, format = :fl)
-      last = ''
+      last = ""
-      if format == :auto
-        format = :fl if nm.index(',').nil?
-      #  format = :lcf if !nm.index(',').nil?
-      end
+      format = :fl  if (format == :auto && nm.index(",").nil?)
+      format = :lcf if (format == :auto && nm.index(","))
-      if format == :fl && n = nm.match( FIRSTLAST_MATCHER )
-        last = n[:fam].strip
-        nm.sub!(last, "").strip!
-      elsif format == :lf && n = nm.match( LASTFIRST_MATCHER )
-        last = n[:fam].strip
-        nm.sub!(last, "").strip!
-      elsif format == :lcf && n = nm.match( LASTCOMFIRST_MATCHER )
-        last = n[:fam].strip
-        nm.sub!(last, "").strip!
-        nm.sub!(',', "").strip!
+      # these constants should have the named match :fam
+      n = nm.match( FIRSTLAST_MATCHER ) if format == :fl
+      n = nm.match( LASTFIRST_MATCHER ) if format == :lf
+      n = nm.match( LASTCOMFIRST_MATCHER ) if format == :lcf
+      unless n.nil?
+        last = n[:fam].strip if n[:fam]
+        nm.sub!(last, "")
+        nm.sub!(",", "")
       end
       last
     end
@@ -236,11 +197,41 @@ module Nomener
     #
     # Returns an array containing the first name and middle name if any
     def self.parse_first!(nm, namecount = 0)
-      nm.tr! '.', ' '
-      first, middle = nm.split ' ', namecount
+      nm.tr! ".", " "
+      nm.squeeze! " "
+      first, middle = nm.split " ", namecount
       [first || "", middle || ""]
     end
+    private
+    # Internal: Clean up a string where there are numerous consecutive and trailing non-name characters.
+    #   Modifies given string in place.
+    #
+    # args - strings to clean up
+    #
+    # Returns nothing
+    def self.cleanup!(*args)
+      args.each do |dirty|
+        next if(dirty.nil? || !dirty.kind_of?(String))
+        dirty.gsub! DIRTY_STUFF, ""
+        dirty.squeeze! " "
+        # remove any trailing commas or whitespace
+        dirty.gsub! TRAILER_TRASH, ""
+        dirty.strip!
+      end
+    end
+    # Internal: a softer clean we keep re-using
+    #
+    # str - the string to dust off
+    #
+    # Returns the nice clean
+    def self.dustoff(str)
+      str = str.gsub PERIOD, " "
+      str = str.squeeze " "
+      str = str.strip
+    end
   end
 end

data/lib/nomener/version.rb CHANGED

@@ -1,4 +1,4 @@
 #-- encoding: UTF-8
 module Nomener
-  VERSION = "0.2.6"
+  VERSION = "0.2.7"
 end

data/spec/nomener/nomener_name_spec.rb CHANGED

@@ -81,6 +81,33 @@ RSpec.describe "Nomener::Name" do
     end
   end
+  context "with last name alternates" do
+    name = Nomener::Name.new("Joe Smith")
+    it "returns from surname the last name" do
+      expect(name.surname).to eq "Smith"
+    end
+    it "returns from family the last name" do
+      expect(name.family).to eq "Smith"
+    end
+  end
+  context "with first name alternate" do
+    name = Nomener::Name.new("Joe Smith")
+    it "returns from surname the last name" do
+      expect(name.given).to eq "Joe"
+    end
+  end
+  context "with a name method to_h" do
+    it "responds with a hash" do
+      name = Nomener::Name.new("Joe Smith")
+      expect(name.to_h).to be_a Hash
+    end
+  end
   context "with capit" do
     name = Nomener::Name.new
     [

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: nomener
 version: !ruby/object:Gem::Version
-  version: 0.2.6
+  version: 0.2.7
 platform: ruby
 authors:
 - Dante Piombino