RubyGems - name_splitter - Versions diffs - 0.1.7 → 0.2.0 - Mend

name_splitter 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 1564e0d6f7a81a05f7c77926c9548115b5f49974
-  data.tar.gz: 82c6d53e108581d3e3c071efd83756a4c8924d82
+SHA256:
+  metadata.gz: 1472fb0e5a7b2b81f64c86f2301ddcf20c27a43a19dda65d47ef7e42bd5445e3
+  data.tar.gz: 692421abfd75e65c12e58ad3550d5df2f504311acf35596f38938ba754e27cd4
 SHA512:
-  metadata.gz: b2e08535bac76dc5094cff67036b27e4dd58644ee6c4e1011e91f5dad3707a65bd4057654b9c96f61f0c0d11c074a421a1403db1c30d280c174a5cd7c91be6d7
-  data.tar.gz: 06028d326a852f8f10f8d0eafa40659d90f9965500a6a762478a8c0068b41e50fc0db6c8e539bf43a7caff2e75d4d77b521dc7d6bc5ac03a18e4906b8545f505
+  metadata.gz: '0286c130755f4db82077a07f867f7903e418708948059dcdd1fab2c88b086ef6eacc0e95b35af571be99821e483f34e5e7044a41bb40f27c3729e2967e8c1e47'
+  data.tar.gz: 3b6665c239b96b123dfea53441222abaa64c6e85b36b769180aac3ac16531fecc4c7e1bf6ad16f5bc2631d832bbf66a32c11ed02bc92d6cc860a5b878f1d41f4

data/README.md CHANGED Viewed

@@ -30,6 +30,8 @@ names.last_name # Farmer
 names.salutation # Ms.
 ````
+See the [spec file](spec/name_splitter_spec.rb) for documentation on all of the ways a name can be split
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/name_splitter/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module NameSplitter
-  VERSION = "0.1.7"
+  VERSION = "0.2.0"
 end

data/lib/name_splitter.rb CHANGED Viewed

@@ -2,83 +2,96 @@ require "name_splitter/version"
 module NameSplitter
   class Splitter
-    attr_accessor :suffixes, :first_name, :last_name, :middle_name, :last_name_prefix, :salutation, :suffix
+    LAST_COMMA_FIRST_FORMAT = "last_comma_first"
+    attr_accessor :suffixes, :first_name, :last_name, :middle_name, :last_name_prefix, :salutation, :suffix, :last_name_first_format
     attr_reader :name
     def self.call(fullname)
       new(fullname)
     end
-    def initialize(fullname = "")
-      self.salutation = ""
-      self.first_name = ""
-      self.middle_name = ""
-      self.last_name = ""
-      self.suffix = ""
-      self.name = fullname if fullname and !fullname.to_s.empty?
+    def initialize(fullname = "", options = {})
+      @salutation = ""
+      @first_name = ""
+      @middle_name = ""
+      @last_name = ""
+      @suffix = ""
+      @options = options
+      @last_name_first_format = options[:format] == LAST_COMMA_FIRST_FORMAT
+      @delimeter = /[ ]+/
+      self.name = fullname
     end
     def name
-      first_name + " " + last_name + (suffix.to_s.empty? ? "" : ", " + suffix)
+      return "#{first_name.strip} #{last_name.strip}#{suffix.to_s.empty? ? "" : ", " + suffix}".strip if first_name.strip.length > 0
+      return "#{salutation.strip} #{last_name.strip}#{suffix.to_s.empty? ? "" : ", " + suffix}".strip
     end
     def name=(fullname)
-      name_arr = fullname.to_s.split(" ")
+      return if fullname.nil? || fullname.strip.empty?
+      if last_name_first_format
+        name_arr = split_name_when_last_name_is_first(fullname)
+      else
+        name_arr ||= fullname.to_s.split(@delimeter)
+      end
       return if name_arr.empty?
       if contains_suffix(name_arr)
-        self.suffix = name_arr.pop
+        self.suffix = name_arr.pop.strip
       end
       if name_arr.length == 1
-        self.first_name = name_arr.shift
+        self.first_name = name_arr.shift.strip
         return
       end
       if is_first_element_a_last_name(name_arr)
-        self.last_name = name_arr.shift.gsub(",","")
+        self.last_name = name_arr.shift.gsub(",","").strip
       end
-      self.salutation = name_arr.shift(number_of_salutations(name_arr)).join(" ")
+      self.salutation = name_arr.shift(number_of_salutations(name_arr)).join(" ").strip
       if name_arr.length == 1 && last_name.empty?
-        self.last_name = name_arr.shift
+        self.last_name = name_arr.shift.strip
       else
-        self.first_name = name_arr.shift(number_of_first_names(name_arr)).join(" ")
-        self.middle_name = name_arr.shift(number_of_middle_names(name_arr)).join(" ")
+        self.first_name = name_arr.shift(number_of_first_names(name_arr)).join(" ").strip
+        self.middle_name = name_arr.shift(number_of_middle_names(name_arr)).join(" ").strip
         self.last_name_check(name_arr)
       end
     end
     def last_name_check(last_name_arr)
       #accepts either a string or an array
-      if last_name_arr.class.name == "String"
+      if last_name_arr.is_a?(String)
         last_name_arr = last_name_arr.split(" ")
       end
       return false if last_name_arr.empty?
       self.suffix = last_name_arr.pop if contains_suffix(last_name_arr)
-      self.last_name = last_name_arr.join(" ").gsub(/[.,]+/, "")
+      self.last_name = last_name_arr.join(" ").gsub(/[.,]+/, "").strip
     end
     private
     def number_of_middle_names(name_arr)
-      # if the first and last names have already been assigned, assume the
-      # rest of the name is a middle name
-      if !first_name.empty? && !last_name.empty?
-        return name_arr.length
-      end
+      number_of_non_middle_names = 0
+      number_of_non_middle_names += 1 if self.first_name.empty?
+      number_of_non_middle_names += 1 if self.last_name.empty?
+      number_of_non_middle_names += 1 if contains_last_name_prefix(name_arr)
+      number_of_non_middle_names += 1 if contains_suffix(name_arr)
+      return 0 if name_arr.length <= number_of_non_middle_names
-      #checks whether the array of names passed in contains a likely middle name
-      if (name_arr.length == 2 &&
-        !(contains_suffix(name_arr) || contains_last_name_prefix(name_arr))) ||
-      (name_arr.length == 3 &&
-        !(contains_suffix(name_arr) && contains_last_name_prefix(name_arr))) ||
-      name_arr.length > 3
-        return 1
-      end
+      # p "last name emptty: #{self.last_name.empty?}"
+      # p "contains_last_name_prefix: #{contains_last_name_prefix(name_arr)}"
+      # p "first_name: #{first_name}"
+      # p "last_name: #{last_name}"
+      # p "number_of_non_middle_names: #{number_of_non_middle_names}"
+      # p "name_arr: #{name_arr}"
-      return 0
+      # assume all other names that are not last name prefixes, or suffixes are middle names
+      name_arr.length - number_of_non_middle_names
     end
     def number_of_salutations(name_arr)
@@ -102,38 +115,82 @@ module NameSplitter
     def is_second_first_name?(_name)
       return false unless _name
       second_first_names.collect { |x| x.upcase }.include?(_name.upcase)
     end
+    # this is a bit of a hack to determine if the first element in the name array is actually a last name.
+    # We assume that if there is a comma in the first element, then it is a last name.
+    # This is not always the case, but it is a common format for names and it allows us to correctly
+    # parse names like "Smith, John" and "Smith, John C." without incorrectly parsing names
+    # like "Smith Johnson Jr., Jim C." as having a last name of "Smith Johnson Jr."
+    # We don't need this if the last_name_first_format option is set to true because we will already be splitting the
+    # name on the comma and assigning the first element as the last name.
+    # But a file could have a mix of formats, or the format is not specified, so we want to be able to handle this case
+    # even if the last_name_first_format option is not set to true.
     def is_first_element_a_last_name(name_arr)
       name_arr[0].strip.match(/,/)
     end
-    def anded_names?(_name)
-      contains_an_and(_name)
+    def anded_names?(name)
+      contains_an_and(name)
     end
     def contains_an_and(*name_arr)
-      name_arr.flatten.select { |_name| ["and", "&"].include?(_name.to_s.strip) }.any?
+      name_arr.flatten.select { |name| ["and", "&"].include?(name.to_s.strip) }.any?
     end
-    def contains_salutation(_name)
-      return false unless _name
-      salutations.collect { |x| x.upcase }.include?(_name.gsub(/[.,;']+/, "").upcase)
+    def contains_salutation(name)
+      return false unless name
+      salutations.collect { |x| x.upcase }.include?(name.gsub(/[.,;']+/, "").upcase)
     end
+    # We check if the name array contains a last name prefix by checking the last two elements of the name array.
+    # This is because last name prefixes are typically found in the last name portion of the name, and they are
+    # typically found before the last name. For example, in the name "John de la Smith", "de la" is a last name prefix
+    # and it is found before the last name "Smith".
     def contains_last_name_prefix(name_arr)
-      last_name_prefix.collect { |x| x.upcase }.include?(name_arr.first.upcase)
+      return false if name_arr.length < 2
+      last_two = name_arr.last(2)
+      last_name_prefix.collect { |x| x.upcase }.include?(last_two.first.upcase)
     end
+    # the name can't contain a suffix unless there are at least two names left in the name array.
     def contains_suffix(name_arr)
-      raise "contains_suffix must receive an array" if !name_arr.class.name == "Array"
-      return false if name_arr.length == 1
+      raise "contains_suffix must receive an array" if !name_arr.is_a?(Array)
+      return false if name_arr.length < 2
       suffixes.collect { |x| x.upcase }.include?(name_arr.last.gsub(/[.,;']+/, "").upcase)
     end
+    # here we assume that everything before the comma is associated with the last name and everything after the comma
+    # are the other names (first, middle, salutation). A suffix would most likely be in the last name portion of the name.
+    # examples of this format include "Smith, John", "Smith,John C.", "Smith Johnson Jr., Jim C."
+    def split_name_when_last_name_is_first(fullname)
+      name_arr = fullname.to_s.split(/[,]+/) # we first only want to split out the name(s) that are considered last from the first
+      return name_arr if name_arr.length < 2
+      last_names = name_arr[0].split(" ") # we then want to split the last name(s) into an array to check for last name prefixes and suffixes
+      first_names = name_arr[1].split(" ") # we also want to split the first name(s) into an array to check for salutations and suffixes
+      # lets check if the last name(s) contain a suffix
+      if contains_suffix(last_names)
+        self.suffix = last_names.pop.strip
+      end
+      # now let's check if the first name(s) contain a suffix
+      if contains_suffix(first_names)
+        self.suffix = first_names.pop.strip
+      end
+      # now we'll recombine the last name(s) and first name(s) into one array to be processed as normal
+      # We assume if there were multiple last names (i.e. before the comma) that they are all part of the last name.
+      first_names + [last_names.join(" ").strip]
+    end
     def suffixes
-      %w{Jr Sr II III IV V VI MD PHD Esq DDS}
+      %w{Jr Sr II III IV V VI MD PHD Esq DDS}.freeze
     end
     def last_name_prefix
@@ -141,11 +198,11 @@ module NameSplitter
     end
     def salutations
-      %w{Mr Mrs Ms Miss Dr Prof Rev Capt Sister Honorable Judge Chief}
+      %w{Mr Mrs Ms Miss Dr Prof Rev Capt Sister Honorable Judge Chief}.freeze
     end
     def second_first_names
-      %w{Beth Catherine Louise}
+      %w{Beth Catherine Louise}.freeze
     end
   end
 end

data/name_splitter.gemspec CHANGED Viewed

@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
   spec.require_paths = ["lib"]
-  spec.add_development_dependency "bundler", "~> 1.10"
+  spec.add_development_dependency "bundler", "~> 2.0"
   spec.add_development_dependency "rake", "~> 10.0"
   spec.add_development_dependency "rspec", "~> 3.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: name_splitter
 version: !ruby/object:Gem::Version
-  version: 0.1.7
+  version: 0.2.0
 platform: ruby
 authors:
 - Tom Hoen
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-11-15 00:00:00.000000000 Z
+date: 2026-02-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -16,14 +16,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.10'
+        version: '2.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.10'
+        version: '2.0'
 - !ruby/object:Gem::Dependency
   name: rake
   requirement: !ruby/object:Gem::Requirement
@@ -79,7 +79,7 @@ licenses:
 - MIT
 metadata:
   allowed_push_host: https://rubygems.org
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -94,9 +94,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.4.8
-signing_key:
+rubygems_version: 3.4.10
+signing_key:
 specification_version: 4
 summary: Gem for splitting full names into the component parts
 test_files: []