name_splitter 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 1564e0d6f7a81a05f7c77926c9548115b5f49974
4
- data.tar.gz: 82c6d53e108581d3e3c071efd83756a4c8924d82
2
+ SHA256:
3
+ metadata.gz: 1472fb0e5a7b2b81f64c86f2301ddcf20c27a43a19dda65d47ef7e42bd5445e3
4
+ data.tar.gz: 692421abfd75e65c12e58ad3550d5df2f504311acf35596f38938ba754e27cd4
5
5
  SHA512:
6
- metadata.gz: b2e08535bac76dc5094cff67036b27e4dd58644ee6c4e1011e91f5dad3707a65bd4057654b9c96f61f0c0d11c074a421a1403db1c30d280c174a5cd7c91be6d7
7
- data.tar.gz: 06028d326a852f8f10f8d0eafa40659d90f9965500a6a762478a8c0068b41e50fc0db6c8e539bf43a7caff2e75d4d77b521dc7d6bc5ac03a18e4906b8545f505
6
+ metadata.gz: '0286c130755f4db82077a07f867f7903e418708948059dcdd1fab2c88b086ef6eacc0e95b35af571be99821e483f34e5e7044a41bb40f27c3729e2967e8c1e47'
7
+ data.tar.gz: 3b6665c239b96b123dfea53441222abaa64c6e85b36b769180aac3ac16531fecc4c7e1bf6ad16f5bc2631d832bbf66a32c11ed02bc92d6cc860a5b878f1d41f4
data/README.md CHANGED
@@ -30,6 +30,8 @@ names.last_name # Farmer
30
30
  names.salutation # Ms.
31
31
  ````
32
32
 
33
+ See the [spec file](spec/name_splitter_spec.rb) for documentation on all of the ways a name can be split
34
+
33
35
  ## Development
34
36
 
35
37
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,3 +1,3 @@
1
1
  module NameSplitter
2
- VERSION = "0.1.7"
2
+ VERSION = "0.2.0"
3
3
  end
data/lib/name_splitter.rb CHANGED
@@ -2,83 +2,96 @@ require "name_splitter/version"
2
2
 
3
3
  module NameSplitter
4
4
  class Splitter
5
- attr_accessor :suffixes, :first_name, :last_name, :middle_name, :last_name_prefix, :salutation, :suffix
5
+ LAST_COMMA_FIRST_FORMAT = "last_comma_first"
6
+
7
+ attr_accessor :suffixes, :first_name, :last_name, :middle_name, :last_name_prefix, :salutation, :suffix, :last_name_first_format
6
8
  attr_reader :name
7
9
 
8
10
  def self.call(fullname)
9
11
  new(fullname)
10
12
  end
11
13
 
12
- def initialize(fullname = "")
13
- self.salutation = ""
14
- self.first_name = ""
15
- self.middle_name = ""
16
- self.last_name = ""
17
- self.suffix = ""
18
- self.name = fullname if fullname and !fullname.to_s.empty?
14
+ def initialize(fullname = "", options = {})
15
+ @salutation = ""
16
+ @first_name = ""
17
+ @middle_name = ""
18
+ @last_name = ""
19
+ @suffix = ""
20
+ @options = options
21
+ @last_name_first_format = options[:format] == LAST_COMMA_FIRST_FORMAT
22
+ @delimeter = /[ ]+/
23
+ self.name = fullname
19
24
  end
20
25
 
21
26
  def name
22
- first_name + " " + last_name + (suffix.to_s.empty? ? "" : ", " + suffix)
27
+ return "#{first_name.strip} #{last_name.strip}#{suffix.to_s.empty? ? "" : ", " + suffix}".strip if first_name.strip.length > 0
28
+
29
+ return "#{salutation.strip} #{last_name.strip}#{suffix.to_s.empty? ? "" : ", " + suffix}".strip
23
30
  end
24
31
 
25
32
  def name=(fullname)
26
- name_arr = fullname.to_s.split(" ")
33
+ return if fullname.nil? || fullname.strip.empty?
34
+
35
+ if last_name_first_format
36
+ name_arr = split_name_when_last_name_is_first(fullname)
37
+ else
38
+ name_arr ||= fullname.to_s.split(@delimeter)
39
+ end
27
40
  return if name_arr.empty?
28
41
 
29
42
  if contains_suffix(name_arr)
30
- self.suffix = name_arr.pop
43
+ self.suffix = name_arr.pop.strip
31
44
  end
32
45
 
33
46
  if name_arr.length == 1
34
- self.first_name = name_arr.shift
47
+ self.first_name = name_arr.shift.strip
35
48
  return
36
49
  end
37
50
 
38
51
  if is_first_element_a_last_name(name_arr)
39
- self.last_name = name_arr.shift.gsub(",","")
52
+ self.last_name = name_arr.shift.gsub(",","").strip
40
53
  end
41
54
 
42
- self.salutation = name_arr.shift(number_of_salutations(name_arr)).join(" ")
55
+ self.salutation = name_arr.shift(number_of_salutations(name_arr)).join(" ").strip
43
56
 
44
57
  if name_arr.length == 1 && last_name.empty?
45
- self.last_name = name_arr.shift
58
+ self.last_name = name_arr.shift.strip
46
59
  else
47
- self.first_name = name_arr.shift(number_of_first_names(name_arr)).join(" ")
48
- self.middle_name = name_arr.shift(number_of_middle_names(name_arr)).join(" ")
60
+ self.first_name = name_arr.shift(number_of_first_names(name_arr)).join(" ").strip
61
+ self.middle_name = name_arr.shift(number_of_middle_names(name_arr)).join(" ").strip
49
62
  self.last_name_check(name_arr)
50
63
  end
51
64
  end
52
65
 
53
66
  def last_name_check(last_name_arr)
54
67
  #accepts either a string or an array
55
- if last_name_arr.class.name == "String"
68
+ if last_name_arr.is_a?(String)
56
69
  last_name_arr = last_name_arr.split(" ")
57
70
  end
58
71
  return false if last_name_arr.empty?
59
72
  self.suffix = last_name_arr.pop if contains_suffix(last_name_arr)
60
- self.last_name = last_name_arr.join(" ").gsub(/[.,]+/, "")
73
+ self.last_name = last_name_arr.join(" ").gsub(/[.,]+/, "").strip
61
74
  end
62
75
 
63
76
  private
64
77
 
65
78
  def number_of_middle_names(name_arr)
66
- # if the first and last names have already been assigned, assume the
67
- # rest of the name is a middle name
68
- if !first_name.empty? && !last_name.empty?
69
- return name_arr.length
70
- end
79
+ number_of_non_middle_names = 0
80
+ number_of_non_middle_names += 1 if self.first_name.empty?
81
+ number_of_non_middle_names += 1 if self.last_name.empty?
82
+ number_of_non_middle_names += 1 if contains_last_name_prefix(name_arr)
83
+ number_of_non_middle_names += 1 if contains_suffix(name_arr)
84
+ return 0 if name_arr.length <= number_of_non_middle_names
71
85
 
72
- #checks whether the array of names passed in contains a likely middle name
73
- if (name_arr.length == 2 &&
74
- !(contains_suffix(name_arr) || contains_last_name_prefix(name_arr))) ||
75
- (name_arr.length == 3 &&
76
- !(contains_suffix(name_arr) && contains_last_name_prefix(name_arr))) ||
77
- name_arr.length > 3
78
- return 1
79
- end
86
+ # p "last name emptty: #{self.last_name.empty?}"
87
+ # p "contains_last_name_prefix: #{contains_last_name_prefix(name_arr)}"
88
+ # p "first_name: #{first_name}"
89
+ # p "last_name: #{last_name}"
90
+ # p "number_of_non_middle_names: #{number_of_non_middle_names}"
91
+ # p "name_arr: #{name_arr}"
80
92
 
81
- return 0
93
+ # assume all other names that are not last name prefixes, or suffixes are middle names
94
+ name_arr.length - number_of_non_middle_names
82
95
  end
83
96
 
84
97
  def number_of_salutations(name_arr)
@@ -102,38 +115,82 @@ module NameSplitter
102
115
 
103
116
  def is_second_first_name?(_name)
104
117
  return false unless _name
118
+
105
119
  second_first_names.collect { |x| x.upcase }.include?(_name.upcase)
106
120
  end
107
121
 
122
+ # this is a bit of a hack to determine if the first element in the name array is actually a last name.
123
+ # We assume that if there is a comma in the first element, then it is a last name.
124
+ # This is not always the case, but it is a common format for names and it allows us to correctly
125
+ # parse names like "Smith, John" and "Smith, John C." without incorrectly parsing names
126
+ # like "Smith Johnson Jr., Jim C." as having a last name of "Smith Johnson Jr."
127
+ # We don't need this if the last_name_first_format option is set to true because we will already be splitting the
128
+ # name on the comma and assigning the first element as the last name.
129
+ # But a file could have a mix of formats, or the format is not specified, so we want to be able to handle this case
130
+ # even if the last_name_first_format option is not set to true.
108
131
  def is_first_element_a_last_name(name_arr)
109
132
  name_arr[0].strip.match(/,/)
110
133
  end
111
134
 
112
- def anded_names?(_name)
113
- contains_an_and(_name)
135
+ def anded_names?(name)
136
+ contains_an_and(name)
114
137
  end
115
138
 
116
139
  def contains_an_and(*name_arr)
117
- name_arr.flatten.select { |_name| ["and", "&"].include?(_name.to_s.strip) }.any?
140
+ name_arr.flatten.select { |name| ["and", "&"].include?(name.to_s.strip) }.any?
118
141
  end
119
142
 
120
- def contains_salutation(_name)
121
- return false unless _name
122
- salutations.collect { |x| x.upcase }.include?(_name.gsub(/[.,;']+/, "").upcase)
143
+ def contains_salutation(name)
144
+ return false unless name
145
+ salutations.collect { |x| x.upcase }.include?(name.gsub(/[.,;']+/, "").upcase)
123
146
  end
124
147
 
148
+ # We check if the name array contains a last name prefix by checking the last two elements of the name array.
149
+ # This is because last name prefixes are typically found in the last name portion of the name, and they are
150
+ # typically found before the last name. For example, in the name "John de la Smith", "de la" is a last name prefix
151
+ # and it is found before the last name "Smith".
125
152
  def contains_last_name_prefix(name_arr)
126
- last_name_prefix.collect { |x| x.upcase }.include?(name_arr.first.upcase)
153
+ return false if name_arr.length < 2
154
+ last_two = name_arr.last(2)
155
+ last_name_prefix.collect { |x| x.upcase }.include?(last_two.first.upcase)
127
156
  end
128
157
 
158
+ # the name can't contain a suffix unless there are at least two names left in the name array.
129
159
  def contains_suffix(name_arr)
130
- raise "contains_suffix must receive an array" if !name_arr.class.name == "Array"
131
- return false if name_arr.length == 1
160
+ raise "contains_suffix must receive an array" if !name_arr.is_a?(Array)
161
+ return false if name_arr.length < 2
162
+
132
163
  suffixes.collect { |x| x.upcase }.include?(name_arr.last.gsub(/[.,;']+/, "").upcase)
133
164
  end
134
165
 
166
+ # here we assume that everything before the comma is associated with the last name and everything after the comma
167
+ # are the other names (first, middle, salutation). A suffix would most likely be in the last name portion of the name.
168
+ # examples of this format include "Smith, John", "Smith,John C.", "Smith Johnson Jr., Jim C."
169
+ def split_name_when_last_name_is_first(fullname)
170
+ name_arr = fullname.to_s.split(/[,]+/) # we first only want to split out the name(s) that are considered last from the first
171
+ return name_arr if name_arr.length < 2
172
+
173
+ last_names = name_arr[0].split(" ") # we then want to split the last name(s) into an array to check for last name prefixes and suffixes
174
+ first_names = name_arr[1].split(" ") # we also want to split the first name(s) into an array to check for salutations and suffixes
175
+
176
+
177
+ # lets check if the last name(s) contain a suffix
178
+ if contains_suffix(last_names)
179
+ self.suffix = last_names.pop.strip
180
+ end
181
+
182
+ # now let's check if the first name(s) contain a suffix
183
+ if contains_suffix(first_names)
184
+ self.suffix = first_names.pop.strip
185
+ end
186
+
187
+ # now we'll recombine the last name(s) and first name(s) into one array to be processed as normal
188
+ # We assume if there were multiple last names (i.e. before the comma) that they are all part of the last name.
189
+ first_names + [last_names.join(" ").strip]
190
+ end
191
+
135
192
  def suffixes
136
- %w{Jr Sr II III IV V VI MD PHD Esq DDS}
193
+ %w{Jr Sr II III IV V VI MD PHD Esq DDS}.freeze
137
194
  end
138
195
 
139
196
  def last_name_prefix
@@ -141,11 +198,11 @@ module NameSplitter
141
198
  end
142
199
 
143
200
  def salutations
144
- %w{Mr Mrs Ms Miss Dr Prof Rev Capt Sister Honorable Judge Chief}
201
+ %w{Mr Mrs Ms Miss Dr Prof Rev Capt Sister Honorable Judge Chief}.freeze
145
202
  end
146
203
 
147
204
  def second_first_names
148
- %w{Beth Catherine Louise}
205
+ %w{Beth Catherine Louise}.freeze
149
206
  end
150
207
  end
151
208
  end
@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
27
27
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
28
  spec.require_paths = ["lib"]
29
29
 
30
- spec.add_development_dependency "bundler", "~> 1.10"
30
+ spec.add_development_dependency "bundler", "~> 2.0"
31
31
  spec.add_development_dependency "rake", "~> 10.0"
32
32
  spec.add_development_dependency "rspec", "~> 3.0"
33
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: name_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Hoen
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-11-15 00:00:00.000000000 Z
11
+ date: 2026-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.10'
19
+ version: '2.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.10'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -79,7 +79,7 @@ licenses:
79
79
  - MIT
80
80
  metadata:
81
81
  allowed_push_host: https://rubygems.org
82
- post_install_message:
82
+ post_install_message:
83
83
  rdoc_options: []
84
84
  require_paths:
85
85
  - lib
@@ -94,9 +94,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
94
94
  - !ruby/object:Gem::Version
95
95
  version: '0'
96
96
  requirements: []
97
- rubyforge_project:
98
- rubygems_version: 2.4.8
99
- signing_key:
97
+ rubygems_version: 3.4.10
98
+ signing_key:
100
99
  specification_version: 4
101
100
  summary: Gem for splitting full names into the component parts
102
101
  test_files: []