name_splitter 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +2 -0
- data/lib/name_splitter/version.rb +1 -1
- data/lib/name_splitter.rb +102 -45
- data/name_splitter.gemspec +1 -1
- metadata +8 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 1472fb0e5a7b2b81f64c86f2301ddcf20c27a43a19dda65d47ef7e42bd5445e3
|
|
4
|
+
data.tar.gz: 692421abfd75e65c12e58ad3550d5df2f504311acf35596f38938ba754e27cd4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0286c130755f4db82077a07f867f7903e418708948059dcdd1fab2c88b086ef6eacc0e95b35af571be99821e483f34e5e7044a41bb40f27c3729e2967e8c1e47'
|
|
7
|
+
data.tar.gz: 3b6665c239b96b123dfea53441222abaa64c6e85b36b769180aac3ac16531fecc4c7e1bf6ad16f5bc2631d832bbf66a32c11ed02bc92d6cc860a5b878f1d41f4
|
data/README.md
CHANGED
|
@@ -30,6 +30,8 @@ names.last_name # Farmer
|
|
|
30
30
|
names.salutation # Ms.
|
|
31
31
|
````
|
|
32
32
|
|
|
33
|
+
See the [spec file](spec/name_splitter_spec.rb) for documentation on all of the ways a name can be split
|
|
34
|
+
|
|
33
35
|
## Development
|
|
34
36
|
|
|
35
37
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/name_splitter.rb
CHANGED
|
@@ -2,83 +2,96 @@ require "name_splitter/version"
|
|
|
2
2
|
|
|
3
3
|
module NameSplitter
|
|
4
4
|
class Splitter
|
|
5
|
-
|
|
5
|
+
LAST_COMMA_FIRST_FORMAT = "last_comma_first"
|
|
6
|
+
|
|
7
|
+
attr_accessor :suffixes, :first_name, :last_name, :middle_name, :last_name_prefix, :salutation, :suffix, :last_name_first_format
|
|
6
8
|
attr_reader :name
|
|
7
9
|
|
|
8
10
|
def self.call(fullname)
|
|
9
11
|
new(fullname)
|
|
10
12
|
end
|
|
11
13
|
|
|
12
|
-
def initialize(fullname = "")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
def initialize(fullname = "", options = {})
|
|
15
|
+
@salutation = ""
|
|
16
|
+
@first_name = ""
|
|
17
|
+
@middle_name = ""
|
|
18
|
+
@last_name = ""
|
|
19
|
+
@suffix = ""
|
|
20
|
+
@options = options
|
|
21
|
+
@last_name_first_format = options[:format] == LAST_COMMA_FIRST_FORMAT
|
|
22
|
+
@delimeter = /[ ]+/
|
|
23
|
+
self.name = fullname
|
|
19
24
|
end
|
|
20
25
|
|
|
21
26
|
def name
|
|
22
|
-
|
|
27
|
+
return "#{first_name.strip} #{last_name.strip}#{suffix.to_s.empty? ? "" : ", " + suffix}".strip if first_name.strip.length > 0
|
|
28
|
+
|
|
29
|
+
return "#{salutation.strip} #{last_name.strip}#{suffix.to_s.empty? ? "" : ", " + suffix}".strip
|
|
23
30
|
end
|
|
24
31
|
|
|
25
32
|
def name=(fullname)
|
|
26
|
-
|
|
33
|
+
return if fullname.nil? || fullname.strip.empty?
|
|
34
|
+
|
|
35
|
+
if last_name_first_format
|
|
36
|
+
name_arr = split_name_when_last_name_is_first(fullname)
|
|
37
|
+
else
|
|
38
|
+
name_arr ||= fullname.to_s.split(@delimeter)
|
|
39
|
+
end
|
|
27
40
|
return if name_arr.empty?
|
|
28
41
|
|
|
29
42
|
if contains_suffix(name_arr)
|
|
30
|
-
self.suffix = name_arr.pop
|
|
43
|
+
self.suffix = name_arr.pop.strip
|
|
31
44
|
end
|
|
32
45
|
|
|
33
46
|
if name_arr.length == 1
|
|
34
|
-
self.first_name = name_arr.shift
|
|
47
|
+
self.first_name = name_arr.shift.strip
|
|
35
48
|
return
|
|
36
49
|
end
|
|
37
50
|
|
|
38
51
|
if is_first_element_a_last_name(name_arr)
|
|
39
|
-
self.last_name = name_arr.shift.gsub(",","")
|
|
52
|
+
self.last_name = name_arr.shift.gsub(",","").strip
|
|
40
53
|
end
|
|
41
54
|
|
|
42
|
-
self.salutation = name_arr.shift(number_of_salutations(name_arr)).join(" ")
|
|
55
|
+
self.salutation = name_arr.shift(number_of_salutations(name_arr)).join(" ").strip
|
|
43
56
|
|
|
44
57
|
if name_arr.length == 1 && last_name.empty?
|
|
45
|
-
self.last_name = name_arr.shift
|
|
58
|
+
self.last_name = name_arr.shift.strip
|
|
46
59
|
else
|
|
47
|
-
self.first_name = name_arr.shift(number_of_first_names(name_arr)).join(" ")
|
|
48
|
-
self.middle_name = name_arr.shift(number_of_middle_names(name_arr)).join(" ")
|
|
60
|
+
self.first_name = name_arr.shift(number_of_first_names(name_arr)).join(" ").strip
|
|
61
|
+
self.middle_name = name_arr.shift(number_of_middle_names(name_arr)).join(" ").strip
|
|
49
62
|
self.last_name_check(name_arr)
|
|
50
63
|
end
|
|
51
64
|
end
|
|
52
65
|
|
|
53
66
|
def last_name_check(last_name_arr)
|
|
54
67
|
#accepts either a string or an array
|
|
55
|
-
if last_name_arr.
|
|
68
|
+
if last_name_arr.is_a?(String)
|
|
56
69
|
last_name_arr = last_name_arr.split(" ")
|
|
57
70
|
end
|
|
58
71
|
return false if last_name_arr.empty?
|
|
59
72
|
self.suffix = last_name_arr.pop if contains_suffix(last_name_arr)
|
|
60
|
-
self.last_name = last_name_arr.join(" ").gsub(/[.,]+/, "")
|
|
73
|
+
self.last_name = last_name_arr.join(" ").gsub(/[.,]+/, "").strip
|
|
61
74
|
end
|
|
62
75
|
|
|
63
76
|
private
|
|
64
77
|
|
|
65
78
|
def number_of_middle_names(name_arr)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
if
|
|
69
|
-
|
|
70
|
-
|
|
79
|
+
number_of_non_middle_names = 0
|
|
80
|
+
number_of_non_middle_names += 1 if self.first_name.empty?
|
|
81
|
+
number_of_non_middle_names += 1 if self.last_name.empty?
|
|
82
|
+
number_of_non_middle_names += 1 if contains_last_name_prefix(name_arr)
|
|
83
|
+
number_of_non_middle_names += 1 if contains_suffix(name_arr)
|
|
84
|
+
return 0 if name_arr.length <= number_of_non_middle_names
|
|
71
85
|
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
name_arr
|
|
78
|
-
return 1
|
|
79
|
-
end
|
|
86
|
+
# p "last name emptty: #{self.last_name.empty?}"
|
|
87
|
+
# p "contains_last_name_prefix: #{contains_last_name_prefix(name_arr)}"
|
|
88
|
+
# p "first_name: #{first_name}"
|
|
89
|
+
# p "last_name: #{last_name}"
|
|
90
|
+
# p "number_of_non_middle_names: #{number_of_non_middle_names}"
|
|
91
|
+
# p "name_arr: #{name_arr}"
|
|
80
92
|
|
|
81
|
-
|
|
93
|
+
# assume all other names that are not last name prefixes, or suffixes are middle names
|
|
94
|
+
name_arr.length - number_of_non_middle_names
|
|
82
95
|
end
|
|
83
96
|
|
|
84
97
|
def number_of_salutations(name_arr)
|
|
@@ -102,38 +115,82 @@ module NameSplitter
|
|
|
102
115
|
|
|
103
116
|
def is_second_first_name?(_name)
|
|
104
117
|
return false unless _name
|
|
118
|
+
|
|
105
119
|
second_first_names.collect { |x| x.upcase }.include?(_name.upcase)
|
|
106
120
|
end
|
|
107
121
|
|
|
122
|
+
# this is a bit of a hack to determine if the first element in the name array is actually a last name.
|
|
123
|
+
# We assume that if there is a comma in the first element, then it is a last name.
|
|
124
|
+
# This is not always the case, but it is a common format for names and it allows us to correctly
|
|
125
|
+
# parse names like "Smith, John" and "Smith, John C." without incorrectly parsing names
|
|
126
|
+
# like "Smith Johnson Jr., Jim C." as having a last name of "Smith Johnson Jr."
|
|
127
|
+
# We don't need this if the last_name_first_format option is set to true because we will already be splitting the
|
|
128
|
+
# name on the comma and assigning the first element as the last name.
|
|
129
|
+
# But a file could have a mix of formats, or the format is not specified, so we want to be able to handle this case
|
|
130
|
+
# even if the last_name_first_format option is not set to true.
|
|
108
131
|
def is_first_element_a_last_name(name_arr)
|
|
109
132
|
name_arr[0].strip.match(/,/)
|
|
110
133
|
end
|
|
111
134
|
|
|
112
|
-
def anded_names?(
|
|
113
|
-
contains_an_and(
|
|
135
|
+
def anded_names?(name)
|
|
136
|
+
contains_an_and(name)
|
|
114
137
|
end
|
|
115
138
|
|
|
116
139
|
def contains_an_and(*name_arr)
|
|
117
|
-
name_arr.flatten.select { |
|
|
140
|
+
name_arr.flatten.select { |name| ["and", "&"].include?(name.to_s.strip) }.any?
|
|
118
141
|
end
|
|
119
142
|
|
|
120
|
-
def contains_salutation(
|
|
121
|
-
return false unless
|
|
122
|
-
salutations.collect { |x| x.upcase }.include?(
|
|
143
|
+
def contains_salutation(name)
|
|
144
|
+
return false unless name
|
|
145
|
+
salutations.collect { |x| x.upcase }.include?(name.gsub(/[.,;']+/, "").upcase)
|
|
123
146
|
end
|
|
124
147
|
|
|
148
|
+
# We check if the name array contains a last name prefix by checking the last two elements of the name array.
|
|
149
|
+
# This is because last name prefixes are typically found in the last name portion of the name, and they are
|
|
150
|
+
# typically found before the last name. For example, in the name "John de la Smith", "de la" is a last name prefix
|
|
151
|
+
# and it is found before the last name "Smith".
|
|
125
152
|
def contains_last_name_prefix(name_arr)
|
|
126
|
-
|
|
153
|
+
return false if name_arr.length < 2
|
|
154
|
+
last_two = name_arr.last(2)
|
|
155
|
+
last_name_prefix.collect { |x| x.upcase }.include?(last_two.first.upcase)
|
|
127
156
|
end
|
|
128
157
|
|
|
158
|
+
# the name can't contain a suffix unless there are at least two names left in the name array.
|
|
129
159
|
def contains_suffix(name_arr)
|
|
130
|
-
raise "contains_suffix must receive an array" if !name_arr.
|
|
131
|
-
return false if name_arr.length
|
|
160
|
+
raise "contains_suffix must receive an array" if !name_arr.is_a?(Array)
|
|
161
|
+
return false if name_arr.length < 2
|
|
162
|
+
|
|
132
163
|
suffixes.collect { |x| x.upcase }.include?(name_arr.last.gsub(/[.,;']+/, "").upcase)
|
|
133
164
|
end
|
|
134
165
|
|
|
166
|
+
# here we assume that everything before the comma is associated with the last name and everything after the comma
|
|
167
|
+
# are the other names (first, middle, salutation). A suffix would most likely be in the last name portion of the name.
|
|
168
|
+
# examples of this format include "Smith, John", "Smith,John C.", "Smith Johnson Jr., Jim C."
|
|
169
|
+
def split_name_when_last_name_is_first(fullname)
|
|
170
|
+
name_arr = fullname.to_s.split(/[,]+/) # we first only want to split out the name(s) that are considered last from the first
|
|
171
|
+
return name_arr if name_arr.length < 2
|
|
172
|
+
|
|
173
|
+
last_names = name_arr[0].split(" ") # we then want to split the last name(s) into an array to check for last name prefixes and suffixes
|
|
174
|
+
first_names = name_arr[1].split(" ") # we also want to split the first name(s) into an array to check for salutations and suffixes
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# lets check if the last name(s) contain a suffix
|
|
178
|
+
if contains_suffix(last_names)
|
|
179
|
+
self.suffix = last_names.pop.strip
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# now let's check if the first name(s) contain a suffix
|
|
183
|
+
if contains_suffix(first_names)
|
|
184
|
+
self.suffix = first_names.pop.strip
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# now we'll recombine the last name(s) and first name(s) into one array to be processed as normal
|
|
188
|
+
# We assume if there were multiple last names (i.e. before the comma) that they are all part of the last name.
|
|
189
|
+
first_names + [last_names.join(" ").strip]
|
|
190
|
+
end
|
|
191
|
+
|
|
135
192
|
def suffixes
|
|
136
|
-
%w{Jr Sr II III IV V VI MD PHD Esq DDS}
|
|
193
|
+
%w{Jr Sr II III IV V VI MD PHD Esq DDS}.freeze
|
|
137
194
|
end
|
|
138
195
|
|
|
139
196
|
def last_name_prefix
|
|
@@ -141,11 +198,11 @@ module NameSplitter
|
|
|
141
198
|
end
|
|
142
199
|
|
|
143
200
|
def salutations
|
|
144
|
-
%w{Mr Mrs Ms Miss Dr Prof Rev Capt Sister Honorable Judge Chief}
|
|
201
|
+
%w{Mr Mrs Ms Miss Dr Prof Rev Capt Sister Honorable Judge Chief}.freeze
|
|
145
202
|
end
|
|
146
203
|
|
|
147
204
|
def second_first_names
|
|
148
|
-
%w{Beth Catherine Louise}
|
|
205
|
+
%w{Beth Catherine Louise}.freeze
|
|
149
206
|
end
|
|
150
207
|
end
|
|
151
208
|
end
|
data/name_splitter.gemspec
CHANGED
|
@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
28
28
|
spec.require_paths = ["lib"]
|
|
29
29
|
|
|
30
|
-
spec.add_development_dependency "bundler", "~>
|
|
30
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
|
31
31
|
spec.add_development_dependency "rake", "~> 10.0"
|
|
32
32
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
33
33
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: name_splitter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tom Hoen
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-02-17 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -16,14 +16,14 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '2.0'
|
|
20
20
|
type: :development
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
26
|
+
version: '2.0'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: rake
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -79,7 +79,7 @@ licenses:
|
|
|
79
79
|
- MIT
|
|
80
80
|
metadata:
|
|
81
81
|
allowed_push_host: https://rubygems.org
|
|
82
|
-
post_install_message:
|
|
82
|
+
post_install_message:
|
|
83
83
|
rdoc_options: []
|
|
84
84
|
require_paths:
|
|
85
85
|
- lib
|
|
@@ -94,9 +94,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
94
94
|
- !ruby/object:Gem::Version
|
|
95
95
|
version: '0'
|
|
96
96
|
requirements: []
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
signing_key:
|
|
97
|
+
rubygems_version: 3.4.10
|
|
98
|
+
signing_key:
|
|
100
99
|
specification_version: 4
|
|
101
100
|
summary: Gem for splitting full names into the component parts
|
|
102
101
|
test_files: []
|