nomener 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2c250686ec8119b88c20fe08ee5e4ecd720411b5
4
- data.tar.gz: 1efeb93999ebf6d3ccc9b879c9ac0a0c84696cac
3
+ metadata.gz: cd56c9231c23b185899cb3f0c85f7bfd050fbd0c
4
+ data.tar.gz: b37745c76f6263bd7fb771aab566286f44937693
5
5
  SHA512:
6
- metadata.gz: 1e66b1549074d0ffd816aae1bc4c84cc143a324f09e94e23ddcd0c61b29d04fe497e275a508d7320dba446370e5a528ecd03ea105ab5baa978bf3c30876c300d
7
- data.tar.gz: 3b2fbf3c124d97965e8836eed594276041a5135079f162ca2024f213ac08a894e7467f3fcf69101cdc9edadf4d4e124a73dc76c97a54d3f5f74433d028a3f384
6
+ metadata.gz: c314a3ad037c4a9fdb120ef77fbb9c40c7998574f99c74f8062120dbefa940e7995c4def79637793b21f6d0f41d3efa747c80813fbc4403c09c06197f1e3998d
7
+ data.tar.gz: dcf8213aeb321016c3e01c798b379c620b178e0456780d1a6a65cd9ce43c55f2a1ed101b032888303cdded55d558b164da14b036ca4089f073dcd0a483bd34c7
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Nomener
2
2
  [![Gem Version](https://badge.fury.io/rb/nomener.svg)](http://badge.fury.io/rb/nomener)
3
3
  [![Build Status](https://travis-ci.org/dan-ding/nomener.svg?branch=master)](https://travis-ci.org/dan-ding/nomener)
4
+ [![Code Climate](https://codeclimate.com/github/dan-ding/nomener/badges/gpa.svg)](https://codeclimate.com/github/dan-ding/nomener)
4
5
 
5
6
  Nomener assists with parsing peoples names that they give themselves (or other people). Nomener ~~is~~ was a fork of [People](https://github.com/dan-ding/people) as it uses some code contributed there. It's currently geared towards western style name formatting, however other cultural name formatting is (or would like to be supported). Currently it attempts to parse names through pattern matching without using large(r) dictionary/library/data files (except for name decorations and suffixes, see usage). It may not be possible to do without such in all languages.
6
7
 
@@ -2,9 +2,7 @@
2
2
 
3
3
  # For Ruby 1.9.3, 2.0.0
4
4
  rv = RUBY_VERSION.split(".")[(0..1)].join("")
5
- if rv >= '19' && rv < '21'
6
- require "string-scrub"
7
- end
5
+ require "string-scrub" if(rv >= '19' && rv < '21')
8
6
 
9
7
  module Nomener
10
8
  module Helper
@@ -20,24 +18,23 @@ module Nomener
20
18
  #
21
19
  # Returns a string which is (ideally) pretty much the same as it was given.
22
20
  def self.reformat(name, leftleft = '"', rightright = '"', left = "'", right = "'")
23
- n = name.dup
24
- n.scrub! # remove illegal characters
21
+ nomen = name.dup
22
+ nomen.scrub! # remove illegal characters
25
23
 
26
24
  # translate fullwidth to typewriter
27
- n.tr!("\uFF02\uFF07", "\u0022\u0027")
25
+ nomen.tr!("\uFF02\uFF07", "\u0022\u0027")
28
26
 
29
- n.tr!("\u0022\u00AB\u201C\u201E\u2036\u300E\u301D\u301F\uFE43", leftleft) # replace left double quotes
30
- n.tr!("\u0022\u00BB\u201D\u201F\u2033\u300F\u301E\uFE44", rightright) # replace right double quotes
27
+ nomen.tr!("\u0022\u00AB\u201C\u201E\u2036\u300E\u301D\u301F\uFE43", leftleft) # replace left double quotes
28
+ nomen.tr!("\u0022\u00BB\u201D\u201F\u2033\u300F\u301E\uFE44", rightright) # replace right double quotes
31
29
 
32
- n.tr!("\u0027\u2018\u201A\u2035\u2039\u300C\uFE41\uFF62", left) # replace left single quotes
33
- n.tr!("\u0027\u2019\u201B\u2032\u203A\u300D\uFE42\uFF62", right) # replace left single quotes
30
+ nomen.tr!("\u0027\u2018\u201A\u2035\u2039\u300C\uFE41\uFF62", left) # replace left single quotes
31
+ nomen.tr!("\u0027\u2019\u201B\u2032\u203A\u300D\uFE42\uFF62", right) # replace left single quotes
34
32
 
35
- #n.gsub!(/\./, ' ')
36
- n.gsub!(/[^\p{Alpha}\-&\/ \.\,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, " ") # what others may be in a name?
37
- n.gsub!(/\p{Blank}+/, " ") # compress whitespace
38
- n.strip! # trim space
33
+ nomen.gsub!(/[^\p{Alpha}\-&\/ \.\,\'\"#{leftleft}#{rightright}#{left}#{right}\(\)]/, " ") # what others may be in a name?
34
+ nomen.squeeze! " "
35
+ nomen.strip!
39
36
 
40
- n
37
+ nomen
41
38
  end
42
39
 
43
40
  end
@@ -50,37 +50,32 @@ module Nomener
50
50
 
51
51
  fix = last.dup
52
52
 
53
- # if there are multiple last names separated by spaces
54
- fix = fix.split(" ").map { |v| v.capitalize }.join " "
55
-
56
53
  # if there are multiple last names separated by a dash
57
- if !fix.index("-").nil?
58
- fix = fix.split("-").map { |v|
59
- v.split(" ").map { |w| w.capitalize }.join " "
60
- }.join "-"
61
- end
62
-
63
- # anything begining with Mac and not ending in [aciozj]
64
- if m = fix.match(/Mac([\p{Alpha}]{2,}[^aciozj])/i)
65
- unless m[1].match(%r!^
66
- hin|
67
- hlen|
68
- har|
69
- kle|
70
- klin|
71
- kie|
72
- hado| # Portugese
73
- evicius| # Lithuanian
74
- iulis| # Lithuanian
75
- ias # Lithuanian
76
- !x)
77
- fix.sub!(/Mac#{m[1]}/, "Mac#{m[1].capitalize}")
78
- end
79
- elsif m = fix.match(/Mc([\p{Alpha}]{2,})/i) # anything beginning with Mc
80
- fix.sub!(/Mc#{m[1]}/, "Mc#{m[1].capitalize}")
81
- elsif fix.match(/'\p{Alpha}/) # names like D'Angelo or Van 't Hooft
82
- fix.gsub!(/('\p{Alpha})/) { |s| (s[-1] != 't') ? s.upcase : s } #no cap 't
83
- end
54
+ fix = fix.split("-").map { |v|
55
+ v.split(" ").map { |w| w.capitalize }.join " "
56
+ }.join "-"
57
+
58
+ # anything begining with Mac and not ending in [aciozj], except for a few
59
+ fix.sub!(/Mac(?!
60
+ hin|
61
+ hlen|
62
+ har|
63
+ kle|
64
+ klin|
65
+ kie|
66
+ hado| # Portugese
67
+ evicius| # Lithuanian
68
+ iulis| # Lithuanian
69
+ ias # Lithuanian
70
+ )([\p{Alpha}]{2,}[^aAcCiIoOzZjJ])\b/x) { |s| "Mac#{$1.capitalize}" }
71
+
72
+ fix.sub! /\bMacmurdo\b/, "MacMurdo" # fix MacMurdo
73
+
74
+ # anything beginning with Mc, Mcdonald == McDonald
75
+ fix.sub!(/Mc(\p{Alpha}{2,})/) { |s| "Mc#{$1.capitalize}" }
76
+
77
+ # names like D'Angelo or Van 't Hooft, no cap 't
78
+ fix.gsub!(/('\p{Alpha})(?=\p{Alpha})/) { |s| "'#{$1[(1..-1)].capitalize}" }
84
79
 
85
80
  fix
86
81
  end
@@ -15,7 +15,7 @@ module Nomener
15
15
  TRAILER_TRASH = /[,|\s]+$/
16
16
 
17
17
  # regex for name characters we aren't going to use
18
- DIRTY_STUFF = /[^,'(?:\p{Alpha}(?<\.))\p{Alpha}]{2,}/
18
+ DIRTY_STUFF = /[^,'(?:\p{Alpha}(?<\.))\p{Alpha}\p{Blank}]{2,}/
19
19
 
20
20
  # regex for boundaries we'll use to find leftover nickname boundaries
21
21
  NICKNAME_LEFTOVER = /["'\(\)]{2}/
@@ -62,85 +62,58 @@ module Nomener
62
62
  # Returns a hash of name parts or nil
63
63
  # Raises ArgumentError if 'name' is not a string or is empty
64
64
  def self.parse!(name, format = {:order => :auto, :spacelimit => 0})
65
- raise ArgumentError, 'Name to parse not provided' unless (name.kind_of?(String) && !name.empty?)
65
+ raise ArgumentError, "Name to parse not provided" unless (name.kind_of?(String) && !name.empty?)
66
66
 
67
67
  name = Nomener::Helper.reformat name
68
+ newname = { :title => "", :first => "", :nick => "", :middle => "", :last => "", :suffix => "" }
68
69
 
69
70
  # grab any identified nickname before working on the rest
70
- nick = parse_nick! name
71
+ newname[:nick] = parse_nick! name
71
72
  cleanup! name
72
73
 
73
74
  # grab any suffix' we can find
74
- suffix = parse_suffix! name
75
+ newname[:suffix] = parse_suffix! name
75
76
  cleanup! name
76
77
 
77
- title = parse_title! name
78
- cleanup! name
78
+ newname[:title] = parse_title! name
79
+ name = dustoff name
80
+
81
+ newname[:last] = name # possibly mononyms
82
+
83
+
84
+ case name
85
+ when /,/ # if there's a comma, it may be a useful hint
86
+ clues = name.split(",").each { |i| i.strip! }
87
+
88
+ raise ParseError, "Could not decipher commas in \"#{name}\"" if clues.length > 2
79
89
 
80
- name.gsub! PERIOD, ' '
81
- name.squeeze! " "
82
- name.strip!
83
-
84
- first = last = middle = ""
85
-
86
- # if there's a comma, it may be a useful hint
87
- if !name.index(',').nil? # && (format[:order] == :auto || format[:order] == :lcf)
88
- clues = name.split(",")
89
- clues.each { |i| i.strip! }
90
-
91
- # convention is last, first
92
- if clues.length == 2
93
- last, first = clues
94
-
95
- # Mies van der Rohe, Ludwig
96
- # Snepscheut, Jan L. A. van de
97
- # check the last by comparing a re-ordering of the name
98
- first_parts = first.split " "
99
- unless first_parts.length == 1
100
- check = parse_last!("#{first} #{last}", :fl)
101
- # let's trust the full name
102
- if check != last
103
- first = "#{first} #{last}".sub(check, '').strip
104
- last = check
105
- end
90
+ # convention is last, first when there's a comma
91
+ newname[:last], newname[:first] = clues
92
+
93
+ # check the last by comparing a re-ordering of the name
94
+ # Mies van der Rohe, Ludwig
95
+ # Snepscheut, Jan L. A. van de
96
+ unless newname[:first].nil? || newname[:first].split(" ").length == 1
97
+ check = parse_last!("#{newname[:first]} #{newname[:last]}", :fl)
98
+
99
+ # let's trust the full name
100
+ if check != newname[:last]
101
+ newname[:first] = "#{newname[:first]} #{newname[:last]}".sub(check, "").strip
102
+ newname[:last] = check
106
103
  end
107
- # titles are part of the first name
108
- title = parse_title!(first) if title.nil? || title.empty?
109
- elsif clues.length == 1
110
- last = clues.shift
111
- else
112
- raise ParseError, "Could not decipher commas in \"#{name}\""
113
104
  end
114
- elsif !name.index(" ").nil?
115
- last = parse_last!(name, format[:order])
116
- first, middle = parse_first!(name, format[:spacelimit])
117
- else
118
- last = name # possibly mononym
119
- first = ""
105
+
106
+ # titles which are part of the first name...
107
+ newname[:title] = parse_title!(newname[:first]) if newname[:title].empty?
108
+
109
+ when / / # no comma, check for space on first then last
110
+ newname[:last] = parse_last!(name, format[:order])
111
+ newname[:first], newname[:middle] = parse_first!(name, format[:spacelimit])
120
112
  end
121
113
 
122
- {
123
- :title => (title || "").strip,
124
- :suffix => (suffix || "").strip,
125
- :nick => (nick || "").strip,
126
- :first => (first || "").strip,
127
- :last => (last || "").strip,
128
- :middle => (middle || "").strip
129
- }
130
- end
114
+ cleanup! newname[:last], newname[:first], newname[:middle]
131
115
 
132
- # Internal: Clean up a string where there are numerous consecutive and trailing non-name characters.
133
- # Modifies given string in place.
134
- #
135
- # dirty - string to clean up
136
- #
137
- # Returns nothing
138
- def self.cleanup!(dirty)
139
- dirty.gsub! DIRTY_STUFF, ''
140
- dirty.squeeze! " "
141
- # remove any trailing commas or whitespace
142
- dirty.gsub! TRAILER_TRASH, ''
143
- dirty.strip!
116
+ newname
144
117
  end
145
118
 
146
119
  # Internal: pull off a title if we can
@@ -153,13 +126,9 @@ module Nomener
153
126
  titles = []
154
127
  nm.gsub! TITLES do |title|
155
128
  titles << title.strip
156
- ''
129
+ ""
157
130
  end
158
- t = titles.join " "
159
- t.gsub! PERIOD, ' '
160
- t.squeeze! " "
161
- t.strip!
162
- t
131
+ dustoff titles.join(" ")
163
132
  end
164
133
 
165
134
  # Internal: pull off what suffixes we can
@@ -172,13 +141,9 @@ module Nomener
172
141
  suffixes = []
173
142
  nm.gsub! SUFFIXES do |suffix|
174
143
  suffixes << suffix.strip
175
- ''
144
+ ""
176
145
  end
177
- s = suffixes.join " "
178
- s.gsub! /\./, ' '
179
- s.squeeze! " "
180
- s.strip!
181
- s
146
+ dustoff suffixes.join(" ")
182
147
  end
183
148
 
184
149
  # Internal: parse nickname out of string. presuming it's in quotes
@@ -189,14 +154,12 @@ module Nomener
189
154
  # Returns string of the nickname found or and empty string
190
155
  def self.parse_nick!(nm)
191
156
  nick = ""
192
- nm.sub! NICKNAME, ''
193
- nick = $1.strip unless $1.nil?
194
- nm.sub! NICKNAME_LEFTOVER, ''
195
- nm.squeeze! " "
196
- nick.gsub! /\./, ' '
197
- nick.squeeze! " "
198
- nick.strip!
199
- nick
157
+ nm.sub! NICKNAME do |z|
158
+ nick = $1.strip
159
+ ""
160
+ end
161
+ nm.sub! NICKNAME_LEFTOVER, ""
162
+ dustoff nick
200
163
  end
201
164
 
202
165
  # Internal: parse last name from string
@@ -207,24 +170,22 @@ module Nomener
207
170
  #
208
171
  # Returns string of the last name found or an empty string
209
172
  def self.parse_last!(nm, format = :fl)
210
- last = ''
173
+ last = ""
211
174
 
212
- if format == :auto
213
- format = :fl if nm.index(',').nil?
214
- # format = :lcf if !nm.index(',').nil?
215
- end
175
+ format = :fl if (format == :auto && nm.index(",").nil?)
176
+ format = :lcf if (format == :auto && nm.index(","))
216
177
 
217
- if format == :fl && n = nm.match( FIRSTLAST_MATCHER )
218
- last = n[:fam].strip
219
- nm.sub!(last, "").strip!
220
- elsif format == :lf && n = nm.match( LASTFIRST_MATCHER )
221
- last = n[:fam].strip
222
- nm.sub!(last, "").strip!
223
- elsif format == :lcf && n = nm.match( LASTCOMFIRST_MATCHER )
224
- last = n[:fam].strip
225
- nm.sub!(last, "").strip!
226
- nm.sub!(',', "").strip!
178
+ # these constants should have the named match :fam
179
+ n = nm.match( FIRSTLAST_MATCHER ) if format == :fl
180
+ n = nm.match( LASTFIRST_MATCHER ) if format == :lf
181
+ n = nm.match( LASTCOMFIRST_MATCHER ) if format == :lcf
182
+
183
+ unless n.nil?
184
+ last = n[:fam].strip if n[:fam]
185
+ nm.sub!(last, "")
186
+ nm.sub!(",", "")
227
187
  end
188
+
228
189
  last
229
190
  end
230
191
 
@@ -236,11 +197,41 @@ module Nomener
236
197
  #
237
198
  # Returns an array containing the first name and middle name if any
238
199
  def self.parse_first!(nm, namecount = 0)
239
- nm.tr! '.', ' '
240
- first, middle = nm.split ' ', namecount
200
+ nm.tr! ".", " "
201
+ nm.squeeze! " "
202
+ first, middle = nm.split " ", namecount
241
203
 
242
204
  [first || "", middle || ""]
243
205
  end
244
206
 
207
+ private
208
+ # Internal: Clean up a string where there are numerous consecutive and trailing non-name characters.
209
+ # Modifies given string in place.
210
+ #
211
+ # args - strings to clean up
212
+ #
213
+ # Returns nothing
214
+ def self.cleanup!(*args)
215
+ args.each do |dirty|
216
+ next if(dirty.nil? || !dirty.kind_of?(String))
217
+
218
+ dirty.gsub! DIRTY_STUFF, ""
219
+ dirty.squeeze! " "
220
+ # remove any trailing commas or whitespace
221
+ dirty.gsub! TRAILER_TRASH, ""
222
+ dirty.strip!
223
+ end
224
+ end
225
+
226
+ # Internal: a softer clean we keep re-using
227
+ #
228
+ # str - the string to dust off
229
+ #
230
+ # Returns the nice clean
231
+ def self.dustoff(str)
232
+ str = str.gsub PERIOD, " "
233
+ str = str.squeeze " "
234
+ str = str.strip
235
+ end
245
236
  end
246
237
  end
@@ -1,4 +1,4 @@
1
1
  #-- encoding: UTF-8
2
2
  module Nomener
3
- VERSION = "0.2.6"
3
+ VERSION = "0.2.7"
4
4
  end
@@ -81,6 +81,33 @@ RSpec.describe "Nomener::Name" do
81
81
  end
82
82
  end
83
83
 
84
+ context "with last name alternates" do
85
+ name = Nomener::Name.new("Joe Smith")
86
+
87
+ it "returns from surname the last name" do
88
+ expect(name.surname).to eq "Smith"
89
+ end
90
+
91
+ it "returns from family the last name" do
92
+ expect(name.family).to eq "Smith"
93
+ end
94
+ end
95
+
96
+ context "with first name alternate" do
97
+ name = Nomener::Name.new("Joe Smith")
98
+
99
+ it "returns from surname the last name" do
100
+ expect(name.given).to eq "Joe"
101
+ end
102
+ end
103
+
104
+ context "with a name method to_h" do
105
+ it "responds with a hash" do
106
+ name = Nomener::Name.new("Joe Smith")
107
+ expect(name.to_h).to be_a Hash
108
+ end
109
+ end
110
+
84
111
  context "with capit" do
85
112
  name = Nomener::Name.new
86
113
  [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nomener
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dante Piombino