namae 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -0
- data/features/examples.feature +16 -0
- data/features/lists.feature +2 -2
- data/lib/namae.rb +1 -1
- data/lib/namae/name.rb +7 -0
- data/lib/namae/parser.rb +138 -151
- data/lib/namae/parser.y +28 -37
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +3 -3
- data/spec/namae/parser_spec.rb +8 -4
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 348bf4a2385c1aa56c35759cc2219a8163fa7cb76e3c05482cd6db7a207906fb
|
|
4
|
+
data.tar.gz: 4329ea23260aef483460581391fcd43c80bde61ecebef419f89c2d23f0cfeffc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 806964f1611f6931acd6e68e4f5a75069b30abf42795191bf084d89adbfe378d98f6b28f10f96cc5cc446ab9068ddafafd6794e71f2c53fd97d2a0aff5a59914
|
|
7
|
+
data.tar.gz: f235fb82617020393be215fe078bd74bbb947ab682f387f5aad87e7bb6e3b62323ee03da30409034ca747beca7b5319905f1a081a43bc8763a25dca5b40722c4
|
data/README.md
CHANGED
|
@@ -121,6 +121,16 @@ ambiguous. For example, multiple family names are always possible in sort-order:
|
|
|
121
121
|
Whilst in display-order, multiple family names are only supported when the
|
|
122
122
|
name contains a particle or a nickname.
|
|
123
123
|
|
|
124
|
+
Namae tries to detect common particles using the `:uppercase_particle` lexer
|
|
125
|
+
pattern. If you prefer to always include particles with the family name, you
|
|
126
|
+
can set the the `:include_particle_in_family` parser option.
|
|
127
|
+
|
|
128
|
+
Namae.parse 'Ludwig von Beethoven'
|
|
129
|
+
#-> [#<Name family="Beethoven" given="Ludwig" particle="von">]
|
|
130
|
+
|
|
131
|
+
Namae.options[:include_particle_in_family] = true
|
|
132
|
+
#-> [#<Name family="von Beethoven" given="Ludwig">]
|
|
133
|
+
|
|
124
134
|
Configuration
|
|
125
135
|
-------------
|
|
126
136
|
You can tweak some of Namae's parse rules by configuring the parser's
|
data/features/examples.feature
CHANGED
|
@@ -34,3 +34,19 @@ Feature: Parse the names in the Readme file
|
|
|
34
34
|
| Mr. Yukihiro "Matz" Matsumoto | Yukihiro | | Matsumoto | | | Mr. | Matz |
|
|
35
35
|
| Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | | Matz |
|
|
36
36
|
| Mr. Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | Mr. | Matz |
|
|
37
|
+
|
|
38
|
+
@particle
|
|
39
|
+
Scenarios: Particles
|
|
40
|
+
| name | given | particle | family | suffix | title | appellation | nick |
|
|
41
|
+
| Ludwig von Beethoven | Ludwig | von | Beethoven | | | | |
|
|
42
|
+
| Beethoven, Ludwig von | Ludwig von | | Beethoven | | | | |
|
|
43
|
+
| Vincent Van Gogh | Vincent | Van | Gogh | | | | |
|
|
44
|
+
| Vincent van Gogh | Vincent | van | Gogh | | | | |
|
|
45
|
+
| Van Gogh, Vincent | Vincent | Van | Gogh | | | | |
|
|
46
|
+
| van Gogh, Vincent | Vincent | van | Gogh | | | | |
|
|
47
|
+
| Walther von der Vogelheide | Walther | von der | Vogelheide | | | | |
|
|
48
|
+
| Don De Lillo | Don | De | Lillo | | | | |
|
|
49
|
+
| De Lillo, Don | Don | De | Lillo | | | | |
|
|
50
|
+
| Tom Van de Weghe | Tom | Van de | Weghe | | | | |
|
|
51
|
+
| Tom Van De Weghe | Tom | Van De | Weghe | | | | |
|
|
52
|
+
|
data/features/lists.feature
CHANGED
|
@@ -121,12 +121,12 @@ Feature: Parse a list of names
|
|
|
121
121
|
Then the names should be:
|
|
122
122
|
| given | family |
|
|
123
123
|
| M. | Di Proctor |
|
|
124
|
-
| P. | Cooper
|
|
124
|
+
| P. | von Cooper |
|
|
125
125
|
When I parse the names "Di Proctor, M, von Cooper, P"
|
|
126
126
|
Then the names should be:
|
|
127
127
|
| given | family |
|
|
128
128
|
| M | Di Proctor |
|
|
129
|
-
| P | Cooper
|
|
129
|
+
| P | von Cooper |
|
|
130
130
|
|
|
131
131
|
Scenario: A list of names with two consecutive accented characters
|
|
132
132
|
Given I want to include particles in the family name
|
data/lib/namae.rb
CHANGED
data/lib/namae/name.rb
CHANGED
|
@@ -183,6 +183,13 @@ module Namae
|
|
|
183
183
|
self
|
|
184
184
|
end
|
|
185
185
|
|
|
186
|
+
def merge_particles!
|
|
187
|
+
self.family = [dropping_particle, particle, family].compact.join(' ')
|
|
188
|
+
self.dropping_particle = nil
|
|
189
|
+
self.particle = nil
|
|
190
|
+
self
|
|
191
|
+
end
|
|
192
|
+
|
|
186
193
|
# @return [String] a string representation of the name
|
|
187
194
|
def inspect
|
|
188
195
|
"#<Name #{each_pair.map { |k,v| [k,v.inspect].join('=') if v }.compact.join(' ')}>"
|
data/lib/namae/parser.rb
CHANGED
|
@@ -11,7 +11,7 @@ require 'strscan'
|
|
|
11
11
|
module Namae
|
|
12
12
|
class Parser < Racc::Parser
|
|
13
13
|
|
|
14
|
-
module_eval(<<'...end parser.y/module_eval...', 'parser.y',
|
|
14
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 111)
|
|
15
15
|
|
|
16
16
|
@defaults = {
|
|
17
17
|
:debug => false,
|
|
@@ -23,7 +23,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
|
|
|
23
23
|
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
|
24
24
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
|
25
25
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
|
|
26
|
-
:uppercase_particle => /\s*\b(
|
|
26
|
+
:uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
|
|
27
27
|
}
|
|
28
28
|
|
|
29
29
|
class << self
|
|
@@ -90,7 +90,9 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
|
|
|
90
90
|
def parse!(string)
|
|
91
91
|
@input = StringScanner.new(normalize(string))
|
|
92
92
|
reset
|
|
93
|
-
do_parse
|
|
93
|
+
names = do_parse
|
|
94
|
+
names.map(&:merge_particles!) if include_particle_in_family?
|
|
95
|
+
names
|
|
94
96
|
end
|
|
95
97
|
|
|
96
98
|
def normalize(string)
|
|
@@ -207,86 +209,84 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
|
|
|
207
209
|
##### State transition tables begin ###
|
|
208
210
|
|
|
209
211
|
racc_action_table = [
|
|
210
|
-
-
|
|
211
|
-
-41, -
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
212
|
+
-43, 36, 26, 37, -41, 38, 39, -43, -42, -43,
|
|
213
|
+
-43, -41, -40, -41, -41, -42, 45, -42, -42, -40,
|
|
214
|
+
50, -40, -40, 72, 59, 58, 60, 73, 16, 13,
|
|
215
|
+
17, -36, 61, 7, 18, 65, 14, 16, 25, 17,
|
|
216
|
+
16, 25, 17, 28, 18, 14, 65, 45, 14, 36,
|
|
217
|
+
34, 37, 68, 16, 13, 17, 26, 35, 7, 18,
|
|
218
|
+
18, 14, 16, 25, 17, 28, 36, 34, 37, 45,
|
|
219
|
+
14, 36, 34, 37, 35, 36, 34, 37, 45, 35,
|
|
220
|
+
36, 52, 37, 35, -22, -22, -22, 18, 35, 59,
|
|
221
|
+
58, 60, -22, 36, 34, 37, 45, 61, 36, 34,
|
|
222
|
+
37, 35, 59, 58, 60, 65, 35, nil, nil, 45,
|
|
223
|
+
61, 59, 58, 60, 59, 58, 60, nil, 45, 61,
|
|
224
|
+
19, nil, 61, 59, 58, 60, -40, 20, -24, nil,
|
|
225
|
+
nil, 61, nil, -40 ]
|
|
223
226
|
|
|
224
227
|
racc_action_check = [
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
228
|
+
14, 48, 8, 48, 16, 11, 19, 14, 17, 14,
|
|
229
|
+
14, 16, 25, 16, 16, 17, 27, 17, 17, 25,
|
|
230
|
+
31, 25, 25, 55, 55, 55, 55, 56, 0, 0,
|
|
231
|
+
0, 55, 55, 0, 0, 56, 0, 5, 5, 5,
|
|
232
|
+
9, 9, 9, 9, 43, 5, 44, 46, 9, 10,
|
|
233
|
+
10, 10, 49, 20, 20, 20, 64, 10, 20, 20,
|
|
234
|
+
66, 20, 23, 23, 23, 23, 24, 24, 24, 67,
|
|
235
|
+
23, 28, 28, 28, 24, 29, 29, 29, 70, 28,
|
|
236
|
+
33, 33, 33, 29, 34, 34, 34, 75, 33, 38,
|
|
237
|
+
38, 38, 34, 41, 41, 41, 38, 38, 47, 47,
|
|
238
|
+
47, 41, 50, 50, 50, 77, 47, nil, nil, 50,
|
|
239
|
+
50, 68, 68, 68, 73, 73, 73, nil, 68, 68,
|
|
240
|
+
1, nil, 73, 78, 78, 78, 13, 1, 13, nil,
|
|
241
|
+
nil, 78, nil, 13 ]
|
|
238
242
|
|
|
239
243
|
racc_action_pointer = [
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
nil, nil,
|
|
243
|
-
nil,
|
|
244
|
-
nil,
|
|
245
|
-
|
|
246
|
-
nil, nil,
|
|
247
|
-
|
|
248
|
-
114, nil ]
|
|
244
|
+
25, 120, nil, nil, nil, 34, nil, nil, -7, 37,
|
|
245
|
+
46, 3, nil, 126, 0, nil, 4, 8, nil, 6,
|
|
246
|
+
50, nil, nil, 59, 63, 12, nil, 6, 68, 72,
|
|
247
|
+
nil, 18, nil, 77, 81, nil, nil, nil, 86, nil,
|
|
248
|
+
nil, 90, nil, 35, 36, nil, 37, 95, -2, 50,
|
|
249
|
+
99, nil, nil, nil, nil, 21, 25, nil, nil, nil,
|
|
250
|
+
nil, nil, nil, nil, 47, nil, 51, 59, 108, nil,
|
|
251
|
+
68, nil, nil, 111, nil, 78, nil, 95, 120, nil ]
|
|
249
252
|
|
|
250
253
|
racc_action_default = [
|
|
251
|
-
-1, -
|
|
252
|
-
-
|
|
253
|
-
-6, -7, -
|
|
254
|
-
-31, -
|
|
255
|
-
|
|
256
|
-
-
|
|
257
|
-
-
|
|
258
|
-
-
|
|
259
|
-
-29, -13 ]
|
|
254
|
+
-1, -52, -2, -4, -5, -52, -8, -9, -10, -25,
|
|
255
|
+
-52, -52, -19, -22, -23, -30, -32, -33, -50, -52,
|
|
256
|
+
-52, -6, -7, -52, -52, -22, -51, -44, -52, -52,
|
|
257
|
+
-31, -15, -20, -25, -24, -23, -32, -33, -38, 80,
|
|
258
|
+
-3, -52, -15, -48, -45, -46, -44, -52, -25, -14,
|
|
259
|
+
-38, -21, -22, -16, -26, -39, -28, -34, -40, -41,
|
|
260
|
+
-42, -43, -14, -11, -49, -47, -48, -44, -38, -17,
|
|
261
|
+
-52, -35, -37, -52, -12, -48, -18, -27, -29, -13 ]
|
|
260
262
|
|
|
261
263
|
racc_goto_table = [
|
|
262
|
-
3,
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
nil, nil, nil, nil, nil,
|
|
269
|
-
nil, nil, nil, nil, nil, nil, nil, nil,
|
|
270
|
-
72 ]
|
|
264
|
+
3, 30, 43, 1, 22, 21, 56, 53, 31, 27,
|
|
265
|
+
32, 63, 78, 70, nil, 30, nil, nil, 56, 69,
|
|
266
|
+
3, 66, 42, 27, 32, 30, 46, 49, 24, 32,
|
|
267
|
+
9, nil, 29, 51, 74, 23, 56, 76, 77, 62,
|
|
268
|
+
30, 32, 75, 79, 2, 67, 41, 32, 8, nil,
|
|
269
|
+
9, 47, nil, nil, nil, 71, nil, nil, 48, nil,
|
|
270
|
+
nil, nil, nil, nil, 40, nil, nil, nil, 8, nil,
|
|
271
|
+
nil, nil, nil, nil, nil, nil, nil, nil, 71 ]
|
|
271
272
|
|
|
272
273
|
racc_goto_check = [
|
|
273
|
-
3,
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
8, nil, nil, nil, nil, nil,
|
|
279
|
-
nil, nil, nil, nil, nil,
|
|
280
|
-
nil, nil, nil, nil, nil, nil, nil, nil,
|
|
281
|
-
3 ]
|
|
274
|
+
3, 19, 9, 1, 4, 3, 18, 13, 11, 3,
|
|
275
|
+
14, 10, 16, 17, nil, 19, nil, nil, 18, 13,
|
|
276
|
+
3, 9, 11, 3, 14, 19, 11, 11, 12, 14,
|
|
277
|
+
8, nil, 12, 14, 10, 8, 18, 13, 18, 11,
|
|
278
|
+
19, 14, 9, 10, 2, 11, 12, 14, 7, nil,
|
|
279
|
+
8, 12, nil, nil, nil, 3, nil, nil, 8, nil,
|
|
280
|
+
nil, nil, nil, nil, 2, nil, nil, nil, 7, nil,
|
|
281
|
+
nil, nil, nil, nil, nil, nil, nil, nil, 3 ]
|
|
282
282
|
|
|
283
283
|
racc_goto_pointer = [
|
|
284
|
-
nil,
|
|
285
|
-
-
|
|
284
|
+
nil, 3, 44, 0, -1, nil, nil, 48, 30, -25,
|
|
285
|
+
-32, -2, 23, -31, 0, nil, -61, -42, -32, -8 ]
|
|
286
286
|
|
|
287
287
|
racc_goto_default = [
|
|
288
|
-
nil, nil, nil,
|
|
289
|
-
nil, 11, 10, nil,
|
|
288
|
+
nil, nil, nil, 57, 4, 5, 6, 64, 33, nil,
|
|
289
|
+
nil, 11, 10, nil, 12, 54, 55, nil, 44, 15 ]
|
|
290
290
|
|
|
291
291
|
racc_reduce_table = [
|
|
292
292
|
0, 0, :racc_error,
|
|
@@ -304,15 +304,15 @@ racc_reduce_table = [
|
|
|
304
304
|
5, 16, :_reduce_12,
|
|
305
305
|
6, 16, :_reduce_13,
|
|
306
306
|
3, 16, :_reduce_14,
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
4, 18, :_reduce_19,
|
|
312
|
-
5, 18, :_reduce_20,
|
|
307
|
+
2, 16, :_reduce_15,
|
|
308
|
+
3, 18, :_reduce_16,
|
|
309
|
+
4, 18, :_reduce_17,
|
|
310
|
+
5, 18, :_reduce_18,
|
|
313
311
|
1, 24, :_reduce_none,
|
|
314
|
-
2, 24, :
|
|
315
|
-
3, 24, :
|
|
312
|
+
2, 24, :_reduce_20,
|
|
313
|
+
3, 24, :_reduce_21,
|
|
314
|
+
1, 26, :_reduce_none,
|
|
315
|
+
1, 26, :_reduce_none,
|
|
316
316
|
1, 23, :_reduce_none,
|
|
317
317
|
1, 23, :_reduce_none,
|
|
318
318
|
1, 25, :_reduce_26,
|
|
@@ -321,29 +321,30 @@ racc_reduce_table = [
|
|
|
321
321
|
3, 25, :_reduce_29,
|
|
322
322
|
1, 20, :_reduce_none,
|
|
323
323
|
2, 20, :_reduce_31,
|
|
324
|
-
1,
|
|
325
|
-
1,
|
|
326
|
-
1, 27, :_reduce_none,
|
|
327
|
-
2, 27, :_reduce_35,
|
|
328
|
-
0, 28, :_reduce_none,
|
|
324
|
+
1, 31, :_reduce_none,
|
|
325
|
+
1, 31, :_reduce_none,
|
|
329
326
|
1, 28, :_reduce_none,
|
|
330
|
-
|
|
331
|
-
|
|
327
|
+
2, 28, :_reduce_35,
|
|
328
|
+
0, 29, :_reduce_none,
|
|
329
|
+
1, 29, :_reduce_none,
|
|
330
|
+
0, 27, :_reduce_none,
|
|
331
|
+
1, 27, :_reduce_none,
|
|
332
|
+
1, 15, :_reduce_none,
|
|
332
333
|
1, 15, :_reduce_none,
|
|
333
334
|
1, 15, :_reduce_none,
|
|
334
335
|
1, 15, :_reduce_none,
|
|
335
336
|
0, 21, :_reduce_none,
|
|
336
337
|
1, 21, :_reduce_none,
|
|
337
|
-
1,
|
|
338
|
-
2,
|
|
338
|
+
1, 30, :_reduce_none,
|
|
339
|
+
2, 30, :_reduce_47,
|
|
339
340
|
0, 22, :_reduce_none,
|
|
340
341
|
1, 22, :_reduce_none,
|
|
341
342
|
1, 19, :_reduce_none,
|
|
342
|
-
2, 19, :
|
|
343
|
+
2, 19, :_reduce_51 ]
|
|
343
344
|
|
|
344
|
-
racc_reduce_n =
|
|
345
|
+
racc_reduce_n = 52
|
|
345
346
|
|
|
346
|
-
racc_shift_n =
|
|
347
|
+
racc_shift_n = 80
|
|
347
348
|
|
|
348
349
|
racc_token_table = {
|
|
349
350
|
false => 0,
|
|
@@ -406,6 +407,7 @@ Racc_token_to_s_table = [
|
|
|
406
407
|
"last",
|
|
407
408
|
"von",
|
|
408
409
|
"first",
|
|
410
|
+
"particle",
|
|
409
411
|
"opt_words",
|
|
410
412
|
"words",
|
|
411
413
|
"opt_comma",
|
|
@@ -480,36 +482,36 @@ module_eval(<<'.,.,', 'parser.y', 22)
|
|
|
480
482
|
|
|
481
483
|
module_eval(<<'.,.,', 'parser.y', 26)
|
|
482
484
|
def _reduce_11(val, _values, result)
|
|
483
|
-
result = Name.new(
|
|
484
|
-
:suffix => val[2], :title => val[3]
|
|
485
|
+
result = Name.new(
|
|
486
|
+
:given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
|
|
487
|
+
)
|
|
485
488
|
|
|
486
489
|
result
|
|
487
490
|
end
|
|
488
491
|
.,.,
|
|
489
492
|
|
|
490
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
493
|
+
module_eval(<<'.,.,', 'parser.y', 32)
|
|
491
494
|
def _reduce_12(val, _values, result)
|
|
492
|
-
result = Name.new(
|
|
493
|
-
:family => val[2], :suffix => val[3], :title => val[4]
|
|
495
|
+
result = Name.new(
|
|
496
|
+
:given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
|
|
497
|
+
)
|
|
494
498
|
|
|
495
499
|
result
|
|
496
500
|
end
|
|
497
501
|
.,.,
|
|
498
502
|
|
|
499
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
503
|
+
module_eval(<<'.,.,', 'parser.y', 38)
|
|
500
504
|
def _reduce_13(val, _values, result)
|
|
501
|
-
result = Name.new(
|
|
502
|
-
:particle => val[2], :family => val[3],
|
|
503
|
-
:suffix => val[4], :title => val[5])
|
|
505
|
+
result = Name.new(
|
|
506
|
+
:given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
|
|
504
507
|
|
|
505
508
|
result
|
|
506
509
|
end
|
|
507
510
|
.,.,
|
|
508
511
|
|
|
509
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
512
|
+
module_eval(<<'.,.,', 'parser.y', 43)
|
|
510
513
|
def _reduce_14(val, _values, result)
|
|
511
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
|
512
|
-
:family => val[2])
|
|
514
|
+
result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
|
513
515
|
|
|
514
516
|
result
|
|
515
517
|
end
|
|
@@ -517,105 +519,88 @@ module_eval(<<'.,.,', 'parser.y', 42)
|
|
|
517
519
|
|
|
518
520
|
module_eval(<<'.,.,', 'parser.y', 47)
|
|
519
521
|
def _reduce_15(val, _values, result)
|
|
520
|
-
|
|
521
|
-
Name.new(:given => val[0], :family => val[1,2].join(' '))
|
|
522
|
-
else
|
|
523
|
-
Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
|
524
|
-
end
|
|
522
|
+
result = Name.new(:particle => val[0], :family => val[1])
|
|
525
523
|
|
|
526
524
|
result
|
|
527
525
|
end
|
|
528
526
|
.,.,
|
|
529
527
|
|
|
530
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
528
|
+
module_eval(<<'.,.,', 'parser.y', 52)
|
|
531
529
|
def _reduce_16(val, _values, result)
|
|
532
|
-
result = Name.new(
|
|
530
|
+
result = Name.new({
|
|
531
|
+
:family => val[0], :suffix => val[2][0], :given => val[2][1]
|
|
532
|
+
}, !!val[2][0])
|
|
533
533
|
|
|
534
534
|
result
|
|
535
535
|
end
|
|
536
536
|
.,.,
|
|
537
537
|
|
|
538
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
538
|
+
module_eval(<<'.,.,', 'parser.y', 58)
|
|
539
539
|
def _reduce_17(val, _values, result)
|
|
540
|
-
result = Name.new({
|
|
541
|
-
:
|
|
540
|
+
result = Name.new({
|
|
541
|
+
:particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
|
|
542
|
+
}, !!val[3][0])
|
|
542
543
|
|
|
543
544
|
result
|
|
544
545
|
end
|
|
545
546
|
.,.,
|
|
546
547
|
|
|
547
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
548
|
+
module_eval(<<'.,.,', 'parser.y', 64)
|
|
548
549
|
def _reduce_18(val, _values, result)
|
|
549
|
-
result =
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
|
553
|
-
end
|
|
550
|
+
result = Name.new({
|
|
551
|
+
:particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
|
|
552
|
+
}, !!val[4][0])
|
|
554
553
|
|
|
555
554
|
result
|
|
556
555
|
end
|
|
557
556
|
.,.,
|
|
558
557
|
|
|
559
|
-
|
|
560
|
-
def _reduce_19(val, _values, result)
|
|
561
|
-
result = Name.new({ :particle => val[0], :family => val[1],
|
|
562
|
-
:suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
|
563
|
-
|
|
564
|
-
result
|
|
565
|
-
end
|
|
566
|
-
.,.,
|
|
558
|
+
# reduce 19 omitted
|
|
567
559
|
|
|
568
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
560
|
+
module_eval(<<'.,.,', 'parser.y', 71)
|
|
569
561
|
def _reduce_20(val, _values, result)
|
|
570
|
-
result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
|
|
571
|
-
:suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
|
|
572
|
-
|
|
573
|
-
result
|
|
574
|
-
end
|
|
575
|
-
.,.,
|
|
576
|
-
|
|
577
|
-
# reduce 21 omitted
|
|
578
|
-
|
|
579
|
-
module_eval(<<'.,.,', 'parser.y', 84)
|
|
580
|
-
def _reduce_22(val, _values, result)
|
|
581
562
|
result = val.join(' ')
|
|
582
563
|
result
|
|
583
564
|
end
|
|
584
565
|
.,.,
|
|
585
566
|
|
|
586
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
587
|
-
def
|
|
567
|
+
module_eval(<<'.,.,', 'parser.y', 72)
|
|
568
|
+
def _reduce_21(val, _values, result)
|
|
588
569
|
result = val.join(' ')
|
|
589
570
|
result
|
|
590
571
|
end
|
|
591
572
|
.,.,
|
|
592
573
|
|
|
574
|
+
# reduce 22 omitted
|
|
575
|
+
|
|
576
|
+
# reduce 23 omitted
|
|
577
|
+
|
|
593
578
|
# reduce 24 omitted
|
|
594
579
|
|
|
595
580
|
# reduce 25 omitted
|
|
596
581
|
|
|
597
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
582
|
+
module_eval(<<'.,.,', 'parser.y', 78)
|
|
598
583
|
def _reduce_26(val, _values, result)
|
|
599
584
|
result = [nil,val[0]]
|
|
600
585
|
result
|
|
601
586
|
end
|
|
602
587
|
.,.,
|
|
603
588
|
|
|
604
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
589
|
+
module_eval(<<'.,.,', 'parser.y', 79)
|
|
605
590
|
def _reduce_27(val, _values, result)
|
|
606
591
|
result = [val[2],val[0]]
|
|
607
592
|
result
|
|
608
593
|
end
|
|
609
594
|
.,.,
|
|
610
595
|
|
|
611
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
596
|
+
module_eval(<<'.,.,', 'parser.y', 80)
|
|
612
597
|
def _reduce_28(val, _values, result)
|
|
613
598
|
result = [val[0],nil]
|
|
614
599
|
result
|
|
615
600
|
end
|
|
616
601
|
.,.,
|
|
617
602
|
|
|
618
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
603
|
+
module_eval(<<'.,.,', 'parser.y', 81)
|
|
619
604
|
def _reduce_29(val, _values, result)
|
|
620
605
|
result = [val[0],val[2]]
|
|
621
606
|
result
|
|
@@ -624,7 +609,7 @@ module_eval(<<'.,.,', 'parser.y', 92)
|
|
|
624
609
|
|
|
625
610
|
# reduce 30 omitted
|
|
626
611
|
|
|
627
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
612
|
+
module_eval(<<'.,.,', 'parser.y', 84)
|
|
628
613
|
def _reduce_31(val, _values, result)
|
|
629
614
|
result = val.join(' ')
|
|
630
615
|
result
|
|
@@ -637,7 +622,7 @@ module_eval(<<'.,.,', 'parser.y', 95)
|
|
|
637
622
|
|
|
638
623
|
# reduce 34 omitted
|
|
639
624
|
|
|
640
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
625
|
+
module_eval(<<'.,.,', 'parser.y', 89)
|
|
641
626
|
def _reduce_35(val, _values, result)
|
|
642
627
|
result = val.join(' ')
|
|
643
628
|
result
|
|
@@ -664,21 +649,23 @@ module_eval(<<'.,.,', 'parser.y', 100)
|
|
|
664
649
|
|
|
665
650
|
# reduce 45 omitted
|
|
666
651
|
|
|
667
|
-
|
|
668
|
-
|
|
652
|
+
# reduce 46 omitted
|
|
653
|
+
|
|
654
|
+
module_eval(<<'.,.,', 'parser.y', 99)
|
|
655
|
+
def _reduce_47(val, _values, result)
|
|
669
656
|
result = val.join(' ')
|
|
670
657
|
result
|
|
671
658
|
end
|
|
672
659
|
.,.,
|
|
673
660
|
|
|
674
|
-
# reduce 47 omitted
|
|
675
|
-
|
|
676
661
|
# reduce 48 omitted
|
|
677
662
|
|
|
678
663
|
# reduce 49 omitted
|
|
679
664
|
|
|
680
|
-
|
|
681
|
-
|
|
665
|
+
# reduce 50 omitted
|
|
666
|
+
|
|
667
|
+
module_eval(<<'.,.,', 'parser.y', 104)
|
|
668
|
+
def _reduce_51(val, _values, result)
|
|
682
669
|
result = val.join(' ')
|
|
683
670
|
result
|
|
684
671
|
end
|
data/lib/namae/parser.y
CHANGED
|
@@ -24,32 +24,24 @@ rule
|
|
|
24
24
|
|
|
25
25
|
display_order : u_words word opt_suffices opt_titles
|
|
26
26
|
{
|
|
27
|
-
result = Name.new(
|
|
28
|
-
:suffix => val[2], :title => val[3]
|
|
27
|
+
result = Name.new(
|
|
28
|
+
:given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
|
|
29
|
+
)
|
|
29
30
|
}
|
|
30
31
|
| u_words NICK last opt_suffices opt_titles
|
|
31
32
|
{
|
|
32
|
-
result = Name.new(
|
|
33
|
-
:family => val[2], :suffix => val[3], :title => val[4]
|
|
33
|
+
result = Name.new(
|
|
34
|
+
:given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
|
|
35
|
+
)
|
|
34
36
|
}
|
|
35
37
|
| u_words NICK von last opt_suffices opt_titles
|
|
36
38
|
{
|
|
37
|
-
result = Name.new(
|
|
38
|
-
:particle => val[2], :family => val[3],
|
|
39
|
-
:suffix => val[4], :title => val[5])
|
|
39
|
+
result = Name.new(
|
|
40
|
+
:given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
|
|
40
41
|
}
|
|
41
42
|
| u_words von last
|
|
42
43
|
{
|
|
43
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
|
44
|
-
:family => val[2])
|
|
45
|
-
}
|
|
46
|
-
| u_words UPARTICLE last
|
|
47
|
-
{
|
|
48
|
-
result = if include_particle_in_family?
|
|
49
|
-
Name.new(:given => val[0], :family => val[1,2].join(' '))
|
|
50
|
-
else
|
|
51
|
-
Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
|
52
|
-
end
|
|
44
|
+
result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
|
53
45
|
}
|
|
54
46
|
| von last
|
|
55
47
|
{
|
|
@@ -58,32 +50,29 @@ rule
|
|
|
58
50
|
|
|
59
51
|
sort_order : last COMMA first
|
|
60
52
|
{
|
|
61
|
-
result = Name.new({
|
|
62
|
-
:
|
|
63
|
-
|
|
64
|
-
| UPARTICLE last COMMA first
|
|
65
|
-
{
|
|
66
|
-
result = if include_particle_in_family?
|
|
67
|
-
Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
|
68
|
-
else
|
|
69
|
-
Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
|
70
|
-
end
|
|
53
|
+
result = Name.new({
|
|
54
|
+
:family => val[0], :suffix => val[2][0], :given => val[2][1]
|
|
55
|
+
}, !!val[2][0])
|
|
71
56
|
}
|
|
72
57
|
| von last COMMA first
|
|
73
58
|
{
|
|
74
|
-
result = Name.new({
|
|
75
|
-
:
|
|
59
|
+
result = Name.new({
|
|
60
|
+
:particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
|
|
61
|
+
}, !!val[3][0])
|
|
76
62
|
}
|
|
77
63
|
| u_words von last COMMA first
|
|
78
64
|
{
|
|
79
|
-
result = Name.new({
|
|
80
|
-
:
|
|
65
|
+
result = Name.new({
|
|
66
|
+
:particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
|
|
67
|
+
}, !!val[4][0])
|
|
81
68
|
}
|
|
82
69
|
;
|
|
83
70
|
|
|
84
|
-
von :
|
|
85
|
-
| von
|
|
86
|
-
| von u_words
|
|
71
|
+
von : particle
|
|
72
|
+
| von particle { result = val.join(' ') }
|
|
73
|
+
| von u_words particle { result = val.join(' ') }
|
|
74
|
+
|
|
75
|
+
particle : LWORD | UPARTICLE
|
|
87
76
|
|
|
88
77
|
last : LWORD | u_words
|
|
89
78
|
|
|
@@ -103,7 +92,7 @@ rule
|
|
|
103
92
|
opt_comma : /* empty */ | COMMA
|
|
104
93
|
opt_words : /* empty */ | words
|
|
105
94
|
|
|
106
|
-
word : LWORD | UWORD | PWORD
|
|
95
|
+
word : LWORD | UWORD | PWORD | UPARTICLE
|
|
107
96
|
|
|
108
97
|
opt_suffices : /* empty */ | suffices
|
|
109
98
|
|
|
@@ -130,7 +119,7 @@ require 'strscan'
|
|
|
130
119
|
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
|
131
120
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
|
132
121
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
|
|
133
|
-
:uppercase_particle => /\s*\b(
|
|
122
|
+
:uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
|
|
134
123
|
}
|
|
135
124
|
|
|
136
125
|
class << self
|
|
@@ -197,7 +186,9 @@ require 'strscan'
|
|
|
197
186
|
def parse!(string)
|
|
198
187
|
@input = StringScanner.new(normalize(string))
|
|
199
188
|
reset
|
|
200
|
-
do_parse
|
|
189
|
+
names = do_parse
|
|
190
|
+
names.map(&:merge_particles!) if include_particle_in_family?
|
|
191
|
+
names
|
|
201
192
|
end
|
|
202
193
|
|
|
203
194
|
def normalize(string)
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
|
@@ -2,16 +2,16 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: namae 1.1.
|
|
5
|
+
# stub: namae 1.1.1 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
8
|
s.name = "namae".freeze
|
|
9
|
-
s.version = "1.1.
|
|
9
|
+
s.version = "1.1.1"
|
|
10
10
|
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
12
12
|
s.require_paths = ["lib".freeze]
|
|
13
13
|
s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
|
|
14
|
-
s.date = "2021-03-
|
|
14
|
+
s.date = "2021-03-14"
|
|
15
15
|
s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
|
|
16
16
|
s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
|
|
17
17
|
s.extra_rdoc_files = [
|
data/spec/namae/parser_spec.rb
CHANGED
|
@@ -207,6 +207,10 @@ module Namae
|
|
|
207
207
|
expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
|
|
208
208
|
end
|
|
209
209
|
|
|
210
|
+
it 'parses multiple common capitalized particles as the family name in display order' do
|
|
211
|
+
expect(parser.parse!('Tom Van De Weghe')[0].values_at(:given, :family, :particle)).to eq(['Tom', 'Weghe', 'Van De'])
|
|
212
|
+
end
|
|
213
|
+
|
|
210
214
|
it 'parses common lowercase particles as a particle, not family name in display order' do
|
|
211
215
|
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
|
|
212
216
|
end
|
|
@@ -235,16 +239,16 @@ module Namae
|
|
|
235
239
|
expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
|
|
236
240
|
end
|
|
237
241
|
|
|
238
|
-
it 'parses common lowercase particles as
|
|
239
|
-
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva',
|
|
242
|
+
it 'parses common lowercase particles as family name in display order' do
|
|
243
|
+
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
|
|
240
244
|
end
|
|
241
245
|
|
|
242
246
|
it 'parses common capitalized particles as the family name in sort order' do
|
|
243
247
|
expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
|
|
244
248
|
end
|
|
245
249
|
|
|
246
|
-
it 'parses common lowercase particles as
|
|
247
|
-
expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva',
|
|
250
|
+
it 'parses common lowercase particles as family name in sort order' do
|
|
251
|
+
expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
|
|
248
252
|
end
|
|
249
253
|
|
|
250
254
|
it 'parses common capitalized particles with punctuation as the family name in display order' do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: namae
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sylvester Keil
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2021-03-
|
|
12
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: racc
|