namae 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -0
- data/features/examples.feature +16 -0
- data/features/lists.feature +2 -2
- data/lib/namae.rb +1 -1
- data/lib/namae/name.rb +7 -0
- data/lib/namae/parser.rb +138 -151
- data/lib/namae/parser.y +28 -37
- data/lib/namae/version.rb +1 -1
- data/namae.gemspec +3 -3
- data/spec/namae/parser_spec.rb +8 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 348bf4a2385c1aa56c35759cc2219a8163fa7cb76e3c05482cd6db7a207906fb
|
4
|
+
data.tar.gz: 4329ea23260aef483460581391fcd43c80bde61ecebef419f89c2d23f0cfeffc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 806964f1611f6931acd6e68e4f5a75069b30abf42795191bf084d89adbfe378d98f6b28f10f96cc5cc446ab9068ddafafd6794e71f2c53fd97d2a0aff5a59914
|
7
|
+
data.tar.gz: f235fb82617020393be215fe078bd74bbb947ab682f387f5aad87e7bb6e3b62323ee03da30409034ca747beca7b5319905f1a081a43bc8763a25dca5b40722c4
|
data/README.md
CHANGED
@@ -121,6 +121,16 @@ ambiguous. For example, multiple family names are always possible in sort-order:
|
|
121
121
|
Whilst in display-order, multiple family names are only supported when the
|
122
122
|
name contains a particle or a nickname.
|
123
123
|
|
124
|
+
Namae tries to detect common particles using the `:uppercase_particle` lexer
|
125
|
+
pattern. If you prefer to always include particles with the family name, you
|
126
|
+
can set the the `:include_particle_in_family` parser option.
|
127
|
+
|
128
|
+
Namae.parse 'Ludwig von Beethoven'
|
129
|
+
#-> [#<Name family="Beethoven" given="Ludwig" particle="von">]
|
130
|
+
|
131
|
+
Namae.options[:include_particle_in_family] = true
|
132
|
+
#-> [#<Name family="von Beethoven" given="Ludwig">]
|
133
|
+
|
124
134
|
Configuration
|
125
135
|
-------------
|
126
136
|
You can tweak some of Namae's parse rules by configuring the parser's
|
data/features/examples.feature
CHANGED
@@ -34,3 +34,19 @@ Feature: Parse the names in the Readme file
|
|
34
34
|
| Mr. Yukihiro "Matz" Matsumoto | Yukihiro | | Matsumoto | | | Mr. | Matz |
|
35
35
|
| Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | | Matz |
|
36
36
|
| Mr. Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | Mr. | Matz |
|
37
|
+
|
38
|
+
@particle
|
39
|
+
Scenarios: Particles
|
40
|
+
| name | given | particle | family | suffix | title | appellation | nick |
|
41
|
+
| Ludwig von Beethoven | Ludwig | von | Beethoven | | | | |
|
42
|
+
| Beethoven, Ludwig von | Ludwig von | | Beethoven | | | | |
|
43
|
+
| Vincent Van Gogh | Vincent | Van | Gogh | | | | |
|
44
|
+
| Vincent van Gogh | Vincent | van | Gogh | | | | |
|
45
|
+
| Van Gogh, Vincent | Vincent | Van | Gogh | | | | |
|
46
|
+
| van Gogh, Vincent | Vincent | van | Gogh | | | | |
|
47
|
+
| Walther von der Vogelheide | Walther | von der | Vogelheide | | | | |
|
48
|
+
| Don De Lillo | Don | De | Lillo | | | | |
|
49
|
+
| De Lillo, Don | Don | De | Lillo | | | | |
|
50
|
+
| Tom Van de Weghe | Tom | Van de | Weghe | | | | |
|
51
|
+
| Tom Van De Weghe | Tom | Van De | Weghe | | | | |
|
52
|
+
|
data/features/lists.feature
CHANGED
@@ -121,12 +121,12 @@ Feature: Parse a list of names
|
|
121
121
|
Then the names should be:
|
122
122
|
| given | family |
|
123
123
|
| M. | Di Proctor |
|
124
|
-
| P. | Cooper
|
124
|
+
| P. | von Cooper |
|
125
125
|
When I parse the names "Di Proctor, M, von Cooper, P"
|
126
126
|
Then the names should be:
|
127
127
|
| given | family |
|
128
128
|
| M | Di Proctor |
|
129
|
-
| P | Cooper
|
129
|
+
| P | von Cooper |
|
130
130
|
|
131
131
|
Scenario: A list of names with two consecutive accented characters
|
132
132
|
Given I want to include particles in the family name
|
data/lib/namae.rb
CHANGED
data/lib/namae/name.rb
CHANGED
@@ -183,6 +183,13 @@ module Namae
|
|
183
183
|
self
|
184
184
|
end
|
185
185
|
|
186
|
+
def merge_particles!
|
187
|
+
self.family = [dropping_particle, particle, family].compact.join(' ')
|
188
|
+
self.dropping_particle = nil
|
189
|
+
self.particle = nil
|
190
|
+
self
|
191
|
+
end
|
192
|
+
|
186
193
|
# @return [String] a string representation of the name
|
187
194
|
def inspect
|
188
195
|
"#<Name #{each_pair.map { |k,v| [k,v.inspect].join('=') if v }.compact.join(' ')}>"
|
data/lib/namae/parser.rb
CHANGED
@@ -11,7 +11,7 @@ require 'strscan'
|
|
11
11
|
module Namae
|
12
12
|
class Parser < Racc::Parser
|
13
13
|
|
14
|
-
module_eval(<<'...end parser.y/module_eval...', 'parser.y',
|
14
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 111)
|
15
15
|
|
16
16
|
@defaults = {
|
17
17
|
:debug => false,
|
@@ -23,7 +23,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
|
|
23
23
|
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
24
24
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
25
25
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
|
26
|
-
:uppercase_particle => /\s*\b(
|
26
|
+
:uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
|
27
27
|
}
|
28
28
|
|
29
29
|
class << self
|
@@ -90,7 +90,9 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
|
|
90
90
|
def parse!(string)
|
91
91
|
@input = StringScanner.new(normalize(string))
|
92
92
|
reset
|
93
|
-
do_parse
|
93
|
+
names = do_parse
|
94
|
+
names.map(&:merge_particles!) if include_particle_in_family?
|
95
|
+
names
|
94
96
|
end
|
95
97
|
|
96
98
|
def normalize(string)
|
@@ -207,86 +209,84 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
|
|
207
209
|
##### State transition tables begin ###
|
208
210
|
|
209
211
|
racc_action_table = [
|
210
|
-
-
|
211
|
-
-41, -
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
212
|
+
-43, 36, 26, 37, -41, 38, 39, -43, -42, -43,
|
213
|
+
-43, -41, -40, -41, -41, -42, 45, -42, -42, -40,
|
214
|
+
50, -40, -40, 72, 59, 58, 60, 73, 16, 13,
|
215
|
+
17, -36, 61, 7, 18, 65, 14, 16, 25, 17,
|
216
|
+
16, 25, 17, 28, 18, 14, 65, 45, 14, 36,
|
217
|
+
34, 37, 68, 16, 13, 17, 26, 35, 7, 18,
|
218
|
+
18, 14, 16, 25, 17, 28, 36, 34, 37, 45,
|
219
|
+
14, 36, 34, 37, 35, 36, 34, 37, 45, 35,
|
220
|
+
36, 52, 37, 35, -22, -22, -22, 18, 35, 59,
|
221
|
+
58, 60, -22, 36, 34, 37, 45, 61, 36, 34,
|
222
|
+
37, 35, 59, 58, 60, 65, 35, nil, nil, 45,
|
223
|
+
61, 59, 58, 60, 59, 58, 60, nil, 45, 61,
|
224
|
+
19, nil, 61, 59, 58, 60, -40, 20, -24, nil,
|
225
|
+
nil, 61, nil, -40 ]
|
223
226
|
|
224
227
|
racc_action_check = [
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
228
|
+
14, 48, 8, 48, 16, 11, 19, 14, 17, 14,
|
229
|
+
14, 16, 25, 16, 16, 17, 27, 17, 17, 25,
|
230
|
+
31, 25, 25, 55, 55, 55, 55, 56, 0, 0,
|
231
|
+
0, 55, 55, 0, 0, 56, 0, 5, 5, 5,
|
232
|
+
9, 9, 9, 9, 43, 5, 44, 46, 9, 10,
|
233
|
+
10, 10, 49, 20, 20, 20, 64, 10, 20, 20,
|
234
|
+
66, 20, 23, 23, 23, 23, 24, 24, 24, 67,
|
235
|
+
23, 28, 28, 28, 24, 29, 29, 29, 70, 28,
|
236
|
+
33, 33, 33, 29, 34, 34, 34, 75, 33, 38,
|
237
|
+
38, 38, 34, 41, 41, 41, 38, 38, 47, 47,
|
238
|
+
47, 41, 50, 50, 50, 77, 47, nil, nil, 50,
|
239
|
+
50, 68, 68, 68, 73, 73, 73, nil, 68, 68,
|
240
|
+
1, nil, 73, 78, 78, 78, 13, 1, 13, nil,
|
241
|
+
nil, 78, nil, 13 ]
|
238
242
|
|
239
243
|
racc_action_pointer = [
|
240
|
-
|
241
|
-
|
242
|
-
nil, nil,
|
243
|
-
nil,
|
244
|
-
nil,
|
245
|
-
|
246
|
-
nil, nil,
|
247
|
-
|
248
|
-
114, nil ]
|
244
|
+
25, 120, nil, nil, nil, 34, nil, nil, -7, 37,
|
245
|
+
46, 3, nil, 126, 0, nil, 4, 8, nil, 6,
|
246
|
+
50, nil, nil, 59, 63, 12, nil, 6, 68, 72,
|
247
|
+
nil, 18, nil, 77, 81, nil, nil, nil, 86, nil,
|
248
|
+
nil, 90, nil, 35, 36, nil, 37, 95, -2, 50,
|
249
|
+
99, nil, nil, nil, nil, 21, 25, nil, nil, nil,
|
250
|
+
nil, nil, nil, nil, 47, nil, 51, 59, 108, nil,
|
251
|
+
68, nil, nil, 111, nil, 78, nil, 95, 120, nil ]
|
249
252
|
|
250
253
|
racc_action_default = [
|
251
|
-
-1, -
|
252
|
-
-
|
253
|
-
-6, -7, -
|
254
|
-
-31, -
|
255
|
-
|
256
|
-
-
|
257
|
-
-
|
258
|
-
-
|
259
|
-
-29, -13 ]
|
254
|
+
-1, -52, -2, -4, -5, -52, -8, -9, -10, -25,
|
255
|
+
-52, -52, -19, -22, -23, -30, -32, -33, -50, -52,
|
256
|
+
-52, -6, -7, -52, -52, -22, -51, -44, -52, -52,
|
257
|
+
-31, -15, -20, -25, -24, -23, -32, -33, -38, 80,
|
258
|
+
-3, -52, -15, -48, -45, -46, -44, -52, -25, -14,
|
259
|
+
-38, -21, -22, -16, -26, -39, -28, -34, -40, -41,
|
260
|
+
-42, -43, -14, -11, -49, -47, -48, -44, -38, -17,
|
261
|
+
-52, -35, -37, -52, -12, -48, -18, -27, -29, -13 ]
|
260
262
|
|
261
263
|
racc_goto_table = [
|
262
|
-
3,
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
nil, nil, nil, nil, nil,
|
269
|
-
nil, nil, nil, nil, nil, nil, nil, nil,
|
270
|
-
72 ]
|
264
|
+
3, 30, 43, 1, 22, 21, 56, 53, 31, 27,
|
265
|
+
32, 63, 78, 70, nil, 30, nil, nil, 56, 69,
|
266
|
+
3, 66, 42, 27, 32, 30, 46, 49, 24, 32,
|
267
|
+
9, nil, 29, 51, 74, 23, 56, 76, 77, 62,
|
268
|
+
30, 32, 75, 79, 2, 67, 41, 32, 8, nil,
|
269
|
+
9, 47, nil, nil, nil, 71, nil, nil, 48, nil,
|
270
|
+
nil, nil, nil, nil, 40, nil, nil, nil, 8, nil,
|
271
|
+
nil, nil, nil, nil, nil, nil, nil, nil, 71 ]
|
271
272
|
|
272
273
|
racc_goto_check = [
|
273
|
-
3,
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
8, nil, nil, nil, nil, nil,
|
279
|
-
nil, nil, nil, nil, nil,
|
280
|
-
nil, nil, nil, nil, nil, nil, nil, nil,
|
281
|
-
3 ]
|
274
|
+
3, 19, 9, 1, 4, 3, 18, 13, 11, 3,
|
275
|
+
14, 10, 16, 17, nil, 19, nil, nil, 18, 13,
|
276
|
+
3, 9, 11, 3, 14, 19, 11, 11, 12, 14,
|
277
|
+
8, nil, 12, 14, 10, 8, 18, 13, 18, 11,
|
278
|
+
19, 14, 9, 10, 2, 11, 12, 14, 7, nil,
|
279
|
+
8, 12, nil, nil, nil, 3, nil, nil, 8, nil,
|
280
|
+
nil, nil, nil, nil, 2, nil, nil, nil, 7, nil,
|
281
|
+
nil, nil, nil, nil, nil, nil, nil, nil, 3 ]
|
282
282
|
|
283
283
|
racc_goto_pointer = [
|
284
|
-
nil,
|
285
|
-
-
|
284
|
+
nil, 3, 44, 0, -1, nil, nil, 48, 30, -25,
|
285
|
+
-32, -2, 23, -31, 0, nil, -61, -42, -32, -8 ]
|
286
286
|
|
287
287
|
racc_goto_default = [
|
288
|
-
nil, nil, nil,
|
289
|
-
nil, 11, 10, nil,
|
288
|
+
nil, nil, nil, 57, 4, 5, 6, 64, 33, nil,
|
289
|
+
nil, 11, 10, nil, 12, 54, 55, nil, 44, 15 ]
|
290
290
|
|
291
291
|
racc_reduce_table = [
|
292
292
|
0, 0, :racc_error,
|
@@ -304,15 +304,15 @@ racc_reduce_table = [
|
|
304
304
|
5, 16, :_reduce_12,
|
305
305
|
6, 16, :_reduce_13,
|
306
306
|
3, 16, :_reduce_14,
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
4, 18, :_reduce_19,
|
312
|
-
5, 18, :_reduce_20,
|
307
|
+
2, 16, :_reduce_15,
|
308
|
+
3, 18, :_reduce_16,
|
309
|
+
4, 18, :_reduce_17,
|
310
|
+
5, 18, :_reduce_18,
|
313
311
|
1, 24, :_reduce_none,
|
314
|
-
2, 24, :
|
315
|
-
3, 24, :
|
312
|
+
2, 24, :_reduce_20,
|
313
|
+
3, 24, :_reduce_21,
|
314
|
+
1, 26, :_reduce_none,
|
315
|
+
1, 26, :_reduce_none,
|
316
316
|
1, 23, :_reduce_none,
|
317
317
|
1, 23, :_reduce_none,
|
318
318
|
1, 25, :_reduce_26,
|
@@ -321,29 +321,30 @@ racc_reduce_table = [
|
|
321
321
|
3, 25, :_reduce_29,
|
322
322
|
1, 20, :_reduce_none,
|
323
323
|
2, 20, :_reduce_31,
|
324
|
-
1,
|
325
|
-
1,
|
326
|
-
1, 27, :_reduce_none,
|
327
|
-
2, 27, :_reduce_35,
|
328
|
-
0, 28, :_reduce_none,
|
324
|
+
1, 31, :_reduce_none,
|
325
|
+
1, 31, :_reduce_none,
|
329
326
|
1, 28, :_reduce_none,
|
330
|
-
|
331
|
-
|
327
|
+
2, 28, :_reduce_35,
|
328
|
+
0, 29, :_reduce_none,
|
329
|
+
1, 29, :_reduce_none,
|
330
|
+
0, 27, :_reduce_none,
|
331
|
+
1, 27, :_reduce_none,
|
332
|
+
1, 15, :_reduce_none,
|
332
333
|
1, 15, :_reduce_none,
|
333
334
|
1, 15, :_reduce_none,
|
334
335
|
1, 15, :_reduce_none,
|
335
336
|
0, 21, :_reduce_none,
|
336
337
|
1, 21, :_reduce_none,
|
337
|
-
1,
|
338
|
-
2,
|
338
|
+
1, 30, :_reduce_none,
|
339
|
+
2, 30, :_reduce_47,
|
339
340
|
0, 22, :_reduce_none,
|
340
341
|
1, 22, :_reduce_none,
|
341
342
|
1, 19, :_reduce_none,
|
342
|
-
2, 19, :
|
343
|
+
2, 19, :_reduce_51 ]
|
343
344
|
|
344
|
-
racc_reduce_n =
|
345
|
+
racc_reduce_n = 52
|
345
346
|
|
346
|
-
racc_shift_n =
|
347
|
+
racc_shift_n = 80
|
347
348
|
|
348
349
|
racc_token_table = {
|
349
350
|
false => 0,
|
@@ -406,6 +407,7 @@ Racc_token_to_s_table = [
|
|
406
407
|
"last",
|
407
408
|
"von",
|
408
409
|
"first",
|
410
|
+
"particle",
|
409
411
|
"opt_words",
|
410
412
|
"words",
|
411
413
|
"opt_comma",
|
@@ -480,36 +482,36 @@ module_eval(<<'.,.,', 'parser.y', 22)
|
|
480
482
|
|
481
483
|
module_eval(<<'.,.,', 'parser.y', 26)
|
482
484
|
def _reduce_11(val, _values, result)
|
483
|
-
result = Name.new(
|
484
|
-
:suffix => val[2], :title => val[3]
|
485
|
+
result = Name.new(
|
486
|
+
:given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
|
487
|
+
)
|
485
488
|
|
486
489
|
result
|
487
490
|
end
|
488
491
|
.,.,
|
489
492
|
|
490
|
-
module_eval(<<'.,.,', 'parser.y',
|
493
|
+
module_eval(<<'.,.,', 'parser.y', 32)
|
491
494
|
def _reduce_12(val, _values, result)
|
492
|
-
result = Name.new(
|
493
|
-
:family => val[2], :suffix => val[3], :title => val[4]
|
495
|
+
result = Name.new(
|
496
|
+
:given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
|
497
|
+
)
|
494
498
|
|
495
499
|
result
|
496
500
|
end
|
497
501
|
.,.,
|
498
502
|
|
499
|
-
module_eval(<<'.,.,', 'parser.y',
|
503
|
+
module_eval(<<'.,.,', 'parser.y', 38)
|
500
504
|
def _reduce_13(val, _values, result)
|
501
|
-
result = Name.new(
|
502
|
-
:particle => val[2], :family => val[3],
|
503
|
-
:suffix => val[4], :title => val[5])
|
505
|
+
result = Name.new(
|
506
|
+
:given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
|
504
507
|
|
505
508
|
result
|
506
509
|
end
|
507
510
|
.,.,
|
508
511
|
|
509
|
-
module_eval(<<'.,.,', 'parser.y',
|
512
|
+
module_eval(<<'.,.,', 'parser.y', 43)
|
510
513
|
def _reduce_14(val, _values, result)
|
511
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
512
|
-
:family => val[2])
|
514
|
+
result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
513
515
|
|
514
516
|
result
|
515
517
|
end
|
@@ -517,105 +519,88 @@ module_eval(<<'.,.,', 'parser.y', 42)
|
|
517
519
|
|
518
520
|
module_eval(<<'.,.,', 'parser.y', 47)
|
519
521
|
def _reduce_15(val, _values, result)
|
520
|
-
|
521
|
-
Name.new(:given => val[0], :family => val[1,2].join(' '))
|
522
|
-
else
|
523
|
-
Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
524
|
-
end
|
522
|
+
result = Name.new(:particle => val[0], :family => val[1])
|
525
523
|
|
526
524
|
result
|
527
525
|
end
|
528
526
|
.,.,
|
529
527
|
|
530
|
-
module_eval(<<'.,.,', 'parser.y',
|
528
|
+
module_eval(<<'.,.,', 'parser.y', 52)
|
531
529
|
def _reduce_16(val, _values, result)
|
532
|
-
result = Name.new(
|
530
|
+
result = Name.new({
|
531
|
+
:family => val[0], :suffix => val[2][0], :given => val[2][1]
|
532
|
+
}, !!val[2][0])
|
533
533
|
|
534
534
|
result
|
535
535
|
end
|
536
536
|
.,.,
|
537
537
|
|
538
|
-
module_eval(<<'.,.,', 'parser.y',
|
538
|
+
module_eval(<<'.,.,', 'parser.y', 58)
|
539
539
|
def _reduce_17(val, _values, result)
|
540
|
-
result = Name.new({
|
541
|
-
:
|
540
|
+
result = Name.new({
|
541
|
+
:particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
|
542
|
+
}, !!val[3][0])
|
542
543
|
|
543
544
|
result
|
544
545
|
end
|
545
546
|
.,.,
|
546
547
|
|
547
|
-
module_eval(<<'.,.,', 'parser.y',
|
548
|
+
module_eval(<<'.,.,', 'parser.y', 64)
|
548
549
|
def _reduce_18(val, _values, result)
|
549
|
-
result =
|
550
|
-
|
551
|
-
|
552
|
-
Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
553
|
-
end
|
550
|
+
result = Name.new({
|
551
|
+
:particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
|
552
|
+
}, !!val[4][0])
|
554
553
|
|
555
554
|
result
|
556
555
|
end
|
557
556
|
.,.,
|
558
557
|
|
559
|
-
|
560
|
-
def _reduce_19(val, _values, result)
|
561
|
-
result = Name.new({ :particle => val[0], :family => val[1],
|
562
|
-
:suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
563
|
-
|
564
|
-
result
|
565
|
-
end
|
566
|
-
.,.,
|
558
|
+
# reduce 19 omitted
|
567
559
|
|
568
|
-
module_eval(<<'.,.,', 'parser.y',
|
560
|
+
module_eval(<<'.,.,', 'parser.y', 71)
|
569
561
|
def _reduce_20(val, _values, result)
|
570
|
-
result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
|
571
|
-
:suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
|
572
|
-
|
573
|
-
result
|
574
|
-
end
|
575
|
-
.,.,
|
576
|
-
|
577
|
-
# reduce 21 omitted
|
578
|
-
|
579
|
-
module_eval(<<'.,.,', 'parser.y', 84)
|
580
|
-
def _reduce_22(val, _values, result)
|
581
562
|
result = val.join(' ')
|
582
563
|
result
|
583
564
|
end
|
584
565
|
.,.,
|
585
566
|
|
586
|
-
module_eval(<<'.,.,', 'parser.y',
|
587
|
-
def
|
567
|
+
module_eval(<<'.,.,', 'parser.y', 72)
|
568
|
+
def _reduce_21(val, _values, result)
|
588
569
|
result = val.join(' ')
|
589
570
|
result
|
590
571
|
end
|
591
572
|
.,.,
|
592
573
|
|
574
|
+
# reduce 22 omitted
|
575
|
+
|
576
|
+
# reduce 23 omitted
|
577
|
+
|
593
578
|
# reduce 24 omitted
|
594
579
|
|
595
580
|
# reduce 25 omitted
|
596
581
|
|
597
|
-
module_eval(<<'.,.,', 'parser.y',
|
582
|
+
module_eval(<<'.,.,', 'parser.y', 78)
|
598
583
|
def _reduce_26(val, _values, result)
|
599
584
|
result = [nil,val[0]]
|
600
585
|
result
|
601
586
|
end
|
602
587
|
.,.,
|
603
588
|
|
604
|
-
module_eval(<<'.,.,', 'parser.y',
|
589
|
+
module_eval(<<'.,.,', 'parser.y', 79)
|
605
590
|
def _reduce_27(val, _values, result)
|
606
591
|
result = [val[2],val[0]]
|
607
592
|
result
|
608
593
|
end
|
609
594
|
.,.,
|
610
595
|
|
611
|
-
module_eval(<<'.,.,', 'parser.y',
|
596
|
+
module_eval(<<'.,.,', 'parser.y', 80)
|
612
597
|
def _reduce_28(val, _values, result)
|
613
598
|
result = [val[0],nil]
|
614
599
|
result
|
615
600
|
end
|
616
601
|
.,.,
|
617
602
|
|
618
|
-
module_eval(<<'.,.,', 'parser.y',
|
603
|
+
module_eval(<<'.,.,', 'parser.y', 81)
|
619
604
|
def _reduce_29(val, _values, result)
|
620
605
|
result = [val[0],val[2]]
|
621
606
|
result
|
@@ -624,7 +609,7 @@ module_eval(<<'.,.,', 'parser.y', 92)
|
|
624
609
|
|
625
610
|
# reduce 30 omitted
|
626
611
|
|
627
|
-
module_eval(<<'.,.,', 'parser.y',
|
612
|
+
module_eval(<<'.,.,', 'parser.y', 84)
|
628
613
|
def _reduce_31(val, _values, result)
|
629
614
|
result = val.join(' ')
|
630
615
|
result
|
@@ -637,7 +622,7 @@ module_eval(<<'.,.,', 'parser.y', 95)
|
|
637
622
|
|
638
623
|
# reduce 34 omitted
|
639
624
|
|
640
|
-
module_eval(<<'.,.,', 'parser.y',
|
625
|
+
module_eval(<<'.,.,', 'parser.y', 89)
|
641
626
|
def _reduce_35(val, _values, result)
|
642
627
|
result = val.join(' ')
|
643
628
|
result
|
@@ -664,21 +649,23 @@ module_eval(<<'.,.,', 'parser.y', 100)
|
|
664
649
|
|
665
650
|
# reduce 45 omitted
|
666
651
|
|
667
|
-
|
668
|
-
|
652
|
+
# reduce 46 omitted
|
653
|
+
|
654
|
+
module_eval(<<'.,.,', 'parser.y', 99)
|
655
|
+
def _reduce_47(val, _values, result)
|
669
656
|
result = val.join(' ')
|
670
657
|
result
|
671
658
|
end
|
672
659
|
.,.,
|
673
660
|
|
674
|
-
# reduce 47 omitted
|
675
|
-
|
676
661
|
# reduce 48 omitted
|
677
662
|
|
678
663
|
# reduce 49 omitted
|
679
664
|
|
680
|
-
|
681
|
-
|
665
|
+
# reduce 50 omitted
|
666
|
+
|
667
|
+
module_eval(<<'.,.,', 'parser.y', 104)
|
668
|
+
def _reduce_51(val, _values, result)
|
682
669
|
result = val.join(' ')
|
683
670
|
result
|
684
671
|
end
|
data/lib/namae/parser.y
CHANGED
@@ -24,32 +24,24 @@ rule
|
|
24
24
|
|
25
25
|
display_order : u_words word opt_suffices opt_titles
|
26
26
|
{
|
27
|
-
result = Name.new(
|
28
|
-
:suffix => val[2], :title => val[3]
|
27
|
+
result = Name.new(
|
28
|
+
:given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
|
29
|
+
)
|
29
30
|
}
|
30
31
|
| u_words NICK last opt_suffices opt_titles
|
31
32
|
{
|
32
|
-
result = Name.new(
|
33
|
-
:family => val[2], :suffix => val[3], :title => val[4]
|
33
|
+
result = Name.new(
|
34
|
+
:given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
|
35
|
+
)
|
34
36
|
}
|
35
37
|
| u_words NICK von last opt_suffices opt_titles
|
36
38
|
{
|
37
|
-
result = Name.new(
|
38
|
-
:particle => val[2], :family => val[3],
|
39
|
-
:suffix => val[4], :title => val[5])
|
39
|
+
result = Name.new(
|
40
|
+
:given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
|
40
41
|
}
|
41
42
|
| u_words von last
|
42
43
|
{
|
43
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
44
|
-
:family => val[2])
|
45
|
-
}
|
46
|
-
| u_words UPARTICLE last
|
47
|
-
{
|
48
|
-
result = if include_particle_in_family?
|
49
|
-
Name.new(:given => val[0], :family => val[1,2].join(' '))
|
50
|
-
else
|
51
|
-
Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
52
|
-
end
|
44
|
+
result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
53
45
|
}
|
54
46
|
| von last
|
55
47
|
{
|
@@ -58,32 +50,29 @@ rule
|
|
58
50
|
|
59
51
|
sort_order : last COMMA first
|
60
52
|
{
|
61
|
-
result = Name.new({
|
62
|
-
:
|
63
|
-
|
64
|
-
| UPARTICLE last COMMA first
|
65
|
-
{
|
66
|
-
result = if include_particle_in_family?
|
67
|
-
Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
68
|
-
else
|
69
|
-
Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
70
|
-
end
|
53
|
+
result = Name.new({
|
54
|
+
:family => val[0], :suffix => val[2][0], :given => val[2][1]
|
55
|
+
}, !!val[2][0])
|
71
56
|
}
|
72
57
|
| von last COMMA first
|
73
58
|
{
|
74
|
-
result = Name.new({
|
75
|
-
:
|
59
|
+
result = Name.new({
|
60
|
+
:particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
|
61
|
+
}, !!val[3][0])
|
76
62
|
}
|
77
63
|
| u_words von last COMMA first
|
78
64
|
{
|
79
|
-
result = Name.new({
|
80
|
-
:
|
65
|
+
result = Name.new({
|
66
|
+
:particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
|
67
|
+
}, !!val[4][0])
|
81
68
|
}
|
82
69
|
;
|
83
70
|
|
84
|
-
von :
|
85
|
-
| von
|
86
|
-
| von u_words
|
71
|
+
von : particle
|
72
|
+
| von particle { result = val.join(' ') }
|
73
|
+
| von u_words particle { result = val.join(' ') }
|
74
|
+
|
75
|
+
particle : LWORD | UPARTICLE
|
87
76
|
|
88
77
|
last : LWORD | u_words
|
89
78
|
|
@@ -103,7 +92,7 @@ rule
|
|
103
92
|
opt_comma : /* empty */ | COMMA
|
104
93
|
opt_words : /* empty */ | words
|
105
94
|
|
106
|
-
word : LWORD | UWORD | PWORD
|
95
|
+
word : LWORD | UWORD | PWORD | UPARTICLE
|
107
96
|
|
108
97
|
opt_suffices : /* empty */ | suffices
|
109
98
|
|
@@ -130,7 +119,7 @@ require 'strscan'
|
|
130
119
|
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
131
120
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
132
121
|
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
|
133
|
-
:uppercase_particle => /\s*\b(
|
122
|
+
:uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
|
134
123
|
}
|
135
124
|
|
136
125
|
class << self
|
@@ -197,7 +186,9 @@ require 'strscan'
|
|
197
186
|
def parse!(string)
|
198
187
|
@input = StringScanner.new(normalize(string))
|
199
188
|
reset
|
200
|
-
do_parse
|
189
|
+
names = do_parse
|
190
|
+
names.map(&:merge_particles!) if include_particle_in_family?
|
191
|
+
names
|
201
192
|
end
|
202
193
|
|
203
194
|
def normalize(string)
|
data/lib/namae/version.rb
CHANGED
data/namae.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: namae 1.1.
|
5
|
+
# stub: namae 1.1.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "namae".freeze
|
9
|
-
s.version = "1.1.
|
9
|
+
s.version = "1.1.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib".freeze]
|
13
13
|
s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
|
14
|
-
s.date = "2021-03-
|
14
|
+
s.date = "2021-03-14"
|
15
15
|
s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
|
16
16
|
s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
|
17
17
|
s.extra_rdoc_files = [
|
data/spec/namae/parser_spec.rb
CHANGED
@@ -207,6 +207,10 @@ module Namae
|
|
207
207
|
expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
|
208
208
|
end
|
209
209
|
|
210
|
+
it 'parses multiple common capitalized particles as the family name in display order' do
|
211
|
+
expect(parser.parse!('Tom Van De Weghe')[0].values_at(:given, :family, :particle)).to eq(['Tom', 'Weghe', 'Van De'])
|
212
|
+
end
|
213
|
+
|
210
214
|
it 'parses common lowercase particles as a particle, not family name in display order' do
|
211
215
|
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
|
212
216
|
end
|
@@ -235,16 +239,16 @@ module Namae
|
|
235
239
|
expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
|
236
240
|
end
|
237
241
|
|
238
|
-
it 'parses common lowercase particles as
|
239
|
-
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva',
|
242
|
+
it 'parses common lowercase particles as family name in display order' do
|
243
|
+
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
|
240
244
|
end
|
241
245
|
|
242
246
|
it 'parses common capitalized particles as the family name in sort order' do
|
243
247
|
expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
|
244
248
|
end
|
245
249
|
|
246
|
-
it 'parses common lowercase particles as
|
247
|
-
expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva',
|
250
|
+
it 'parses common lowercase particles as family name in sort order' do
|
251
|
+
expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
|
248
252
|
end
|
249
253
|
|
250
254
|
it 'parses common capitalized particles with punctuation as the family name in display order' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: namae
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-03-
|
12
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: racc
|