namae 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 971aa03b31c2c4b0c853f777fbb103a8e15eb7c39a5be6c3e8c2bfbeb1825946
4
- data.tar.gz: ffc6a59e69d7654f346c3b5e568b48bcb1bd537a83d1ee543eefc0f32db65119
3
+ metadata.gz: 82fa8955c4f650ccbcb6bf67db18f005eafd2f1f09252b8d98203a6f04949ed2
4
+ data.tar.gz: c96965c52193db381f8fceb0e8bfc34453f62127c846b6cb593e804c25394908
5
5
  SHA512:
6
- metadata.gz: 31e652932c388fc6c80429a9206682d1478b4470f5d338cd0564a19c31e785ce57bf9c4b333386d8803cb8f9e0235b624e9dca71aa3260b1f55444f25911b9af
7
- data.tar.gz: c03466d1e028fa84e07ae994fe53a6c7c3ea66ea8344018e4ae5795f1cc9206a54f7df67b73e3cf8ae67043eece2929a7c27be7d194966f93cac86fd73867ada
6
+ metadata.gz: 14644528eb8d587a2fd0064fbddfa6cfe8925d7a1b9b3f6a75b33c599f3bbb9a43b53e951e4ec8fab9506c5edc2ef62abff77fc1f43cca1ce3489c5ab4c36f8f
7
+ data.tar.gz: 921b0c60e964b8e5f6154008ab8923d59a2df5523484e38618f3ddddbd89eb2d0ad99ffdbbc9d90629cced3051e38fffb05617657338018afd8913cefc6e891c
@@ -115,7 +115,8 @@ Feature: Parse a list of names
115
115
  | B | Malcom |
116
116
 
117
117
  Scenario: A list of names with particles separated by commas
118
- Given a parser that prefers commas as separators
118
+ Given I want to include particles in the family name
119
+ And a parser that prefers commas as separators
119
120
  When I parse the names "Di Proctor, M., von Cooper, P."
120
121
  Then the names should be:
121
122
  | given | family |
@@ -128,7 +129,8 @@ Feature: Parse a list of names
128
129
  | P | Cooper |
129
130
 
130
131
  Scenario: A list of names with two consecutive accented characters
131
- Given a parser that prefers commas as separators
132
+ Given I want to include particles in the family name
133
+ And a parser that prefers commas as separators
132
134
  When I parse the names "Çakıroğlu, Ü., Başıbüyük, B."
133
135
  Then the names should be:
134
136
  | given | family |
@@ -2,6 +2,11 @@ Given /^a parser that prefers commas as separators$/ do
2
2
  Namae::Parser.instance.options[:prefer_comma_as_separator] = true
3
3
  end
4
4
 
5
+ Given /^I want to include particles in the family name$/ do
6
+ Namae::Parser.instance.options[:include_particle_in_family] = true
7
+ end
8
+
9
+
5
10
  When /^I parse the name "(.*)"$/ do |string|
6
11
  @name = Namae.parse!(string)[0]
7
12
  end
data/lib/namae/parser.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  #
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.16
3
+ # This file is automatically generated by Racc 1.5.2
4
4
  # from Racc grammar file "".
5
5
  #
6
6
 
@@ -11,17 +11,19 @@ require 'strscan'
11
11
  module Namae
12
12
  class Parser < Racc::Parser
13
13
 
14
- module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
14
+ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
15
15
 
16
16
  @defaults = {
17
17
  :debug => false,
18
18
  :prefer_comma_as_separator => false,
19
+ :include_particle_in_family => false,
19
20
  :comma => ',',
20
21
  :stops => ',;',
21
22
  :separator => /\s*(\band\b|\&|;)\s*/i,
22
23
  :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
23
24
  :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
24
- :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
25
+ :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
26
+ :uppercase_particle => /\s*\b((Da|De|Di|De\sLa|Du|Der|Des|Da|St|Saint|Les|Van)\.?)(\s+|$)/
25
27
  }
26
28
 
27
29
  class << self
@@ -50,6 +52,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
50
52
  options[:comma]
51
53
  end
52
54
 
55
+ def include_particle_in_family?
56
+ options[:include_particle_in_family]
57
+ end
58
+
53
59
  def stops
54
60
  options[:stops]
55
61
  end
@@ -66,6 +72,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
66
72
  options[:appellation]
67
73
  end
68
74
 
75
+ def uppercase_particle
76
+ options[:uppercase_particle]
77
+ end
78
+
69
79
  def prefer_comma_as_separator?
70
80
  options[:prefer_comma_as_separator]
71
81
  end
@@ -171,6 +181,8 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
171
181
  else
172
182
  consume_word(:UWORD, input.matched)
173
183
  end
184
+ when input.scan(uppercase_particle)
185
+ consume_word(:UPARTICLE, input.matched.strip)
174
186
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
175
187
  consume_word(:UWORD, input.matched)
176
188
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
@@ -195,133 +207,143 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
195
207
  ##### State transition tables begin ###
196
208
 
197
209
  racc_action_table = [
198
- -39, 16, 32, 30, -40, 31, 33, -39, 17, -39,
199
- -39, -40, 67, -40, -40, 66, 53, 52, 54, -38,
200
- 59, -22, 39, -34, 45, 58, -38, 53, 52, 54,
201
- 53, 52, 54, 59, 39, 39, 62, 39, 53, 52,
202
- 54, 14, 12, 15, 68, 39, 7, 8, 14, 12,
203
- 15, 58, 39, 7, 8, 14, 22, 15, 24, 14,
204
- 22, 15, 24, 14, 22, 15, 30, 28, 31, 30,
205
- 28, 31, -19, -19, -19, 30, 42, 31, 30, 28,
206
- 31, -20, -20, -20, 30, 46, 31, 30, 28, 31,
207
- 30, 28, 31, -19, -19, -19, 53, 52, 54, 53,
208
- 52, 54, 39, 58, 59 ]
210
+ -41, 18, 25, 34, -42, 35, 36, -41, 19, -41,
211
+ -41, -42, 40, -42, -42, 15, 13, 16, 46, 52,
212
+ 7, 17, 62, 12, 15, 24, 16, 27, 15, 13,
213
+ 16, 17, 29, 7, 17, 66, 12, 15, 24, 16,
214
+ 27, 73, 60, 59, 61, 29, 74, 46, -40, -36,
215
+ -24, 60, 59, 61, 66, -40, 69, 25, 46, 60,
216
+ 59, 61, 60, 59, 61, 17, 46, 46, 46, 46,
217
+ 60, 59, 61, 15, 24, 16, 17, 46, 34, 32,
218
+ 35, 34, 38, 35, 34, 32, 35, -21, -21, -21,
219
+ 34, 49, 35, 34, 32, 35, 34, 38, 35, -22,
220
+ -22, -22, 34, 53, 35, 34, 32, 35, 34, 32,
221
+ 35, -21, -21, -21, 60, 59, 61, 60, 59, 61,
222
+ 66 ]
209
223
 
210
224
  racc_action_check = [
211
- 14, 1, 11, 43, 15, 43, 16, 14, 1, 14,
212
- 14, 15, 50, 15, 15, 49, 49, 49, 49, 12,
213
- 50, 12, 23, 49, 27, 37, 12, 32, 32, 32,
214
- 45, 45, 45, 38, 32, 40, 44, 45, 62, 62,
215
- 62, 0, 0, 0, 57, 62, 0, 0, 17, 17,
216
- 17, 60, 61, 17, 17, 9, 9, 9, 9, 20,
217
- 20, 20, 20, 5, 5, 5, 10, 10, 10, 21,
218
- 21, 21, 22, 22, 22, 24, 24, 24, 25, 25,
219
- 25, 28, 28, 28, 29, 29, 29, 35, 35, 35,
220
- 41, 41, 41, 42, 42, 42, 67, 67, 67, 73,
221
- 73, 73, 64, 70, 72 ]
225
+ 15, 1, 8, 39, 16, 39, 11, 15, 1, 15,
226
+ 15, 16, 18, 16, 16, 0, 0, 0, 26, 31,
227
+ 0, 0, 37, 0, 9, 9, 9, 9, 19, 19,
228
+ 19, 44, 9, 19, 19, 45, 19, 22, 22, 22,
229
+ 22, 56, 56, 56, 56, 22, 57, 47, 13, 56,
230
+ 13, 36, 36, 36, 57, 13, 50, 65, 36, 52,
231
+ 52, 52, 62, 62, 62, 67, 52, 68, 71, 62,
232
+ 69, 69, 69, 5, 5, 5, 77, 69, 10, 10,
233
+ 10, 12, 12, 12, 23, 23, 23, 24, 24, 24,
234
+ 27, 27, 27, 28, 28, 28, 29, 29, 29, 32,
235
+ 32, 32, 33, 33, 33, 42, 42, 42, 48, 48,
236
+ 48, 49, 49, 49, 74, 74, 74, 80, 80, 80,
237
+ 79 ]
222
238
 
223
239
  racc_action_pointer = [
224
- 38, 1, nil, nil, nil, 60, nil, nil, nil, 52,
225
- 63, 0, 19, nil, 0, 4, 6, 45, nil, nil,
226
- 56, 66, 69, 12, 72, 75, nil, 22, 78, 81,
227
- nil, nil, 24, nil, nil, 84, nil, 16, 23, nil,
228
- 25, 87, 90, 0, 34, 27, nil, nil, nil, 13,
229
- 10, nil, nil, nil, nil, nil, nil, 35, nil, nil,
230
- 42, 42, 35, nil, 92, nil, nil, 93, nil, nil,
231
- 94, nil, 94, 96, nil ]
240
+ 12, 1, nil, nil, nil, 70, nil, nil, -7, 21,
241
+ 75, 4, 78, 48, nil, 0, 4, nil, 12, 25,
242
+ nil, nil, 34, 81, 84, nil, 8, 87, 90, 93,
243
+ nil, 17, 96, 99, nil, nil, 48, 20, nil, 0,
244
+ nil, nil, 102, nil, 22, 25, nil, 37, 105, 108,
245
+ 54, nil, 56, nil, nil, nil, 39, 44, nil, nil,
246
+ nil, nil, 59, nil, nil, 48, nil, 56, 57, 67,
247
+ nil, 58, nil, nil, 111, nil, nil, 67, nil, 110,
248
+ 114, nil ]
232
249
 
233
250
  racc_action_default = [
234
- -1, -49, -2, -4, -5, -49, -8, -9, -10, -23,
235
- -49, -49, -19, -28, -30, -31, -49, -49, -6, -7,
236
- -49, -49, -38, -41, -49, -49, -29, -15, -22, -23,
237
- -30, -31, -36, 75, -3, -49, -15, -45, -42, -43,
238
- -41, -49, -22, -23, -14, -36, -21, -16, -24, -37,
239
- -26, -32, -38, -39, -40, -14, -11, -46, -47, -44,
240
- -45, -41, -36, -17, -49, -33, -35, -49, -48, -12,
241
- -45, -18, -25, -27, -13 ]
251
+ -1, -51, -2, -4, -5, -51, -8, -9, -10, -25,
252
+ -51, -51, -51, -21, -30, -32, -33, -49, -51, -51,
253
+ -6, -7, -51, -51, -40, -50, -43, -51, -51, -51,
254
+ -31, -16, -24, -25, -32, -33, -38, -51, -24, -25,
255
+ 82, -3, -51, -16, -47, -44, -45, -43, -51, -24,
256
+ -14, -15, -38, -23, -17, -26, -39, -28, -34, -40,
257
+ -41, -42, -38, -14, -11, -48, -46, -47, -43, -38,
258
+ -19, -51, -35, -37, -51, -18, -12, -47, -20, -27,
259
+ -29, -13 ]
242
260
 
243
261
  racc_goto_table = [
244
- 3, 37, 26, 50, 56, 18, 2, 9, 47, 23,
245
- 1, 19, 20, 26, 73, 27, 50, 3, 60, 64,
246
- 23, 63, 26, 34, 9, nil, 36, 69, 21, 40,
247
- 44, 43, 25, 50, nil, 72, 26, 74, 71, 70,
248
- 55, nil, nil, 35, nil, nil, 61, 41, nil, 65,
249
- nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
262
+ 3, 1, 45, 44, 54, 20, 64, 21, 31, 26,
263
+ 37, 23, 80, 2, 71, 28, 8, nil, 30, 3,
264
+ 70, 43, 26, 45, 67, 47, 50, 51, 42, 76,
265
+ 75, 30, 41, 48, nil, 8, nil, 78, 9, 81,
266
+ 63, nil, 30, 22, 45, 77, 68, 79, 30, nil,
267
+ 39, nil, nil, nil, nil, nil, 72, 9, nil, nil,
268
+ nil, nil, nil, nil, nil, 39, nil, 39, nil, nil,
250
269
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
251
- nil, nil, nil, 65 ]
270
+ 72 ]
252
271
 
253
272
  racc_goto_check = [
254
- 3, 8, 17, 16, 9, 3, 2, 7, 12, 3,
255
- 1, 4, 7, 17, 14, 10, 16, 3, 8, 15,
256
- 3, 12, 17, 2, 7, nil, 10, 9, 11, 10,
257
- 10, 7, 11, 16, nil, 16, 17, 9, 12, 8,
258
- 10, nil, nil, 11, nil, nil, 10, 11, nil, 3,
273
+ 3, 1, 17, 9, 13, 3, 10, 4, 11, 3,
274
+ 11, 12, 15, 2, 16, 12, 7, nil, 18, 3,
275
+ 13, 11, 3, 17, 9, 11, 11, 11, 12, 10,
276
+ 13, 18, 2, 12, nil, 7, nil, 13, 8, 10,
277
+ 11, nil, 18, 8, 17, 9, 11, 17, 18, nil,
278
+ 8, nil, nil, nil, nil, nil, 3, 8, nil, nil,
279
+ nil, nil, nil, nil, nil, 8, nil, 8, nil, nil,
259
280
  nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
260
- nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
261
- nil, nil, nil, 3 ]
281
+ 3 ]
262
282
 
263
283
  racc_goto_pointer = [
264
- nil, 10, 6, 0, 6, nil, nil, 7, -22, -33,
265
- 5, 23, -24, nil, -53, -30, -29, -7, nil ]
284
+ nil, 1, 13, 0, 2, nil, nil, 16, 38, -23,
285
+ -38, -2, 6, -32, nil, -62, -42, -24, 9 ]
266
286
 
267
287
  racc_goto_default = [
268
- nil, nil, nil, 51, 4, 5, 6, 29, nil, nil,
269
- 11, 10, nil, 48, 49, nil, 38, 13, 57 ]
288
+ nil, nil, nil, 58, 4, 5, 6, 65, 33, nil,
289
+ nil, 11, 10, nil, 55, 56, nil, 57, 14 ]
270
290
 
271
291
  racc_reduce_table = [
272
292
  0, 0, :racc_error,
273
- 0, 12, :_reduce_1,
274
- 1, 12, :_reduce_2,
275
- 3, 12, :_reduce_3,
276
- 1, 13, :_reduce_4,
277
- 1, 13, :_reduce_none,
278
- 2, 13, :_reduce_6,
279
- 2, 13, :_reduce_7,
280
- 1, 13, :_reduce_none,
281
- 1, 16, :_reduce_9,
282
- 1, 16, :_reduce_10,
283
- 4, 15, :_reduce_11,
284
- 5, 15, :_reduce_12,
285
- 6, 15, :_reduce_13,
286
- 3, 15, :_reduce_14,
287
- 2, 15, :_reduce_15,
288
- 3, 17, :_reduce_16,
289
- 4, 17, :_reduce_17,
290
- 5, 17, :_reduce_18,
291
- 1, 22, :_reduce_none,
292
- 2, 22, :_reduce_20,
293
- 3, 22, :_reduce_21,
294
- 1, 21, :_reduce_none,
295
- 1, 21, :_reduce_none,
296
- 1, 23, :_reduce_24,
297
- 3, 23, :_reduce_25,
298
- 1, 23, :_reduce_26,
299
- 3, 23, :_reduce_27,
300
- 1, 18, :_reduce_none,
301
- 2, 18, :_reduce_29,
302
- 1, 28, :_reduce_none,
303
- 1, 28, :_reduce_none,
304
- 1, 25, :_reduce_none,
305
- 2, 25, :_reduce_33,
306
- 0, 26, :_reduce_none,
307
- 1, 26, :_reduce_none,
308
- 0, 24, :_reduce_none,
309
- 1, 24, :_reduce_none,
310
- 1, 14, :_reduce_none,
293
+ 0, 13, :_reduce_1,
294
+ 1, 13, :_reduce_2,
295
+ 3, 13, :_reduce_3,
296
+ 1, 14, :_reduce_4,
311
297
  1, 14, :_reduce_none,
298
+ 2, 14, :_reduce_6,
299
+ 2, 14, :_reduce_7,
312
300
  1, 14, :_reduce_none,
313
- 0, 19, :_reduce_none,
314
- 1, 19, :_reduce_none,
315
- 1, 27, :_reduce_none,
316
- 2, 27, :_reduce_44,
317
- 0, 20, :_reduce_none,
301
+ 1, 17, :_reduce_9,
302
+ 1, 17, :_reduce_10,
303
+ 4, 16, :_reduce_11,
304
+ 5, 16, :_reduce_12,
305
+ 6, 16, :_reduce_13,
306
+ 3, 16, :_reduce_14,
307
+ 3, 16, :_reduce_15,
308
+ 2, 16, :_reduce_16,
309
+ 3, 18, :_reduce_17,
310
+ 4, 18, :_reduce_18,
311
+ 4, 18, :_reduce_19,
312
+ 5, 18, :_reduce_20,
313
+ 1, 24, :_reduce_none,
314
+ 2, 24, :_reduce_22,
315
+ 3, 24, :_reduce_23,
316
+ 1, 23, :_reduce_none,
317
+ 1, 23, :_reduce_none,
318
+ 1, 25, :_reduce_26,
319
+ 3, 25, :_reduce_27,
320
+ 1, 25, :_reduce_28,
321
+ 3, 25, :_reduce_29,
318
322
  1, 20, :_reduce_none,
323
+ 2, 20, :_reduce_31,
324
+ 1, 30, :_reduce_none,
325
+ 1, 30, :_reduce_none,
326
+ 1, 27, :_reduce_none,
327
+ 2, 27, :_reduce_35,
328
+ 0, 28, :_reduce_none,
329
+ 1, 28, :_reduce_none,
330
+ 0, 26, :_reduce_none,
331
+ 1, 26, :_reduce_none,
332
+ 1, 15, :_reduce_none,
333
+ 1, 15, :_reduce_none,
334
+ 1, 15, :_reduce_none,
335
+ 0, 21, :_reduce_none,
336
+ 1, 21, :_reduce_none,
319
337
  1, 29, :_reduce_none,
320
- 2, 29, :_reduce_48 ]
338
+ 2, 29, :_reduce_46,
339
+ 0, 22, :_reduce_none,
340
+ 1, 22, :_reduce_none,
341
+ 1, 19, :_reduce_none,
342
+ 2, 19, :_reduce_50 ]
321
343
 
322
- racc_reduce_n = 49
344
+ racc_reduce_n = 51
323
345
 
324
- racc_shift_n = 75
346
+ racc_shift_n = 82
325
347
 
326
348
  racc_token_table = {
327
349
  false => 0,
@@ -334,9 +356,10 @@ racc_token_table = {
334
356
  :AND => 7,
335
357
  :APPELLATION => 8,
336
358
  :TITLE => 9,
337
- :SUFFIX => 10 }
359
+ :SUFFIX => 10,
360
+ :UPARTICLE => 11 }
338
361
 
339
- racc_nt_base = 11
362
+ racc_nt_base = 12
340
363
 
341
364
  racc_use_result_var = true
342
365
 
@@ -368,6 +391,7 @@ Racc_token_to_s_table = [
368
391
  "APPELLATION",
369
392
  "TITLE",
370
393
  "SUFFIX",
394
+ "UPARTICLE",
371
395
  "$start",
372
396
  "names",
373
397
  "name",
@@ -375,6 +399,7 @@ Racc_token_to_s_table = [
375
399
  "display_order",
376
400
  "honorific",
377
401
  "sort_order",
402
+ "titles",
378
403
  "u_words",
379
404
  "opt_suffices",
380
405
  "opt_titles",
@@ -385,8 +410,7 @@ Racc_token_to_s_table = [
385
410
  "words",
386
411
  "opt_comma",
387
412
  "suffices",
388
- "u_word",
389
- "titles" ]
413
+ "u_word" ]
390
414
 
391
415
  Racc_debug_parser = false
392
416
 
@@ -493,14 +517,26 @@ module_eval(<<'.,.,', 'parser.y', 42)
493
517
 
494
518
  module_eval(<<'.,.,', 'parser.y', 47)
495
519
  def _reduce_15(val, _values, result)
496
- result = Name.new(:particle => val[0], :family => val[1])
520
+ result = if include_particle_in_family?
521
+ Name.new(:given => val[0], :family => val[1,2].join(' '))
522
+ else
523
+ Name.new(:given => val[0], :particle => val[1], :family => val[2])
524
+ end
497
525
 
498
526
  result
499
527
  end
500
528
  .,.,
501
529
 
502
- module_eval(<<'.,.,', 'parser.y', 52)
530
+ module_eval(<<'.,.,', 'parser.y', 55)
503
531
  def _reduce_16(val, _values, result)
532
+ result = Name.new(:particle => val[0], :family => val[1])
533
+
534
+ result
535
+ end
536
+ .,.,
537
+
538
+ module_eval(<<'.,.,', 'parser.y', 60)
539
+ def _reduce_17(val, _values, result)
504
540
  result = Name.new({ :family => val[0], :suffix => val[2][0],
505
541
  :given => val[2][1] }, !!val[2][0])
506
542
 
@@ -508,8 +544,20 @@ module_eval(<<'.,.,', 'parser.y', 52)
508
544
  end
509
545
  .,.,
510
546
 
511
- module_eval(<<'.,.,', 'parser.y', 57)
512
- def _reduce_17(val, _values, result)
547
+ module_eval(<<'.,.,', 'parser.y', 65)
548
+ def _reduce_18(val, _values, result)
549
+ result = if include_particle_in_family?
550
+ Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
551
+ else
552
+ Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
553
+ end
554
+
555
+ result
556
+ end
557
+ .,.,
558
+
559
+ module_eval(<<'.,.,', 'parser.y', 73)
560
+ def _reduce_19(val, _values, result)
513
561
  result = Name.new({ :particle => val[0], :family => val[1],
514
562
  :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
515
563
 
@@ -517,8 +565,8 @@ module_eval(<<'.,.,', 'parser.y', 57)
517
565
  end
518
566
  .,.,
519
567
 
520
- module_eval(<<'.,.,', 'parser.y', 62)
521
- def _reduce_18(val, _values, result)
568
+ module_eval(<<'.,.,', 'parser.y', 78)
569
+ def _reduce_20(val, _values, result)
522
570
  result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
523
571
  :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
524
572
 
@@ -526,80 +574,76 @@ module_eval(<<'.,.,', 'parser.y', 62)
526
574
  end
527
575
  .,.,
528
576
 
529
- # reduce 19 omitted
577
+ # reduce 21 omitted
530
578
 
531
- module_eval(<<'.,.,', 'parser.y', 68)
532
- def _reduce_20(val, _values, result)
579
+ module_eval(<<'.,.,', 'parser.y', 84)
580
+ def _reduce_22(val, _values, result)
533
581
  result = val.join(' ')
534
582
  result
535
583
  end
536
584
  .,.,
537
585
 
538
- module_eval(<<'.,.,', 'parser.y', 69)
539
- def _reduce_21(val, _values, result)
586
+ module_eval(<<'.,.,', 'parser.y', 85)
587
+ def _reduce_23(val, _values, result)
540
588
  result = val.join(' ')
541
589
  result
542
590
  end
543
591
  .,.,
544
592
 
545
- # reduce 22 omitted
593
+ # reduce 24 omitted
546
594
 
547
- # reduce 23 omitted
595
+ # reduce 25 omitted
548
596
 
549
- module_eval(<<'.,.,', 'parser.y', 73)
550
- def _reduce_24(val, _values, result)
597
+ module_eval(<<'.,.,', 'parser.y', 89)
598
+ def _reduce_26(val, _values, result)
551
599
  result = [nil,val[0]]
552
600
  result
553
601
  end
554
602
  .,.,
555
603
 
556
- module_eval(<<'.,.,', 'parser.y', 74)
557
- def _reduce_25(val, _values, result)
604
+ module_eval(<<'.,.,', 'parser.y', 90)
605
+ def _reduce_27(val, _values, result)
558
606
  result = [val[2],val[0]]
559
607
  result
560
608
  end
561
609
  .,.,
562
610
 
563
- module_eval(<<'.,.,', 'parser.y', 75)
564
- def _reduce_26(val, _values, result)
611
+ module_eval(<<'.,.,', 'parser.y', 91)
612
+ def _reduce_28(val, _values, result)
565
613
  result = [val[0],nil]
566
614
  result
567
615
  end
568
616
  .,.,
569
617
 
570
- module_eval(<<'.,.,', 'parser.y', 76)
571
- def _reduce_27(val, _values, result)
618
+ module_eval(<<'.,.,', 'parser.y', 92)
619
+ def _reduce_29(val, _values, result)
572
620
  result = [val[0],val[2]]
573
621
  result
574
622
  end
575
623
  .,.,
576
624
 
577
- # reduce 28 omitted
625
+ # reduce 30 omitted
578
626
 
579
- module_eval(<<'.,.,', 'parser.y', 79)
580
- def _reduce_29(val, _values, result)
627
+ module_eval(<<'.,.,', 'parser.y', 95)
628
+ def _reduce_31(val, _values, result)
581
629
  result = val.join(' ')
582
630
  result
583
631
  end
584
632
  .,.,
585
633
 
586
- # reduce 30 omitted
634
+ # reduce 32 omitted
587
635
 
588
- # reduce 31 omitted
636
+ # reduce 33 omitted
589
637
 
590
- # reduce 32 omitted
638
+ # reduce 34 omitted
591
639
 
592
- module_eval(<<'.,.,', 'parser.y', 84)
593
- def _reduce_33(val, _values, result)
640
+ module_eval(<<'.,.,', 'parser.y', 100)
641
+ def _reduce_35(val, _values, result)
594
642
  result = val.join(' ')
595
643
  result
596
644
  end
597
645
  .,.,
598
646
 
599
- # reduce 34 omitted
600
-
601
- # reduce 35 omitted
602
-
603
647
  # reduce 36 omitted
604
648
 
605
649
  # reduce 37 omitted
@@ -616,21 +660,25 @@ module_eval(<<'.,.,', 'parser.y', 84)
616
660
 
617
661
  # reduce 43 omitted
618
662
 
619
- module_eval(<<'.,.,', 'parser.y', 94)
620
- def _reduce_44(val, _values, result)
663
+ # reduce 44 omitted
664
+
665
+ # reduce 45 omitted
666
+
667
+ module_eval(<<'.,.,', 'parser.y', 110)
668
+ def _reduce_46(val, _values, result)
621
669
  result = val.join(' ')
622
670
  result
623
671
  end
624
672
  .,.,
625
673
 
626
- # reduce 45 omitted
674
+ # reduce 47 omitted
627
675
 
628
- # reduce 46 omitted
676
+ # reduce 48 omitted
629
677
 
630
- # reduce 47 omitted
678
+ # reduce 49 omitted
631
679
 
632
- module_eval(<<'.,.,', 'parser.y', 99)
633
- def _reduce_48(val, _values, result)
680
+ module_eval(<<'.,.,', 'parser.y', 115)
681
+ def _reduce_50(val, _values, result)
634
682
  result = val.join(' ')
635
683
  result
636
684
  end
data/lib/namae/parser.y CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  class Namae::Parser
5
5
 
6
- token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
6
+ token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX UPARTICLE
7
7
 
8
8
  expect 0
9
9
 
@@ -20,7 +20,7 @@ rule
20
20
  | sort_order
21
21
 
22
22
  honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
23
- | TITLE { result = Name.new(:title => val[0]) }
23
+ | titles { result = Name.new(:title => val[0]) }
24
24
 
25
25
  display_order : u_words word opt_suffices opt_titles
26
26
  {
@@ -43,6 +43,14 @@ rule
43
43
  result = Name.new(:given => val[0], :particle => val[1],
44
44
  :family => val[2])
45
45
  }
46
+ | u_words UPARTICLE last
47
+ {
48
+ result = if include_particle_in_family?
49
+ Name.new(:given => val[0], :family => val[1,2].join(' '))
50
+ else
51
+ Name.new(:given => val[0], :particle => val[1], :family => val[2])
52
+ end
53
+ }
46
54
  | von last
47
55
  {
48
56
  result = Name.new(:particle => val[0], :family => val[1])
@@ -53,6 +61,14 @@ rule
53
61
  result = Name.new({ :family => val[0], :suffix => val[2][0],
54
62
  :given => val[2][1] }, !!val[2][0])
55
63
  }
64
+ | UPARTICLE last COMMA first
65
+ {
66
+ result = if include_particle_in_family?
67
+ Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
68
+ else
69
+ Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
70
+ end
71
+ }
56
72
  | von last COMMA first
57
73
  {
58
74
  result = Name.new({ :particle => val[0], :family => val[1],
@@ -107,12 +123,14 @@ require 'strscan'
107
123
  @defaults = {
108
124
  :debug => false,
109
125
  :prefer_comma_as_separator => false,
126
+ :include_particle_in_family => false,
110
127
  :comma => ',',
111
128
  :stops => ',;',
112
129
  :separator => /\s*(\band\b|\&|;)\s*/i,
113
130
  :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
114
131
  :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
115
- :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
132
+ :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
133
+ :uppercase_particle => /\s*\b((Da|De|Di|De\sLa|Du|Der|Des|Da|St|Saint|Les|Van)\.?)(\s+|$)/
116
134
  }
117
135
 
118
136
  class << self
@@ -141,6 +159,10 @@ require 'strscan'
141
159
  options[:comma]
142
160
  end
143
161
 
162
+ def include_particle_in_family?
163
+ options[:include_particle_in_family]
164
+ end
165
+
144
166
  def stops
145
167
  options[:stops]
146
168
  end
@@ -157,6 +179,10 @@ require 'strscan'
157
179
  options[:appellation]
158
180
  end
159
181
 
182
+ def uppercase_particle
183
+ options[:uppercase_particle]
184
+ end
185
+
160
186
  def prefer_comma_as_separator?
161
187
  options[:prefer_comma_as_separator]
162
188
  end
@@ -262,6 +288,8 @@ require 'strscan'
262
288
  else
263
289
  consume_word(:UWORD, input.matched)
264
290
  end
291
+ when input.scan(uppercase_particle)
292
+ consume_word(:UPARTICLE, input.matched.strip)
265
293
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
266
294
  consume_word(:UWORD, input.matched)
267
295
  when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
data/lib/namae/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Namae
2
2
  module Version
3
3
  MAJOR = 1
4
- MINOR = 0
5
- PATCH = 2
4
+ MINOR = 1
5
+ PATCH = 0
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: namae 1.0.2 ruby lib
5
+ # stub: namae 1.1.0 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "namae".freeze
9
- s.version = "1.0.2"
9
+ s.version = "1.1.0"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
14
- s.date = "2021-01-07"
14
+ s.date = "2021-03-12"
15
15
  s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
16
16
  s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
17
17
  s.extra_rdoc_files = [
@@ -191,6 +191,66 @@ module Namae
191
191
  expect(parser.parse!('Bernado Franecki Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
192
192
  #expect(parser.parse!('Bernado Franecki, Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
193
193
  end
194
+
195
+ it 'parses consecutive titles in display order' do
196
+ expect(parser.parse!('Lt. Col. Bernado Franecki')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Lt. Col.'])
197
+ end
198
+
199
+ context 'when include_particle_in_family is false' do
200
+ let(:parser) { Parser.new(include_particle_in_family: false) }
201
+
202
+ it 'parses common capitalized particles as the family name in display order' do
203
+ expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
204
+ end
205
+
206
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
207
+ expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
208
+ end
209
+
210
+ it 'parses common lowercase particles as a particle, not family name in display order' do
211
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
212
+ end
213
+
214
+ it 'parses common capitalized particles as the family name in sort order' do
215
+ expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
216
+ end
217
+
218
+ it 'parses common lowercase particles as a particle, not family name in sort order' do
219
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
220
+ end
221
+
222
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
223
+ expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
224
+ end
225
+ end
226
+
227
+ context 'when include_particle_in_family is true' do
228
+ let(:parser) { Parser.new(include_particle_in_family: true) }
229
+
230
+ it 'parses common capitalized particles as the family name in display order' do
231
+ expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
232
+ end
233
+
234
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
235
+ expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
236
+ end
237
+
238
+ it 'parses common lowercase particles as a particle, not family name in display order' do
239
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
240
+ end
241
+
242
+ it 'parses common capitalized particles as the family name in sort order' do
243
+ expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
244
+ end
245
+
246
+ it 'parses common lowercase particles as a particle, not family name in sort order' do
247
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
248
+ end
249
+
250
+ it 'parses common capitalized particles with punctuation as the family name in display order' do
251
+ expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
252
+ end
253
+ end
194
254
  end
195
255
  end
196
256
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-01-07 00:00:00.000000000 Z
12
+ date: 2021-03-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: racc