namae 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82fa8955c4f650ccbcb6bf67db18f005eafd2f1f09252b8d98203a6f04949ed2
4
- data.tar.gz: c96965c52193db381f8fceb0e8bfc34453f62127c846b6cb593e804c25394908
3
+ metadata.gz: 348bf4a2385c1aa56c35759cc2219a8163fa7cb76e3c05482cd6db7a207906fb
4
+ data.tar.gz: 4329ea23260aef483460581391fcd43c80bde61ecebef419f89c2d23f0cfeffc
5
5
  SHA512:
6
- metadata.gz: 14644528eb8d587a2fd0064fbddfa6cfe8925d7a1b9b3f6a75b33c599f3bbb9a43b53e951e4ec8fab9506c5edc2ef62abff77fc1f43cca1ce3489c5ab4c36f8f
7
- data.tar.gz: 921b0c60e964b8e5f6154008ab8923d59a2df5523484e38618f3ddddbd89eb2d0ad99ffdbbc9d90629cced3051e38fffb05617657338018afd8913cefc6e891c
6
+ metadata.gz: 806964f1611f6931acd6e68e4f5a75069b30abf42795191bf084d89adbfe378d98f6b28f10f96cc5cc446ab9068ddafafd6794e71f2c53fd97d2a0aff5a59914
7
+ data.tar.gz: f235fb82617020393be215fe078bd74bbb947ab682f387f5aad87e7bb6e3b62323ee03da30409034ca747beca7b5319905f1a081a43bc8763a25dca5b40722c4
data/README.md CHANGED
@@ -121,6 +121,16 @@ ambiguous. For example, multiple family names are always possible in sort-order:
121
121
  Whilst in display-order, multiple family names are only supported when the
122
122
  name contains a particle or a nickname.
123
123
 
124
+ Namae tries to detect common particles using the `:uppercase_particle` lexer
125
+ pattern. If you prefer to always include particles with the family name, you
126
+ can set the the `:include_particle_in_family` parser option.
127
+
128
+ Namae.parse 'Ludwig von Beethoven'
129
+ #-> [#<Name family="Beethoven" given="Ludwig" particle="von">]
130
+
131
+ Namae.options[:include_particle_in_family] = true
132
+ #-> [#<Name family="von Beethoven" given="Ludwig">]
133
+
124
134
  Configuration
125
135
  -------------
126
136
  You can tweak some of Namae's parse rules by configuring the parser's
@@ -34,3 +34,19 @@ Feature: Parse the names in the Readme file
34
34
  | Mr. Yukihiro "Matz" Matsumoto | Yukihiro | | Matsumoto | | | Mr. | Matz |
35
35
  | Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | | Matz |
36
36
  | Mr. Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | Mr. | Matz |
37
+
38
+ @particle
39
+ Scenarios: Particles
40
+ | name | given | particle | family | suffix | title | appellation | nick |
41
+ | Ludwig von Beethoven | Ludwig | von | Beethoven | | | | |
42
+ | Beethoven, Ludwig von | Ludwig von | | Beethoven | | | | |
43
+ | Vincent Van Gogh | Vincent | Van | Gogh | | | | |
44
+ | Vincent van Gogh | Vincent | van | Gogh | | | | |
45
+ | Van Gogh, Vincent | Vincent | Van | Gogh | | | | |
46
+ | van Gogh, Vincent | Vincent | van | Gogh | | | | |
47
+ | Walther von der Vogelheide | Walther | von der | Vogelheide | | | | |
48
+ | Don De Lillo | Don | De | Lillo | | | | |
49
+ | De Lillo, Don | Don | De | Lillo | | | | |
50
+ | Tom Van de Weghe | Tom | Van de | Weghe | | | | |
51
+ | Tom Van De Weghe | Tom | Van De | Weghe | | | | |
52
+
@@ -121,12 +121,12 @@ Feature: Parse a list of names
121
121
  Then the names should be:
122
122
  | given | family |
123
123
  | M. | Di Proctor |
124
- | P. | Cooper |
124
+ | P. | von Cooper |
125
125
  When I parse the names "Di Proctor, M, von Cooper, P"
126
126
  Then the names should be:
127
127
  | given | family |
128
128
  | M | Di Proctor |
129
- | P | Cooper |
129
+ | P | von Cooper |
130
130
 
131
131
  Scenario: A list of names with two consecutive accented characters
132
132
  Given I want to include particles in the family name
data/lib/namae.rb CHANGED
@@ -2,4 +2,4 @@ require 'namae/version'
2
2
 
3
3
  require 'namae/name'
4
4
  require 'namae/parser'
5
- require 'namae/utility'
5
+ require 'namae/utility'
data/lib/namae/name.rb CHANGED
@@ -183,6 +183,13 @@ module Namae
183
183
  self
184
184
  end
185
185
 
186
+ def merge_particles!
187
+ self.family = [dropping_particle, particle, family].compact.join(' ')
188
+ self.dropping_particle = nil
189
+ self.particle = nil
190
+ self
191
+ end
192
+
186
193
  # @return [String] a string representation of the name
187
194
  def inspect
188
195
  "#<Name #{each_pair.map { |k,v| [k,v.inspect].join('=') if v }.compact.join(' ')}>"
data/lib/namae/parser.rb CHANGED
@@ -11,7 +11,7 @@ require 'strscan'
11
11
  module Namae
12
12
  class Parser < Racc::Parser
13
13
 
14
- module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
14
+ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 111)
15
15
 
16
16
  @defaults = {
17
17
  :debug => false,
@@ -23,7 +23,7 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
23
23
  :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
24
24
  :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
25
25
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
26
- :uppercase_particle => /\s*\b((Da|De|Di|De\sLa|Du|Der|Des|Da|St|Saint|Les|Van)\.?)(\s+|$)/
26
+ :uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
27
27
  }
28
28
 
29
29
  class << self
@@ -90,7 +90,9 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
90
90
  def parse!(string)
91
91
  @input = StringScanner.new(normalize(string))
92
92
  reset
93
- do_parse
93
+ names = do_parse
94
+ names.map(&:merge_particles!) if include_particle_in_family?
95
+ names
94
96
  end
95
97
 
96
98
  def normalize(string)
@@ -207,86 +209,84 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 122)
207
209
  ##### State transition tables begin ###
208
210
 
209
211
  racc_action_table = [
210
- -41, 18, 25, 34, -42, 35, 36, -41, 19, -41,
211
- -41, -42, 40, -42, -42, 15, 13, 16, 46, 52,
212
- 7, 17, 62, 12, 15, 24, 16, 27, 15, 13,
213
- 16, 17, 29, 7, 17, 66, 12, 15, 24, 16,
214
- 27, 73, 60, 59, 61, 29, 74, 46, -40, -36,
215
- -24, 60, 59, 61, 66, -40, 69, 25, 46, 60,
216
- 59, 61, 60, 59, 61, 17, 46, 46, 46, 46,
217
- 60, 59, 61, 15, 24, 16, 17, 46, 34, 32,
218
- 35, 34, 38, 35, 34, 32, 35, -21, -21, -21,
219
- 34, 49, 35, 34, 32, 35, 34, 38, 35, -22,
220
- -22, -22, 34, 53, 35, 34, 32, 35, 34, 32,
221
- 35, -21, -21, -21, 60, 59, 61, 60, 59, 61,
222
- 66 ]
212
+ -43, 36, 26, 37, -41, 38, 39, -43, -42, -43,
213
+ -43, -41, -40, -41, -41, -42, 45, -42, -42, -40,
214
+ 50, -40, -40, 72, 59, 58, 60, 73, 16, 13,
215
+ 17, -36, 61, 7, 18, 65, 14, 16, 25, 17,
216
+ 16, 25, 17, 28, 18, 14, 65, 45, 14, 36,
217
+ 34, 37, 68, 16, 13, 17, 26, 35, 7, 18,
218
+ 18, 14, 16, 25, 17, 28, 36, 34, 37, 45,
219
+ 14, 36, 34, 37, 35, 36, 34, 37, 45, 35,
220
+ 36, 52, 37, 35, -22, -22, -22, 18, 35, 59,
221
+ 58, 60, -22, 36, 34, 37, 45, 61, 36, 34,
222
+ 37, 35, 59, 58, 60, 65, 35, nil, nil, 45,
223
+ 61, 59, 58, 60, 59, 58, 60, nil, 45, 61,
224
+ 19, nil, 61, 59, 58, 60, -40, 20, -24, nil,
225
+ nil, 61, nil, -40 ]
223
226
 
224
227
  racc_action_check = [
225
- 15, 1, 8, 39, 16, 39, 11, 15, 1, 15,
226
- 15, 16, 18, 16, 16, 0, 0, 0, 26, 31,
227
- 0, 0, 37, 0, 9, 9, 9, 9, 19, 19,
228
- 19, 44, 9, 19, 19, 45, 19, 22, 22, 22,
229
- 22, 56, 56, 56, 56, 22, 57, 47, 13, 56,
230
- 13, 36, 36, 36, 57, 13, 50, 65, 36, 52,
231
- 52, 52, 62, 62, 62, 67, 52, 68, 71, 62,
232
- 69, 69, 69, 5, 5, 5, 77, 69, 10, 10,
233
- 10, 12, 12, 12, 23, 23, 23, 24, 24, 24,
234
- 27, 27, 27, 28, 28, 28, 29, 29, 29, 32,
235
- 32, 32, 33, 33, 33, 42, 42, 42, 48, 48,
236
- 48, 49, 49, 49, 74, 74, 74, 80, 80, 80,
237
- 79 ]
228
+ 14, 48, 8, 48, 16, 11, 19, 14, 17, 14,
229
+ 14, 16, 25, 16, 16, 17, 27, 17, 17, 25,
230
+ 31, 25, 25, 55, 55, 55, 55, 56, 0, 0,
231
+ 0, 55, 55, 0, 0, 56, 0, 5, 5, 5,
232
+ 9, 9, 9, 9, 43, 5, 44, 46, 9, 10,
233
+ 10, 10, 49, 20, 20, 20, 64, 10, 20, 20,
234
+ 66, 20, 23, 23, 23, 23, 24, 24, 24, 67,
235
+ 23, 28, 28, 28, 24, 29, 29, 29, 70, 28,
236
+ 33, 33, 33, 29, 34, 34, 34, 75, 33, 38,
237
+ 38, 38, 34, 41, 41, 41, 38, 38, 47, 47,
238
+ 47, 41, 50, 50, 50, 77, 47, nil, nil, 50,
239
+ 50, 68, 68, 68, 73, 73, 73, nil, 68, 68,
240
+ 1, nil, 73, 78, 78, 78, 13, 1, 13, nil,
241
+ nil, 78, nil, 13 ]
238
242
 
239
243
  racc_action_pointer = [
240
- 12, 1, nil, nil, nil, 70, nil, nil, -7, 21,
241
- 75, 4, 78, 48, nil, 0, 4, nil, 12, 25,
242
- nil, nil, 34, 81, 84, nil, 8, 87, 90, 93,
243
- nil, 17, 96, 99, nil, nil, 48, 20, nil, 0,
244
- nil, nil, 102, nil, 22, 25, nil, 37, 105, 108,
245
- 54, nil, 56, nil, nil, nil, 39, 44, nil, nil,
246
- nil, nil, 59, nil, nil, 48, nil, 56, 57, 67,
247
- nil, 58, nil, nil, 111, nil, nil, 67, nil, 110,
248
- 114, nil ]
244
+ 25, 120, nil, nil, nil, 34, nil, nil, -7, 37,
245
+ 46, 3, nil, 126, 0, nil, 4, 8, nil, 6,
246
+ 50, nil, nil, 59, 63, 12, nil, 6, 68, 72,
247
+ nil, 18, nil, 77, 81, nil, nil, nil, 86, nil,
248
+ nil, 90, nil, 35, 36, nil, 37, 95, -2, 50,
249
+ 99, nil, nil, nil, nil, 21, 25, nil, nil, nil,
250
+ nil, nil, nil, nil, 47, nil, 51, 59, 108, nil,
251
+ 68, nil, nil, 111, nil, 78, nil, 95, 120, nil ]
249
252
 
250
253
  racc_action_default = [
251
- -1, -51, -2, -4, -5, -51, -8, -9, -10, -25,
252
- -51, -51, -51, -21, -30, -32, -33, -49, -51, -51,
253
- -6, -7, -51, -51, -40, -50, -43, -51, -51, -51,
254
- -31, -16, -24, -25, -32, -33, -38, -51, -24, -25,
255
- 82, -3, -51, -16, -47, -44, -45, -43, -51, -24,
256
- -14, -15, -38, -23, -17, -26, -39, -28, -34, -40,
257
- -41, -42, -38, -14, -11, -48, -46, -47, -43, -38,
258
- -19, -51, -35, -37, -51, -18, -12, -47, -20, -27,
259
- -29, -13 ]
254
+ -1, -52, -2, -4, -5, -52, -8, -9, -10, -25,
255
+ -52, -52, -19, -22, -23, -30, -32, -33, -50, -52,
256
+ -52, -6, -7, -52, -52, -22, -51, -44, -52, -52,
257
+ -31, -15, -20, -25, -24, -23, -32, -33, -38, 80,
258
+ -3, -52, -15, -48, -45, -46, -44, -52, -25, -14,
259
+ -38, -21, -22, -16, -26, -39, -28, -34, -40, -41,
260
+ -42, -43, -14, -11, -49, -47, -48, -44, -38, -17,
261
+ -52, -35, -37, -52, -12, -48, -18, -27, -29, -13 ]
260
262
 
261
263
  racc_goto_table = [
262
- 3, 1, 45, 44, 54, 20, 64, 21, 31, 26,
263
- 37, 23, 80, 2, 71, 28, 8, nil, 30, 3,
264
- 70, 43, 26, 45, 67, 47, 50, 51, 42, 76,
265
- 75, 30, 41, 48, nil, 8, nil, 78, 9, 81,
266
- 63, nil, 30, 22, 45, 77, 68, 79, 30, nil,
267
- 39, nil, nil, nil, nil, nil, 72, 9, nil, nil,
268
- nil, nil, nil, nil, nil, 39, nil, 39, nil, nil,
269
- nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
270
- 72 ]
264
+ 3, 30, 43, 1, 22, 21, 56, 53, 31, 27,
265
+ 32, 63, 78, 70, nil, 30, nil, nil, 56, 69,
266
+ 3, 66, 42, 27, 32, 30, 46, 49, 24, 32,
267
+ 9, nil, 29, 51, 74, 23, 56, 76, 77, 62,
268
+ 30, 32, 75, 79, 2, 67, 41, 32, 8, nil,
269
+ 9, 47, nil, nil, nil, 71, nil, nil, 48, nil,
270
+ nil, nil, nil, nil, 40, nil, nil, nil, 8, nil,
271
+ nil, nil, nil, nil, nil, nil, nil, nil, 71 ]
271
272
 
272
273
  racc_goto_check = [
273
- 3, 1, 17, 9, 13, 3, 10, 4, 11, 3,
274
- 11, 12, 15, 2, 16, 12, 7, nil, 18, 3,
275
- 13, 11, 3, 17, 9, 11, 11, 11, 12, 10,
276
- 13, 18, 2, 12, nil, 7, nil, 13, 8, 10,
277
- 11, nil, 18, 8, 17, 9, 11, 17, 18, nil,
278
- 8, nil, nil, nil, nil, nil, 3, 8, nil, nil,
279
- nil, nil, nil, nil, nil, 8, nil, 8, nil, nil,
280
- nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
281
- 3 ]
274
+ 3, 19, 9, 1, 4, 3, 18, 13, 11, 3,
275
+ 14, 10, 16, 17, nil, 19, nil, nil, 18, 13,
276
+ 3, 9, 11, 3, 14, 19, 11, 11, 12, 14,
277
+ 8, nil, 12, 14, 10, 8, 18, 13, 18, 11,
278
+ 19, 14, 9, 10, 2, 11, 12, 14, 7, nil,
279
+ 8, 12, nil, nil, nil, 3, nil, nil, 8, nil,
280
+ nil, nil, nil, nil, 2, nil, nil, nil, 7, nil,
281
+ nil, nil, nil, nil, nil, nil, nil, nil, 3 ]
282
282
 
283
283
  racc_goto_pointer = [
284
- nil, 1, 13, 0, 2, nil, nil, 16, 38, -23,
285
- -38, -2, 6, -32, nil, -62, -42, -24, 9 ]
284
+ nil, 3, 44, 0, -1, nil, nil, 48, 30, -25,
285
+ -32, -2, 23, -31, 0, nil, -61, -42, -32, -8 ]
286
286
 
287
287
  racc_goto_default = [
288
- nil, nil, nil, 58, 4, 5, 6, 65, 33, nil,
289
- nil, 11, 10, nil, 55, 56, nil, 57, 14 ]
288
+ nil, nil, nil, 57, 4, 5, 6, 64, 33, nil,
289
+ nil, 11, 10, nil, 12, 54, 55, nil, 44, 15 ]
290
290
 
291
291
  racc_reduce_table = [
292
292
  0, 0, :racc_error,
@@ -304,15 +304,15 @@ racc_reduce_table = [
304
304
  5, 16, :_reduce_12,
305
305
  6, 16, :_reduce_13,
306
306
  3, 16, :_reduce_14,
307
- 3, 16, :_reduce_15,
308
- 2, 16, :_reduce_16,
309
- 3, 18, :_reduce_17,
310
- 4, 18, :_reduce_18,
311
- 4, 18, :_reduce_19,
312
- 5, 18, :_reduce_20,
307
+ 2, 16, :_reduce_15,
308
+ 3, 18, :_reduce_16,
309
+ 4, 18, :_reduce_17,
310
+ 5, 18, :_reduce_18,
313
311
  1, 24, :_reduce_none,
314
- 2, 24, :_reduce_22,
315
- 3, 24, :_reduce_23,
312
+ 2, 24, :_reduce_20,
313
+ 3, 24, :_reduce_21,
314
+ 1, 26, :_reduce_none,
315
+ 1, 26, :_reduce_none,
316
316
  1, 23, :_reduce_none,
317
317
  1, 23, :_reduce_none,
318
318
  1, 25, :_reduce_26,
@@ -321,29 +321,30 @@ racc_reduce_table = [
321
321
  3, 25, :_reduce_29,
322
322
  1, 20, :_reduce_none,
323
323
  2, 20, :_reduce_31,
324
- 1, 30, :_reduce_none,
325
- 1, 30, :_reduce_none,
326
- 1, 27, :_reduce_none,
327
- 2, 27, :_reduce_35,
328
- 0, 28, :_reduce_none,
324
+ 1, 31, :_reduce_none,
325
+ 1, 31, :_reduce_none,
329
326
  1, 28, :_reduce_none,
330
- 0, 26, :_reduce_none,
331
- 1, 26, :_reduce_none,
327
+ 2, 28, :_reduce_35,
328
+ 0, 29, :_reduce_none,
329
+ 1, 29, :_reduce_none,
330
+ 0, 27, :_reduce_none,
331
+ 1, 27, :_reduce_none,
332
+ 1, 15, :_reduce_none,
332
333
  1, 15, :_reduce_none,
333
334
  1, 15, :_reduce_none,
334
335
  1, 15, :_reduce_none,
335
336
  0, 21, :_reduce_none,
336
337
  1, 21, :_reduce_none,
337
- 1, 29, :_reduce_none,
338
- 2, 29, :_reduce_46,
338
+ 1, 30, :_reduce_none,
339
+ 2, 30, :_reduce_47,
339
340
  0, 22, :_reduce_none,
340
341
  1, 22, :_reduce_none,
341
342
  1, 19, :_reduce_none,
342
- 2, 19, :_reduce_50 ]
343
+ 2, 19, :_reduce_51 ]
343
344
 
344
- racc_reduce_n = 51
345
+ racc_reduce_n = 52
345
346
 
346
- racc_shift_n = 82
347
+ racc_shift_n = 80
347
348
 
348
349
  racc_token_table = {
349
350
  false => 0,
@@ -406,6 +407,7 @@ Racc_token_to_s_table = [
406
407
  "last",
407
408
  "von",
408
409
  "first",
410
+ "particle",
409
411
  "opt_words",
410
412
  "words",
411
413
  "opt_comma",
@@ -480,36 +482,36 @@ module_eval(<<'.,.,', 'parser.y', 22)
480
482
 
481
483
  module_eval(<<'.,.,', 'parser.y', 26)
482
484
  def _reduce_11(val, _values, result)
483
- result = Name.new(:given => val[0], :family => val[1],
484
- :suffix => val[2], :title => val[3])
485
+ result = Name.new(
486
+ :given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
487
+ )
485
488
 
486
489
  result
487
490
  end
488
491
  .,.,
489
492
 
490
- module_eval(<<'.,.,', 'parser.y', 31)
493
+ module_eval(<<'.,.,', 'parser.y', 32)
491
494
  def _reduce_12(val, _values, result)
492
- result = Name.new(:given => val[0], :nick => val[1],
493
- :family => val[2], :suffix => val[3], :title => val[4])
495
+ result = Name.new(
496
+ :given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
497
+ )
494
498
 
495
499
  result
496
500
  end
497
501
  .,.,
498
502
 
499
- module_eval(<<'.,.,', 'parser.y', 36)
503
+ module_eval(<<'.,.,', 'parser.y', 38)
500
504
  def _reduce_13(val, _values, result)
501
- result = Name.new(:given => val[0], :nick => val[1],
502
- :particle => val[2], :family => val[3],
503
- :suffix => val[4], :title => val[5])
505
+ result = Name.new(
506
+ :given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
504
507
 
505
508
  result
506
509
  end
507
510
  .,.,
508
511
 
509
- module_eval(<<'.,.,', 'parser.y', 42)
512
+ module_eval(<<'.,.,', 'parser.y', 43)
510
513
  def _reduce_14(val, _values, result)
511
- result = Name.new(:given => val[0], :particle => val[1],
512
- :family => val[2])
514
+ result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
513
515
 
514
516
  result
515
517
  end
@@ -517,105 +519,88 @@ module_eval(<<'.,.,', 'parser.y', 42)
517
519
 
518
520
  module_eval(<<'.,.,', 'parser.y', 47)
519
521
  def _reduce_15(val, _values, result)
520
- result = if include_particle_in_family?
521
- Name.new(:given => val[0], :family => val[1,2].join(' '))
522
- else
523
- Name.new(:given => val[0], :particle => val[1], :family => val[2])
524
- end
522
+ result = Name.new(:particle => val[0], :family => val[1])
525
523
 
526
524
  result
527
525
  end
528
526
  .,.,
529
527
 
530
- module_eval(<<'.,.,', 'parser.y', 55)
528
+ module_eval(<<'.,.,', 'parser.y', 52)
531
529
  def _reduce_16(val, _values, result)
532
- result = Name.new(:particle => val[0], :family => val[1])
530
+ result = Name.new({
531
+ :family => val[0], :suffix => val[2][0], :given => val[2][1]
532
+ }, !!val[2][0])
533
533
 
534
534
  result
535
535
  end
536
536
  .,.,
537
537
 
538
- module_eval(<<'.,.,', 'parser.y', 60)
538
+ module_eval(<<'.,.,', 'parser.y', 58)
539
539
  def _reduce_17(val, _values, result)
540
- result = Name.new({ :family => val[0], :suffix => val[2][0],
541
- :given => val[2][1] }, !!val[2][0])
540
+ result = Name.new({
541
+ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
542
+ }, !!val[3][0])
542
543
 
543
544
  result
544
545
  end
545
546
  .,.,
546
547
 
547
- module_eval(<<'.,.,', 'parser.y', 65)
548
+ module_eval(<<'.,.,', 'parser.y', 64)
548
549
  def _reduce_18(val, _values, result)
549
- result = if include_particle_in_family?
550
- Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
551
- else
552
- Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
553
- end
550
+ result = Name.new({
551
+ :particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
552
+ }, !!val[4][0])
554
553
 
555
554
  result
556
555
  end
557
556
  .,.,
558
557
 
559
- module_eval(<<'.,.,', 'parser.y', 73)
560
- def _reduce_19(val, _values, result)
561
- result = Name.new({ :particle => val[0], :family => val[1],
562
- :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
563
-
564
- result
565
- end
566
- .,.,
558
+ # reduce 19 omitted
567
559
 
568
- module_eval(<<'.,.,', 'parser.y', 78)
560
+ module_eval(<<'.,.,', 'parser.y', 71)
569
561
  def _reduce_20(val, _values, result)
570
- result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
571
- :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
572
-
573
- result
574
- end
575
- .,.,
576
-
577
- # reduce 21 omitted
578
-
579
- module_eval(<<'.,.,', 'parser.y', 84)
580
- def _reduce_22(val, _values, result)
581
562
  result = val.join(' ')
582
563
  result
583
564
  end
584
565
  .,.,
585
566
 
586
- module_eval(<<'.,.,', 'parser.y', 85)
587
- def _reduce_23(val, _values, result)
567
+ module_eval(<<'.,.,', 'parser.y', 72)
568
+ def _reduce_21(val, _values, result)
588
569
  result = val.join(' ')
589
570
  result
590
571
  end
591
572
  .,.,
592
573
 
574
+ # reduce 22 omitted
575
+
576
+ # reduce 23 omitted
577
+
593
578
  # reduce 24 omitted
594
579
 
595
580
  # reduce 25 omitted
596
581
 
597
- module_eval(<<'.,.,', 'parser.y', 89)
582
+ module_eval(<<'.,.,', 'parser.y', 78)
598
583
  def _reduce_26(val, _values, result)
599
584
  result = [nil,val[0]]
600
585
  result
601
586
  end
602
587
  .,.,
603
588
 
604
- module_eval(<<'.,.,', 'parser.y', 90)
589
+ module_eval(<<'.,.,', 'parser.y', 79)
605
590
  def _reduce_27(val, _values, result)
606
591
  result = [val[2],val[0]]
607
592
  result
608
593
  end
609
594
  .,.,
610
595
 
611
- module_eval(<<'.,.,', 'parser.y', 91)
596
+ module_eval(<<'.,.,', 'parser.y', 80)
612
597
  def _reduce_28(val, _values, result)
613
598
  result = [val[0],nil]
614
599
  result
615
600
  end
616
601
  .,.,
617
602
 
618
- module_eval(<<'.,.,', 'parser.y', 92)
603
+ module_eval(<<'.,.,', 'parser.y', 81)
619
604
  def _reduce_29(val, _values, result)
620
605
  result = [val[0],val[2]]
621
606
  result
@@ -624,7 +609,7 @@ module_eval(<<'.,.,', 'parser.y', 92)
624
609
 
625
610
  # reduce 30 omitted
626
611
 
627
- module_eval(<<'.,.,', 'parser.y', 95)
612
+ module_eval(<<'.,.,', 'parser.y', 84)
628
613
  def _reduce_31(val, _values, result)
629
614
  result = val.join(' ')
630
615
  result
@@ -637,7 +622,7 @@ module_eval(<<'.,.,', 'parser.y', 95)
637
622
 
638
623
  # reduce 34 omitted
639
624
 
640
- module_eval(<<'.,.,', 'parser.y', 100)
625
+ module_eval(<<'.,.,', 'parser.y', 89)
641
626
  def _reduce_35(val, _values, result)
642
627
  result = val.join(' ')
643
628
  result
@@ -664,21 +649,23 @@ module_eval(<<'.,.,', 'parser.y', 100)
664
649
 
665
650
  # reduce 45 omitted
666
651
 
667
- module_eval(<<'.,.,', 'parser.y', 110)
668
- def _reduce_46(val, _values, result)
652
+ # reduce 46 omitted
653
+
654
+ module_eval(<<'.,.,', 'parser.y', 99)
655
+ def _reduce_47(val, _values, result)
669
656
  result = val.join(' ')
670
657
  result
671
658
  end
672
659
  .,.,
673
660
 
674
- # reduce 47 omitted
675
-
676
661
  # reduce 48 omitted
677
662
 
678
663
  # reduce 49 omitted
679
664
 
680
- module_eval(<<'.,.,', 'parser.y', 115)
681
- def _reduce_50(val, _values, result)
665
+ # reduce 50 omitted
666
+
667
+ module_eval(<<'.,.,', 'parser.y', 104)
668
+ def _reduce_51(val, _values, result)
682
669
  result = val.join(' ')
683
670
  result
684
671
  end
data/lib/namae/parser.y CHANGED
@@ -24,32 +24,24 @@ rule
24
24
 
25
25
  display_order : u_words word opt_suffices opt_titles
26
26
  {
27
- result = Name.new(:given => val[0], :family => val[1],
28
- :suffix => val[2], :title => val[3])
27
+ result = Name.new(
28
+ :given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
29
+ )
29
30
  }
30
31
  | u_words NICK last opt_suffices opt_titles
31
32
  {
32
- result = Name.new(:given => val[0], :nick => val[1],
33
- :family => val[2], :suffix => val[3], :title => val[4])
33
+ result = Name.new(
34
+ :given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
35
+ )
34
36
  }
35
37
  | u_words NICK von last opt_suffices opt_titles
36
38
  {
37
- result = Name.new(:given => val[0], :nick => val[1],
38
- :particle => val[2], :family => val[3],
39
- :suffix => val[4], :title => val[5])
39
+ result = Name.new(
40
+ :given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
40
41
  }
41
42
  | u_words von last
42
43
  {
43
- result = Name.new(:given => val[0], :particle => val[1],
44
- :family => val[2])
45
- }
46
- | u_words UPARTICLE last
47
- {
48
- result = if include_particle_in_family?
49
- Name.new(:given => val[0], :family => val[1,2].join(' '))
50
- else
51
- Name.new(:given => val[0], :particle => val[1], :family => val[2])
52
- end
44
+ result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
53
45
  }
54
46
  | von last
55
47
  {
@@ -58,32 +50,29 @@ rule
58
50
 
59
51
  sort_order : last COMMA first
60
52
  {
61
- result = Name.new({ :family => val[0], :suffix => val[2][0],
62
- :given => val[2][1] }, !!val[2][0])
63
- }
64
- | UPARTICLE last COMMA first
65
- {
66
- result = if include_particle_in_family?
67
- Name.new({ :family => val[0,2].join(' '), :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
68
- else
69
- Name.new({ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
70
- end
53
+ result = Name.new({
54
+ :family => val[0], :suffix => val[2][0], :given => val[2][1]
55
+ }, !!val[2][0])
71
56
  }
72
57
  | von last COMMA first
73
58
  {
74
- result = Name.new({ :particle => val[0], :family => val[1],
75
- :suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
59
+ result = Name.new({
60
+ :particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
61
+ }, !!val[3][0])
76
62
  }
77
63
  | u_words von last COMMA first
78
64
  {
79
- result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
80
- :suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
65
+ result = Name.new({
66
+ :particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
67
+ }, !!val[4][0])
81
68
  }
82
69
  ;
83
70
 
84
- von : LWORD
85
- | von LWORD { result = val.join(' ') }
86
- | von u_words LWORD { result = val.join(' ') }
71
+ von : particle
72
+ | von particle { result = val.join(' ') }
73
+ | von u_words particle { result = val.join(' ') }
74
+
75
+ particle : LWORD | UPARTICLE
87
76
 
88
77
  last : LWORD | u_words
89
78
 
@@ -103,7 +92,7 @@ rule
103
92
  opt_comma : /* empty */ | COMMA
104
93
  opt_words : /* empty */ | words
105
94
 
106
- word : LWORD | UWORD | PWORD
95
+ word : LWORD | UWORD | PWORD | UPARTICLE
107
96
 
108
97
  opt_suffices : /* empty */ | suffices
109
98
 
@@ -130,7 +119,7 @@ require 'strscan'
130
119
  :title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
131
120
  :suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
132
121
  :appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
133
- :uppercase_particle => /\s*\b((Da|De|Di|De\sLa|Du|Der|Des|Da|St|Saint|Les|Van)\.?)(\s+|$)/
122
+ :uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
134
123
  }
135
124
 
136
125
  class << self
@@ -197,7 +186,9 @@ require 'strscan'
197
186
  def parse!(string)
198
187
  @input = StringScanner.new(normalize(string))
199
188
  reset
200
- do_parse
189
+ names = do_parse
190
+ names.map(&:merge_particles!) if include_particle_in_family?
191
+ names
201
192
  end
202
193
 
203
194
  def normalize(string)
data/lib/namae/version.rb CHANGED
@@ -2,7 +2,7 @@ module Namae
2
2
  module Version
3
3
  MAJOR = 1
4
4
  MINOR = 1
5
- PATCH = 0
5
+ PATCH = 1
6
6
  BUILD = nil
7
7
 
8
8
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.').freeze
data/namae.gemspec CHANGED
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: namae 1.1.0 ruby lib
5
+ # stub: namae 1.1.1 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "namae".freeze
9
- s.version = "1.1.0"
9
+ s.version = "1.1.1"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib".freeze]
13
13
  s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
14
- s.date = "2021-03-12"
14
+ s.date = "2021-03-14"
15
15
  s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
16
16
  s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
17
17
  s.extra_rdoc_files = [
@@ -207,6 +207,10 @@ module Namae
207
207
  expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
208
208
  end
209
209
 
210
+ it 'parses multiple common capitalized particles as the family name in display order' do
211
+ expect(parser.parse!('Tom Van De Weghe')[0].values_at(:given, :family, :particle)).to eq(['Tom', 'Weghe', 'Van De'])
212
+ end
213
+
210
214
  it 'parses common lowercase particles as a particle, not family name in display order' do
211
215
  expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
212
216
  end
@@ -235,16 +239,16 @@ module Namae
235
239
  expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
236
240
  end
237
241
 
238
- it 'parses common lowercase particles as a particle, not family name in display order' do
239
- expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
242
+ it 'parses common lowercase particles as family name in display order' do
243
+ expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
240
244
  end
241
245
 
242
246
  it 'parses common capitalized particles as the family name in sort order' do
243
247
  expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
244
248
  end
245
249
 
246
- it 'parses common lowercase particles as a particle, not family name in sort order' do
247
- expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
250
+ it 'parses common lowercase particles as family name in sort order' do
251
+ expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
248
252
  end
249
253
 
250
254
  it 'parses common capitalized particles with punctuation as the family name in display order' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: namae
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-03-12 00:00:00.000000000 Z
12
+ date: 2021-03-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: racc