namae 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +8 -3
- data/BSDL +1 -1
- data/README.md +11 -1
- data/features/examples.feature +16 -0
- data/features/lists.feature +13 -3
- data/features/step_definitions/namae_steps.rb +5 -0
- data/lib/namae/name.rb +7 -0
- data/lib/namae/parser.rb +227 -192
- data/lib/namae/parser.y +45 -26
- data/lib/namae/version.rb +2 -2
- data/lib/namae.rb +1 -1
- data/namae.gemspec +7 -9
- data/spec/namae/parser_spec.rb +65 -1
- metadata +6 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 348bf4a2385c1aa56c35759cc2219a8163fa7cb76e3c05482cd6db7a207906fb
|
|
4
|
+
data.tar.gz: 4329ea23260aef483460581391fcd43c80bde61ecebef419f89c2d23f0cfeffc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 806964f1611f6931acd6e68e4f5a75069b30abf42795191bf084d89adbfe378d98f6b28f10f96cc5cc446ab9068ddafafd6794e71f2c53fd97d2a0aff5a59914
|
|
7
|
+
data.tar.gz: f235fb82617020393be215fe078bd74bbb947ab682f387f5aad87e7bb6e3b62323ee03da30409034ca747beca7b5319905f1a081a43bc8763a25dca5b40722c4
|
data/.travis.yml
CHANGED
|
@@ -6,15 +6,20 @@ cache: bundler
|
|
|
6
6
|
matrix:
|
|
7
7
|
fast_finish: true
|
|
8
8
|
include:
|
|
9
|
-
- rvm:
|
|
9
|
+
- rvm: 3.0
|
|
10
10
|
env: WITH_COVERALLS=true
|
|
11
|
-
- rvm: 2.
|
|
11
|
+
- rvm: 2.7
|
|
12
12
|
env: WITH_COVERALLS=false
|
|
13
|
-
- rvm: 2.
|
|
13
|
+
- rvm: 2.6
|
|
14
|
+
env: WITH_COVERALLS=false
|
|
15
|
+
- rvm: 2.5
|
|
14
16
|
env: WITH_COVERALLS=false
|
|
15
17
|
- rvm: jruby-19mode
|
|
16
18
|
env: WITH_COVERALLS=false
|
|
17
19
|
|
|
20
|
+
before_install:
|
|
21
|
+
- gem update --system
|
|
22
|
+
|
|
18
23
|
install:
|
|
19
24
|
- if [[ $WITH_COVERALLS = "true" ]]; then
|
|
20
25
|
bundle install --without debug optional;
|
data/BSDL
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Namae. A personal name parser.
|
|
2
2
|
Copyright (C) 2012 President and Fellows of Harvard College
|
|
3
|
-
Copyright (C) 2013-
|
|
3
|
+
Copyright (C) 2013-2020 Sylvester Keil
|
|
4
4
|
|
|
5
5
|
Redistribution and use in source and binary forms, with or without
|
|
6
6
|
modification, are permitted provided that the following conditions are met:
|
data/README.md
CHANGED
|
@@ -121,6 +121,16 @@ ambiguous. For example, multiple family names are always possible in sort-order:
|
|
|
121
121
|
Whilst in display-order, multiple family names are only supported when the
|
|
122
122
|
name contains a particle or a nickname.
|
|
123
123
|
|
|
124
|
+
Namae tries to detect common particles using the `:uppercase_particle` lexer
|
|
125
|
+
pattern. If you prefer to always include particles with the family name, you
|
|
126
|
+
can set the the `:include_particle_in_family` parser option.
|
|
127
|
+
|
|
128
|
+
Namae.parse 'Ludwig von Beethoven'
|
|
129
|
+
#-> [#<Name family="Beethoven" given="Ludwig" particle="von">]
|
|
130
|
+
|
|
131
|
+
Namae.options[:include_particle_in_family] = true
|
|
132
|
+
#-> [#<Name family="von Beethoven" given="Ludwig">]
|
|
133
|
+
|
|
124
134
|
Configuration
|
|
125
135
|
-------------
|
|
126
136
|
You can tweak some of Namae's parse rules by configuring the parser's
|
|
@@ -187,7 +197,7 @@ Namae was written as a part of a Google Summer of Code project. Thanks Google!
|
|
|
187
197
|
|
|
188
198
|
Copyright
|
|
189
199
|
---------
|
|
190
|
-
Copyright (c) 2013-
|
|
200
|
+
Copyright (c) 2013-2020 Sylvester Keil
|
|
191
201
|
|
|
192
202
|
Copyright (c) 2012 President and Fellows of Harvard College.
|
|
193
203
|
|
data/features/examples.feature
CHANGED
|
@@ -34,3 +34,19 @@ Feature: Parse the names in the Readme file
|
|
|
34
34
|
| Mr. Yukihiro "Matz" Matsumoto | Yukihiro | | Matsumoto | | | Mr. | Matz |
|
|
35
35
|
| Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | | Matz |
|
|
36
36
|
| Mr. Yukihiro "Matz" Matsumoto Sr. | Yukihiro | | Matsumoto | Sr. | | Mr. | Matz |
|
|
37
|
+
|
|
38
|
+
@particle
|
|
39
|
+
Scenarios: Particles
|
|
40
|
+
| name | given | particle | family | suffix | title | appellation | nick |
|
|
41
|
+
| Ludwig von Beethoven | Ludwig | von | Beethoven | | | | |
|
|
42
|
+
| Beethoven, Ludwig von | Ludwig von | | Beethoven | | | | |
|
|
43
|
+
| Vincent Van Gogh | Vincent | Van | Gogh | | | | |
|
|
44
|
+
| Vincent van Gogh | Vincent | van | Gogh | | | | |
|
|
45
|
+
| Van Gogh, Vincent | Vincent | Van | Gogh | | | | |
|
|
46
|
+
| van Gogh, Vincent | Vincent | van | Gogh | | | | |
|
|
47
|
+
| Walther von der Vogelheide | Walther | von der | Vogelheide | | | | |
|
|
48
|
+
| Don De Lillo | Don | De | Lillo | | | | |
|
|
49
|
+
| De Lillo, Don | Don | De | Lillo | | | | |
|
|
50
|
+
| Tom Van de Weghe | Tom | Van de | Weghe | | | | |
|
|
51
|
+
| Tom Van De Weghe | Tom | Van De | Weghe | | | | |
|
|
52
|
+
|
data/features/lists.feature
CHANGED
|
@@ -115,14 +115,24 @@ Feature: Parse a list of names
|
|
|
115
115
|
| B | Malcom |
|
|
116
116
|
|
|
117
117
|
Scenario: A list of names with particles separated by commas
|
|
118
|
-
Given
|
|
118
|
+
Given I want to include particles in the family name
|
|
119
|
+
And a parser that prefers commas as separators
|
|
119
120
|
When I parse the names "Di Proctor, M., von Cooper, P."
|
|
120
121
|
Then the names should be:
|
|
121
122
|
| given | family |
|
|
122
123
|
| M. | Di Proctor |
|
|
123
|
-
| P. | Cooper
|
|
124
|
+
| P. | von Cooper |
|
|
124
125
|
When I parse the names "Di Proctor, M, von Cooper, P"
|
|
125
126
|
Then the names should be:
|
|
126
127
|
| given | family |
|
|
127
128
|
| M | Di Proctor |
|
|
128
|
-
| P | Cooper
|
|
129
|
+
| P | von Cooper |
|
|
130
|
+
|
|
131
|
+
Scenario: A list of names with two consecutive accented characters
|
|
132
|
+
Given I want to include particles in the family name
|
|
133
|
+
And a parser that prefers commas as separators
|
|
134
|
+
When I parse the names "Çakıroğlu, Ü., Başıbüyük, B."
|
|
135
|
+
Then the names should be:
|
|
136
|
+
| given | family |
|
|
137
|
+
| Ü. | Çakıroğlu |
|
|
138
|
+
| B. | Başıbüyük |
|
|
@@ -2,6 +2,11 @@ Given /^a parser that prefers commas as separators$/ do
|
|
|
2
2
|
Namae::Parser.instance.options[:prefer_comma_as_separator] = true
|
|
3
3
|
end
|
|
4
4
|
|
|
5
|
+
Given /^I want to include particles in the family name$/ do
|
|
6
|
+
Namae::Parser.instance.options[:include_particle_in_family] = true
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
|
|
5
10
|
When /^I parse the name "(.*)"$/ do |string|
|
|
6
11
|
@name = Namae.parse!(string)[0]
|
|
7
12
|
end
|
data/lib/namae/name.rb
CHANGED
|
@@ -183,6 +183,13 @@ module Namae
|
|
|
183
183
|
self
|
|
184
184
|
end
|
|
185
185
|
|
|
186
|
+
def merge_particles!
|
|
187
|
+
self.family = [dropping_particle, particle, family].compact.join(' ')
|
|
188
|
+
self.dropping_particle = nil
|
|
189
|
+
self.particle = nil
|
|
190
|
+
self
|
|
191
|
+
end
|
|
192
|
+
|
|
186
193
|
# @return [String] a string representation of the name
|
|
187
194
|
def inspect
|
|
188
195
|
"#<Name #{each_pair.map { |k,v| [k,v.inspect].join('=') if v }.compact.join(' ')}>"
|
data/lib/namae/parser.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#
|
|
2
2
|
# DO NOT MODIFY!!!!
|
|
3
|
-
# This file is automatically generated by Racc 1.
|
|
4
|
-
# from Racc
|
|
3
|
+
# This file is automatically generated by Racc 1.5.2
|
|
4
|
+
# from Racc grammar file "".
|
|
5
5
|
#
|
|
6
6
|
|
|
7
7
|
require 'racc/parser.rb'
|
|
@@ -11,17 +11,19 @@ require 'strscan'
|
|
|
11
11
|
module Namae
|
|
12
12
|
class Parser < Racc::Parser
|
|
13
13
|
|
|
14
|
-
module_eval(<<'...end parser.y/module_eval...', 'parser.y',
|
|
14
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 111)
|
|
15
15
|
|
|
16
16
|
@defaults = {
|
|
17
17
|
:debug => false,
|
|
18
18
|
:prefer_comma_as_separator => false,
|
|
19
|
+
:include_particle_in_family => false,
|
|
19
20
|
:comma => ',',
|
|
20
21
|
:stops => ',;',
|
|
21
22
|
:separator => /\s*(\band\b|\&|;)\s*/i,
|
|
22
|
-
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
|
23
|
+
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
|
23
24
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
|
24
|
-
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
|
25
|
+
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
|
|
26
|
+
:uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
|
|
25
27
|
}
|
|
26
28
|
|
|
27
29
|
class << self
|
|
@@ -50,6 +52,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
|
|
50
52
|
options[:comma]
|
|
51
53
|
end
|
|
52
54
|
|
|
55
|
+
def include_particle_in_family?
|
|
56
|
+
options[:include_particle_in_family]
|
|
57
|
+
end
|
|
58
|
+
|
|
53
59
|
def stops
|
|
54
60
|
options[:stops]
|
|
55
61
|
end
|
|
@@ -66,6 +72,10 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
|
|
66
72
|
options[:appellation]
|
|
67
73
|
end
|
|
68
74
|
|
|
75
|
+
def uppercase_particle
|
|
76
|
+
options[:uppercase_particle]
|
|
77
|
+
end
|
|
78
|
+
|
|
69
79
|
def prefer_comma_as_separator?
|
|
70
80
|
options[:prefer_comma_as_separator]
|
|
71
81
|
end
|
|
@@ -80,7 +90,9 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
|
|
80
90
|
def parse!(string)
|
|
81
91
|
@input = StringScanner.new(normalize(string))
|
|
82
92
|
reset
|
|
83
|
-
do_parse
|
|
93
|
+
names = do_parse
|
|
94
|
+
names.map(&:merge_particles!) if include_particle_in_family?
|
|
95
|
+
names
|
|
84
96
|
end
|
|
85
97
|
|
|
86
98
|
def normalize(string)
|
|
@@ -135,11 +147,11 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
|
|
135
147
|
end
|
|
136
148
|
|
|
137
149
|
def will_see_suffix?
|
|
138
|
-
input.
|
|
150
|
+
input.rest.strip.split(/\s+/)[0] =~ suffix
|
|
139
151
|
end
|
|
140
152
|
|
|
141
153
|
def will_see_initial?
|
|
142
|
-
input.
|
|
154
|
+
input.rest.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
|
|
143
155
|
end
|
|
144
156
|
|
|
145
157
|
def seen_full_name?
|
|
@@ -171,6 +183,8 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
|
|
171
183
|
else
|
|
172
184
|
consume_word(:UWORD, input.matched)
|
|
173
185
|
end
|
|
186
|
+
when input.scan(uppercase_particle)
|
|
187
|
+
consume_word(:UPARTICLE, input.matched.strip)
|
|
174
188
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
|
|
175
189
|
consume_word(:UWORD, input.matched)
|
|
176
190
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
|
|
@@ -195,133 +209,142 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 106)
|
|
|
195
209
|
##### State transition tables begin ###
|
|
196
210
|
|
|
197
211
|
racc_action_table = [
|
|
198
|
-
-
|
|
199
|
-
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
212
|
+
-43, 36, 26, 37, -41, 38, 39, -43, -42, -43,
|
|
213
|
+
-43, -41, -40, -41, -41, -42, 45, -42, -42, -40,
|
|
214
|
+
50, -40, -40, 72, 59, 58, 60, 73, 16, 13,
|
|
215
|
+
17, -36, 61, 7, 18, 65, 14, 16, 25, 17,
|
|
216
|
+
16, 25, 17, 28, 18, 14, 65, 45, 14, 36,
|
|
217
|
+
34, 37, 68, 16, 13, 17, 26, 35, 7, 18,
|
|
218
|
+
18, 14, 16, 25, 17, 28, 36, 34, 37, 45,
|
|
219
|
+
14, 36, 34, 37, 35, 36, 34, 37, 45, 35,
|
|
220
|
+
36, 52, 37, 35, -22, -22, -22, 18, 35, 59,
|
|
221
|
+
58, 60, -22, 36, 34, 37, 45, 61, 36, 34,
|
|
222
|
+
37, 35, 59, 58, 60, 65, 35, nil, nil, 45,
|
|
223
|
+
61, 59, 58, 60, 59, 58, 60, nil, 45, 61,
|
|
224
|
+
19, nil, 61, 59, 58, 60, -40, 20, -24, nil,
|
|
225
|
+
nil, 61, nil, -40 ]
|
|
209
226
|
|
|
210
227
|
racc_action_check = [
|
|
211
|
-
14,
|
|
212
|
-
14,
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
228
|
+
14, 48, 8, 48, 16, 11, 19, 14, 17, 14,
|
|
229
|
+
14, 16, 25, 16, 16, 17, 27, 17, 17, 25,
|
|
230
|
+
31, 25, 25, 55, 55, 55, 55, 56, 0, 0,
|
|
231
|
+
0, 55, 55, 0, 0, 56, 0, 5, 5, 5,
|
|
232
|
+
9, 9, 9, 9, 43, 5, 44, 46, 9, 10,
|
|
233
|
+
10, 10, 49, 20, 20, 20, 64, 10, 20, 20,
|
|
234
|
+
66, 20, 23, 23, 23, 23, 24, 24, 24, 67,
|
|
235
|
+
23, 28, 28, 28, 24, 29, 29, 29, 70, 28,
|
|
236
|
+
33, 33, 33, 29, 34, 34, 34, 75, 33, 38,
|
|
237
|
+
38, 38, 34, 41, 41, 41, 38, 38, 47, 47,
|
|
238
|
+
47, 41, 50, 50, 50, 77, 47, nil, nil, 50,
|
|
239
|
+
50, 68, 68, 68, 73, 73, 73, nil, 68, 68,
|
|
240
|
+
1, nil, 73, 78, 78, 78, 13, 1, 13, nil,
|
|
241
|
+
nil, 78, nil, 13 ]
|
|
222
242
|
|
|
223
243
|
racc_action_pointer = [
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
nil, nil,
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
244
|
+
25, 120, nil, nil, nil, 34, nil, nil, -7, 37,
|
|
245
|
+
46, 3, nil, 126, 0, nil, 4, 8, nil, 6,
|
|
246
|
+
50, nil, nil, 59, 63, 12, nil, 6, 68, 72,
|
|
247
|
+
nil, 18, nil, 77, 81, nil, nil, nil, 86, nil,
|
|
248
|
+
nil, 90, nil, 35, 36, nil, 37, 95, -2, 50,
|
|
249
|
+
99, nil, nil, nil, nil, 21, 25, nil, nil, nil,
|
|
250
|
+
nil, nil, nil, nil, 47, nil, 51, 59, 108, nil,
|
|
251
|
+
68, nil, nil, 111, nil, 78, nil, 95, 120, nil ]
|
|
232
252
|
|
|
233
253
|
racc_action_default = [
|
|
234
|
-
-1, -
|
|
235
|
-
-
|
|
236
|
-
-
|
|
237
|
-
-
|
|
238
|
-
|
|
239
|
-
-
|
|
240
|
-
-
|
|
241
|
-
-
|
|
254
|
+
-1, -52, -2, -4, -5, -52, -8, -9, -10, -25,
|
|
255
|
+
-52, -52, -19, -22, -23, -30, -32, -33, -50, -52,
|
|
256
|
+
-52, -6, -7, -52, -52, -22, -51, -44, -52, -52,
|
|
257
|
+
-31, -15, -20, -25, -24, -23, -32, -33, -38, 80,
|
|
258
|
+
-3, -52, -15, -48, -45, -46, -44, -52, -25, -14,
|
|
259
|
+
-38, -21, -22, -16, -26, -39, -28, -34, -40, -41,
|
|
260
|
+
-42, -43, -14, -11, -49, -47, -48, -44, -38, -17,
|
|
261
|
+
-52, -35, -37, -52, -12, -48, -18, -27, -29, -13 ]
|
|
242
262
|
|
|
243
263
|
racc_goto_table = [
|
|
244
|
-
3,
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
nil, nil, nil, nil,
|
|
251
|
-
nil, nil, nil,
|
|
264
|
+
3, 30, 43, 1, 22, 21, 56, 53, 31, 27,
|
|
265
|
+
32, 63, 78, 70, nil, 30, nil, nil, 56, 69,
|
|
266
|
+
3, 66, 42, 27, 32, 30, 46, 49, 24, 32,
|
|
267
|
+
9, nil, 29, 51, 74, 23, 56, 76, 77, 62,
|
|
268
|
+
30, 32, 75, 79, 2, 67, 41, 32, 8, nil,
|
|
269
|
+
9, 47, nil, nil, nil, 71, nil, nil, 48, nil,
|
|
270
|
+
nil, nil, nil, nil, 40, nil, nil, nil, 8, nil,
|
|
271
|
+
nil, nil, nil, nil, nil, nil, nil, nil, 71 ]
|
|
252
272
|
|
|
253
273
|
racc_goto_check = [
|
|
254
|
-
3,
|
|
255
|
-
|
|
256
|
-
3,
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
nil, nil, nil, nil,
|
|
261
|
-
nil, nil, nil, 3 ]
|
|
274
|
+
3, 19, 9, 1, 4, 3, 18, 13, 11, 3,
|
|
275
|
+
14, 10, 16, 17, nil, 19, nil, nil, 18, 13,
|
|
276
|
+
3, 9, 11, 3, 14, 19, 11, 11, 12, 14,
|
|
277
|
+
8, nil, 12, 14, 10, 8, 18, 13, 18, 11,
|
|
278
|
+
19, 14, 9, 10, 2, 11, 12, 14, 7, nil,
|
|
279
|
+
8, 12, nil, nil, nil, 3, nil, nil, 8, nil,
|
|
280
|
+
nil, nil, nil, nil, 2, nil, nil, nil, 7, nil,
|
|
281
|
+
nil, nil, nil, nil, nil, nil, nil, nil, 3 ]
|
|
262
282
|
|
|
263
283
|
racc_goto_pointer = [
|
|
264
|
-
nil,
|
|
265
|
-
|
|
284
|
+
nil, 3, 44, 0, -1, nil, nil, 48, 30, -25,
|
|
285
|
+
-32, -2, 23, -31, 0, nil, -61, -42, -32, -8 ]
|
|
266
286
|
|
|
267
287
|
racc_goto_default = [
|
|
268
|
-
nil, nil, nil,
|
|
269
|
-
11, 10, nil,
|
|
288
|
+
nil, nil, nil, 57, 4, 5, 6, 64, 33, nil,
|
|
289
|
+
nil, 11, 10, nil, 12, 54, 55, nil, 44, 15 ]
|
|
270
290
|
|
|
271
291
|
racc_reduce_table = [
|
|
272
292
|
0, 0, :racc_error,
|
|
273
|
-
0,
|
|
274
|
-
1,
|
|
275
|
-
3,
|
|
276
|
-
1,
|
|
277
|
-
1, 13, :_reduce_none,
|
|
278
|
-
2, 13, :_reduce_6,
|
|
279
|
-
2, 13, :_reduce_7,
|
|
280
|
-
1, 13, :_reduce_none,
|
|
281
|
-
1, 16, :_reduce_9,
|
|
282
|
-
1, 16, :_reduce_10,
|
|
283
|
-
4, 15, :_reduce_11,
|
|
284
|
-
5, 15, :_reduce_12,
|
|
285
|
-
6, 15, :_reduce_13,
|
|
286
|
-
3, 15, :_reduce_14,
|
|
287
|
-
2, 15, :_reduce_15,
|
|
288
|
-
3, 17, :_reduce_16,
|
|
289
|
-
4, 17, :_reduce_17,
|
|
290
|
-
5, 17, :_reduce_18,
|
|
291
|
-
1, 22, :_reduce_none,
|
|
292
|
-
2, 22, :_reduce_20,
|
|
293
|
-
3, 22, :_reduce_21,
|
|
294
|
-
1, 21, :_reduce_none,
|
|
295
|
-
1, 21, :_reduce_none,
|
|
296
|
-
1, 23, :_reduce_24,
|
|
297
|
-
3, 23, :_reduce_25,
|
|
298
|
-
1, 23, :_reduce_26,
|
|
299
|
-
3, 23, :_reduce_27,
|
|
300
|
-
1, 18, :_reduce_none,
|
|
301
|
-
2, 18, :_reduce_29,
|
|
302
|
-
1, 28, :_reduce_none,
|
|
303
|
-
1, 28, :_reduce_none,
|
|
304
|
-
1, 25, :_reduce_none,
|
|
305
|
-
2, 25, :_reduce_33,
|
|
306
|
-
0, 26, :_reduce_none,
|
|
307
|
-
1, 26, :_reduce_none,
|
|
308
|
-
0, 24, :_reduce_none,
|
|
309
|
-
1, 24, :_reduce_none,
|
|
310
|
-
1, 14, :_reduce_none,
|
|
293
|
+
0, 13, :_reduce_1,
|
|
294
|
+
1, 13, :_reduce_2,
|
|
295
|
+
3, 13, :_reduce_3,
|
|
296
|
+
1, 14, :_reduce_4,
|
|
311
297
|
1, 14, :_reduce_none,
|
|
298
|
+
2, 14, :_reduce_6,
|
|
299
|
+
2, 14, :_reduce_7,
|
|
312
300
|
1, 14, :_reduce_none,
|
|
313
|
-
|
|
314
|
-
1,
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
301
|
+
1, 17, :_reduce_9,
|
|
302
|
+
1, 17, :_reduce_10,
|
|
303
|
+
4, 16, :_reduce_11,
|
|
304
|
+
5, 16, :_reduce_12,
|
|
305
|
+
6, 16, :_reduce_13,
|
|
306
|
+
3, 16, :_reduce_14,
|
|
307
|
+
2, 16, :_reduce_15,
|
|
308
|
+
3, 18, :_reduce_16,
|
|
309
|
+
4, 18, :_reduce_17,
|
|
310
|
+
5, 18, :_reduce_18,
|
|
311
|
+
1, 24, :_reduce_none,
|
|
312
|
+
2, 24, :_reduce_20,
|
|
313
|
+
3, 24, :_reduce_21,
|
|
314
|
+
1, 26, :_reduce_none,
|
|
315
|
+
1, 26, :_reduce_none,
|
|
316
|
+
1, 23, :_reduce_none,
|
|
317
|
+
1, 23, :_reduce_none,
|
|
318
|
+
1, 25, :_reduce_26,
|
|
319
|
+
3, 25, :_reduce_27,
|
|
320
|
+
1, 25, :_reduce_28,
|
|
321
|
+
3, 25, :_reduce_29,
|
|
318
322
|
1, 20, :_reduce_none,
|
|
323
|
+
2, 20, :_reduce_31,
|
|
324
|
+
1, 31, :_reduce_none,
|
|
325
|
+
1, 31, :_reduce_none,
|
|
326
|
+
1, 28, :_reduce_none,
|
|
327
|
+
2, 28, :_reduce_35,
|
|
328
|
+
0, 29, :_reduce_none,
|
|
319
329
|
1, 29, :_reduce_none,
|
|
320
|
-
|
|
330
|
+
0, 27, :_reduce_none,
|
|
331
|
+
1, 27, :_reduce_none,
|
|
332
|
+
1, 15, :_reduce_none,
|
|
333
|
+
1, 15, :_reduce_none,
|
|
334
|
+
1, 15, :_reduce_none,
|
|
335
|
+
1, 15, :_reduce_none,
|
|
336
|
+
0, 21, :_reduce_none,
|
|
337
|
+
1, 21, :_reduce_none,
|
|
338
|
+
1, 30, :_reduce_none,
|
|
339
|
+
2, 30, :_reduce_47,
|
|
340
|
+
0, 22, :_reduce_none,
|
|
341
|
+
1, 22, :_reduce_none,
|
|
342
|
+
1, 19, :_reduce_none,
|
|
343
|
+
2, 19, :_reduce_51 ]
|
|
321
344
|
|
|
322
|
-
racc_reduce_n =
|
|
345
|
+
racc_reduce_n = 52
|
|
323
346
|
|
|
324
|
-
racc_shift_n =
|
|
347
|
+
racc_shift_n = 80
|
|
325
348
|
|
|
326
349
|
racc_token_table = {
|
|
327
350
|
false => 0,
|
|
@@ -334,9 +357,10 @@ racc_token_table = {
|
|
|
334
357
|
:AND => 7,
|
|
335
358
|
:APPELLATION => 8,
|
|
336
359
|
:TITLE => 9,
|
|
337
|
-
:SUFFIX => 10
|
|
360
|
+
:SUFFIX => 10,
|
|
361
|
+
:UPARTICLE => 11 }
|
|
338
362
|
|
|
339
|
-
racc_nt_base =
|
|
363
|
+
racc_nt_base = 12
|
|
340
364
|
|
|
341
365
|
racc_use_result_var = true
|
|
342
366
|
|
|
@@ -368,6 +392,7 @@ Racc_token_to_s_table = [
|
|
|
368
392
|
"APPELLATION",
|
|
369
393
|
"TITLE",
|
|
370
394
|
"SUFFIX",
|
|
395
|
+
"UPARTICLE",
|
|
371
396
|
"$start",
|
|
372
397
|
"names",
|
|
373
398
|
"name",
|
|
@@ -375,18 +400,19 @@ Racc_token_to_s_table = [
|
|
|
375
400
|
"display_order",
|
|
376
401
|
"honorific",
|
|
377
402
|
"sort_order",
|
|
403
|
+
"titles",
|
|
378
404
|
"u_words",
|
|
379
405
|
"opt_suffices",
|
|
380
406
|
"opt_titles",
|
|
381
407
|
"last",
|
|
382
408
|
"von",
|
|
383
409
|
"first",
|
|
410
|
+
"particle",
|
|
384
411
|
"opt_words",
|
|
385
412
|
"words",
|
|
386
413
|
"opt_comma",
|
|
387
414
|
"suffices",
|
|
388
|
-
"u_word"
|
|
389
|
-
"titles" ]
|
|
415
|
+
"u_word" ]
|
|
390
416
|
|
|
391
417
|
Racc_debug_parser = false
|
|
392
418
|
|
|
@@ -396,28 +422,28 @@ Racc_debug_parser = false
|
|
|
396
422
|
|
|
397
423
|
module_eval(<<'.,.,', 'parser.y', 11)
|
|
398
424
|
def _reduce_1(val, _values, result)
|
|
399
|
-
result = []
|
|
425
|
+
result = []
|
|
400
426
|
result
|
|
401
427
|
end
|
|
402
428
|
.,.,
|
|
403
429
|
|
|
404
430
|
module_eval(<<'.,.,', 'parser.y', 12)
|
|
405
431
|
def _reduce_2(val, _values, result)
|
|
406
|
-
result = [val[0]]
|
|
432
|
+
result = [val[0]]
|
|
407
433
|
result
|
|
408
434
|
end
|
|
409
435
|
.,.,
|
|
410
436
|
|
|
411
437
|
module_eval(<<'.,.,', 'parser.y', 13)
|
|
412
438
|
def _reduce_3(val, _values, result)
|
|
413
|
-
result = val[0] << val[2]
|
|
439
|
+
result = val[0] << val[2]
|
|
414
440
|
result
|
|
415
441
|
end
|
|
416
442
|
.,.,
|
|
417
443
|
|
|
418
444
|
module_eval(<<'.,.,', 'parser.y', 15)
|
|
419
445
|
def _reduce_4(val, _values, result)
|
|
420
|
-
result = Name.new(:given => val[0])
|
|
446
|
+
result = Name.new(:given => val[0])
|
|
421
447
|
result
|
|
422
448
|
end
|
|
423
449
|
.,.,
|
|
@@ -426,14 +452,14 @@ module_eval(<<'.,.,', 'parser.y', 15)
|
|
|
426
452
|
|
|
427
453
|
module_eval(<<'.,.,', 'parser.y', 17)
|
|
428
454
|
def _reduce_6(val, _values, result)
|
|
429
|
-
result = val[0].merge(:family => val[1])
|
|
455
|
+
result = val[0].merge(:family => val[1])
|
|
430
456
|
result
|
|
431
457
|
end
|
|
432
458
|
.,.,
|
|
433
459
|
|
|
434
460
|
module_eval(<<'.,.,', 'parser.y', 18)
|
|
435
461
|
def _reduce_7(val, _values, result)
|
|
436
|
-
result = val[1].merge(val[0])
|
|
462
|
+
result = val[1].merge(val[0])
|
|
437
463
|
result
|
|
438
464
|
end
|
|
439
465
|
.,.,
|
|
@@ -442,51 +468,51 @@ module_eval(<<'.,.,', 'parser.y', 18)
|
|
|
442
468
|
|
|
443
469
|
module_eval(<<'.,.,', 'parser.y', 21)
|
|
444
470
|
def _reduce_9(val, _values, result)
|
|
445
|
-
result = Name.new(:appellation => val[0])
|
|
471
|
+
result = Name.new(:appellation => val[0])
|
|
446
472
|
result
|
|
447
473
|
end
|
|
448
474
|
.,.,
|
|
449
475
|
|
|
450
476
|
module_eval(<<'.,.,', 'parser.y', 22)
|
|
451
477
|
def _reduce_10(val, _values, result)
|
|
452
|
-
result = Name.new(:title => val[0])
|
|
478
|
+
result = Name.new(:title => val[0])
|
|
453
479
|
result
|
|
454
480
|
end
|
|
455
481
|
.,.,
|
|
456
482
|
|
|
457
483
|
module_eval(<<'.,.,', 'parser.y', 26)
|
|
458
484
|
def _reduce_11(val, _values, result)
|
|
459
|
-
result = Name.new(
|
|
460
|
-
:suffix => val[2], :title => val[3]
|
|
461
|
-
|
|
485
|
+
result = Name.new(
|
|
486
|
+
:given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
|
|
487
|
+
)
|
|
488
|
+
|
|
462
489
|
result
|
|
463
490
|
end
|
|
464
491
|
.,.,
|
|
465
492
|
|
|
466
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
493
|
+
module_eval(<<'.,.,', 'parser.y', 32)
|
|
467
494
|
def _reduce_12(val, _values, result)
|
|
468
|
-
result = Name.new(
|
|
469
|
-
:family => val[2], :suffix => val[3], :title => val[4]
|
|
470
|
-
|
|
495
|
+
result = Name.new(
|
|
496
|
+
:given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
|
|
497
|
+
)
|
|
498
|
+
|
|
471
499
|
result
|
|
472
500
|
end
|
|
473
501
|
.,.,
|
|
474
502
|
|
|
475
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
503
|
+
module_eval(<<'.,.,', 'parser.y', 38)
|
|
476
504
|
def _reduce_13(val, _values, result)
|
|
477
|
-
result = Name.new(
|
|
478
|
-
:particle => val[2], :family => val[3],
|
|
479
|
-
|
|
480
|
-
|
|
505
|
+
result = Name.new(
|
|
506
|
+
:given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
|
|
507
|
+
|
|
481
508
|
result
|
|
482
509
|
end
|
|
483
510
|
.,.,
|
|
484
511
|
|
|
485
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
512
|
+
module_eval(<<'.,.,', 'parser.y', 43)
|
|
486
513
|
def _reduce_14(val, _values, result)
|
|
487
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
|
488
|
-
|
|
489
|
-
|
|
514
|
+
result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
|
515
|
+
|
|
490
516
|
result
|
|
491
517
|
end
|
|
492
518
|
.,.,
|
|
@@ -494,50 +520,53 @@ module_eval(<<'.,.,', 'parser.y', 42)
|
|
|
494
520
|
module_eval(<<'.,.,', 'parser.y', 47)
|
|
495
521
|
def _reduce_15(val, _values, result)
|
|
496
522
|
result = Name.new(:particle => val[0], :family => val[1])
|
|
497
|
-
|
|
523
|
+
|
|
498
524
|
result
|
|
499
525
|
end
|
|
500
526
|
.,.,
|
|
501
527
|
|
|
502
528
|
module_eval(<<'.,.,', 'parser.y', 52)
|
|
503
529
|
def _reduce_16(val, _values, result)
|
|
504
|
-
result = Name.new({
|
|
505
|
-
:
|
|
506
|
-
|
|
530
|
+
result = Name.new({
|
|
531
|
+
:family => val[0], :suffix => val[2][0], :given => val[2][1]
|
|
532
|
+
}, !!val[2][0])
|
|
533
|
+
|
|
507
534
|
result
|
|
508
535
|
end
|
|
509
536
|
.,.,
|
|
510
537
|
|
|
511
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
538
|
+
module_eval(<<'.,.,', 'parser.y', 58)
|
|
512
539
|
def _reduce_17(val, _values, result)
|
|
513
|
-
result = Name.new({
|
|
514
|
-
:
|
|
515
|
-
|
|
540
|
+
result = Name.new({
|
|
541
|
+
:particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
|
|
542
|
+
}, !!val[3][0])
|
|
543
|
+
|
|
516
544
|
result
|
|
517
545
|
end
|
|
518
546
|
.,.,
|
|
519
547
|
|
|
520
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
548
|
+
module_eval(<<'.,.,', 'parser.y', 64)
|
|
521
549
|
def _reduce_18(val, _values, result)
|
|
522
|
-
result = Name.new({
|
|
523
|
-
:
|
|
524
|
-
|
|
550
|
+
result = Name.new({
|
|
551
|
+
:particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
|
|
552
|
+
}, !!val[4][0])
|
|
553
|
+
|
|
525
554
|
result
|
|
526
555
|
end
|
|
527
556
|
.,.,
|
|
528
557
|
|
|
529
558
|
# reduce 19 omitted
|
|
530
559
|
|
|
531
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
560
|
+
module_eval(<<'.,.,', 'parser.y', 71)
|
|
532
561
|
def _reduce_20(val, _values, result)
|
|
533
|
-
result = val.join(' ')
|
|
562
|
+
result = val.join(' ')
|
|
534
563
|
result
|
|
535
564
|
end
|
|
536
565
|
.,.,
|
|
537
566
|
|
|
538
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
567
|
+
module_eval(<<'.,.,', 'parser.y', 72)
|
|
539
568
|
def _reduce_21(val, _values, result)
|
|
540
|
-
result = val.join(' ')
|
|
569
|
+
result = val.join(' ')
|
|
541
570
|
result
|
|
542
571
|
end
|
|
543
572
|
.,.,
|
|
@@ -546,59 +575,59 @@ module_eval(<<'.,.,', 'parser.y', 69)
|
|
|
546
575
|
|
|
547
576
|
# reduce 23 omitted
|
|
548
577
|
|
|
549
|
-
|
|
550
|
-
def _reduce_24(val, _values, result)
|
|
551
|
-
result = [nil,val[0]]
|
|
552
|
-
result
|
|
553
|
-
end
|
|
554
|
-
.,.,
|
|
578
|
+
# reduce 24 omitted
|
|
555
579
|
|
|
556
|
-
|
|
557
|
-
def _reduce_25(val, _values, result)
|
|
558
|
-
result = [val[2],val[0]]
|
|
559
|
-
result
|
|
560
|
-
end
|
|
561
|
-
.,.,
|
|
580
|
+
# reduce 25 omitted
|
|
562
581
|
|
|
563
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
582
|
+
module_eval(<<'.,.,', 'parser.y', 78)
|
|
564
583
|
def _reduce_26(val, _values, result)
|
|
565
|
-
result = [val[0]
|
|
584
|
+
result = [nil,val[0]]
|
|
566
585
|
result
|
|
567
586
|
end
|
|
568
587
|
.,.,
|
|
569
588
|
|
|
570
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
589
|
+
module_eval(<<'.,.,', 'parser.y', 79)
|
|
571
590
|
def _reduce_27(val, _values, result)
|
|
572
|
-
result = [val[
|
|
591
|
+
result = [val[2],val[0]]
|
|
573
592
|
result
|
|
574
593
|
end
|
|
575
594
|
.,.,
|
|
576
595
|
|
|
577
|
-
|
|
596
|
+
module_eval(<<'.,.,', 'parser.y', 80)
|
|
597
|
+
def _reduce_28(val, _values, result)
|
|
598
|
+
result = [val[0],nil]
|
|
599
|
+
result
|
|
600
|
+
end
|
|
601
|
+
.,.,
|
|
578
602
|
|
|
579
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
603
|
+
module_eval(<<'.,.,', 'parser.y', 81)
|
|
580
604
|
def _reduce_29(val, _values, result)
|
|
581
|
-
result = val
|
|
605
|
+
result = [val[0],val[2]]
|
|
582
606
|
result
|
|
583
607
|
end
|
|
584
608
|
.,.,
|
|
585
609
|
|
|
586
610
|
# reduce 30 omitted
|
|
587
611
|
|
|
588
|
-
# reduce 31 omitted
|
|
589
|
-
|
|
590
|
-
# reduce 32 omitted
|
|
591
|
-
|
|
592
612
|
module_eval(<<'.,.,', 'parser.y', 84)
|
|
593
|
-
def
|
|
594
|
-
result = val.join(' ')
|
|
613
|
+
def _reduce_31(val, _values, result)
|
|
614
|
+
result = val.join(' ')
|
|
595
615
|
result
|
|
596
616
|
end
|
|
597
617
|
.,.,
|
|
598
618
|
|
|
619
|
+
# reduce 32 omitted
|
|
620
|
+
|
|
621
|
+
# reduce 33 omitted
|
|
622
|
+
|
|
599
623
|
# reduce 34 omitted
|
|
600
624
|
|
|
601
|
-
|
|
625
|
+
module_eval(<<'.,.,', 'parser.y', 89)
|
|
626
|
+
def _reduce_35(val, _values, result)
|
|
627
|
+
result = val.join(' ')
|
|
628
|
+
result
|
|
629
|
+
end
|
|
630
|
+
.,.,
|
|
602
631
|
|
|
603
632
|
# reduce 36 omitted
|
|
604
633
|
|
|
@@ -616,22 +645,28 @@ module_eval(<<'.,.,', 'parser.y', 84)
|
|
|
616
645
|
|
|
617
646
|
# reduce 43 omitted
|
|
618
647
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
648
|
+
# reduce 44 omitted
|
|
649
|
+
|
|
650
|
+
# reduce 45 omitted
|
|
651
|
+
|
|
652
|
+
# reduce 46 omitted
|
|
653
|
+
|
|
654
|
+
module_eval(<<'.,.,', 'parser.y', 99)
|
|
655
|
+
def _reduce_47(val, _values, result)
|
|
656
|
+
result = val.join(' ')
|
|
622
657
|
result
|
|
623
658
|
end
|
|
624
659
|
.,.,
|
|
625
660
|
|
|
626
|
-
# reduce
|
|
661
|
+
# reduce 48 omitted
|
|
627
662
|
|
|
628
|
-
# reduce
|
|
663
|
+
# reduce 49 omitted
|
|
629
664
|
|
|
630
|
-
# reduce
|
|
665
|
+
# reduce 50 omitted
|
|
631
666
|
|
|
632
|
-
module_eval(<<'.,.,', 'parser.y',
|
|
633
|
-
def
|
|
634
|
-
result = val.join(' ')
|
|
667
|
+
module_eval(<<'.,.,', 'parser.y', 104)
|
|
668
|
+
def _reduce_51(val, _values, result)
|
|
669
|
+
result = val.join(' ')
|
|
635
670
|
result
|
|
636
671
|
end
|
|
637
672
|
.,.,
|
|
@@ -641,4 +676,4 @@ def _reduce_none(val, _values, result)
|
|
|
641
676
|
end
|
|
642
677
|
|
|
643
678
|
end # class Parser
|
|
644
|
-
|
|
679
|
+
end # module Namae
|
data/lib/namae/parser.y
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
class Namae::Parser
|
|
5
5
|
|
|
6
|
-
token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
|
|
6
|
+
token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX UPARTICLE
|
|
7
7
|
|
|
8
8
|
expect 0
|
|
9
9
|
|
|
@@ -20,28 +20,28 @@ rule
|
|
|
20
20
|
| sort_order
|
|
21
21
|
|
|
22
22
|
honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
|
|
23
|
-
|
|
|
23
|
+
| titles { result = Name.new(:title => val[0]) }
|
|
24
24
|
|
|
25
25
|
display_order : u_words word opt_suffices opt_titles
|
|
26
26
|
{
|
|
27
|
-
result = Name.new(
|
|
28
|
-
:suffix => val[2], :title => val[3]
|
|
27
|
+
result = Name.new(
|
|
28
|
+
:given => val[0], :family => val[1], :suffix => val[2], :title => val[3]
|
|
29
|
+
)
|
|
29
30
|
}
|
|
30
31
|
| u_words NICK last opt_suffices opt_titles
|
|
31
32
|
{
|
|
32
|
-
result = Name.new(
|
|
33
|
-
:family => val[2], :suffix => val[3], :title => val[4]
|
|
33
|
+
result = Name.new(
|
|
34
|
+
:given => val[0], :nick => val[1], :family => val[2], :suffix => val[3], :title => val[4]
|
|
35
|
+
)
|
|
34
36
|
}
|
|
35
37
|
| u_words NICK von last opt_suffices opt_titles
|
|
36
38
|
{
|
|
37
|
-
result = Name.new(
|
|
38
|
-
:particle => val[2], :family => val[3],
|
|
39
|
-
:suffix => val[4], :title => val[5])
|
|
39
|
+
result = Name.new(
|
|
40
|
+
:given => val[0], :nick => val[1], :particle => val[2], :family => val[3], :suffix => val[4], :title => val[5])
|
|
40
41
|
}
|
|
41
42
|
| u_words von last
|
|
42
43
|
{
|
|
43
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
|
44
|
-
:family => val[2])
|
|
44
|
+
result = Name.new(:given => val[0], :particle => val[1], :family => val[2])
|
|
45
45
|
}
|
|
46
46
|
| von last
|
|
47
47
|
{
|
|
@@ -50,24 +50,29 @@ rule
|
|
|
50
50
|
|
|
51
51
|
sort_order : last COMMA first
|
|
52
52
|
{
|
|
53
|
-
result = Name.new({
|
|
54
|
-
:
|
|
53
|
+
result = Name.new({
|
|
54
|
+
:family => val[0], :suffix => val[2][0], :given => val[2][1]
|
|
55
|
+
}, !!val[2][0])
|
|
55
56
|
}
|
|
56
57
|
| von last COMMA first
|
|
57
58
|
{
|
|
58
|
-
result = Name.new({
|
|
59
|
-
:
|
|
59
|
+
result = Name.new({
|
|
60
|
+
:particle => val[0], :family => val[1], :suffix => val[3][0], :given => val[3][1]
|
|
61
|
+
}, !!val[3][0])
|
|
60
62
|
}
|
|
61
63
|
| u_words von last COMMA first
|
|
62
64
|
{
|
|
63
|
-
result = Name.new({
|
|
64
|
-
:
|
|
65
|
+
result = Name.new({
|
|
66
|
+
:particle => val[0,2].join(' '), :family => val[2], :suffix => val[4][0], :given => val[4][1]
|
|
67
|
+
}, !!val[4][0])
|
|
65
68
|
}
|
|
66
69
|
;
|
|
67
70
|
|
|
68
|
-
von :
|
|
69
|
-
| von
|
|
70
|
-
| von u_words
|
|
71
|
+
von : particle
|
|
72
|
+
| von particle { result = val.join(' ') }
|
|
73
|
+
| von u_words particle { result = val.join(' ') }
|
|
74
|
+
|
|
75
|
+
particle : LWORD | UPARTICLE
|
|
71
76
|
|
|
72
77
|
last : LWORD | u_words
|
|
73
78
|
|
|
@@ -87,7 +92,7 @@ rule
|
|
|
87
92
|
opt_comma : /* empty */ | COMMA
|
|
88
93
|
opt_words : /* empty */ | words
|
|
89
94
|
|
|
90
|
-
word : LWORD | UWORD | PWORD
|
|
95
|
+
word : LWORD | UWORD | PWORD | UPARTICLE
|
|
91
96
|
|
|
92
97
|
opt_suffices : /* empty */ | suffices
|
|
93
98
|
|
|
@@ -107,12 +112,14 @@ require 'strscan'
|
|
|
107
112
|
@defaults = {
|
|
108
113
|
:debug => false,
|
|
109
114
|
:prefer_comma_as_separator => false,
|
|
115
|
+
:include_particle_in_family => false,
|
|
110
116
|
:comma => ',',
|
|
111
117
|
:stops => ',;',
|
|
112
118
|
:separator => /\s*(\band\b|\&|;)\s*/i,
|
|
113
|
-
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
|
119
|
+
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|pastor|pr|reverend|rev|elder|deacon|deaconess|father|fr|rabbi|cantor|vicar|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
|
114
120
|
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
|
115
|
-
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
|
121
|
+
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i,
|
|
122
|
+
:uppercase_particle => /\s*\b(D[aiu]|De[rs]?|St\.?|Saint|La|Les|V[ao]n)(\s+|$)/
|
|
116
123
|
}
|
|
117
124
|
|
|
118
125
|
class << self
|
|
@@ -141,6 +148,10 @@ require 'strscan'
|
|
|
141
148
|
options[:comma]
|
|
142
149
|
end
|
|
143
150
|
|
|
151
|
+
def include_particle_in_family?
|
|
152
|
+
options[:include_particle_in_family]
|
|
153
|
+
end
|
|
154
|
+
|
|
144
155
|
def stops
|
|
145
156
|
options[:stops]
|
|
146
157
|
end
|
|
@@ -157,6 +168,10 @@ require 'strscan'
|
|
|
157
168
|
options[:appellation]
|
|
158
169
|
end
|
|
159
170
|
|
|
171
|
+
def uppercase_particle
|
|
172
|
+
options[:uppercase_particle]
|
|
173
|
+
end
|
|
174
|
+
|
|
160
175
|
def prefer_comma_as_separator?
|
|
161
176
|
options[:prefer_comma_as_separator]
|
|
162
177
|
end
|
|
@@ -171,7 +186,9 @@ require 'strscan'
|
|
|
171
186
|
def parse!(string)
|
|
172
187
|
@input = StringScanner.new(normalize(string))
|
|
173
188
|
reset
|
|
174
|
-
do_parse
|
|
189
|
+
names = do_parse
|
|
190
|
+
names.map(&:merge_particles!) if include_particle_in_family?
|
|
191
|
+
names
|
|
175
192
|
end
|
|
176
193
|
|
|
177
194
|
def normalize(string)
|
|
@@ -226,11 +243,11 @@ require 'strscan'
|
|
|
226
243
|
end
|
|
227
244
|
|
|
228
245
|
def will_see_suffix?
|
|
229
|
-
input.
|
|
246
|
+
input.rest.strip.split(/\s+/)[0] =~ suffix
|
|
230
247
|
end
|
|
231
248
|
|
|
232
249
|
def will_see_initial?
|
|
233
|
-
input.
|
|
250
|
+
input.rest.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
|
|
234
251
|
end
|
|
235
252
|
|
|
236
253
|
def seen_full_name?
|
|
@@ -262,6 +279,8 @@ require 'strscan'
|
|
|
262
279
|
else
|
|
263
280
|
consume_word(:UWORD, input.matched)
|
|
264
281
|
end
|
|
282
|
+
when input.scan(uppercase_particle)
|
|
283
|
+
consume_word(:UPARTICLE, input.matched.strip)
|
|
265
284
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
|
|
266
285
|
consume_word(:UWORD, input.matched)
|
|
267
286
|
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
|
data/lib/namae/version.rb
CHANGED
data/lib/namae.rb
CHANGED
data/namae.gemspec
CHANGED
|
@@ -2,16 +2,16 @@
|
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
|
-
# stub: namae 1.
|
|
5
|
+
# stub: namae 1.1.1 ruby lib
|
|
6
6
|
|
|
7
7
|
Gem::Specification.new do |s|
|
|
8
8
|
s.name = "namae".freeze
|
|
9
|
-
s.version = "1.
|
|
9
|
+
s.version = "1.1.1"
|
|
10
10
|
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
|
12
12
|
s.require_paths = ["lib".freeze]
|
|
13
13
|
s.authors = ["Sylvester Keil".freeze, "Dan Collis-Puro".freeze]
|
|
14
|
-
s.date = "
|
|
14
|
+
s.date = "2021-03-14"
|
|
15
15
|
s.description = " Namae (\u540D\u524D) is a parser for human names. It recognizes personal names of various cultural backgrounds and tries to split them into their component parts (e.g., given and family names, honorifics etc.). ".freeze
|
|
16
16
|
s.email = ["sylvester@keil.or.at".freeze, "dan@collispuro.com".freeze]
|
|
17
17
|
s.extra_rdoc_files = [
|
|
@@ -54,17 +54,15 @@ Gem::Specification.new do |s|
|
|
|
54
54
|
]
|
|
55
55
|
s.homepage = "https://github.com/berkmancenter/namae".freeze
|
|
56
56
|
s.licenses = ["AGPL-3.0".freeze]
|
|
57
|
-
s.rubygems_version = "2.
|
|
57
|
+
s.rubygems_version = "3.2.3".freeze
|
|
58
58
|
s.summary = "Namae (\u540D\u524D) parses personal names and splits them into their component parts.".freeze
|
|
59
59
|
|
|
60
60
|
if s.respond_to? :specification_version then
|
|
61
61
|
s.specification_version = 4
|
|
62
|
+
end
|
|
62
63
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
else
|
|
66
|
-
s.add_dependency(%q<racc>.freeze, ["~> 1.4"])
|
|
67
|
-
end
|
|
64
|
+
if s.respond_to? :add_runtime_dependency then
|
|
65
|
+
s.add_development_dependency(%q<racc>.freeze, ["~> 1.4"])
|
|
68
66
|
else
|
|
69
67
|
s.add_dependency(%q<racc>.freeze, ["~> 1.4"])
|
|
70
68
|
end
|
data/spec/namae/parser_spec.rb
CHANGED
|
@@ -115,7 +115,7 @@ module Namae
|
|
|
115
115
|
end
|
|
116
116
|
end
|
|
117
117
|
|
|
118
|
-
%w{Pastor Pr. Reverend Rev. Elder Deacon Deaconess Father Fr. Vicar}.each do |title|
|
|
118
|
+
%w{Pastor Pr. Reverend Rev. Elder Deacon Deaconess Father Fr. Vicar Rabbi Cantor}.each do |title|
|
|
119
119
|
describe "the next token is #{title.inspect}" do
|
|
120
120
|
before { parser.send(:input).string = title }
|
|
121
121
|
it 'returns a TITLE token' do
|
|
@@ -191,6 +191,70 @@ module Namae
|
|
|
191
191
|
expect(parser.parse!('Bernado Franecki Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
|
|
192
192
|
#expect(parser.parse!('Bernado Franecki, Ph.D.')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Ph.D.'])
|
|
193
193
|
end
|
|
194
|
+
|
|
195
|
+
it 'parses consecutive titles in display order' do
|
|
196
|
+
expect(parser.parse!('Lt. Col. Bernado Franecki')[0].values_at(:given, :family, :title)).to eq(['Bernado', 'Franecki', 'Lt. Col.'])
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
context 'when include_particle_in_family is false' do
|
|
200
|
+
let(:parser) { Parser.new(include_particle_in_family: false) }
|
|
201
|
+
|
|
202
|
+
it 'parses common capitalized particles as the family name in display order' do
|
|
203
|
+
expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
it 'parses common capitalized particles with punctuation as the family name in display order' do
|
|
207
|
+
expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it 'parses multiple common capitalized particles as the family name in display order' do
|
|
211
|
+
expect(parser.parse!('Tom Van De Weghe')[0].values_at(:given, :family, :particle)).to eq(['Tom', 'Weghe', 'Van De'])
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it 'parses common lowercase particles as a particle, not family name in display order' do
|
|
215
|
+
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
it 'parses common capitalized particles as the family name in sort order' do
|
|
219
|
+
expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'De'])
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
it 'parses common lowercase particles as a particle, not family name in sort order' do
|
|
223
|
+
expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'Silva', 'de'])
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
it 'parses common capitalized particles with punctuation as the family name in display order' do
|
|
227
|
+
expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'Hilaire', 'St.'])
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
context 'when include_particle_in_family is true' do
|
|
232
|
+
let(:parser) { Parser.new(include_particle_in_family: true) }
|
|
233
|
+
|
|
234
|
+
it 'parses common capitalized particles as the family name in display order' do
|
|
235
|
+
expect(parser.parse!('Carlos De Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
it 'parses common capitalized particles with punctuation as the family name in display order' do
|
|
239
|
+
expect(parser.parse!('Matt St. Hilaire')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
it 'parses common lowercase particles as family name in display order' do
|
|
243
|
+
expect(parser.parse!('Carlos de Silva')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
it 'parses common capitalized particles as the family name in sort order' do
|
|
247
|
+
expect(parser.parse!('De Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'De Silva', nil])
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
it 'parses common lowercase particles as family name in sort order' do
|
|
251
|
+
expect(parser.parse!('de Silva, Carlos')[0].values_at(:given, :family, :particle)).to eq(['Carlos', 'de Silva', nil])
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
it 'parses common capitalized particles with punctuation as the family name in display order' do
|
|
255
|
+
expect(parser.parse!('St. Hilaire, Matt')[0].values_at(:given, :family, :particle)).to eq(['Matt', 'St. Hilaire', nil])
|
|
256
|
+
end
|
|
257
|
+
end
|
|
194
258
|
end
|
|
195
259
|
end
|
|
196
260
|
|
metadata
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: namae
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.1.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sylvester Keil
|
|
8
8
|
- Dan Collis-Puro
|
|
9
|
-
autorequire:
|
|
9
|
+
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: racc
|
|
@@ -73,7 +73,7 @@ homepage: https://github.com/berkmancenter/namae
|
|
|
73
73
|
licenses:
|
|
74
74
|
- AGPL-3.0
|
|
75
75
|
metadata: {}
|
|
76
|
-
post_install_message:
|
|
76
|
+
post_install_message:
|
|
77
77
|
rdoc_options: []
|
|
78
78
|
require_paths:
|
|
79
79
|
- lib
|
|
@@ -88,9 +88,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
88
88
|
- !ruby/object:Gem::Version
|
|
89
89
|
version: '0'
|
|
90
90
|
requirements: []
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
signing_key:
|
|
91
|
+
rubygems_version: 3.2.3
|
|
92
|
+
signing_key:
|
|
94
93
|
specification_version: 4
|
|
95
94
|
summary: Namae (名前) parses personal names and splits them into their component parts.
|
|
96
95
|
test_files: []
|