namae 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +21 -0
- data/.document +6 -0
- data/.rspec +2 -0
- data/.simplecov +2 -0
- data/.travis.yml +11 -0
- data/.yardopts +3 -0
- data/Gemfile +25 -0
- data/LICENSE +661 -0
- data/README.md +133 -0
- data/Rakefile +62 -0
- data/cucumber.yml +1 -0
- data/features/bibtex.feature +78 -0
- data/features/examples.feature +24 -0
- data/features/step_definitions/namae_steps.rb +22 -0
- data/features/support/env.rb +19 -0
- data/lib/namae.rb +5 -0
- data/lib/namae/name.rb +119 -0
- data/lib/namae/parser.rb +470 -0
- data/lib/namae/parser.y +175 -0
- data/lib/namae/utility.rb +47 -0
- data/lib/namae/version.rb +10 -0
- data/namae.gemspec +80 -0
- data/spec/namae/name_spec.rb +65 -0
- data/spec/namae/parser_spec.rb +107 -0
- data/spec/namae/utility_spec.rb +21 -0
- data/spec/spec_helper.rb +19 -0
- metadata +147 -0
data/lib/namae/parser.rb
ADDED
@@ -0,0 +1,470 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.8
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
|
9
|
+
require 'singleton'
|
10
|
+
require 'strscan'
|
11
|
+
|
12
|
+
module Namae
|
13
|
+
class Parser < Racc::Parser
|
14
|
+
|
15
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 90)
|
16
|
+
|
17
|
+
include Singleton
|
18
|
+
|
19
|
+
attr_reader :options
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@input, @options = StringScanner.new(''), {
|
23
|
+
:debug => false,
|
24
|
+
:comma => ',',
|
25
|
+
:separator => /\s*(\band\b|\&)\s*/i,
|
26
|
+
:title => /\s*\b(sir|lord|(prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
27
|
+
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def debug?
|
32
|
+
options[:debug] || ENV['DEBUG']
|
33
|
+
end
|
34
|
+
|
35
|
+
def separator
|
36
|
+
options[:separator]
|
37
|
+
end
|
38
|
+
|
39
|
+
def comma
|
40
|
+
options[:comma]
|
41
|
+
end
|
42
|
+
|
43
|
+
def title
|
44
|
+
options[:title]
|
45
|
+
end
|
46
|
+
|
47
|
+
def appellation
|
48
|
+
options[:appellation]
|
49
|
+
end
|
50
|
+
|
51
|
+
def parse(input)
|
52
|
+
parse!(input)
|
53
|
+
rescue => e
|
54
|
+
warn e.message if debug?
|
55
|
+
[]
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse!(string)
|
59
|
+
@yydebug = debug?
|
60
|
+
input.string = string.strip
|
61
|
+
do_parse
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def next_token
|
67
|
+
case
|
68
|
+
when input.nil?, input.eos?
|
69
|
+
nil
|
70
|
+
when input.scan(separator)
|
71
|
+
[:AND, nil]
|
72
|
+
when input.scan(/\s*,\s*/)
|
73
|
+
[:COMMA, nil]
|
74
|
+
when input.scan(/\s+/)
|
75
|
+
next_token
|
76
|
+
when input.scan(title)
|
77
|
+
[:TITLE, input.matched.strip]
|
78
|
+
when input.scan(appellation)
|
79
|
+
[:APPELLATION, input.matched.strip]
|
80
|
+
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{comma}]*/)
|
81
|
+
[:UWORD, input.matched]
|
82
|
+
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{comma}]*/)
|
83
|
+
[:LWORD, input.matched]
|
84
|
+
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{comma}]*/)
|
85
|
+
[:PWORD, input.matched]
|
86
|
+
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
87
|
+
[:NICK, input.matched[1...-1]]
|
88
|
+
else
|
89
|
+
raise ArgumentError,
|
90
|
+
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def on_error(tid, value, stack)
|
95
|
+
raise ArgumentError,
|
96
|
+
"Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
|
97
|
+
end
|
98
|
+
|
99
|
+
attr_reader :input
|
100
|
+
|
101
|
+
# -*- racc -*-
|
102
|
+
...end parser.y/module_eval...
|
103
|
+
##### State transition tables begin ###
|
104
|
+
|
105
|
+
racc_action_table = [
|
106
|
+
-34, 16, -22, -36, -22, -35, -22, -34, 17, -22,
|
107
|
+
-36, -22, -35, -34, 53, -22, 14, 12, 15, 55,
|
108
|
+
-34, 7, 8, 14, 12, 15, 42, 33, 7, 8,
|
109
|
+
14, 22, 15, 24, 14, 22, 15, 24, 30, 28,
|
110
|
+
31, 30, 28, 31, 49, 48, 50, 30, 39, 31,
|
111
|
+
49, 48, 50, 49, 48, 50, 30, 28, 31, 30,
|
112
|
+
43, 31, 49, 48, 50, 30, 28, 31, 49, 48,
|
113
|
+
50, 14, 22, 15, 30, 32, 31, 30, 28, 31 ]
|
114
|
+
|
115
|
+
racc_action_check = [
|
116
|
+
22, 1, 39, 15, 28, 14, 28, 22, 1, 39,
|
117
|
+
15, 28, 14, 12, 41, 12, 0, 0, 0, 45,
|
118
|
+
12, 0, 0, 17, 17, 17, 27, 16, 17, 17,
|
119
|
+
20, 20, 20, 20, 9, 9, 9, 9, 25, 25,
|
120
|
+
25, 21, 21, 21, 55, 55, 55, 24, 24, 24,
|
121
|
+
53, 53, 53, 47, 47, 47, 10, 10, 10, 29,
|
122
|
+
29, 29, 32, 32, 32, 35, 35, 35, 42, 42,
|
123
|
+
42, 5, 5, 5, 40, 11, 40, 38, 38, 38 ]
|
124
|
+
|
125
|
+
racc_action_pointer = [
|
126
|
+
13, 1, nil, nil, nil, 68, nil, nil, nil, 31,
|
127
|
+
53, 73, 13, nil, 5, 3, 27, 20, nil, nil,
|
128
|
+
27, 38, 0, nil, 44, 35, nil, 24, 4, 56,
|
129
|
+
nil, nil, 59, nil, nil, 62, nil, nil, 74, 2,
|
130
|
+
71, 12, 65, nil, nil, 17, nil, 50, nil, nil,
|
131
|
+
nil, nil, nil, 47, nil, 41, nil, nil, nil ]
|
132
|
+
|
133
|
+
racc_action_default = [
|
134
|
+
-1, -37, -2, -4, -5, -37, -8, -9, -10, -23,
|
135
|
+
-37, -37, -19, -26, -28, -29, -37, -37, -6, -7,
|
136
|
+
-37, -37, -19, -11, -37, -37, -27, -15, -20, -23,
|
137
|
+
-28, -29, -32, 59, -3, -37, -15, -12, -37, -19,
|
138
|
+
-23, -14, -32, -21, -16, -24, -30, -33, -34, -35,
|
139
|
+
-36, -14, -13, -32, -17, -32, -31, -18, -25 ]
|
140
|
+
|
141
|
+
racc_goto_table = [
|
142
|
+
3, 26, 19, 44, 58, 18, 1, nil, 27, 23,
|
143
|
+
9, 2, 26, 54, nil, 20, nil, 3, nil, 36,
|
144
|
+
23, 26, 37, 41, 57, 21, nil, 9, 34, 25,
|
145
|
+
nil, nil, 26, 51, 40, nil, 52, nil, nil, nil,
|
146
|
+
35, nil, nil, nil, 38, nil, nil, 56 ]
|
147
|
+
|
148
|
+
racc_goto_check = [
|
149
|
+
3, 12, 4, 10, 11, 3, 1, nil, 8, 3,
|
150
|
+
7, 2, 12, 10, nil, 7, nil, 3, nil, 8,
|
151
|
+
3, 12, 8, 8, 10, 9, nil, 7, 2, 9,
|
152
|
+
nil, nil, 12, 8, 7, nil, 8, nil, nil, nil,
|
153
|
+
9, nil, nil, nil, 9, nil, nil, 3 ]
|
154
|
+
|
155
|
+
racc_goto_pointer = [
|
156
|
+
nil, 6, 11, 0, -3, nil, nil, 10, -2, 20,
|
157
|
+
-29, -51, -8, nil ]
|
158
|
+
|
159
|
+
racc_goto_default = [
|
160
|
+
nil, nil, nil, 46, 4, 5, 6, 29, 11, 10,
|
161
|
+
nil, 45, 13, 47 ]
|
162
|
+
|
163
|
+
racc_reduce_table = [
|
164
|
+
0, 0, :racc_error,
|
165
|
+
0, 11, :_reduce_1,
|
166
|
+
1, 11, :_reduce_2,
|
167
|
+
3, 11, :_reduce_3,
|
168
|
+
1, 12, :_reduce_4,
|
169
|
+
1, 12, :_reduce_none,
|
170
|
+
2, 12, :_reduce_6,
|
171
|
+
2, 12, :_reduce_7,
|
172
|
+
1, 12, :_reduce_none,
|
173
|
+
1, 15, :_reduce_9,
|
174
|
+
1, 15, :_reduce_10,
|
175
|
+
2, 14, :_reduce_11,
|
176
|
+
3, 14, :_reduce_12,
|
177
|
+
4, 14, :_reduce_13,
|
178
|
+
3, 14, :_reduce_14,
|
179
|
+
2, 14, :_reduce_15,
|
180
|
+
3, 16, :_reduce_16,
|
181
|
+
4, 16, :_reduce_17,
|
182
|
+
5, 16, :_reduce_18,
|
183
|
+
1, 19, :_reduce_none,
|
184
|
+
2, 19, :_reduce_20,
|
185
|
+
3, 19, :_reduce_21,
|
186
|
+
1, 18, :_reduce_none,
|
187
|
+
1, 18, :_reduce_none,
|
188
|
+
1, 20, :_reduce_24,
|
189
|
+
3, 20, :_reduce_25,
|
190
|
+
1, 17, :_reduce_none,
|
191
|
+
2, 17, :_reduce_27,
|
192
|
+
1, 22, :_reduce_none,
|
193
|
+
1, 22, :_reduce_none,
|
194
|
+
1, 23, :_reduce_none,
|
195
|
+
2, 23, :_reduce_31,
|
196
|
+
0, 21, :_reduce_none,
|
197
|
+
1, 21, :_reduce_none,
|
198
|
+
1, 13, :_reduce_none,
|
199
|
+
1, 13, :_reduce_none,
|
200
|
+
1, 13, :_reduce_none ]
|
201
|
+
|
202
|
+
racc_reduce_n = 37
|
203
|
+
|
204
|
+
racc_shift_n = 59
|
205
|
+
|
206
|
+
racc_token_table = {
|
207
|
+
false => 0,
|
208
|
+
:error => 1,
|
209
|
+
:COMMA => 2,
|
210
|
+
:UWORD => 3,
|
211
|
+
:LWORD => 4,
|
212
|
+
:PWORD => 5,
|
213
|
+
:NICK => 6,
|
214
|
+
:AND => 7,
|
215
|
+
:APPELLATION => 8,
|
216
|
+
:TITLE => 9 }
|
217
|
+
|
218
|
+
racc_nt_base = 10
|
219
|
+
|
220
|
+
racc_use_result_var = true
|
221
|
+
|
222
|
+
Racc_arg = [
|
223
|
+
racc_action_table,
|
224
|
+
racc_action_check,
|
225
|
+
racc_action_default,
|
226
|
+
racc_action_pointer,
|
227
|
+
racc_goto_table,
|
228
|
+
racc_goto_check,
|
229
|
+
racc_goto_default,
|
230
|
+
racc_goto_pointer,
|
231
|
+
racc_nt_base,
|
232
|
+
racc_reduce_table,
|
233
|
+
racc_token_table,
|
234
|
+
racc_shift_n,
|
235
|
+
racc_reduce_n,
|
236
|
+
racc_use_result_var ]
|
237
|
+
|
238
|
+
Racc_token_to_s_table = [
|
239
|
+
"$end",
|
240
|
+
"error",
|
241
|
+
"COMMA",
|
242
|
+
"UWORD",
|
243
|
+
"LWORD",
|
244
|
+
"PWORD",
|
245
|
+
"NICK",
|
246
|
+
"AND",
|
247
|
+
"APPELLATION",
|
248
|
+
"TITLE",
|
249
|
+
"$start",
|
250
|
+
"names",
|
251
|
+
"name",
|
252
|
+
"word",
|
253
|
+
"display_order",
|
254
|
+
"honorific",
|
255
|
+
"sort_order",
|
256
|
+
"u_words",
|
257
|
+
"last",
|
258
|
+
"von",
|
259
|
+
"first",
|
260
|
+
"opt_words",
|
261
|
+
"u_word",
|
262
|
+
"words" ]
|
263
|
+
|
264
|
+
Racc_debug_parser = false
|
265
|
+
|
266
|
+
##### State transition tables end #####
|
267
|
+
|
268
|
+
# reduce 0 omitted
|
269
|
+
|
270
|
+
module_eval(<<'.,.,', 'parser.y', 10)
|
271
|
+
def _reduce_1(val, _values, result)
|
272
|
+
result = []
|
273
|
+
result
|
274
|
+
end
|
275
|
+
.,.,
|
276
|
+
|
277
|
+
module_eval(<<'.,.,', 'parser.y', 11)
|
278
|
+
def _reduce_2(val, _values, result)
|
279
|
+
result = [val[0]]
|
280
|
+
result
|
281
|
+
end
|
282
|
+
.,.,
|
283
|
+
|
284
|
+
module_eval(<<'.,.,', 'parser.y', 12)
|
285
|
+
def _reduce_3(val, _values, result)
|
286
|
+
result = val[0] << val[2]
|
287
|
+
result
|
288
|
+
end
|
289
|
+
.,.,
|
290
|
+
|
291
|
+
module_eval(<<'.,.,', 'parser.y', 14)
|
292
|
+
def _reduce_4(val, _values, result)
|
293
|
+
result = Name.new(:given => val[0])
|
294
|
+
result
|
295
|
+
end
|
296
|
+
.,.,
|
297
|
+
|
298
|
+
# reduce 5 omitted
|
299
|
+
|
300
|
+
module_eval(<<'.,.,', 'parser.y', 16)
|
301
|
+
def _reduce_6(val, _values, result)
|
302
|
+
result = val[0].merge(:family => val[1])
|
303
|
+
result
|
304
|
+
end
|
305
|
+
.,.,
|
306
|
+
|
307
|
+
module_eval(<<'.,.,', 'parser.y', 17)
|
308
|
+
def _reduce_7(val, _values, result)
|
309
|
+
result = val[1].merge(val[0])
|
310
|
+
result
|
311
|
+
end
|
312
|
+
.,.,
|
313
|
+
|
314
|
+
# reduce 8 omitted
|
315
|
+
|
316
|
+
module_eval(<<'.,.,', 'parser.y', 20)
|
317
|
+
def _reduce_9(val, _values, result)
|
318
|
+
result = Name.new(:appellation => val[0])
|
319
|
+
result
|
320
|
+
end
|
321
|
+
.,.,
|
322
|
+
|
323
|
+
module_eval(<<'.,.,', 'parser.y', 21)
|
324
|
+
def _reduce_10(val, _values, result)
|
325
|
+
result = Name.new(:title => val[0])
|
326
|
+
result
|
327
|
+
end
|
328
|
+
.,.,
|
329
|
+
|
330
|
+
module_eval(<<'.,.,', 'parser.y', 25)
|
331
|
+
def _reduce_11(val, _values, result)
|
332
|
+
result = Name.new(:given => val[0], :family => val[1])
|
333
|
+
|
334
|
+
result
|
335
|
+
end
|
336
|
+
.,.,
|
337
|
+
|
338
|
+
module_eval(<<'.,.,', 'parser.y', 29)
|
339
|
+
def _reduce_12(val, _values, result)
|
340
|
+
result = Name.new(:given => val[0], :nick => val[1], :family => val[2])
|
341
|
+
|
342
|
+
result
|
343
|
+
end
|
344
|
+
.,.,
|
345
|
+
|
346
|
+
module_eval(<<'.,.,', 'parser.y', 33)
|
347
|
+
def _reduce_13(val, _values, result)
|
348
|
+
result = Name.new(:given => val[0], :nick => val[1],
|
349
|
+
:particle => val[2], :family => val[3])
|
350
|
+
|
351
|
+
result
|
352
|
+
end
|
353
|
+
.,.,
|
354
|
+
|
355
|
+
module_eval(<<'.,.,', 'parser.y', 38)
|
356
|
+
def _reduce_14(val, _values, result)
|
357
|
+
result = Name.new(:given => val[0], :particle => val[1],
|
358
|
+
:family => val[2])
|
359
|
+
|
360
|
+
result
|
361
|
+
end
|
362
|
+
.,.,
|
363
|
+
|
364
|
+
module_eval(<<'.,.,', 'parser.y', 43)
|
365
|
+
def _reduce_15(val, _values, result)
|
366
|
+
result = Name.new(:particle => val[0], :family => val[1])
|
367
|
+
|
368
|
+
result
|
369
|
+
end
|
370
|
+
.,.,
|
371
|
+
|
372
|
+
module_eval(<<'.,.,', 'parser.y', 48)
|
373
|
+
def _reduce_16(val, _values, result)
|
374
|
+
result = Name.new(:family => val[0], :suffix => val[2][0],
|
375
|
+
:given => val[2][1])
|
376
|
+
|
377
|
+
result
|
378
|
+
end
|
379
|
+
.,.,
|
380
|
+
|
381
|
+
module_eval(<<'.,.,', 'parser.y', 53)
|
382
|
+
def _reduce_17(val, _values, result)
|
383
|
+
result = Name.new(:particle => val[0], :family => val[1],
|
384
|
+
:suffix => val[3][0], :given => val[3][1])
|
385
|
+
|
386
|
+
result
|
387
|
+
end
|
388
|
+
.,.,
|
389
|
+
|
390
|
+
module_eval(<<'.,.,', 'parser.y', 58)
|
391
|
+
def _reduce_18(val, _values, result)
|
392
|
+
result = Name.new(:particle => val[0,2].join(' '), :family => val[2],
|
393
|
+
:suffix => val[4][0], :given => val[4][1])
|
394
|
+
|
395
|
+
result
|
396
|
+
end
|
397
|
+
.,.,
|
398
|
+
|
399
|
+
# reduce 19 omitted
|
400
|
+
|
401
|
+
module_eval(<<'.,.,', 'parser.y', 64)
|
402
|
+
def _reduce_20(val, _values, result)
|
403
|
+
result = val.join(' ')
|
404
|
+
result
|
405
|
+
end
|
406
|
+
.,.,
|
407
|
+
|
408
|
+
module_eval(<<'.,.,', 'parser.y', 65)
|
409
|
+
def _reduce_21(val, _values, result)
|
410
|
+
result = val.join(' ')
|
411
|
+
result
|
412
|
+
end
|
413
|
+
.,.,
|
414
|
+
|
415
|
+
# reduce 22 omitted
|
416
|
+
|
417
|
+
# reduce 23 omitted
|
418
|
+
|
419
|
+
module_eval(<<'.,.,', 'parser.y', 69)
|
420
|
+
def _reduce_24(val, _values, result)
|
421
|
+
result = [nil,val[0]]
|
422
|
+
result
|
423
|
+
end
|
424
|
+
.,.,
|
425
|
+
|
426
|
+
module_eval(<<'.,.,', 'parser.y', 70)
|
427
|
+
def _reduce_25(val, _values, result)
|
428
|
+
result = [val[0],val[2]]
|
429
|
+
result
|
430
|
+
end
|
431
|
+
.,.,
|
432
|
+
|
433
|
+
# reduce 26 omitted
|
434
|
+
|
435
|
+
module_eval(<<'.,.,', 'parser.y', 73)
|
436
|
+
def _reduce_27(val, _values, result)
|
437
|
+
result = val.join(' ')
|
438
|
+
result
|
439
|
+
end
|
440
|
+
.,.,
|
441
|
+
|
442
|
+
# reduce 28 omitted
|
443
|
+
|
444
|
+
# reduce 29 omitted
|
445
|
+
|
446
|
+
# reduce 30 omitted
|
447
|
+
|
448
|
+
module_eval(<<'.,.,', 'parser.y', 78)
|
449
|
+
def _reduce_31(val, _values, result)
|
450
|
+
result = val.join(' ')
|
451
|
+
result
|
452
|
+
end
|
453
|
+
.,.,
|
454
|
+
|
455
|
+
# reduce 32 omitted
|
456
|
+
|
457
|
+
# reduce 33 omitted
|
458
|
+
|
459
|
+
# reduce 34 omitted
|
460
|
+
|
461
|
+
# reduce 35 omitted
|
462
|
+
|
463
|
+
# reduce 36 omitted
|
464
|
+
|
465
|
+
def _reduce_none(val, _values, result)
|
466
|
+
val[0]
|
467
|
+
end
|
468
|
+
|
469
|
+
end # class Parser
|
470
|
+
end # module Namae
|