racc 1.5.2 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.ja.rdoc +0 -1
- data/README.rdoc +6 -7
- data/{rdoc → doc}/en/NEWS.en.rdoc +0 -0
- data/{rdoc → doc}/en/grammar.en.rdoc +27 -31
- data/doc/en/grammar2.en.rdoc +219 -0
- data/{rdoc → doc}/ja/NEWS.ja.rdoc +0 -0
- data/{rdoc → doc}/ja/command.ja.html +0 -0
- data/{rdoc → doc}/ja/debug.ja.rdoc +0 -0
- data/{rdoc → doc}/ja/grammar.ja.rdoc +0 -0
- data/{rdoc → doc}/ja/index.ja.html +0 -0
- data/{rdoc → doc}/ja/parser.ja.rdoc +0 -0
- data/{rdoc → doc}/ja/usage.ja.html +0 -0
- data/ext/racc/cparse/cparse.c +1 -1
- data/ext/racc/cparse/extconf.rb +1 -0
- data/lib/racc/info.rb +1 -1
- data/lib/racc/parser-text.rb +1 -1
- data/lib/racc/parser.rb +1 -1
- data/lib/racc/parserfilegenerator.rb +0 -44
- data/lib/racc/statetransitiontable.rb +2 -8
- metadata +15 -121
- data/Rakefile +0 -79
- data/ext/racc/com/headius/racc/Cparse.java +0 -849
- data/lib/racc/pre-setup +0 -13
- data/sample/array.y +0 -67
- data/sample/array2.y +0 -59
- data/sample/calc-ja.y +0 -66
- data/sample/calc.y +0 -65
- data/sample/conflict.y +0 -15
- data/sample/hash.y +0 -60
- data/sample/lalr.y +0 -17
- data/sample/lists.y +0 -57
- data/sample/syntax.y +0 -46
- data/sample/yyerr.y +0 -46
- data/test/assets/cadenza.y +0 -170
- data/test/assets/cast.y +0 -926
- data/test/assets/chk.y +0 -126
- data/test/assets/conf.y +0 -16
- data/test/assets/csspool.y +0 -729
- data/test/assets/digraph.y +0 -29
- data/test/assets/echk.y +0 -118
- data/test/assets/edtf.y +0 -583
- data/test/assets/err.y +0 -60
- data/test/assets/error_recovery.y +0 -35
- data/test/assets/expect.y +0 -7
- data/test/assets/firstline.y +0 -4
- data/test/assets/huia.y +0 -318
- data/test/assets/ichk.y +0 -102
- data/test/assets/intp.y +0 -546
- data/test/assets/journey.y +0 -47
- data/test/assets/liquor.y +0 -313
- data/test/assets/machete.y +0 -423
- data/test/assets/macruby.y +0 -2197
- data/test/assets/mailp.y +0 -437
- data/test/assets/mediacloth.y +0 -599
- data/test/assets/mof.y +0 -649
- data/test/assets/namae.y +0 -302
- data/test/assets/nasl.y +0 -626
- data/test/assets/newsyn.y +0 -25
- data/test/assets/noend.y +0 -4
- data/test/assets/nokogiri-css.y +0 -255
- data/test/assets/nonass.y +0 -41
- data/test/assets/normal.y +0 -27
- data/test/assets/norule.y +0 -4
- data/test/assets/nullbug1.y +0 -25
- data/test/assets/nullbug2.y +0 -15
- data/test/assets/opal.y +0 -1807
- data/test/assets/opt.y +0 -123
- data/test/assets/percent.y +0 -35
- data/test/assets/php_serialization.y +0 -98
- data/test/assets/recv.y +0 -97
- data/test/assets/riml.y +0 -665
- data/test/assets/rrconf.y +0 -14
- data/test/assets/ruby18.y +0 -1943
- data/test/assets/ruby19.y +0 -2174
- data/test/assets/ruby20.y +0 -2350
- data/test/assets/ruby21.y +0 -2359
- data/test/assets/ruby22.y +0 -2381
- data/test/assets/scan.y +0 -72
- data/test/assets/syntax.y +0 -50
- data/test/assets/tp_plus.y +0 -622
- data/test/assets/twowaysql.y +0 -278
- data/test/assets/unterm.y +0 -5
- data/test/assets/useless.y +0 -12
- data/test/assets/yyerr.y +0 -46
- data/test/bench.y +0 -36
- data/test/helper.rb +0 -115
- data/test/infini.y +0 -8
- data/test/regress/cadenza +0 -796
- data/test/regress/cast +0 -3428
- data/test/regress/csspool +0 -2314
- data/test/regress/edtf +0 -1794
- data/test/regress/huia +0 -1392
- data/test/regress/journey +0 -222
- data/test/regress/liquor +0 -885
- data/test/regress/machete +0 -833
- data/test/regress/mediacloth +0 -1463
- data/test/regress/mof +0 -1368
- data/test/regress/namae +0 -634
- data/test/regress/nasl +0 -2058
- data/test/regress/nokogiri-css +0 -836
- data/test/regress/opal +0 -6431
- data/test/regress/php_serialization +0 -336
- data/test/regress/riml +0 -3283
- data/test/regress/ruby18 +0 -6344
- data/test/regress/ruby22 +0 -7460
- data/test/regress/tp_plus +0 -1933
- data/test/regress/twowaysql +0 -556
- data/test/scandata/brace +0 -7
- data/test/scandata/gvar +0 -1
- data/test/scandata/normal +0 -4
- data/test/scandata/percent +0 -18
- data/test/scandata/slash +0 -10
- data/test/src.intp +0 -34
- data/test/start.y +0 -20
- data/test/test_chk_y.rb +0 -52
- data/test/test_grammar_file_parser.rb +0 -15
- data/test/test_racc_command.rb +0 -339
- data/test/test_scan_y.rb +0 -52
- data/test/testscanner.rb +0 -51
- data/web/racc.en.rhtml +0 -42
- data/web/racc.ja.rhtml +0 -51
data/test/assets/namae.y
DELETED
@@ -1,302 +0,0 @@
|
|
1
|
-
# -*- ruby -*-
|
2
|
-
# vi: set ft=ruby :
|
3
|
-
|
4
|
-
# Copyright (C) 2012 President and Fellows of Harvard College
|
5
|
-
# Copyright (C) 2013-2014 Sylvester Keil
|
6
|
-
#
|
7
|
-
# Redistribution and use in source and binary forms, with or without
|
8
|
-
# modification, are permitted provided that the following conditions are met:
|
9
|
-
#
|
10
|
-
# 1. Redistributions of source code must retain the above copyright notice,
|
11
|
-
# this list of conditions and the following disclaimer.
|
12
|
-
#
|
13
|
-
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
14
|
-
# this list of conditions and the following disclaimer in the documentation
|
15
|
-
# and/or other materials provided with the distribution.
|
16
|
-
#
|
17
|
-
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS OR
|
18
|
-
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
19
|
-
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
20
|
-
# EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
21
|
-
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
22
|
-
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
23
|
-
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
24
|
-
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
25
|
-
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
26
|
-
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
27
|
-
#
|
28
|
-
# The views and conclusions contained in the software and documentation are
|
29
|
-
# those of the authors and should not be interpreted as representing official
|
30
|
-
# policies, either expressed or implied, of the copyright holder.
|
31
|
-
|
32
|
-
class Namae::Parser
|
33
|
-
|
34
|
-
token COMMA UWORD LWORD PWORD NICK AND APPELLATION TITLE SUFFIX
|
35
|
-
|
36
|
-
expect 0
|
37
|
-
|
38
|
-
rule
|
39
|
-
|
40
|
-
names : { result = [] }
|
41
|
-
| name { result = [val[0]] }
|
42
|
-
| names AND name { result = val[0] << val[2] }
|
43
|
-
|
44
|
-
name : word { result = Name.new(:given => val[0]) }
|
45
|
-
| display_order
|
46
|
-
| honorific word { result = val[0].merge(:family => val[1]) }
|
47
|
-
| honorific display_order { result = val[1].merge(val[0]) }
|
48
|
-
| sort_order
|
49
|
-
|
50
|
-
honorific : APPELLATION { result = Name.new(:appellation => val[0]) }
|
51
|
-
| TITLE { result = Name.new(:title => val[0]) }
|
52
|
-
|
53
|
-
display_order : u_words word opt_suffices opt_titles
|
54
|
-
{
|
55
|
-
result = Name.new(:given => val[0], :family => val[1],
|
56
|
-
:suffix => val[2], :title => val[3])
|
57
|
-
}
|
58
|
-
| u_words NICK last opt_suffices opt_titles
|
59
|
-
{
|
60
|
-
result = Name.new(:given => val[0], :nick => val[1],
|
61
|
-
:family => val[2], :suffix => val[3], :title => val[4])
|
62
|
-
}
|
63
|
-
| u_words NICK von last opt_suffices opt_titles
|
64
|
-
{
|
65
|
-
result = Name.new(:given => val[0], :nick => val[1],
|
66
|
-
:particle => val[2], :family => val[3],
|
67
|
-
:suffix => val[4], :title => val[5])
|
68
|
-
}
|
69
|
-
| u_words von last
|
70
|
-
{
|
71
|
-
result = Name.new(:given => val[0], :particle => val[1],
|
72
|
-
:family => val[2])
|
73
|
-
}
|
74
|
-
| von last
|
75
|
-
{
|
76
|
-
result = Name.new(:particle => val[0], :family => val[1])
|
77
|
-
}
|
78
|
-
|
79
|
-
sort_order : last COMMA first
|
80
|
-
{
|
81
|
-
result = Name.new({ :family => val[0], :suffix => val[2][0],
|
82
|
-
:given => val[2][1] }, !!val[2][0])
|
83
|
-
}
|
84
|
-
| von last COMMA first
|
85
|
-
{
|
86
|
-
result = Name.new({ :particle => val[0], :family => val[1],
|
87
|
-
:suffix => val[3][0], :given => val[3][1] }, !!val[3][0])
|
88
|
-
}
|
89
|
-
| u_words von last COMMA first
|
90
|
-
{
|
91
|
-
result = Name.new({ :particle => val[0,2].join(' '), :family => val[2],
|
92
|
-
:suffix => val[4][0], :given => val[4][1] }, !!val[4][0])
|
93
|
-
}
|
94
|
-
;
|
95
|
-
|
96
|
-
von : LWORD
|
97
|
-
| von LWORD { result = val.join(' ') }
|
98
|
-
| von u_words LWORD { result = val.join(' ') }
|
99
|
-
|
100
|
-
last : LWORD | u_words
|
101
|
-
|
102
|
-
first : opt_words { result = [nil,val[0]] }
|
103
|
-
| words opt_comma suffices { result = [val[2],val[0]] }
|
104
|
-
| suffices { result = [val[0],nil] }
|
105
|
-
| suffices COMMA words { result = [val[0],val[2]] }
|
106
|
-
|
107
|
-
u_words : u_word
|
108
|
-
| u_words u_word { result = val.join(' ') }
|
109
|
-
|
110
|
-
u_word : UWORD | PWORD
|
111
|
-
|
112
|
-
words : word
|
113
|
-
| words word { result = val.join(' ') }
|
114
|
-
|
115
|
-
opt_comma : /* empty */ | COMMA
|
116
|
-
opt_words : /* empty */ | words
|
117
|
-
|
118
|
-
word : LWORD | UWORD | PWORD
|
119
|
-
|
120
|
-
opt_suffices : /* empty */ | suffices
|
121
|
-
|
122
|
-
suffices : SUFFIX
|
123
|
-
| suffices SUFFIX { result = val.join(' ') }
|
124
|
-
|
125
|
-
opt_titles : /* empty */ | titles
|
126
|
-
|
127
|
-
titles : TITLE
|
128
|
-
| titles TITLE { result = val.join(' ') }
|
129
|
-
|
130
|
-
---- header
|
131
|
-
require 'singleton'
|
132
|
-
require 'strscan'
|
133
|
-
|
134
|
-
---- inner
|
135
|
-
|
136
|
-
include Singleton
|
137
|
-
|
138
|
-
attr_reader :options, :input
|
139
|
-
|
140
|
-
def initialize
|
141
|
-
@input, @options = StringScanner.new(''), {
|
142
|
-
:debug => false,
|
143
|
-
:prefer_comma_as_separator => false,
|
144
|
-
:comma => ',',
|
145
|
-
:stops => ',;',
|
146
|
-
:separator => /\s*(\band\b|\&|;)\s*/i,
|
147
|
-
:title => /\s*\b(sir|lord|count(ess)?|(gen|adm|col|maj|capt|cmdr|lt|sgt|cpl|pvt|prof|dr|md|ph\.?d)\.?)(\s+|$)/i,
|
148
|
-
:suffix => /\s*\b(JR|Jr|jr|SR|Sr|sr|[IVX]{2,})(\.|\b)/,
|
149
|
-
:appellation => /\s*\b((mrs?|ms|fr|hr)\.?|miss|herr|frau)(\s+|$)/i
|
150
|
-
}
|
151
|
-
end
|
152
|
-
|
153
|
-
def debug?
|
154
|
-
options[:debug] || ENV['DEBUG']
|
155
|
-
end
|
156
|
-
|
157
|
-
def separator
|
158
|
-
options[:separator]
|
159
|
-
end
|
160
|
-
|
161
|
-
def comma
|
162
|
-
options[:comma]
|
163
|
-
end
|
164
|
-
|
165
|
-
def stops
|
166
|
-
options[:stops]
|
167
|
-
end
|
168
|
-
|
169
|
-
def title
|
170
|
-
options[:title]
|
171
|
-
end
|
172
|
-
|
173
|
-
def suffix
|
174
|
-
options[:suffix]
|
175
|
-
end
|
176
|
-
|
177
|
-
def appellation
|
178
|
-
options[:appellation]
|
179
|
-
end
|
180
|
-
|
181
|
-
def prefer_comma_as_separator?
|
182
|
-
options[:prefer_comma_as_separator]
|
183
|
-
end
|
184
|
-
|
185
|
-
def parse(input)
|
186
|
-
parse!(input)
|
187
|
-
rescue => e
|
188
|
-
warn e.message if debug?
|
189
|
-
[]
|
190
|
-
end
|
191
|
-
|
192
|
-
def parse!(string)
|
193
|
-
input.string = normalize(string)
|
194
|
-
reset
|
195
|
-
do_parse
|
196
|
-
end
|
197
|
-
|
198
|
-
def normalize(string)
|
199
|
-
string = string.strip
|
200
|
-
string
|
201
|
-
end
|
202
|
-
|
203
|
-
def reset
|
204
|
-
@commas, @words, @initials, @suffices, @yydebug = 0, 0, 0, 0, debug?
|
205
|
-
self
|
206
|
-
end
|
207
|
-
|
208
|
-
private
|
209
|
-
|
210
|
-
def stack
|
211
|
-
@vstack || @racc_vstack || []
|
212
|
-
end
|
213
|
-
|
214
|
-
def last_token
|
215
|
-
stack[-1]
|
216
|
-
end
|
217
|
-
|
218
|
-
def consume_separator
|
219
|
-
return next_token if seen_separator?
|
220
|
-
@commas, @words, @initials, @suffices = 0, 0, 0, 0
|
221
|
-
[:AND, :AND]
|
222
|
-
end
|
223
|
-
|
224
|
-
def consume_comma
|
225
|
-
@commas += 1
|
226
|
-
[:COMMA, :COMMA]
|
227
|
-
end
|
228
|
-
|
229
|
-
def consume_word(type, word)
|
230
|
-
@words += 1
|
231
|
-
|
232
|
-
case type
|
233
|
-
when :UWORD
|
234
|
-
@initials += 1 if word =~ /^[[:upper:]]+\b/
|
235
|
-
when :SUFFIX
|
236
|
-
@suffices += 1
|
237
|
-
end
|
238
|
-
|
239
|
-
[type, word]
|
240
|
-
end
|
241
|
-
|
242
|
-
def seen_separator?
|
243
|
-
!stack.empty? && last_token == :AND
|
244
|
-
end
|
245
|
-
|
246
|
-
def suffix?
|
247
|
-
!@suffices.zero? || will_see_suffix?
|
248
|
-
end
|
249
|
-
|
250
|
-
def will_see_suffix?
|
251
|
-
input.peek(8).to_s.strip.split(/\s+/)[0] =~ suffix
|
252
|
-
end
|
253
|
-
|
254
|
-
def will_see_initial?
|
255
|
-
input.peek(6).to_s.strip.split(/\s+/)[0] =~ /^[[:upper:]]+\b/
|
256
|
-
end
|
257
|
-
|
258
|
-
def seen_full_name?
|
259
|
-
prefer_comma_as_separator? && @words > 1 &&
|
260
|
-
(@initials > 0 || !will_see_initial?) && !will_see_suffix?
|
261
|
-
end
|
262
|
-
|
263
|
-
def next_token
|
264
|
-
case
|
265
|
-
when input.nil?, input.eos?
|
266
|
-
nil
|
267
|
-
when input.scan(separator)
|
268
|
-
consume_separator
|
269
|
-
when input.scan(/\s*#{comma}\s*/)
|
270
|
-
if @commas.zero? && !seen_full_name? || @commas == 1 && suffix?
|
271
|
-
consume_comma
|
272
|
-
else
|
273
|
-
consume_separator
|
274
|
-
end
|
275
|
-
when input.scan(/\s+/)
|
276
|
-
next_token
|
277
|
-
when input.scan(title)
|
278
|
-
consume_word(:TITLE, input.matched.strip)
|
279
|
-
when input.scan(suffix)
|
280
|
-
consume_word(:SUFFIX, input.matched.strip)
|
281
|
-
when input.scan(appellation)
|
282
|
-
[:APPELLATION, input.matched.strip]
|
283
|
-
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:upper:]][^\s#{stops}]*/)
|
284
|
-
consume_word(:UWORD, input.matched)
|
285
|
-
when input.scan(/((\\\w+)?\{[^\}]*\})*[[:lower:]][^\s#{stops}]*/)
|
286
|
-
consume_word(:LWORD, input.matched)
|
287
|
-
when input.scan(/(\\\w+)?\{[^\}]*\}[^\s#{stops}]*/)
|
288
|
-
consume_word(:PWORD, input.matched)
|
289
|
-
when input.scan(/('[^'\n]+')|("[^"\n]+")/)
|
290
|
-
consume_word(:NICK, input.matched[1...-1])
|
291
|
-
else
|
292
|
-
raise ArgumentError,
|
293
|
-
"Failed to parse name #{input.string.inspect}: unmatched data at offset #{input.pos}"
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
def on_error(tid, value, stack)
|
298
|
-
raise ArgumentError,
|
299
|
-
"Failed to parse name: unexpected '#{value}' at #{stack.inspect}"
|
300
|
-
end
|
301
|
-
|
302
|
-
# -*- racc -*-
|