racc 1.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. data/.gitattributes +2 -0
  2. data/.gitignore +7 -0
  3. data/COPYING +515 -0
  4. data/ChangeLog +846 -0
  5. data/DEPENDS +4 -0
  6. data/README.en.rdoc +86 -0
  7. data/README.ja.rdoc +96 -0
  8. data/Rakefile +15 -0
  9. data/TODO +5 -0
  10. data/bin/racc +308 -0
  11. data/bin/racc2y +195 -0
  12. data/bin/y2racc +339 -0
  13. data/doc/en/NEWS.en.rdoc +282 -0
  14. data/doc/en/command.en.html +78 -0
  15. data/doc/en/debug.en.rdoc +20 -0
  16. data/doc/en/grammar.en.rdoc +230 -0
  17. data/doc/en/index.en.html +10 -0
  18. data/doc/en/parser.en.rdoc +74 -0
  19. data/doc/en/usage.en.html +92 -0
  20. data/doc/ja/NEWS.ja.rdoc +307 -0
  21. data/doc/ja/command.ja.html +94 -0
  22. data/doc/ja/debug.ja.rdoc +36 -0
  23. data/doc/ja/grammar.ja.rdoc +348 -0
  24. data/doc/ja/index.ja.html +10 -0
  25. data/doc/ja/parser.ja.rdoc +125 -0
  26. data/doc/ja/usage.ja.html +414 -0
  27. data/ext/racc/cparse/MANIFEST +4 -0
  28. data/ext/racc/cparse/cparse.c +824 -0
  29. data/ext/racc/cparse/depend +1 -0
  30. data/ext/racc/cparse/extconf.rb +7 -0
  31. data/fastcache/extconf.rb +2 -0
  32. data/fastcache/fastcache.c +185 -0
  33. data/lib/racc.rb +6 -0
  34. data/lib/racc/compat.rb +40 -0
  35. data/lib/racc/debugflags.rb +59 -0
  36. data/lib/racc/exception.rb +15 -0
  37. data/lib/racc/grammar.rb +1115 -0
  38. data/lib/racc/grammarfileparser.rb +559 -0
  39. data/lib/racc/info.rb +16 -0
  40. data/lib/racc/iset.rb +91 -0
  41. data/lib/racc/logfilegenerator.rb +214 -0
  42. data/lib/racc/parser.rb +439 -0
  43. data/lib/racc/parserfilegenerator.rb +511 -0
  44. data/lib/racc/pre-setup +13 -0
  45. data/lib/racc/sourcetext.rb +34 -0
  46. data/lib/racc/state.rb +971 -0
  47. data/lib/racc/statetransitiontable.rb +316 -0
  48. data/lib/racc/static.rb +5 -0
  49. data/misc/dist.sh +31 -0
  50. data/sample/array.y +67 -0
  51. data/sample/array2.y +59 -0
  52. data/sample/calc-ja.y +66 -0
  53. data/sample/calc.y +65 -0
  54. data/sample/conflict.y +15 -0
  55. data/sample/hash.y +60 -0
  56. data/sample/lalr.y +17 -0
  57. data/sample/lists.y +57 -0
  58. data/sample/syntax.y +46 -0
  59. data/sample/yyerr.y +46 -0
  60. data/setup.rb +1587 -0
  61. data/tasks/doc.rb +12 -0
  62. data/tasks/email.rb +55 -0
  63. data/tasks/file.rb +37 -0
  64. data/tasks/gem.rb +37 -0
  65. data/tasks/test.rb +16 -0
  66. data/test/assets/chk.y +126 -0
  67. data/test/assets/conf.y +16 -0
  68. data/test/assets/digraph.y +29 -0
  69. data/test/assets/echk.y +118 -0
  70. data/test/assets/err.y +60 -0
  71. data/test/assets/expect.y +7 -0
  72. data/test/assets/firstline.y +4 -0
  73. data/test/assets/ichk.y +102 -0
  74. data/test/assets/intp.y +546 -0
  75. data/test/assets/mailp.y +437 -0
  76. data/test/assets/newsyn.y +25 -0
  77. data/test/assets/noend.y +4 -0
  78. data/test/assets/nonass.y +41 -0
  79. data/test/assets/normal.y +27 -0
  80. data/test/assets/norule.y +4 -0
  81. data/test/assets/nullbug1.y +25 -0
  82. data/test/assets/nullbug2.y +15 -0
  83. data/test/assets/opt.y +123 -0
  84. data/test/assets/percent.y +35 -0
  85. data/test/assets/recv.y +97 -0
  86. data/test/assets/rrconf.y +14 -0
  87. data/test/assets/scan.y +72 -0
  88. data/test/assets/syntax.y +50 -0
  89. data/test/assets/unterm.y +5 -0
  90. data/test/assets/useless.y +12 -0
  91. data/test/assets/yyerr.y +46 -0
  92. data/test/bench.y +36 -0
  93. data/test/helper.rb +88 -0
  94. data/test/infini.y +8 -0
  95. data/test/scandata/brace +7 -0
  96. data/test/scandata/gvar +1 -0
  97. data/test/scandata/normal +4 -0
  98. data/test/scandata/percent +18 -0
  99. data/test/scandata/slash +10 -0
  100. data/test/src.intp +34 -0
  101. data/test/start.y +20 -0
  102. data/test/test_chk_y.rb +51 -0
  103. data/test/test_grammar_file_parser.rb +15 -0
  104. data/test/test_racc_command.rb +155 -0
  105. data/test/test_scan_y.rb +51 -0
  106. data/test/testscanner.rb +51 -0
  107. data/web/racc.en.rhtml +42 -0
  108. data/web/racc.ja.rhtml +51 -0
  109. metadata +166 -0
@@ -0,0 +1,559 @@
1
+ #
2
+ # $Id$
3
+ #
4
+ # Copyright (c) 1999-2006 Minero Aoki
5
+ #
6
+ # This program is free software.
7
+ # You can distribute/modify this program under the terms of
8
+ # the GNU LGPL, Lesser General Public License version 2.1.
9
+ # For details of the GNU LGPL, see the file "COPYING".
10
+ #
11
+
12
+ require 'racc'
13
+ require 'racc/compat'
14
+ require 'racc/grammar'
15
+ require 'racc/parserfilegenerator'
16
+ require 'racc/sourcetext'
17
+ require 'stringio'
18
+
19
+ module Racc
20
+
21
+ grammar = Grammar.define {
22
+ g = self
23
+
24
+ g.class = seq(:CLASS, :cname, many(:param), :RULE, :rules, option(:END))
25
+
26
+ g.cname = seq(:rubyconst) {|name|
27
+ @result.params.classname = name
28
+ }\
29
+ | seq(:rubyconst, "<", :rubyconst) {|c, _, s|
30
+ @result.params.classname = c
31
+ @result.params.superclass = s
32
+ }
33
+
34
+ g.rubyconst = separated_by1(:colon2, :SYMBOL) {|syms|
35
+ syms.map {|s| s.to_s }.join('::')
36
+ }
37
+
38
+ g.colon2 = seq(':', ':')
39
+
40
+ g.param = seq(:CONV, many1(:convdef), :END) {|*|
41
+ #@grammar.end_convert_block # FIXME
42
+ }\
43
+ | seq(:PRECHIGH, many1(:precdef), :PRECLOW) {|*|
44
+ @grammar.end_precedence_declaration true
45
+ }\
46
+ | seq(:PRECLOW, many1(:precdef), :PRECHIGH) {|*|
47
+ @grammar.end_precedence_declaration false
48
+ }\
49
+ | seq(:START, :symbol) {|_, sym|
50
+ @grammar.start_symbol = sym
51
+ }\
52
+ | seq(:TOKEN, :symbols) {|_, syms|
53
+ syms.each do |s|
54
+ s.should_terminal
55
+ end
56
+ }\
57
+ | seq(:OPTION, :options) {|_, syms|
58
+ syms.each do |opt|
59
+ case opt
60
+ when 'result_var'
61
+ @result.params.result_var = true
62
+ when 'no_result_var'
63
+ @result.params.result_var = false
64
+ when 'omit_action_call'
65
+ @result.params.omit_action_call = true
66
+ when 'no_omit_action_call'
67
+ @result.params.omit_action_call = false
68
+ else
69
+ raise CompileError, "unknown option: #{opt}"
70
+ end
71
+ end
72
+ }\
73
+ | seq(:EXPECT, :DIGIT) {|_, num|
74
+ if @grammar.n_expected_srconflicts
75
+ raise CompileError, "`expect' seen twice"
76
+ end
77
+ @grammar.n_expected_srconflicts = num
78
+ }
79
+
80
+ g.convdef = seq(:symbol, :STRING) {|sym, code|
81
+ sym.serialized = code
82
+ }
83
+
84
+ g.precdef = seq(:LEFT, :symbols) {|_, syms|
85
+ @grammar.declare_precedence :Left, syms
86
+ }\
87
+ | seq(:RIGHT, :symbols) {|_, syms|
88
+ @grammar.declare_precedence :Right, syms
89
+ }\
90
+ | seq(:NONASSOC, :symbols) {|_, syms|
91
+ @grammar.declare_precedence :Nonassoc, syms
92
+ }
93
+
94
+ g.symbols = seq(:symbol) {|sym|
95
+ [sym]
96
+ }\
97
+ | seq(:symbols, :symbol) {|list, sym|
98
+ list.push sym
99
+ list
100
+ }\
101
+ | seq(:symbols, "|")
102
+
103
+ g.symbol = seq(:SYMBOL) {|sym| @grammar.intern(sym) }\
104
+ | seq(:STRING) {|str| @grammar.intern(str) }
105
+
106
+ g.options = many(:SYMBOL) {|syms| syms.map {|s| s.to_s } }
107
+
108
+ g.rules = option(:rules_core) {|list|
109
+ add_rule_block list unless list.empty?
110
+ nil
111
+ }
112
+
113
+ g.rules_core = seq(:symbol) {|sym|
114
+ [sym]
115
+ }\
116
+ | seq(:rules_core, :rule_item) {|list, i|
117
+ list.push i
118
+ list
119
+ }\
120
+ | seq(:rules_core, ';') {|list, *|
121
+ add_rule_block list unless list.empty?
122
+ list.clear
123
+ list
124
+ }\
125
+ | seq(:rules_core, ':') {|list, *|
126
+ next_target = list.pop
127
+ add_rule_block list unless list.empty?
128
+ [next_target]
129
+ }
130
+
131
+ g.rule_item = seq(:symbol)\
132
+ | seq("|") {|*|
133
+ OrMark.new(@scanner.lineno)
134
+ }\
135
+ | seq("=", :symbol) {|_, sym|
136
+ Prec.new(sym, @scanner.lineno)
137
+ }\
138
+ | seq(:ACTION) {|src|
139
+ UserAction.source_text(src)
140
+ }
141
+ }
142
+
143
+ GrammarFileParser = grammar.parser_class
144
+
145
+ if grammar.states.srconflict_exist?
146
+ raise 'Racc boot script fatal: S/R conflict in build'
147
+ end
148
+ if grammar.states.rrconflict_exist?
149
+ raise 'Racc boot script fatal: R/R conflict in build'
150
+ end
151
+
152
+ class GrammarFileParser # reopen
153
+
154
+ class Result
155
+ def initialize(grammar)
156
+ @grammar = grammar
157
+ @params = ParserFileGenerator::Params.new
158
+ end
159
+
160
+ attr_reader :grammar
161
+ attr_reader :params
162
+ end
163
+
164
+ def GrammarFileParser.parse_file(filename)
165
+ parse(File.read(filename), filename, 1)
166
+ end
167
+
168
+ def GrammarFileParser.parse(src, filename = '-', lineno = 1)
169
+ new().parse(src, filename, lineno)
170
+ end
171
+
172
+ def initialize(debug_flags = DebugFlags.new)
173
+ @yydebug = debug_flags.parse
174
+ end
175
+
176
+ def parse(src, filename = '-', lineno = 1)
177
+ @filename = filename
178
+ @lineno = lineno
179
+ @scanner = GrammarFileScanner.new(src, @filename)
180
+ @scanner.debug = @yydebug
181
+ @grammar = Grammar.new
182
+ @result = Result.new(@grammar)
183
+ @embedded_action_seq = 0
184
+ yyparse @scanner, :yylex
185
+ parse_user_code
186
+ @result.grammar.init
187
+ @result
188
+ end
189
+
190
+ private
191
+
192
+ def next_token
193
+ @scanner.scan
194
+ end
195
+
196
+ def on_error(tok, val, _values)
197
+ if val.respond_to?(:id2name)
198
+ v = val.id2name
199
+ elsif val.kind_of?(String)
200
+ v = val
201
+ else
202
+ v = val.inspect
203
+ end
204
+ raise CompileError, "#{location()}: unexpected token '#{v}'"
205
+ end
206
+
207
+ def location
208
+ "#{@filename}:#{@lineno - 1 + @scanner.lineno}"
209
+ end
210
+
211
+ def add_rule_block(list)
212
+ sprec = nil
213
+ target = list.shift
214
+ case target
215
+ when OrMark, UserAction, Prec
216
+ raise CompileError, "#{target.lineno}: unexpected symbol #{target.name}"
217
+ end
218
+ curr = []
219
+ list.each do |i|
220
+ case i
221
+ when OrMark
222
+ add_rule target, curr, sprec
223
+ curr = []
224
+ sprec = nil
225
+ when Prec
226
+ raise CompileError, "'=<prec>' used twice in one rule" if sprec
227
+ sprec = i.symbol
228
+ else
229
+ curr.push i
230
+ end
231
+ end
232
+ add_rule target, curr, sprec
233
+ end
234
+
235
+ def add_rule(target, list, sprec)
236
+ if list.last.kind_of?(UserAction)
237
+ act = list.pop
238
+ else
239
+ act = UserAction.empty
240
+ end
241
+ list.map! {|s| s.kind_of?(UserAction) ? embedded_action(s) : s }
242
+ rule = Rule.new(target, list, act)
243
+ rule.specified_prec = sprec
244
+ @grammar.add rule
245
+ end
246
+
247
+ def embedded_action(act)
248
+ sym = @grammar.intern("@#{@embedded_action_seq += 1}".intern, true)
249
+ @grammar.add Rule.new(sym, [], act)
250
+ sym
251
+ end
252
+
253
+ #
254
+ # User Code Block
255
+ #
256
+
257
+ def parse_user_code
258
+ line = @scanner.lineno
259
+ _, *blocks = *@scanner.epilogue.split(/^----/)
260
+ blocks.each do |block|
261
+ header, *body = block.to_a
262
+ label0, pathes = *header.sub(/\A-+/, '').split('=', 2)
263
+ label = canonical_label(label0)
264
+ (pathes ? pathes.strip.split(' ') : []).each do |path|
265
+ add_user_code label, SourceText.new(File.read(path), path, 1)
266
+ end
267
+ add_user_code label, SourceText.new(body.join(''), @filename, line + 1)
268
+ line += (1 + body.size)
269
+ end
270
+ end
271
+
272
+ USER_CODE_LABELS = {
273
+ 'header' => :header,
274
+ 'prepare' => :header, # obsolete
275
+ 'inner' => :inner,
276
+ 'footer' => :footer,
277
+ 'driver' => :footer # obsolete
278
+ }
279
+
280
+ def canonical_label(src)
281
+ label = src.to_s.strip.downcase.slice(/\w+/)
282
+ unless USER_CODE_LABELS.key?(label)
283
+ raise CompileError, "unknown user code type: #{label.inspect}"
284
+ end
285
+ label
286
+ end
287
+
288
+ def add_user_code(label, src)
289
+ @result.params.send(USER_CODE_LABELS[label]).push src
290
+ end
291
+
292
+ end
293
+
294
+
295
+ class GrammarFileScanner
296
+
297
+ def initialize(str, filename = '-')
298
+ @lines = str.split(/\n|\r\n|\r/)
299
+ @filename = filename
300
+ @lineno = -1
301
+ @line_head = true
302
+ @in_rule_blk = false
303
+ @in_conv_blk = false
304
+ @in_block = nil
305
+ @epilogue = ''
306
+ @debug = false
307
+ next_line
308
+ end
309
+
310
+ attr_reader :epilogue
311
+
312
+ def lineno
313
+ @lineno + 1
314
+ end
315
+
316
+ attr_accessor :debug
317
+
318
+ def yylex(&block)
319
+ unless @debug
320
+ yylex0(&block)
321
+ else
322
+ yylex0 do |sym, tok|
323
+ $stderr.printf "%7d %-10s %s\n", lineno(), sym.inspect, tok.inspect
324
+ yield [sym, tok]
325
+ end
326
+ end
327
+ end
328
+
329
+ private
330
+
331
+ def yylex0
332
+ begin
333
+ until @line.empty?
334
+ @line.sub!(/\A\s+/, '')
335
+ if /\A\#/ =~ @line
336
+ break
337
+ elsif /\A\/\*/ =~ @line
338
+ skip_comment
339
+ elsif s = reads(/\A[a-zA-Z_]\w*/)
340
+ yield [atom_symbol(s), s.intern]
341
+ elsif s = reads(/\A\d+/)
342
+ yield [:DIGIT, s.to_i]
343
+ elsif ch = reads(/\A./)
344
+ case ch
345
+ when '"', "'"
346
+ yield [:STRING, eval(scan_quoted(ch))]
347
+ when '{'
348
+ lineno = lineno()
349
+ yield [:ACTION, SourceText.new(scan_action(), @filename, lineno)]
350
+ else
351
+ if ch == '|'
352
+ @line_head = false
353
+ end
354
+ yield [ch, ch]
355
+ end
356
+ else
357
+ end
358
+ end
359
+ end while next_line()
360
+ yield nil
361
+ end
362
+
363
+ def next_line
364
+ @lineno += 1
365
+ @line = @lines[@lineno]
366
+ if not @line or /\A----/ =~ @line
367
+ @epilogue = @lines.join("\n")
368
+ @lines.clear
369
+ @line = nil
370
+ if @in_block
371
+ @lineno -= 1
372
+ scan_error! sprintf('unterminated %s', @in_block)
373
+ end
374
+ false
375
+ else
376
+ @line.sub!(/(?:\n|\r\n|\r)\z/, '')
377
+ @line_head = true
378
+ true
379
+ end
380
+ end
381
+
382
+ ReservedWord = {
383
+ 'right' => :RIGHT,
384
+ 'left' => :LEFT,
385
+ 'nonassoc' => :NONASSOC,
386
+ 'preclow' => :PRECLOW,
387
+ 'prechigh' => :PRECHIGH,
388
+ 'token' => :TOKEN,
389
+ 'convert' => :CONV,
390
+ 'options' => :OPTION,
391
+ 'start' => :START,
392
+ 'expect' => :EXPECT,
393
+ 'class' => :CLASS,
394
+ 'rule' => :RULE,
395
+ 'end' => :END
396
+ }
397
+
398
+ def atom_symbol(token)
399
+ if token == 'end'
400
+ symbol = :END
401
+ @in_conv_blk = false
402
+ @in_rule_blk = false
403
+ else
404
+ if @line_head and not @in_conv_blk and not @in_rule_blk
405
+ symbol = ReservedWord[token] || :SYMBOL
406
+ else
407
+ symbol = :SYMBOL
408
+ end
409
+ case symbol
410
+ when :RULE then @in_rule_blk = true
411
+ when :CONV then @in_conv_blk = true
412
+ end
413
+ end
414
+ @line_head = false
415
+ symbol
416
+ end
417
+
418
+ def skip_comment
419
+ @in_block = 'comment'
420
+ until m = /\*\//.match(@line)
421
+ next_line
422
+ end
423
+ @line = m.post_match
424
+ @in_block = nil
425
+ end
426
+
427
+ $raccs_print_type = false
428
+
429
+ def scan_action
430
+ buf = ''
431
+ nest = 1
432
+ pre = nil
433
+ @in_block = 'action'
434
+ begin
435
+ pre = nil
436
+ if s = reads(/\A\s+/)
437
+ # does not set 'pre'
438
+ buf << s
439
+ end
440
+ until @line.empty?
441
+ if s = reads(/\A[^'"`{}%#\/\$]+/)
442
+ buf << (pre = s)
443
+ next
444
+ end
445
+ case ch = read(1)
446
+ when '{'
447
+ nest += 1
448
+ buf << (pre = ch)
449
+ when '}'
450
+ nest -= 1
451
+ if nest == 0
452
+ @in_block = nil
453
+ return buf
454
+ end
455
+ buf << (pre = ch)
456
+ when '#' # comment
457
+ buf << ch << @line
458
+ break
459
+ when "'", '"', '`'
460
+ buf << (pre = scan_quoted(ch))
461
+ when '%'
462
+ if literal_head? pre, @line
463
+ # % string, regexp, array
464
+ buf << ch
465
+ case ch = read(1)
466
+ when /[qQx]/n
467
+ buf << ch << (pre = scan_quoted(read(1), '%string'))
468
+ when /wW/n
469
+ buf << ch << (pre = scan_quoted(read(1), '%array'))
470
+ when /s/n
471
+ buf << ch << (pre = scan_quoted(read(1), '%symbol'))
472
+ when /r/n
473
+ buf << ch << (pre = scan_quoted(read(1), '%regexp'))
474
+ when /[a-zA-Z0-9= ]/n # does not include "_"
475
+ scan_error! "unknown type of % literal '%#{ch}'"
476
+ else
477
+ buf << (pre = scan_quoted(ch, '%string'))
478
+ end
479
+ else
480
+ # operator
481
+ buf << '||op->' if $raccs_print_type
482
+ buf << (pre = ch)
483
+ end
484
+ when '/'
485
+ if literal_head? pre, @line
486
+ # regexp
487
+ buf << (pre = scan_quoted(ch, 'regexp'))
488
+ else
489
+ # operator
490
+ buf << '||op->' if $raccs_print_type
491
+ buf << (pre = ch)
492
+ end
493
+ when '$' # gvar
494
+ buf << ch << (pre = read(1))
495
+ else
496
+ raise 'racc: fatal: must not happen'
497
+ end
498
+ end
499
+ buf << "\n"
500
+ end while next_line()
501
+ raise 'racc: fatal: scan finished before parser finished'
502
+ end
503
+
504
+ def literal_head?(pre, post)
505
+ (!pre || /[a-zA-Z_0-9]/n !~ pre[-1,1]) &&
506
+ !post.empty? && /\A[\s\=]/n !~ post
507
+ end
508
+
509
+ def read(len)
510
+ s = @line[0, len]
511
+ @line = @line[len .. -1]
512
+ s
513
+ end
514
+
515
+ def reads(re)
516
+ m = re.match(@line) or return nil
517
+ @line = m.post_match
518
+ m[0]
519
+ end
520
+
521
+ def scan_quoted(left, tag = 'string')
522
+ buf = left.dup
523
+ buf = "||#{tag}->" + buf if $raccs_print_type
524
+ re = get_quoted_re(left)
525
+ sv, @in_block = @in_block, tag
526
+ begin
527
+ if s = reads(re)
528
+ buf << s
529
+ break
530
+ else
531
+ buf << @line
532
+ end
533
+ end while next_line()
534
+ @in_block = sv
535
+ buf << "<-#{tag}||" if $raccs_print_type
536
+ buf
537
+ end
538
+
539
+ LEFT_TO_RIGHT = {
540
+ '(' => ')',
541
+ '{' => '}',
542
+ '[' => ']',
543
+ '<' => '>'
544
+ }
545
+
546
+ CACHE = {}
547
+
548
+ def get_quoted_re(left)
549
+ term = Regexp.quote(LEFT_TO_RIGHT[left] || left)
550
+ CACHE[left] ||= /\A[^#{term}\\]*(?:\\.[^\\#{term}]*)*#{term}/
551
+ end
552
+
553
+ def scan_error!(msg)
554
+ raise CompileError, "#{lineno()}: #{msg}"
555
+ end
556
+
557
+ end
558
+
559
+ end # module Racc