racc 1.4.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. data/.gitattributes +2 -0
  2. data/.gitignore +7 -0
  3. data/COPYING +515 -0
  4. data/ChangeLog +846 -0
  5. data/DEPENDS +4 -0
  6. data/README.en.rdoc +86 -0
  7. data/README.ja.rdoc +96 -0
  8. data/Rakefile +15 -0
  9. data/TODO +5 -0
  10. data/bin/racc +308 -0
  11. data/bin/racc2y +195 -0
  12. data/bin/y2racc +339 -0
  13. data/doc/en/NEWS.en.rdoc +282 -0
  14. data/doc/en/command.en.html +78 -0
  15. data/doc/en/debug.en.rdoc +20 -0
  16. data/doc/en/grammar.en.rdoc +230 -0
  17. data/doc/en/index.en.html +10 -0
  18. data/doc/en/parser.en.rdoc +74 -0
  19. data/doc/en/usage.en.html +92 -0
  20. data/doc/ja/NEWS.ja.rdoc +307 -0
  21. data/doc/ja/command.ja.html +94 -0
  22. data/doc/ja/debug.ja.rdoc +36 -0
  23. data/doc/ja/grammar.ja.rdoc +348 -0
  24. data/doc/ja/index.ja.html +10 -0
  25. data/doc/ja/parser.ja.rdoc +125 -0
  26. data/doc/ja/usage.ja.html +414 -0
  27. data/ext/racc/cparse/MANIFEST +4 -0
  28. data/ext/racc/cparse/cparse.c +824 -0
  29. data/ext/racc/cparse/depend +1 -0
  30. data/ext/racc/cparse/extconf.rb +7 -0
  31. data/fastcache/extconf.rb +2 -0
  32. data/fastcache/fastcache.c +185 -0
  33. data/lib/racc.rb +6 -0
  34. data/lib/racc/compat.rb +40 -0
  35. data/lib/racc/debugflags.rb +59 -0
  36. data/lib/racc/exception.rb +15 -0
  37. data/lib/racc/grammar.rb +1115 -0
  38. data/lib/racc/grammarfileparser.rb +559 -0
  39. data/lib/racc/info.rb +16 -0
  40. data/lib/racc/iset.rb +91 -0
  41. data/lib/racc/logfilegenerator.rb +214 -0
  42. data/lib/racc/parser.rb +439 -0
  43. data/lib/racc/parserfilegenerator.rb +511 -0
  44. data/lib/racc/pre-setup +13 -0
  45. data/lib/racc/sourcetext.rb +34 -0
  46. data/lib/racc/state.rb +971 -0
  47. data/lib/racc/statetransitiontable.rb +316 -0
  48. data/lib/racc/static.rb +5 -0
  49. data/misc/dist.sh +31 -0
  50. data/sample/array.y +67 -0
  51. data/sample/array2.y +59 -0
  52. data/sample/calc-ja.y +66 -0
  53. data/sample/calc.y +65 -0
  54. data/sample/conflict.y +15 -0
  55. data/sample/hash.y +60 -0
  56. data/sample/lalr.y +17 -0
  57. data/sample/lists.y +57 -0
  58. data/sample/syntax.y +46 -0
  59. data/sample/yyerr.y +46 -0
  60. data/setup.rb +1587 -0
  61. data/tasks/doc.rb +12 -0
  62. data/tasks/email.rb +55 -0
  63. data/tasks/file.rb +37 -0
  64. data/tasks/gem.rb +37 -0
  65. data/tasks/test.rb +16 -0
  66. data/test/assets/chk.y +126 -0
  67. data/test/assets/conf.y +16 -0
  68. data/test/assets/digraph.y +29 -0
  69. data/test/assets/echk.y +118 -0
  70. data/test/assets/err.y +60 -0
  71. data/test/assets/expect.y +7 -0
  72. data/test/assets/firstline.y +4 -0
  73. data/test/assets/ichk.y +102 -0
  74. data/test/assets/intp.y +546 -0
  75. data/test/assets/mailp.y +437 -0
  76. data/test/assets/newsyn.y +25 -0
  77. data/test/assets/noend.y +4 -0
  78. data/test/assets/nonass.y +41 -0
  79. data/test/assets/normal.y +27 -0
  80. data/test/assets/norule.y +4 -0
  81. data/test/assets/nullbug1.y +25 -0
  82. data/test/assets/nullbug2.y +15 -0
  83. data/test/assets/opt.y +123 -0
  84. data/test/assets/percent.y +35 -0
  85. data/test/assets/recv.y +97 -0
  86. data/test/assets/rrconf.y +14 -0
  87. data/test/assets/scan.y +72 -0
  88. data/test/assets/syntax.y +50 -0
  89. data/test/assets/unterm.y +5 -0
  90. data/test/assets/useless.y +12 -0
  91. data/test/assets/yyerr.y +46 -0
  92. data/test/bench.y +36 -0
  93. data/test/helper.rb +88 -0
  94. data/test/infini.y +8 -0
  95. data/test/scandata/brace +7 -0
  96. data/test/scandata/gvar +1 -0
  97. data/test/scandata/normal +4 -0
  98. data/test/scandata/percent +18 -0
  99. data/test/scandata/slash +10 -0
  100. data/test/src.intp +34 -0
  101. data/test/start.y +20 -0
  102. data/test/test_chk_y.rb +51 -0
  103. data/test/test_grammar_file_parser.rb +15 -0
  104. data/test/test_racc_command.rb +155 -0
  105. data/test/test_scan_y.rb +51 -0
  106. data/test/testscanner.rb +51 -0
  107. data/web/racc.en.rhtml +42 -0
  108. data/web/racc.ja.rhtml +51 -0
  109. metadata +166 -0
@@ -0,0 +1,559 @@
1
+ #
2
+ # $Id$
3
+ #
4
+ # Copyright (c) 1999-2006 Minero Aoki
5
+ #
6
+ # This program is free software.
7
+ # You can distribute/modify this program under the terms of
8
+ # the GNU LGPL, Lesser General Public License version 2.1.
9
+ # For details of the GNU LGPL, see the file "COPYING".
10
+ #
11
+
12
+ require 'racc'
13
+ require 'racc/compat'
14
+ require 'racc/grammar'
15
+ require 'racc/parserfilegenerator'
16
+ require 'racc/sourcetext'
17
+ require 'stringio'
18
+
19
+ module Racc
20
+
21
+ grammar = Grammar.define {
22
+ g = self
23
+
24
+ g.class = seq(:CLASS, :cname, many(:param), :RULE, :rules, option(:END))
25
+
26
+ g.cname = seq(:rubyconst) {|name|
27
+ @result.params.classname = name
28
+ }\
29
+ | seq(:rubyconst, "<", :rubyconst) {|c, _, s|
30
+ @result.params.classname = c
31
+ @result.params.superclass = s
32
+ }
33
+
34
+ g.rubyconst = separated_by1(:colon2, :SYMBOL) {|syms|
35
+ syms.map {|s| s.to_s }.join('::')
36
+ }
37
+
38
+ g.colon2 = seq(':', ':')
39
+
40
+ g.param = seq(:CONV, many1(:convdef), :END) {|*|
41
+ #@grammar.end_convert_block # FIXME
42
+ }\
43
+ | seq(:PRECHIGH, many1(:precdef), :PRECLOW) {|*|
44
+ @grammar.end_precedence_declaration true
45
+ }\
46
+ | seq(:PRECLOW, many1(:precdef), :PRECHIGH) {|*|
47
+ @grammar.end_precedence_declaration false
48
+ }\
49
+ | seq(:START, :symbol) {|_, sym|
50
+ @grammar.start_symbol = sym
51
+ }\
52
+ | seq(:TOKEN, :symbols) {|_, syms|
53
+ syms.each do |s|
54
+ s.should_terminal
55
+ end
56
+ }\
57
+ | seq(:OPTION, :options) {|_, syms|
58
+ syms.each do |opt|
59
+ case opt
60
+ when 'result_var'
61
+ @result.params.result_var = true
62
+ when 'no_result_var'
63
+ @result.params.result_var = false
64
+ when 'omit_action_call'
65
+ @result.params.omit_action_call = true
66
+ when 'no_omit_action_call'
67
+ @result.params.omit_action_call = false
68
+ else
69
+ raise CompileError, "unknown option: #{opt}"
70
+ end
71
+ end
72
+ }\
73
+ | seq(:EXPECT, :DIGIT) {|_, num|
74
+ if @grammar.n_expected_srconflicts
75
+ raise CompileError, "`expect' seen twice"
76
+ end
77
+ @grammar.n_expected_srconflicts = num
78
+ }
79
+
80
+ g.convdef = seq(:symbol, :STRING) {|sym, code|
81
+ sym.serialized = code
82
+ }
83
+
84
+ g.precdef = seq(:LEFT, :symbols) {|_, syms|
85
+ @grammar.declare_precedence :Left, syms
86
+ }\
87
+ | seq(:RIGHT, :symbols) {|_, syms|
88
+ @grammar.declare_precedence :Right, syms
89
+ }\
90
+ | seq(:NONASSOC, :symbols) {|_, syms|
91
+ @grammar.declare_precedence :Nonassoc, syms
92
+ }
93
+
94
+ g.symbols = seq(:symbol) {|sym|
95
+ [sym]
96
+ }\
97
+ | seq(:symbols, :symbol) {|list, sym|
98
+ list.push sym
99
+ list
100
+ }\
101
+ | seq(:symbols, "|")
102
+
103
+ g.symbol = seq(:SYMBOL) {|sym| @grammar.intern(sym) }\
104
+ | seq(:STRING) {|str| @grammar.intern(str) }
105
+
106
+ g.options = many(:SYMBOL) {|syms| syms.map {|s| s.to_s } }
107
+
108
+ g.rules = option(:rules_core) {|list|
109
+ add_rule_block list unless list.empty?
110
+ nil
111
+ }
112
+
113
+ g.rules_core = seq(:symbol) {|sym|
114
+ [sym]
115
+ }\
116
+ | seq(:rules_core, :rule_item) {|list, i|
117
+ list.push i
118
+ list
119
+ }\
120
+ | seq(:rules_core, ';') {|list, *|
121
+ add_rule_block list unless list.empty?
122
+ list.clear
123
+ list
124
+ }\
125
+ | seq(:rules_core, ':') {|list, *|
126
+ next_target = list.pop
127
+ add_rule_block list unless list.empty?
128
+ [next_target]
129
+ }
130
+
131
+ g.rule_item = seq(:symbol)\
132
+ | seq("|") {|*|
133
+ OrMark.new(@scanner.lineno)
134
+ }\
135
+ | seq("=", :symbol) {|_, sym|
136
+ Prec.new(sym, @scanner.lineno)
137
+ }\
138
+ | seq(:ACTION) {|src|
139
+ UserAction.source_text(src)
140
+ }
141
+ }
142
+
143
+ GrammarFileParser = grammar.parser_class
144
+
145
+ if grammar.states.srconflict_exist?
146
+ raise 'Racc boot script fatal: S/R conflict in build'
147
+ end
148
+ if grammar.states.rrconflict_exist?
149
+ raise 'Racc boot script fatal: R/R conflict in build'
150
+ end
151
+
152
+ class GrammarFileParser # reopen
153
+
154
+ class Result
155
+ def initialize(grammar)
156
+ @grammar = grammar
157
+ @params = ParserFileGenerator::Params.new
158
+ end
159
+
160
+ attr_reader :grammar
161
+ attr_reader :params
162
+ end
163
+
164
+ def GrammarFileParser.parse_file(filename)
165
+ parse(File.read(filename), filename, 1)
166
+ end
167
+
168
+ def GrammarFileParser.parse(src, filename = '-', lineno = 1)
169
+ new().parse(src, filename, lineno)
170
+ end
171
+
172
+ def initialize(debug_flags = DebugFlags.new)
173
+ @yydebug = debug_flags.parse
174
+ end
175
+
176
+ def parse(src, filename = '-', lineno = 1)
177
+ @filename = filename
178
+ @lineno = lineno
179
+ @scanner = GrammarFileScanner.new(src, @filename)
180
+ @scanner.debug = @yydebug
181
+ @grammar = Grammar.new
182
+ @result = Result.new(@grammar)
183
+ @embedded_action_seq = 0
184
+ yyparse @scanner, :yylex
185
+ parse_user_code
186
+ @result.grammar.init
187
+ @result
188
+ end
189
+
190
+ private
191
+
192
+ def next_token
193
+ @scanner.scan
194
+ end
195
+
196
+ def on_error(tok, val, _values)
197
+ if val.respond_to?(:id2name)
198
+ v = val.id2name
199
+ elsif val.kind_of?(String)
200
+ v = val
201
+ else
202
+ v = val.inspect
203
+ end
204
+ raise CompileError, "#{location()}: unexpected token '#{v}'"
205
+ end
206
+
207
+ def location
208
+ "#{@filename}:#{@lineno - 1 + @scanner.lineno}"
209
+ end
210
+
211
+ def add_rule_block(list)
212
+ sprec = nil
213
+ target = list.shift
214
+ case target
215
+ when OrMark, UserAction, Prec
216
+ raise CompileError, "#{target.lineno}: unexpected symbol #{target.name}"
217
+ end
218
+ curr = []
219
+ list.each do |i|
220
+ case i
221
+ when OrMark
222
+ add_rule target, curr, sprec
223
+ curr = []
224
+ sprec = nil
225
+ when Prec
226
+ raise CompileError, "'=<prec>' used twice in one rule" if sprec
227
+ sprec = i.symbol
228
+ else
229
+ curr.push i
230
+ end
231
+ end
232
+ add_rule target, curr, sprec
233
+ end
234
+
235
+ def add_rule(target, list, sprec)
236
+ if list.last.kind_of?(UserAction)
237
+ act = list.pop
238
+ else
239
+ act = UserAction.empty
240
+ end
241
+ list.map! {|s| s.kind_of?(UserAction) ? embedded_action(s) : s }
242
+ rule = Rule.new(target, list, act)
243
+ rule.specified_prec = sprec
244
+ @grammar.add rule
245
+ end
246
+
247
+ def embedded_action(act)
248
+ sym = @grammar.intern("@#{@embedded_action_seq += 1}".intern, true)
249
+ @grammar.add Rule.new(sym, [], act)
250
+ sym
251
+ end
252
+
253
+ #
254
+ # User Code Block
255
+ #
256
+
257
+ def parse_user_code
258
+ line = @scanner.lineno
259
+ _, *blocks = *@scanner.epilogue.split(/^----/)
260
+ blocks.each do |block|
261
+ header, *body = block.to_a
262
+ label0, pathes = *header.sub(/\A-+/, '').split('=', 2)
263
+ label = canonical_label(label0)
264
+ (pathes ? pathes.strip.split(' ') : []).each do |path|
265
+ add_user_code label, SourceText.new(File.read(path), path, 1)
266
+ end
267
+ add_user_code label, SourceText.new(body.join(''), @filename, line + 1)
268
+ line += (1 + body.size)
269
+ end
270
+ end
271
+
272
+ USER_CODE_LABELS = {
273
+ 'header' => :header,
274
+ 'prepare' => :header, # obsolete
275
+ 'inner' => :inner,
276
+ 'footer' => :footer,
277
+ 'driver' => :footer # obsolete
278
+ }
279
+
280
+ def canonical_label(src)
281
+ label = src.to_s.strip.downcase.slice(/\w+/)
282
+ unless USER_CODE_LABELS.key?(label)
283
+ raise CompileError, "unknown user code type: #{label.inspect}"
284
+ end
285
+ label
286
+ end
287
+
288
+ def add_user_code(label, src)
289
+ @result.params.send(USER_CODE_LABELS[label]).push src
290
+ end
291
+
292
+ end
293
+
294
+
295
+ class GrammarFileScanner
296
+
297
+ def initialize(str, filename = '-')
298
+ @lines = str.split(/\n|\r\n|\r/)
299
+ @filename = filename
300
+ @lineno = -1
301
+ @line_head = true
302
+ @in_rule_blk = false
303
+ @in_conv_blk = false
304
+ @in_block = nil
305
+ @epilogue = ''
306
+ @debug = false
307
+ next_line
308
+ end
309
+
310
+ attr_reader :epilogue
311
+
312
+ def lineno
313
+ @lineno + 1
314
+ end
315
+
316
+ attr_accessor :debug
317
+
318
+ def yylex(&block)
319
+ unless @debug
320
+ yylex0(&block)
321
+ else
322
+ yylex0 do |sym, tok|
323
+ $stderr.printf "%7d %-10s %s\n", lineno(), sym.inspect, tok.inspect
324
+ yield [sym, tok]
325
+ end
326
+ end
327
+ end
328
+
329
+ private
330
+
331
+ def yylex0
332
+ begin
333
+ until @line.empty?
334
+ @line.sub!(/\A\s+/, '')
335
+ if /\A\#/ =~ @line
336
+ break
337
+ elsif /\A\/\*/ =~ @line
338
+ skip_comment
339
+ elsif s = reads(/\A[a-zA-Z_]\w*/)
340
+ yield [atom_symbol(s), s.intern]
341
+ elsif s = reads(/\A\d+/)
342
+ yield [:DIGIT, s.to_i]
343
+ elsif ch = reads(/\A./)
344
+ case ch
345
+ when '"', "'"
346
+ yield [:STRING, eval(scan_quoted(ch))]
347
+ when '{'
348
+ lineno = lineno()
349
+ yield [:ACTION, SourceText.new(scan_action(), @filename, lineno)]
350
+ else
351
+ if ch == '|'
352
+ @line_head = false
353
+ end
354
+ yield [ch, ch]
355
+ end
356
+ else
357
+ end
358
+ end
359
+ end while next_line()
360
+ yield nil
361
+ end
362
+
363
+ def next_line
364
+ @lineno += 1
365
+ @line = @lines[@lineno]
366
+ if not @line or /\A----/ =~ @line
367
+ @epilogue = @lines.join("\n")
368
+ @lines.clear
369
+ @line = nil
370
+ if @in_block
371
+ @lineno -= 1
372
+ scan_error! sprintf('unterminated %s', @in_block)
373
+ end
374
+ false
375
+ else
376
+ @line.sub!(/(?:\n|\r\n|\r)\z/, '')
377
+ @line_head = true
378
+ true
379
+ end
380
+ end
381
+
382
+ ReservedWord = {
383
+ 'right' => :RIGHT,
384
+ 'left' => :LEFT,
385
+ 'nonassoc' => :NONASSOC,
386
+ 'preclow' => :PRECLOW,
387
+ 'prechigh' => :PRECHIGH,
388
+ 'token' => :TOKEN,
389
+ 'convert' => :CONV,
390
+ 'options' => :OPTION,
391
+ 'start' => :START,
392
+ 'expect' => :EXPECT,
393
+ 'class' => :CLASS,
394
+ 'rule' => :RULE,
395
+ 'end' => :END
396
+ }
397
+
398
+ def atom_symbol(token)
399
+ if token == 'end'
400
+ symbol = :END
401
+ @in_conv_blk = false
402
+ @in_rule_blk = false
403
+ else
404
+ if @line_head and not @in_conv_blk and not @in_rule_blk
405
+ symbol = ReservedWord[token] || :SYMBOL
406
+ else
407
+ symbol = :SYMBOL
408
+ end
409
+ case symbol
410
+ when :RULE then @in_rule_blk = true
411
+ when :CONV then @in_conv_blk = true
412
+ end
413
+ end
414
+ @line_head = false
415
+ symbol
416
+ end
417
+
418
+ def skip_comment
419
+ @in_block = 'comment'
420
+ until m = /\*\//.match(@line)
421
+ next_line
422
+ end
423
+ @line = m.post_match
424
+ @in_block = nil
425
+ end
426
+
427
+ $raccs_print_type = false
428
+
429
+ def scan_action
430
+ buf = ''
431
+ nest = 1
432
+ pre = nil
433
+ @in_block = 'action'
434
+ begin
435
+ pre = nil
436
+ if s = reads(/\A\s+/)
437
+ # does not set 'pre'
438
+ buf << s
439
+ end
440
+ until @line.empty?
441
+ if s = reads(/\A[^'"`{}%#\/\$]+/)
442
+ buf << (pre = s)
443
+ next
444
+ end
445
+ case ch = read(1)
446
+ when '{'
447
+ nest += 1
448
+ buf << (pre = ch)
449
+ when '}'
450
+ nest -= 1
451
+ if nest == 0
452
+ @in_block = nil
453
+ return buf
454
+ end
455
+ buf << (pre = ch)
456
+ when '#' # comment
457
+ buf << ch << @line
458
+ break
459
+ when "'", '"', '`'
460
+ buf << (pre = scan_quoted(ch))
461
+ when '%'
462
+ if literal_head? pre, @line
463
+ # % string, regexp, array
464
+ buf << ch
465
+ case ch = read(1)
466
+ when /[qQx]/n
467
+ buf << ch << (pre = scan_quoted(read(1), '%string'))
468
+ when /wW/n
469
+ buf << ch << (pre = scan_quoted(read(1), '%array'))
470
+ when /s/n
471
+ buf << ch << (pre = scan_quoted(read(1), '%symbol'))
472
+ when /r/n
473
+ buf << ch << (pre = scan_quoted(read(1), '%regexp'))
474
+ when /[a-zA-Z0-9= ]/n # does not include "_"
475
+ scan_error! "unknown type of % literal '%#{ch}'"
476
+ else
477
+ buf << (pre = scan_quoted(ch, '%string'))
478
+ end
479
+ else
480
+ # operator
481
+ buf << '||op->' if $raccs_print_type
482
+ buf << (pre = ch)
483
+ end
484
+ when '/'
485
+ if literal_head? pre, @line
486
+ # regexp
487
+ buf << (pre = scan_quoted(ch, 'regexp'))
488
+ else
489
+ # operator
490
+ buf << '||op->' if $raccs_print_type
491
+ buf << (pre = ch)
492
+ end
493
+ when '$' # gvar
494
+ buf << ch << (pre = read(1))
495
+ else
496
+ raise 'racc: fatal: must not happen'
497
+ end
498
+ end
499
+ buf << "\n"
500
+ end while next_line()
501
+ raise 'racc: fatal: scan finished before parser finished'
502
+ end
503
+
504
+ def literal_head?(pre, post)
505
+ (!pre || /[a-zA-Z_0-9]/n !~ pre[-1,1]) &&
506
+ !post.empty? && /\A[\s\=]/n !~ post
507
+ end
508
+
509
+ def read(len)
510
+ s = @line[0, len]
511
+ @line = @line[len .. -1]
512
+ s
513
+ end
514
+
515
+ def reads(re)
516
+ m = re.match(@line) or return nil
517
+ @line = m.post_match
518
+ m[0]
519
+ end
520
+
521
+ def scan_quoted(left, tag = 'string')
522
+ buf = left.dup
523
+ buf = "||#{tag}->" + buf if $raccs_print_type
524
+ re = get_quoted_re(left)
525
+ sv, @in_block = @in_block, tag
526
+ begin
527
+ if s = reads(re)
528
+ buf << s
529
+ break
530
+ else
531
+ buf << @line
532
+ end
533
+ end while next_line()
534
+ @in_block = sv
535
+ buf << "<-#{tag}||" if $raccs_print_type
536
+ buf
537
+ end
538
+
539
+ LEFT_TO_RIGHT = {
540
+ '(' => ')',
541
+ '{' => '}',
542
+ '[' => ']',
543
+ '<' => '>'
544
+ }
545
+
546
+ CACHE = {}
547
+
548
+ def get_quoted_re(left)
549
+ term = Regexp.quote(LEFT_TO_RIGHT[left] || left)
550
+ CACHE[left] ||= /\A[^#{term}\\]*(?:\\.[^\\#{term}]*)*#{term}/
551
+ end
552
+
553
+ def scan_error!(msg)
554
+ raise CompileError, "#{lineno()}: #{msg}"
555
+ end
556
+
557
+ end
558
+
559
+ end # module Racc