lrama 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yaml +72 -0
- data/.gitignore +4 -0
- data/.rspec +2 -0
- data/Gemfile +8 -0
- data/LEGAL.md +26 -0
- data/MIT +21 -0
- data/README.md +32 -0
- data/Rakefile +1 -0
- data/doc/TODO.md +50 -0
- data/exe/lrama +7 -0
- data/lib/lrama/command.rb +129 -0
- data/lib/lrama/context.rb +510 -0
- data/lib/lrama/grammar.rb +850 -0
- data/lib/lrama/lexer.rb +349 -0
- data/lib/lrama/output.rb +268 -0
- data/lib/lrama/parser.rb +321 -0
- data/lib/lrama/report.rb +35 -0
- data/lib/lrama/states.rb +1124 -0
- data/lib/lrama/version.rb +3 -0
- data/lib/lrama.rb +9 -0
- data/lrama.gemspec +22 -0
- data/template/bison/yacc.c +1750 -0
- data/template/bison/yacc.h +112 -0
- metadata +67 -0
@@ -0,0 +1,850 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
require "lrama/lexer"
|
3
|
+
|
4
|
+
module Lrama
|
5
|
+
Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
|
6
|
+
# TODO: Change this to display_name
|
7
|
+
def to_s
|
8
|
+
l = lhs.id.s_value
|
9
|
+
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
|
10
|
+
|
11
|
+
"#{l} -> #{r}"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Used by #user_actions
|
15
|
+
def as_comment
|
16
|
+
l = lhs.id.s_value
|
17
|
+
r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
|
18
|
+
|
19
|
+
"#{l}: #{r}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def precedence
|
23
|
+
precedence_sym && precedence_sym.precedence
|
24
|
+
end
|
25
|
+
|
26
|
+
def initial_rule?
|
27
|
+
id == 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def translated_code
|
31
|
+
if code
|
32
|
+
code.translated_code
|
33
|
+
else
|
34
|
+
nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Symbol is both of nterm and term
|
40
|
+
# `number` is both for nterm and term
|
41
|
+
# `token_id` is tokentype for term, internal sequence number for nterm
|
42
|
+
#
|
43
|
+
# TODO: Add validation for ASCII code range for Token::Char
|
44
|
+
Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
|
45
|
+
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
46
|
+
|
47
|
+
def term?
|
48
|
+
term
|
49
|
+
end
|
50
|
+
|
51
|
+
def nterm?
|
52
|
+
!term
|
53
|
+
end
|
54
|
+
|
55
|
+
def eof_symbol?
|
56
|
+
!!@eof_symbol
|
57
|
+
end
|
58
|
+
|
59
|
+
def error_symbol?
|
60
|
+
!!@error_symbol
|
61
|
+
end
|
62
|
+
|
63
|
+
def undef_symbol?
|
64
|
+
!!@undef_symbol
|
65
|
+
end
|
66
|
+
|
67
|
+
def accept_symbol?
|
68
|
+
!!@accept_symbol
|
69
|
+
end
|
70
|
+
|
71
|
+
def display_name
|
72
|
+
if alias_name
|
73
|
+
alias_name
|
74
|
+
else
|
75
|
+
id.s_value
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# name for yysymbol_kind_t
|
80
|
+
#
|
81
|
+
# See: b4_symbol_kind_base
|
82
|
+
def enum_name
|
83
|
+
case
|
84
|
+
when accept_symbol?
|
85
|
+
name = "YYACCEPT"
|
86
|
+
when eof_symbol?
|
87
|
+
name = "YYEOF"
|
88
|
+
when term? && id.type == Token::Char
|
89
|
+
if alias_name
|
90
|
+
name = number.to_s + alias_name
|
91
|
+
else
|
92
|
+
name = number.to_s + id.s_value
|
93
|
+
end
|
94
|
+
when term? && id.type == Token::Ident
|
95
|
+
name = id.s_value
|
96
|
+
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
97
|
+
name = number.to_s + id.s_value
|
98
|
+
when nterm?
|
99
|
+
name = id.s_value
|
100
|
+
else
|
101
|
+
raise "Unexpected #{self}"
|
102
|
+
end
|
103
|
+
|
104
|
+
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
|
105
|
+
end
|
106
|
+
|
107
|
+
# comment for yysymbol_kind_t
|
108
|
+
def comment
|
109
|
+
case
|
110
|
+
when accept_symbol?
|
111
|
+
# YYSYMBOL_YYACCEPT
|
112
|
+
id.s_value
|
113
|
+
when eof_symbol?
|
114
|
+
# YYEOF
|
115
|
+
alias_name
|
116
|
+
when (term? && 0 < token_id && token_id < 128)
|
117
|
+
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
|
118
|
+
alias_name || id.s_value
|
119
|
+
when id.s_value.include?("$") || id.s_value.include?("@")
|
120
|
+
# YYSYMBOL_21_1
|
121
|
+
id.s_value
|
122
|
+
else
|
123
|
+
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
|
124
|
+
alias_name || id.s_value
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
Type = Struct.new(:id, :tag, keyword_init: true)
|
130
|
+
|
131
|
+
Code = Struct.new(:type, :token_code, keyword_init: true) do
|
132
|
+
extend Forwardable
|
133
|
+
|
134
|
+
def_delegators "token_code", :s_value, :line, :column, :references
|
135
|
+
|
136
|
+
# $$, $n, @$, @n is translated to C code
|
137
|
+
def translated_code
|
138
|
+
case type
|
139
|
+
when :user_code
|
140
|
+
translated_user_code
|
141
|
+
when :initial_action
|
142
|
+
translated_initial_action_code
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# * ($1) error
|
147
|
+
# * ($$) *yyvaluep
|
148
|
+
# * (@1) error
|
149
|
+
# * (@$) *yylocationp
|
150
|
+
def translated_printer_code(tag)
|
151
|
+
t_code = s_value.dup
|
152
|
+
|
153
|
+
references.reverse.each do |ref|
|
154
|
+
first_column = ref.first_column
|
155
|
+
last_column = ref.last_column
|
156
|
+
|
157
|
+
case
|
158
|
+
when ref.number == "$" && ref.type == :dollar # $$
|
159
|
+
# Omit "<>"
|
160
|
+
member = tag.s_value[1..-2]
|
161
|
+
str = "((*yyvaluep).#{member})"
|
162
|
+
when ref.number == "$" && ref.type == :at # @$
|
163
|
+
str = "(*yylocationp)"
|
164
|
+
when ref.type == :dollar # $n
|
165
|
+
raise "$#{ref.number} can not be used in %printer."
|
166
|
+
when ref.type == :at # @n
|
167
|
+
raise "@#{ref.number} can not be used in %printer."
|
168
|
+
else
|
169
|
+
raise "Unexpected. #{code}, #{ref}"
|
170
|
+
end
|
171
|
+
|
172
|
+
t_code[first_column..last_column] = str
|
173
|
+
end
|
174
|
+
|
175
|
+
return t_code
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
private
|
180
|
+
|
181
|
+
# * ($1) yyvsp[i]
|
182
|
+
# * ($$) yyval
|
183
|
+
# * (@1) yylsp[i]
|
184
|
+
# * (@$) yyloc
|
185
|
+
def translated_user_code
|
186
|
+
t_code = s_value.dup
|
187
|
+
|
188
|
+
references.reverse.each do |ref|
|
189
|
+
first_column = ref.first_column
|
190
|
+
last_column = ref.last_column
|
191
|
+
|
192
|
+
case
|
193
|
+
when ref.number == "$" && ref.type == :dollar # $$
|
194
|
+
# Omit "<>"
|
195
|
+
member = ref.tag.s_value[1..-2]
|
196
|
+
str = "(yyval.#{member})"
|
197
|
+
when ref.number == "$" && ref.type == :at # @$
|
198
|
+
str = "(yyloc)"
|
199
|
+
when ref.type == :dollar # $n
|
200
|
+
i = -ref.position_in_rhs + ref.number
|
201
|
+
# Omit "<>"
|
202
|
+
member = ref.tag.s_value[1..-2]
|
203
|
+
str = "(yyvsp[#{i}].#{member})"
|
204
|
+
when ref.type == :at # @n
|
205
|
+
i = -ref.position_in_rhs + ref.number
|
206
|
+
str = "(yylsp[#{i}])"
|
207
|
+
else
|
208
|
+
raise "Unexpected. #{code}, #{ref}"
|
209
|
+
end
|
210
|
+
|
211
|
+
t_code[first_column..last_column] = str
|
212
|
+
end
|
213
|
+
|
214
|
+
return t_code
|
215
|
+
end
|
216
|
+
|
217
|
+
# * ($1) error
|
218
|
+
# * ($$) yylval
|
219
|
+
# * (@1) error
|
220
|
+
# * (@$) yylloc
|
221
|
+
def translated_initial_action_code
|
222
|
+
t_code = s_value.dup
|
223
|
+
|
224
|
+
references.reverse.each do |ref|
|
225
|
+
first_column = ref.first_column
|
226
|
+
last_column = ref.last_column
|
227
|
+
|
228
|
+
case
|
229
|
+
when ref.number == "$" && ref.type == :dollar # $$
|
230
|
+
str = "yylval"
|
231
|
+
when ref.number == "$" && ref.type == :at # @$
|
232
|
+
str = "yylloc"
|
233
|
+
when ref.type == :dollar # $n
|
234
|
+
raise "$#{ref.number} can not be used in initial_action."
|
235
|
+
when ref.type == :at # @n
|
236
|
+
raise "@#{ref.number} can not be used in initial_action."
|
237
|
+
else
|
238
|
+
raise "Unexpected. #{code}, #{ref}"
|
239
|
+
end
|
240
|
+
|
241
|
+
t_code[first_column..last_column] = str
|
242
|
+
end
|
243
|
+
|
244
|
+
return t_code
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# type: :dollar or :at
|
249
|
+
# ex_tag: "$<tag>1" (Optional)
|
250
|
+
Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
|
251
|
+
def tag
|
252
|
+
if ex_tag
|
253
|
+
ex_tag
|
254
|
+
else
|
255
|
+
referring_symbol.tag
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
Precedence = Struct.new(:type, :precedence, keyword_init: true) do
|
261
|
+
include Comparable
|
262
|
+
|
263
|
+
def <=>(other)
|
264
|
+
self.precedence <=> other.precedence
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
|
269
|
+
def translated_code(member)
|
270
|
+
code.translated_printer_code(member)
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
Union = Struct.new(:code, :lineno, keyword_init: true) do
|
275
|
+
def braces_less_code
|
276
|
+
# Remove braces
|
277
|
+
code.s_value[1..-2]
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
Token = Lrama::Lexer::Token
|
282
|
+
|
283
|
+
# Grammar is the result of parsing an input grammar file
|
284
|
+
class Grammar
|
285
|
+
# Grammar file information not used by States but by Output
|
286
|
+
Aux = Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true)
|
287
|
+
|
288
|
+
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
289
|
+
attr_accessor :union, :expect,
|
290
|
+
:printers,
|
291
|
+
:lex_param, :parse_param, :initial_action,
|
292
|
+
:symbols, :types,
|
293
|
+
:rules, :_rules,
|
294
|
+
:sym_to_rules
|
295
|
+
|
296
|
+
def initialize
|
297
|
+
@printers = []
|
298
|
+
@symbols = []
|
299
|
+
@types = []
|
300
|
+
@_rules = []
|
301
|
+
@rules = []
|
302
|
+
@sym_to_rules = {}
|
303
|
+
@empty_symbol = nil
|
304
|
+
@eof_symbol = nil
|
305
|
+
@error_symbol = nil
|
306
|
+
@undef_symbol = nil
|
307
|
+
@accept_symbol = nil
|
308
|
+
@aux = Aux.new
|
309
|
+
|
310
|
+
append_special_symbols
|
311
|
+
end
|
312
|
+
|
313
|
+
def add_printer(ident_or_tags:, code:, lineno:)
|
314
|
+
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
315
|
+
end
|
316
|
+
|
317
|
+
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
318
|
+
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
319
|
+
if replace
|
320
|
+
sym.id = id
|
321
|
+
sym.alias_name = alias_name
|
322
|
+
sym.tag = tag
|
323
|
+
end
|
324
|
+
|
325
|
+
return sym
|
326
|
+
end
|
327
|
+
|
328
|
+
if sym = @symbols.find {|s| s.id == id }
|
329
|
+
return sym
|
330
|
+
end
|
331
|
+
|
332
|
+
sym = Symbol.new(
|
333
|
+
id: id, alias_name: alias_name, number: nil, tag: tag,
|
334
|
+
term: true, token_id: token_id, nullable: false
|
335
|
+
)
|
336
|
+
@symbols << sym
|
337
|
+
@terms = nil
|
338
|
+
|
339
|
+
return sym
|
340
|
+
end
|
341
|
+
|
342
|
+
def add_nterm(id:, alias_name: nil, tag: nil)
|
343
|
+
return if @symbols.find {|s| s.id == id }
|
344
|
+
|
345
|
+
sym = Symbol.new(
|
346
|
+
id: id, alias_name: alias_name, number: nil, tag: tag,
|
347
|
+
term: false, token_id: nil, nullable: nil,
|
348
|
+
)
|
349
|
+
@symbols << sym
|
350
|
+
@nterms = nil
|
351
|
+
|
352
|
+
return sym
|
353
|
+
end
|
354
|
+
|
355
|
+
def add_type(id:, tag:)
|
356
|
+
@types << Type.new(id: id, tag: tag)
|
357
|
+
end
|
358
|
+
|
359
|
+
def add_nonassoc(sym, precedence)
|
360
|
+
set_precedence(sym, Precedence.new(type: :nonassoc, precedence: precedence))
|
361
|
+
end
|
362
|
+
|
363
|
+
def add_left(sym, precedence)
|
364
|
+
set_precedence(sym, Precedence.new(type: :left, precedence: precedence))
|
365
|
+
end
|
366
|
+
|
367
|
+
def add_right(sym, precedence)
|
368
|
+
set_precedence(sym, Precedence.new(type: :right, precedence: precedence))
|
369
|
+
end
|
370
|
+
|
371
|
+
def set_precedence(sym, precedence)
|
372
|
+
raise "" if sym.nterm?
|
373
|
+
sym.precedence = precedence
|
374
|
+
end
|
375
|
+
|
376
|
+
def set_union(code, lineno)
|
377
|
+
@union = Union.new(code: code, lineno: lineno)
|
378
|
+
end
|
379
|
+
|
380
|
+
def add_rule(lhs:, rhs:, lineno:)
|
381
|
+
@_rules << [lhs, rhs, lineno]
|
382
|
+
end
|
383
|
+
|
384
|
+
def build_references(token_code)
|
385
|
+
token_code.references.map! do |type, number, tag, first_column, last_column|
|
386
|
+
Reference.new(type: type, number: number, ex_tag: tag, first_column: first_column, last_column: last_column)
|
387
|
+
end
|
388
|
+
|
389
|
+
token_code
|
390
|
+
end
|
391
|
+
|
392
|
+
def build_code(type, token_code)
|
393
|
+
build_references(token_code)
|
394
|
+
Code.new(type: type, token_code: token_code)
|
395
|
+
end
|
396
|
+
|
397
|
+
def prologue_first_lineno=(prologue_first_lineno)
|
398
|
+
@aux.prologue_first_lineno = prologue_first_lineno
|
399
|
+
end
|
400
|
+
|
401
|
+
def prologue=(prologue)
|
402
|
+
@aux.prologue = prologue
|
403
|
+
end
|
404
|
+
|
405
|
+
def epilogue_first_lineno=(epilogue_first_lineno)
|
406
|
+
@aux.epilogue_first_lineno = epilogue_first_lineno
|
407
|
+
end
|
408
|
+
|
409
|
+
def epilogue=(epilogue)
|
410
|
+
@aux.epilogue = epilogue
|
411
|
+
end
|
412
|
+
|
413
|
+
def prepare
|
414
|
+
normalize_rules
|
415
|
+
collect_symbols
|
416
|
+
replace_token_with_symbol
|
417
|
+
fill_symbol_number
|
418
|
+
fill_default_precedence
|
419
|
+
fill_sym_to_rules
|
420
|
+
fill_nterm_type
|
421
|
+
fill_symbol_printer
|
422
|
+
@symbols.sort_by!(&:number)
|
423
|
+
end
|
424
|
+
|
425
|
+
# TODO: More validation methods
|
426
|
+
def validate!
|
427
|
+
validate_symbol_number_uniqueness!
|
428
|
+
end
|
429
|
+
|
430
|
+
def compute_nullable
|
431
|
+
@rules.each do |rule|
|
432
|
+
case
|
433
|
+
when rule.rhs.empty?
|
434
|
+
rule.nullable = true
|
435
|
+
when rule.rhs.any?(&:term)
|
436
|
+
rule.nullable = false
|
437
|
+
else
|
438
|
+
# noop
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
while true do
|
443
|
+
rs = @rules.select {|e| e.nullable.nil? }
|
444
|
+
nts = nterms.select {|e| e.nullable.nil? }
|
445
|
+
rule_count_1 = rs.count
|
446
|
+
nterm_count_1 = nts.count
|
447
|
+
|
448
|
+
rs.each do |rule|
|
449
|
+
if rule.rhs.all?(&:nullable)
|
450
|
+
rule.nullable = true
|
451
|
+
end
|
452
|
+
end
|
453
|
+
|
454
|
+
nts.each do |nterm|
|
455
|
+
find_rules_by_symbol!(nterm).each do |rule|
|
456
|
+
if rule.nullable
|
457
|
+
nterm.nullable = true
|
458
|
+
end
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
rule_count_2 = @rules.count {|e| e.nullable.nil? }
|
463
|
+
nterm_count_2 = nterms.count {|e| e.nullable.nil? }
|
464
|
+
|
465
|
+
if (rule_count_1 == rule_count_2) && (nterm_count_1 == nterm_count_2)
|
466
|
+
break
|
467
|
+
end
|
468
|
+
end
|
469
|
+
|
470
|
+
rules.select {|r| r.nullable.nil? }.each do |rule|
|
471
|
+
rule.nullable = false
|
472
|
+
end
|
473
|
+
|
474
|
+
nterms.select {|r| r.nullable.nil? }.each do |nterm|
|
475
|
+
nterm.nullable = false
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
def find_symbol_by_s_value(s_value)
|
480
|
+
@symbols.find do |sym|
|
481
|
+
sym.id.s_value == s_value
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
def find_symbol_by_s_value!(s_value)
|
486
|
+
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
487
|
+
end
|
488
|
+
|
489
|
+
def find_symbol_by_id(id)
|
490
|
+
@symbols.find do |sym|
|
491
|
+
# TODO: validate uniqueness of Token#s_value and Symbol#alias_name
|
492
|
+
sym.id == id || sym.alias_name == id.s_value
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
496
|
+
def find_symbol_by_id!(id)
|
497
|
+
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
498
|
+
end
|
499
|
+
|
500
|
+
def find_symbol_by_number!(number)
|
501
|
+
sym = @symbols[number]
|
502
|
+
|
503
|
+
raise "Symbol not found: #{number}" unless sym
|
504
|
+
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
505
|
+
|
506
|
+
sym
|
507
|
+
end
|
508
|
+
|
509
|
+
def find_rules_by_symbol!(sym)
|
510
|
+
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
511
|
+
end
|
512
|
+
|
513
|
+
def find_rules_by_symbol(sym)
|
514
|
+
@sym_to_rules[sym.number]
|
515
|
+
end
|
516
|
+
|
517
|
+
def terms_count
|
518
|
+
terms.count
|
519
|
+
end
|
520
|
+
|
521
|
+
def terms
|
522
|
+
@terms ||= @symbols.select(&:term?)
|
523
|
+
end
|
524
|
+
|
525
|
+
def nterms_count
|
526
|
+
nterms.count
|
527
|
+
end
|
528
|
+
|
529
|
+
def nterms
|
530
|
+
@nterms ||= @symbols.select(&:nterm?)
|
531
|
+
end
|
532
|
+
|
533
|
+
private
|
534
|
+
|
535
|
+
def find_nterm_by_id!(id)
|
536
|
+
nterms.find do |nterm|
|
537
|
+
nterm.id == id
|
538
|
+
end || (raise "Nterm not found: #{id}")
|
539
|
+
end
|
540
|
+
|
541
|
+
|
542
|
+
def append_special_symbols
|
543
|
+
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
|
544
|
+
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
|
545
|
+
# term.number = -2
|
546
|
+
# @empty_symbol = term
|
547
|
+
|
548
|
+
# YYEOF
|
549
|
+
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
550
|
+
term.number = 0
|
551
|
+
term.eof_symbol = true
|
552
|
+
@eof_symbol = term
|
553
|
+
|
554
|
+
# YYerror
|
555
|
+
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYerror"), alias_name: "error")
|
556
|
+
term.number = 1
|
557
|
+
term.error_symbol = true
|
558
|
+
@error_symbol = term
|
559
|
+
|
560
|
+
# YYUNDEF
|
561
|
+
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
562
|
+
term.number = 2
|
563
|
+
term.undef_symbol = true
|
564
|
+
@undef_symbol = term
|
565
|
+
|
566
|
+
# $accept
|
567
|
+
term = add_nterm(id: Token.new(type: Token::Ident, s_value: "$accept"))
|
568
|
+
term.accept_symbol = true
|
569
|
+
@accept_symbol = term
|
570
|
+
end
|
571
|
+
|
572
|
+
# 1. Add $accept rule to the top of rules
|
573
|
+
# 2. Extract precedence and last action
|
574
|
+
# 3. Extract action in the middle of RHS into new Empty rule
|
575
|
+
# 4. Append id and extract action then create Rule
|
576
|
+
#
|
577
|
+
# Bison 3.8.2 uses different orders for symbol number and rule number
|
578
|
+
# when a rule has actions in the middle of a rule.
|
579
|
+
#
|
580
|
+
# For example,
|
581
|
+
#
|
582
|
+
# `program: $@1 top_compstmt`
|
583
|
+
#
|
584
|
+
# Rules are ordered like below,
|
585
|
+
#
|
586
|
+
# 1 $@1: ε
|
587
|
+
# 2 program: $@1 top_compstmt
|
588
|
+
#
|
589
|
+
# Symbols are ordered like below,
|
590
|
+
#
|
591
|
+
# 164 program
|
592
|
+
# 165 $@1
|
593
|
+
#
|
594
|
+
def normalize_rules
|
595
|
+
# 1. Add $accept rule to the top of rules
|
596
|
+
accept = find_symbol_by_s_value!("$accept")
|
597
|
+
eof = find_symbol_by_number!(0)
|
598
|
+
lineno = @_rules.first ? @_rules.first[2] : 0
|
599
|
+
@rules << Rule.new(id: @rules.count, lhs: accept, rhs: [@_rules.first[0], eof], code: nil, lineno: lineno)
|
600
|
+
|
601
|
+
extracted_action_number = 1 # @n as nterm
|
602
|
+
|
603
|
+
@_rules.each do |lhs, rhs, lineno|
|
604
|
+
a = []
|
605
|
+
rhs1 = []
|
606
|
+
code = nil
|
607
|
+
precedence_sym = nil
|
608
|
+
|
609
|
+
# 2. Extract precedence and last action
|
610
|
+
rhs.reverse.each do |r|
|
611
|
+
case
|
612
|
+
when r.is_a?(Symbol) # precedence_sym
|
613
|
+
precedence_sym = r
|
614
|
+
when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
615
|
+
code = r
|
616
|
+
else
|
617
|
+
rhs1 << r
|
618
|
+
end
|
619
|
+
end
|
620
|
+
rhs1.reverse!
|
621
|
+
|
622
|
+
# Bison n'th component is 1-origin
|
623
|
+
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
624
|
+
if token.type == Token::User_code
|
625
|
+
token.references.each do |ref|
|
626
|
+
# Need to keep position_in_rhs for actions in the middle of RHS
|
627
|
+
ref.position_in_rhs = i - 1
|
628
|
+
next if ref.type == :at
|
629
|
+
# $$, $n, @$, @n can be used in any actions
|
630
|
+
number = ref.number
|
631
|
+
|
632
|
+
if number == "$"
|
633
|
+
# TODO: Should be postponed after middle actions are extracted?
|
634
|
+
ref.referring_symbol = lhs
|
635
|
+
else
|
636
|
+
raise "Can not refer following component. #{number} >= #{i}. #{token}" if number >= i
|
637
|
+
rhs1[number - 1].referred = true
|
638
|
+
ref.referring_symbol = rhs1[number - 1]
|
639
|
+
end
|
640
|
+
end
|
641
|
+
end
|
642
|
+
end
|
643
|
+
|
644
|
+
rhs2 = rhs1.map do |token|
|
645
|
+
if token.type == Token::User_code
|
646
|
+
prefix = token.referred ? "@" : "$@"
|
647
|
+
new_token = Token.new(type: Token::Ident, s_value: prefix + extracted_action_number.to_s)
|
648
|
+
extracted_action_number += 1
|
649
|
+
a << [new_token, token]
|
650
|
+
new_token
|
651
|
+
else
|
652
|
+
token
|
653
|
+
end
|
654
|
+
end
|
655
|
+
|
656
|
+
# Extract actions in the middle of RHS
|
657
|
+
# into new rules.
|
658
|
+
a.each do |new_token, code|
|
659
|
+
@rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
|
660
|
+
end
|
661
|
+
|
662
|
+
c = code ? Code.new(type: :user_code, token_code: code) : nil
|
663
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
664
|
+
|
665
|
+
add_nterm(id: lhs)
|
666
|
+
a.each do |new_token, _|
|
667
|
+
add_nterm(id: new_token)
|
668
|
+
end
|
669
|
+
end
|
670
|
+
end
|
671
|
+
|
672
|
+
# Collect symbols from rules
|
673
|
+
def collect_symbols
|
674
|
+
@rules.flat_map(&:rhs).each do |s|
|
675
|
+
case s
|
676
|
+
when Token
|
677
|
+
if s.type == Token::Char
|
678
|
+
add_term(id: s)
|
679
|
+
end
|
680
|
+
when Symbol
|
681
|
+
# skip
|
682
|
+
else
|
683
|
+
raise "Unknown class: #{s}"
|
684
|
+
end
|
685
|
+
end
|
686
|
+
end
|
687
|
+
|
688
|
+
# Fill #number and #token_id
|
689
|
+
def fill_symbol_number
|
690
|
+
# TODO: why start from 256
|
691
|
+
token_id = 256
|
692
|
+
|
693
|
+
# YYEMPTY = -2
|
694
|
+
# YYEOF = 0
|
695
|
+
# YYerror = 1
|
696
|
+
# YYUNDEF = 2
|
697
|
+
number = 3
|
698
|
+
|
699
|
+
nterm_token_id = 0
|
700
|
+
used_numbers = {}
|
701
|
+
|
702
|
+
@symbols.map(&:number).each do |n|
|
703
|
+
used_numbers[n] = true
|
704
|
+
end
|
705
|
+
|
706
|
+
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
|
707
|
+
while used_numbers[number] do
|
708
|
+
number += 1
|
709
|
+
end
|
710
|
+
|
711
|
+
if sym.number.nil?
|
712
|
+
sym.number = number
|
713
|
+
number += 1
|
714
|
+
end
|
715
|
+
|
716
|
+
# If id is Token::Char, it uses ASCII code
|
717
|
+
if sym.term? && sym.token_id.nil?
|
718
|
+
if sym.id.type == Token::Char
|
719
|
+
# Igonre ' on the both sides
|
720
|
+
case sym.id.s_value[1..-2]
|
721
|
+
when "\\b"
|
722
|
+
sym.token_id = 8
|
723
|
+
when "\\f"
|
724
|
+
sym.token_id = 12
|
725
|
+
when "\\n"
|
726
|
+
sym.token_id = 10
|
727
|
+
when "\\r"
|
728
|
+
sym.token_id = 13
|
729
|
+
when "\\t"
|
730
|
+
sym.token_id = 9
|
731
|
+
when "\\v"
|
732
|
+
sym.token_id = 11
|
733
|
+
when "\""
|
734
|
+
sym.token_id = 34
|
735
|
+
when "\'"
|
736
|
+
sym.token_id = 39
|
737
|
+
when "\\\\"
|
738
|
+
sym.token_id = 92
|
739
|
+
when /\A\\(\d+)\z/
|
740
|
+
sym.token_id = Integer($1, 8)
|
741
|
+
when /\A(.)\z/
|
742
|
+
sym.token_id = $1.bytes.first
|
743
|
+
else
|
744
|
+
raise "Unknown Char s_value #{sym}"
|
745
|
+
end
|
746
|
+
else
|
747
|
+
sym.token_id = token_id
|
748
|
+
token_id += 1
|
749
|
+
end
|
750
|
+
end
|
751
|
+
|
752
|
+
if sym.nterm? && sym.token_id.nil?
|
753
|
+
sym.token_id = nterm_token_id
|
754
|
+
nterm_token_id += 1
|
755
|
+
end
|
756
|
+
end
|
757
|
+
end
|
758
|
+
|
759
|
+
def replace_token_with_symbol
|
760
|
+
@rules.each do |rule|
|
761
|
+
rule.lhs = token_to_symbol(rule.lhs)
|
762
|
+
|
763
|
+
rule.rhs.map! do |t|
|
764
|
+
token_to_symbol(t)
|
765
|
+
end
|
766
|
+
|
767
|
+
if rule.code
|
768
|
+
rule.code.references.each do |ref|
|
769
|
+
next if ref.type == :at
|
770
|
+
|
771
|
+
if ref.referring_symbol.type != Token::User_code
|
772
|
+
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
773
|
+
end
|
774
|
+
end
|
775
|
+
end
|
776
|
+
end
|
777
|
+
end
|
778
|
+
|
779
|
+
def token_to_symbol(token)
|
780
|
+
case token
|
781
|
+
when Token
|
782
|
+
find_symbol_by_id!(token)
|
783
|
+
when Symbol
|
784
|
+
token
|
785
|
+
else
|
786
|
+
raise "Unknown class: #{token}"
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
# Rule inherits precedence from the last term in RHS.
|
791
|
+
#
|
792
|
+
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
|
793
|
+
def fill_default_precedence
|
794
|
+
@rules.each do |rule|
|
795
|
+
# Explicitly specified precedence has the highest priority
|
796
|
+
next if rule.precedence_sym
|
797
|
+
|
798
|
+
precedence_sym = nil
|
799
|
+
rule.rhs.each do |sym|
|
800
|
+
precedence_sym = sym if sym.term?
|
801
|
+
end
|
802
|
+
|
803
|
+
rule.precedence_sym = precedence_sym
|
804
|
+
end
|
805
|
+
end
|
806
|
+
|
807
|
+
def fill_sym_to_rules
|
808
|
+
@rules.each do |rule|
|
809
|
+
key = rule.lhs.number
|
810
|
+
@sym_to_rules[key] ||= []
|
811
|
+
@sym_to_rules[key] << rule
|
812
|
+
end
|
813
|
+
end
|
814
|
+
|
815
|
+
# Fill nterm's tag defined by %type decl
|
816
|
+
def fill_nterm_type
|
817
|
+
@types.each do |type|
|
818
|
+
nterm = find_nterm_by_id!(type.id)
|
819
|
+
nterm.tag = type.tag
|
820
|
+
end
|
821
|
+
end
|
822
|
+
|
823
|
+
def fill_symbol_printer
|
824
|
+
@symbols.each do |sym|
|
825
|
+
@printers.each do |printer|
|
826
|
+
printer.ident_or_tags.each do |ident_or_tag|
|
827
|
+
case ident_or_tag.type
|
828
|
+
when Token::Ident
|
829
|
+
sym.printer = printer if sym.id == ident_or_tag
|
830
|
+
when Token::Tag
|
831
|
+
sym.printer = printer if sym.tag == ident_or_tag
|
832
|
+
else
|
833
|
+
raise "Unknown token type. #{printer}"
|
834
|
+
end
|
835
|
+
end
|
836
|
+
end
|
837
|
+
end
|
838
|
+
end
|
839
|
+
|
840
|
+
def validate_symbol_number_uniqueness!
|
841
|
+
invalid = @symbols.group_by(&:number).select do |number, syms|
|
842
|
+
syms.count > 1
|
843
|
+
end
|
844
|
+
|
845
|
+
return if invalid.empty?
|
846
|
+
|
847
|
+
raise "Symbol number is dupulicated. #{invalid}"
|
848
|
+
end
|
849
|
+
end
|
850
|
+
end
|