lrama 0.5.2 → 0.5.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +10 -1
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/LEGAL.md +1 -16
- data/README.md +11 -1
- data/Steepfile +2 -1
- data/doc/TODO.md +8 -3
- data/exe/lrama +1 -1
- data/lib/lrama/command.rb +91 -72
- data/lib/lrama/context.rb +11 -1
- data/lib/lrama/counterexamples/derivation.rb +63 -0
- data/lib/lrama/counterexamples/example.rb +124 -0
- data/lib/lrama/counterexamples/path.rb +69 -0
- data/lib/lrama/counterexamples/state_item.rb +6 -0
- data/lib/lrama/counterexamples/triple.rb +21 -0
- data/lib/lrama/counterexamples.rb +285 -0
- data/lib/lrama/digraph.rb +2 -3
- data/lib/lrama/grammar/auxiliary.rb +7 -0
- data/lib/lrama/grammar/code.rb +123 -0
- data/lib/lrama/grammar/error_token.rb +9 -0
- data/lib/lrama/grammar/precedence.rb +11 -0
- data/lib/lrama/grammar/printer.rb +9 -0
- data/lib/lrama/grammar/reference.rb +22 -0
- data/lib/lrama/grammar/rule.rb +39 -0
- data/lib/lrama/grammar/symbol.rb +87 -0
- data/lib/lrama/grammar/union.rb +10 -0
- data/lib/lrama/grammar.rb +89 -282
- data/lib/lrama/lexer/token/type.rb +8 -0
- data/lib/lrama/lexer/token.rb +77 -0
- data/lib/lrama/lexer.rb +4 -74
- data/lib/lrama/output.rb +32 -4
- data/lib/lrama/parser/token_scanner.rb +3 -6
- data/lib/lrama/parser.rb +9 -1
- data/lib/lrama/report/duration.rb +25 -0
- data/lib/lrama/report/profile.rb +25 -0
- data/lib/lrama/report.rb +2 -47
- data/lib/lrama/state/reduce_reduce_conflict.rb +9 -0
- data/lib/lrama/state/resolved_conflict.rb +29 -0
- data/lib/lrama/state/shift_reduce_conflict.rb +9 -0
- data/lib/lrama/state.rb +13 -30
- data/lib/lrama/states/item.rb +79 -0
- data/lib/lrama/states.rb +24 -73
- data/lib/lrama/states_reporter.rb +28 -3
- data/lib/lrama/type.rb +4 -0
- data/lib/lrama/version.rb +1 -1
- data/lib/lrama.rb +2 -0
- data/lrama.gemspec +1 -1
- data/sig/lrama/{report.rbs → report/duration.rbs} +0 -4
- data/sig/lrama/report/profile.rbs +7 -0
- data/template/bison/yacc.c +371 -0
- metadata +30 -5
data/lib/lrama/grammar.rb
CHANGED
@@ -1,293 +1,23 @@
|
|
1
|
-
require "
|
1
|
+
require "lrama/grammar/auxiliary"
|
2
|
+
require "lrama/grammar/code"
|
3
|
+
require "lrama/grammar/error_token"
|
4
|
+
require "lrama/grammar/precedence"
|
5
|
+
require "lrama/grammar/printer"
|
6
|
+
require "lrama/grammar/reference"
|
7
|
+
require "lrama/grammar/rule"
|
8
|
+
require "lrama/grammar/symbol"
|
9
|
+
require "lrama/grammar/union"
|
2
10
|
require "lrama/lexer"
|
11
|
+
require "lrama/type"
|
3
12
|
|
4
13
|
module Lrama
|
5
|
-
Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
|
6
|
-
# TODO: Change this to display_name
|
7
|
-
def to_s
|
8
|
-
l = lhs.id.s_value
|
9
|
-
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
|
10
|
-
|
11
|
-
"#{l} -> #{r}"
|
12
|
-
end
|
13
|
-
|
14
|
-
# Used by #user_actions
|
15
|
-
def as_comment
|
16
|
-
l = lhs.id.s_value
|
17
|
-
r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
|
18
|
-
|
19
|
-
"#{l}: #{r}"
|
20
|
-
end
|
21
|
-
|
22
|
-
def precedence
|
23
|
-
precedence_sym && precedence_sym.precedence
|
24
|
-
end
|
25
|
-
|
26
|
-
def initial_rule?
|
27
|
-
id == 0
|
28
|
-
end
|
29
|
-
|
30
|
-
def translated_code
|
31
|
-
if code
|
32
|
-
code.translated_code
|
33
|
-
else
|
34
|
-
nil
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Symbol is both of nterm and term
|
40
|
-
# `number` is both for nterm and term
|
41
|
-
# `token_id` is tokentype for term, internal sequence number for nterm
|
42
|
-
#
|
43
|
-
# TODO: Add validation for ASCII code range for Token::Char
|
44
|
-
Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
|
45
|
-
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
46
|
-
|
47
|
-
def term?
|
48
|
-
term
|
49
|
-
end
|
50
|
-
|
51
|
-
def nterm?
|
52
|
-
!term
|
53
|
-
end
|
54
|
-
|
55
|
-
def eof_symbol?
|
56
|
-
!!@eof_symbol
|
57
|
-
end
|
58
|
-
|
59
|
-
def error_symbol?
|
60
|
-
!!@error_symbol
|
61
|
-
end
|
62
|
-
|
63
|
-
def undef_symbol?
|
64
|
-
!!@undef_symbol
|
65
|
-
end
|
66
|
-
|
67
|
-
def accept_symbol?
|
68
|
-
!!@accept_symbol
|
69
|
-
end
|
70
|
-
|
71
|
-
def display_name
|
72
|
-
if alias_name
|
73
|
-
alias_name
|
74
|
-
else
|
75
|
-
id.s_value
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# name for yysymbol_kind_t
|
80
|
-
#
|
81
|
-
# See: b4_symbol_kind_base
|
82
|
-
def enum_name
|
83
|
-
case
|
84
|
-
when accept_symbol?
|
85
|
-
name = "YYACCEPT"
|
86
|
-
when eof_symbol?
|
87
|
-
name = "YYEOF"
|
88
|
-
when term? && id.type == Token::Char
|
89
|
-
if alias_name
|
90
|
-
name = number.to_s + alias_name
|
91
|
-
else
|
92
|
-
name = number.to_s + id.s_value
|
93
|
-
end
|
94
|
-
when term? && id.type == Token::Ident
|
95
|
-
name = id.s_value
|
96
|
-
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
97
|
-
name = number.to_s + id.s_value
|
98
|
-
when nterm?
|
99
|
-
name = id.s_value
|
100
|
-
else
|
101
|
-
raise "Unexpected #{self}"
|
102
|
-
end
|
103
|
-
|
104
|
-
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
|
105
|
-
end
|
106
|
-
|
107
|
-
# comment for yysymbol_kind_t
|
108
|
-
def comment
|
109
|
-
case
|
110
|
-
when accept_symbol?
|
111
|
-
# YYSYMBOL_YYACCEPT
|
112
|
-
id.s_value
|
113
|
-
when eof_symbol?
|
114
|
-
# YYEOF
|
115
|
-
alias_name
|
116
|
-
when (term? && 0 < token_id && token_id < 128)
|
117
|
-
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
|
118
|
-
alias_name || id.s_value
|
119
|
-
when id.s_value.include?("$") || id.s_value.include?("@")
|
120
|
-
# YYSYMBOL_21_1
|
121
|
-
id.s_value
|
122
|
-
else
|
123
|
-
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
|
124
|
-
alias_name || id.s_value
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
Type = Struct.new(:id, :tag, keyword_init: true)
|
130
|
-
|
131
|
-
Code = Struct.new(:type, :token_code, keyword_init: true) do
|
132
|
-
extend Forwardable
|
133
|
-
|
134
|
-
def_delegators "token_code", :s_value, :line, :column, :references
|
135
|
-
|
136
|
-
# $$, $n, @$, @n is translated to C code
|
137
|
-
def translated_code
|
138
|
-
case type
|
139
|
-
when :user_code
|
140
|
-
translated_user_code
|
141
|
-
when :initial_action
|
142
|
-
translated_initial_action_code
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
# * ($1) error
|
147
|
-
# * ($$) *yyvaluep
|
148
|
-
# * (@1) error
|
149
|
-
# * (@$) *yylocationp
|
150
|
-
def translated_printer_code(tag)
|
151
|
-
t_code = s_value.dup
|
152
|
-
|
153
|
-
references.reverse.each do |ref|
|
154
|
-
first_column = ref.first_column
|
155
|
-
last_column = ref.last_column
|
156
|
-
|
157
|
-
case
|
158
|
-
when ref.value == "$" && ref.type == :dollar # $$
|
159
|
-
# Omit "<>"
|
160
|
-
member = tag.s_value[1..-2]
|
161
|
-
str = "((*yyvaluep).#{member})"
|
162
|
-
when ref.value == "$" && ref.type == :at # @$
|
163
|
-
str = "(*yylocationp)"
|
164
|
-
when ref.type == :dollar # $n
|
165
|
-
raise "$#{ref.value} can not be used in %printer."
|
166
|
-
when ref.type == :at # @n
|
167
|
-
raise "@#{ref.value} can not be used in %printer."
|
168
|
-
else
|
169
|
-
raise "Unexpected. #{self}, #{ref}"
|
170
|
-
end
|
171
|
-
|
172
|
-
t_code[first_column..last_column] = str
|
173
|
-
end
|
174
|
-
|
175
|
-
return t_code
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
private
|
180
|
-
|
181
|
-
# * ($1) yyvsp[i]
|
182
|
-
# * ($$) yyval
|
183
|
-
# * (@1) yylsp[i]
|
184
|
-
# * (@$) yyloc
|
185
|
-
def translated_user_code
|
186
|
-
t_code = s_value.dup
|
187
|
-
|
188
|
-
references.reverse.each do |ref|
|
189
|
-
first_column = ref.first_column
|
190
|
-
last_column = ref.last_column
|
191
|
-
|
192
|
-
case
|
193
|
-
when ref.value == "$" && ref.type == :dollar # $$
|
194
|
-
# Omit "<>"
|
195
|
-
member = ref.tag.s_value[1..-2]
|
196
|
-
str = "(yyval.#{member})"
|
197
|
-
when ref.value == "$" && ref.type == :at # @$
|
198
|
-
str = "(yyloc)"
|
199
|
-
when ref.type == :dollar # $n
|
200
|
-
i = -ref.position_in_rhs + ref.value
|
201
|
-
# Omit "<>"
|
202
|
-
member = ref.tag.s_value[1..-2]
|
203
|
-
str = "(yyvsp[#{i}].#{member})"
|
204
|
-
when ref.type == :at # @n
|
205
|
-
i = -ref.position_in_rhs + ref.value
|
206
|
-
str = "(yylsp[#{i}])"
|
207
|
-
else
|
208
|
-
raise "Unexpected. #{self}, #{ref}"
|
209
|
-
end
|
210
|
-
|
211
|
-
t_code[first_column..last_column] = str
|
212
|
-
end
|
213
|
-
|
214
|
-
return t_code
|
215
|
-
end
|
216
|
-
|
217
|
-
# * ($1) error
|
218
|
-
# * ($$) yylval
|
219
|
-
# * (@1) error
|
220
|
-
# * (@$) yylloc
|
221
|
-
def translated_initial_action_code
|
222
|
-
t_code = s_value.dup
|
223
|
-
|
224
|
-
references.reverse.each do |ref|
|
225
|
-
first_column = ref.first_column
|
226
|
-
last_column = ref.last_column
|
227
|
-
|
228
|
-
case
|
229
|
-
when ref.value == "$" && ref.type == :dollar # $$
|
230
|
-
str = "yylval"
|
231
|
-
when ref.value == "$" && ref.type == :at # @$
|
232
|
-
str = "yylloc"
|
233
|
-
when ref.type == :dollar # $n
|
234
|
-
raise "$#{ref.value} can not be used in initial_action."
|
235
|
-
when ref.type == :at # @n
|
236
|
-
raise "@#{ref.value} can not be used in initial_action."
|
237
|
-
else
|
238
|
-
raise "Unexpected. #{self}, #{ref}"
|
239
|
-
end
|
240
|
-
|
241
|
-
t_code[first_column..last_column] = str
|
242
|
-
end
|
243
|
-
|
244
|
-
return t_code
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
# type: :dollar or :at
|
249
|
-
# ex_tag: "$<tag>1" (Optional)
|
250
|
-
Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
|
251
|
-
def tag
|
252
|
-
if ex_tag
|
253
|
-
ex_tag
|
254
|
-
else
|
255
|
-
referring_symbol.tag
|
256
|
-
end
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
Precedence = Struct.new(:type, :precedence, keyword_init: true) do
|
261
|
-
include Comparable
|
262
|
-
|
263
|
-
def <=>(other)
|
264
|
-
self.precedence <=> other.precedence
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
|
269
|
-
def translated_code(member)
|
270
|
-
code.translated_printer_code(member)
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
Union = Struct.new(:code, :lineno, keyword_init: true) do
|
275
|
-
def braces_less_code
|
276
|
-
# Remove braces
|
277
|
-
code.s_value[1..-2]
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
14
|
Token = Lrama::Lexer::Token
|
282
15
|
|
283
16
|
# Grammar is the result of parsing an input grammar file
|
284
17
|
class Grammar
|
285
|
-
# Grammar file information not used by States but by Output
|
286
|
-
Aux = Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true)
|
287
|
-
|
288
18
|
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
289
19
|
attr_accessor :union, :expect,
|
290
|
-
:printers,
|
20
|
+
:printers, :error_tokens,
|
291
21
|
:lex_param, :parse_param, :initial_action,
|
292
22
|
:symbols, :types,
|
293
23
|
:rules, :_rules,
|
@@ -295,6 +25,7 @@ module Lrama
|
|
295
25
|
|
296
26
|
def initialize
|
297
27
|
@printers = []
|
28
|
+
@error_tokens = []
|
298
29
|
@symbols = []
|
299
30
|
@types = []
|
300
31
|
@_rules = []
|
@@ -305,7 +36,7 @@ module Lrama
|
|
305
36
|
@error_symbol = nil
|
306
37
|
@undef_symbol = nil
|
307
38
|
@accept_symbol = nil
|
308
|
-
@aux =
|
39
|
+
@aux = Auxiliary.new
|
309
40
|
|
310
41
|
append_special_symbols
|
311
42
|
end
|
@@ -314,6 +45,10 @@ module Lrama
|
|
314
45
|
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
315
46
|
end
|
316
47
|
|
48
|
+
def add_error_token(ident_or_tags:, code:, lineno:)
|
49
|
+
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
50
|
+
end
|
51
|
+
|
317
52
|
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
318
53
|
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
319
54
|
if replace
|
@@ -419,12 +154,14 @@ module Lrama
|
|
419
154
|
fill_sym_to_rules
|
420
155
|
fill_nterm_type
|
421
156
|
fill_symbol_printer
|
157
|
+
fill_symbol_error_token
|
422
158
|
@symbols.sort_by!(&:number)
|
423
159
|
end
|
424
160
|
|
425
161
|
# TODO: More validation methods
|
426
162
|
def validate!
|
427
163
|
validate_symbol_number_uniqueness!
|
164
|
+
validate_no_declared_type_reference!
|
428
165
|
end
|
429
166
|
|
430
167
|
def compute_nullable
|
@@ -476,6 +213,41 @@ module Lrama
|
|
476
213
|
end
|
477
214
|
end
|
478
215
|
|
216
|
+
def compute_first_set
|
217
|
+
terms.each do |term|
|
218
|
+
term.first_set = Set.new([term]).freeze
|
219
|
+
term.first_set_bitmap = Lrama::Bitmap.from_array([term.number])
|
220
|
+
end
|
221
|
+
|
222
|
+
nterms.each do |nterm|
|
223
|
+
nterm.first_set = Set.new([]).freeze
|
224
|
+
nterm.first_set_bitmap = Lrama::Bitmap.from_array([])
|
225
|
+
end
|
226
|
+
|
227
|
+
while true do
|
228
|
+
changed = false
|
229
|
+
|
230
|
+
@rules.each do |rule|
|
231
|
+
rule.rhs.each do |r|
|
232
|
+
if rule.lhs.first_set_bitmap | r.first_set_bitmap != rule.lhs.first_set_bitmap
|
233
|
+
changed = true
|
234
|
+
rule.lhs.first_set_bitmap = rule.lhs.first_set_bitmap | r.first_set_bitmap
|
235
|
+
end
|
236
|
+
|
237
|
+
break unless r.nullable
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
break unless changed
|
242
|
+
end
|
243
|
+
|
244
|
+
nterms.each do |nterm|
|
245
|
+
nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number|
|
246
|
+
find_symbol_by_number!(number)
|
247
|
+
end.to_set
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
479
251
|
def find_symbol_by_s_value(s_value)
|
480
252
|
@symbols.find do |sym|
|
481
253
|
sym.id.s_value == s_value
|
@@ -845,6 +617,23 @@ module Lrama
|
|
845
617
|
end
|
846
618
|
end
|
847
619
|
|
620
|
+
def fill_symbol_error_token
|
621
|
+
@symbols.each do |sym|
|
622
|
+
@error_tokens.each do |error_token|
|
623
|
+
error_token.ident_or_tags.each do |ident_or_tag|
|
624
|
+
case ident_or_tag.type
|
625
|
+
when Token::Ident
|
626
|
+
sym.error_token = error_token if sym.id == ident_or_tag
|
627
|
+
when Token::Tag
|
628
|
+
sym.error_token = error_token if sym.tag == ident_or_tag
|
629
|
+
else
|
630
|
+
raise "Unknown token type. #{error_token}"
|
631
|
+
end
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
end
|
636
|
+
|
848
637
|
def validate_symbol_number_uniqueness!
|
849
638
|
invalid = @symbols.group_by(&:number).select do |number, syms|
|
850
639
|
syms.count > 1
|
@@ -854,5 +643,23 @@ module Lrama
|
|
854
643
|
|
855
644
|
raise "Symbol number is duplicated. #{invalid}"
|
856
645
|
end
|
646
|
+
|
647
|
+
def validate_no_declared_type_reference!
|
648
|
+
errors = []
|
649
|
+
|
650
|
+
rules.each do |rule|
|
651
|
+
next unless rule.code
|
652
|
+
|
653
|
+
rule.code.references.select do |ref|
|
654
|
+
ref.type == :dollar && !ref.tag
|
655
|
+
end.each do |ref|
|
656
|
+
errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
|
657
|
+
end
|
658
|
+
end
|
659
|
+
|
660
|
+
return if errors.empty?
|
661
|
+
|
662
|
+
raise errors.join("\n")
|
663
|
+
end
|
857
664
|
end
|
858
665
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'lrama/lexer/token/type'
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
class Lexer
|
5
|
+
class Token
|
6
|
+
|
7
|
+
attr_accessor :line, :column, :referred
|
8
|
+
# For User_code
|
9
|
+
attr_accessor :references
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"#{super} line: #{line}, column: #{column}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def referred_by?(string)
|
16
|
+
[self.s_value, self.alias].include?(string)
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.type == other.type && self.s_value == other.s_value
|
21
|
+
end
|
22
|
+
|
23
|
+
def numberize_references(lhs, rhs)
|
24
|
+
self.references.map! {|ref|
|
25
|
+
ref_name = ref[1]
|
26
|
+
if ref_name.is_a?(::String) && ref_name != '$'
|
27
|
+
value =
|
28
|
+
if lhs.referred_by?(ref_name)
|
29
|
+
'$'
|
30
|
+
else
|
31
|
+
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
|
32
|
+
end
|
33
|
+
[ref[0], value, ref[2], ref[3], ref[4]]
|
34
|
+
else
|
35
|
+
ref
|
36
|
+
end
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
@i = 0
|
41
|
+
@types = []
|
42
|
+
|
43
|
+
def self.define_type(name)
|
44
|
+
type = Type.new(id: @i, name: name.to_s)
|
45
|
+
const_set(name, type)
|
46
|
+
@types << type
|
47
|
+
@i += 1
|
48
|
+
end
|
49
|
+
|
50
|
+
# Token types
|
51
|
+
define_type(:P_expect) # %expect
|
52
|
+
define_type(:P_define) # %define
|
53
|
+
define_type(:P_printer) # %printer
|
54
|
+
define_type(:P_error_token) # %error-token
|
55
|
+
define_type(:P_lex_param) # %lex-param
|
56
|
+
define_type(:P_parse_param) # %parse-param
|
57
|
+
define_type(:P_initial_action) # %initial-action
|
58
|
+
define_type(:P_union) # %union
|
59
|
+
define_type(:P_token) # %token
|
60
|
+
define_type(:P_type) # %type
|
61
|
+
define_type(:P_nonassoc) # %nonassoc
|
62
|
+
define_type(:P_left) # %left
|
63
|
+
define_type(:P_right) # %right
|
64
|
+
define_type(:P_prec) # %prec
|
65
|
+
define_type(:User_code) # { ... }
|
66
|
+
define_type(:Tag) # <int>
|
67
|
+
define_type(:Number) # 0
|
68
|
+
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
69
|
+
define_type(:Ident) # api.pure, tNUMBER
|
70
|
+
define_type(:Named_Ref) # [foo]
|
71
|
+
define_type(:Semicolon) # ;
|
72
|
+
define_type(:Bar) # |
|
73
|
+
define_type(:String) # "str"
|
74
|
+
define_type(:Char) # '+'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/lrama/lexer.rb
CHANGED
@@ -1,84 +1,12 @@
|
|
1
1
|
require "strscan"
|
2
|
-
require "lrama/report"
|
2
|
+
require "lrama/report/duration"
|
3
|
+
require "lrama/lexer/token"
|
3
4
|
|
4
5
|
module Lrama
|
5
6
|
# Lexer for parse.y
|
6
7
|
class Lexer
|
7
8
|
include Lrama::Report::Duration
|
8
9
|
|
9
|
-
# s_value is semantic value
|
10
|
-
Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do
|
11
|
-
Type = Struct.new(:id, :name, keyword_init: true)
|
12
|
-
|
13
|
-
attr_accessor :line, :column, :referred
|
14
|
-
# For User_code
|
15
|
-
attr_accessor :references
|
16
|
-
|
17
|
-
def to_s
|
18
|
-
"#{super} line: #{line}, column: #{column}"
|
19
|
-
end
|
20
|
-
|
21
|
-
def referred_by?(string)
|
22
|
-
[self.s_value, self.alias].include?(string)
|
23
|
-
end
|
24
|
-
|
25
|
-
def ==(other)
|
26
|
-
self.class == other.class && self.type == other.type && self.s_value == other.s_value
|
27
|
-
end
|
28
|
-
|
29
|
-
def numberize_references(lhs, rhs)
|
30
|
-
self.references.map! {|ref|
|
31
|
-
ref_name = ref[1]
|
32
|
-
if ref_name.is_a?(String) && ref_name != '$'
|
33
|
-
value =
|
34
|
-
if lhs.referred_by?(ref_name)
|
35
|
-
'$'
|
36
|
-
else
|
37
|
-
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
|
38
|
-
end
|
39
|
-
[ref[0], value, ref[2], ref[3], ref[4]]
|
40
|
-
else
|
41
|
-
ref
|
42
|
-
end
|
43
|
-
}
|
44
|
-
end
|
45
|
-
|
46
|
-
@i = 0
|
47
|
-
@types = []
|
48
|
-
|
49
|
-
def self.define_type(name)
|
50
|
-
type = Type.new(id: @i, name: name.to_s)
|
51
|
-
const_set(name, type)
|
52
|
-
@types << type
|
53
|
-
@i += 1
|
54
|
-
end
|
55
|
-
|
56
|
-
# Token types
|
57
|
-
define_type(:P_expect) # %expect
|
58
|
-
define_type(:P_define) # %define
|
59
|
-
define_type(:P_printer) # %printer
|
60
|
-
define_type(:P_lex_param) # %lex-param
|
61
|
-
define_type(:P_parse_param) # %parse-param
|
62
|
-
define_type(:P_initial_action) # %initial-action
|
63
|
-
define_type(:P_union) # %union
|
64
|
-
define_type(:P_token) # %token
|
65
|
-
define_type(:P_type) # %type
|
66
|
-
define_type(:P_nonassoc) # %nonassoc
|
67
|
-
define_type(:P_left) # %left
|
68
|
-
define_type(:P_right) # %right
|
69
|
-
define_type(:P_prec) # %prec
|
70
|
-
define_type(:User_code) # { ... }
|
71
|
-
define_type(:Tag) # <int>
|
72
|
-
define_type(:Number) # 0
|
73
|
-
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
74
|
-
define_type(:Ident) # api.pure, tNUMBER
|
75
|
-
define_type(:Named_Ref) # [foo]
|
76
|
-
define_type(:Semicolon) # ;
|
77
|
-
define_type(:Bar) # |
|
78
|
-
define_type(:String) # "str"
|
79
|
-
define_type(:Char) # '+'
|
80
|
-
end
|
81
|
-
|
82
10
|
# States
|
83
11
|
#
|
84
12
|
# See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
|
@@ -207,6 +135,8 @@ module Lrama
|
|
207
135
|
tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
|
208
136
|
when ss.scan(/%printer/)
|
209
137
|
tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
|
138
|
+
when ss.scan(/%error-token/)
|
139
|
+
tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
|
210
140
|
when ss.scan(/%lex-param/)
|
211
141
|
tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
|
212
142
|
when ss.scan(/%parse-param/)
|
data/lib/lrama/output.rb
CHANGED
@@ -1,20 +1,24 @@
|
|
1
1
|
require "erb"
|
2
2
|
require "forwardable"
|
3
|
-
require "lrama/report"
|
3
|
+
require "lrama/report/duration"
|
4
4
|
|
5
5
|
module Lrama
|
6
6
|
class Output
|
7
7
|
extend Forwardable
|
8
8
|
include Report::Duration
|
9
9
|
|
10
|
-
attr_reader :grammar_file_path, :context, :grammar
|
10
|
+
attr_reader :grammar_file_path, :context, :grammar, :error_recovery
|
11
11
|
|
12
12
|
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
|
13
13
|
:yymaxutok, :yypact_ninf, :yytable_ninf
|
14
14
|
|
15
15
|
def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
16
16
|
|
17
|
-
def initialize(
|
17
|
+
def initialize(
|
18
|
+
out:, output_file_path:, template_name:, grammar_file_path:,
|
19
|
+
header_out: nil, header_file_path: nil,
|
20
|
+
context:, grammar:, error_recovery: false
|
21
|
+
)
|
18
22
|
@out = out
|
19
23
|
@output_file_path = output_file_path
|
20
24
|
@template_name = template_name
|
@@ -23,6 +27,7 @@ module Lrama
|
|
23
27
|
@header_file_path = header_file_path
|
24
28
|
@context = context
|
25
29
|
@grammar = grammar
|
30
|
+
@error_recovery = error_recovery
|
26
31
|
end
|
27
32
|
|
28
33
|
if ERB.instance_method(:initialize).parameters.last.first == :key
|
@@ -98,6 +103,10 @@ module Lrama
|
|
98
103
|
int_array_to_string(@context.yytranslate)
|
99
104
|
end
|
100
105
|
|
106
|
+
def yytranslate_inverted
|
107
|
+
int_array_to_string(@context.yytranslate_inverted)
|
108
|
+
end
|
109
|
+
|
101
110
|
def yyrline
|
102
111
|
int_array_to_string(@context.yyrline)
|
103
112
|
end
|
@@ -155,6 +164,25 @@ module Lrama
|
|
155
164
|
STR
|
156
165
|
end
|
157
166
|
|
167
|
+
def symbol_actions_for_error_token
|
168
|
+
str = ""
|
169
|
+
|
170
|
+
@grammar.symbols.each do |sym|
|
171
|
+
next unless sym.error_token
|
172
|
+
|
173
|
+
str << <<-STR
|
174
|
+
case #{sym.enum_name}: /* #{sym.comment} */
|
175
|
+
#line #{sym.error_token.lineno} "#{@grammar_file_path}"
|
176
|
+
#{sym.error_token.translated_code(sym.tag)}
|
177
|
+
#line [@oline@] [@ofile@]
|
178
|
+
break;
|
179
|
+
|
180
|
+
STR
|
181
|
+
end
|
182
|
+
|
183
|
+
str
|
184
|
+
end
|
185
|
+
|
158
186
|
# b4_user_actions
|
159
187
|
def user_actions
|
160
188
|
str = ""
|
@@ -224,7 +252,7 @@ module Lrama
|
|
224
252
|
end
|
225
253
|
|
226
254
|
def extract_param_name(param)
|
227
|
-
/\A(
|
255
|
+
/\A(\W*)([a-zA-Z0-9_]+)\z/.match(param.split.last)[2]
|
228
256
|
end
|
229
257
|
|
230
258
|
def parse_param_name
|