lrama 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +10 -1
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/LEGAL.md +1 -16
- data/README.md +11 -1
- data/Steepfile +2 -1
- data/doc/TODO.md +8 -3
- data/exe/lrama +1 -1
- data/lib/lrama/command.rb +91 -72
- data/lib/lrama/context.rb +11 -1
- data/lib/lrama/counterexamples/derivation.rb +63 -0
- data/lib/lrama/counterexamples/example.rb +124 -0
- data/lib/lrama/counterexamples/path.rb +69 -0
- data/lib/lrama/counterexamples/state_item.rb +6 -0
- data/lib/lrama/counterexamples/triple.rb +21 -0
- data/lib/lrama/counterexamples.rb +285 -0
- data/lib/lrama/digraph.rb +2 -3
- data/lib/lrama/grammar/auxiliary.rb +7 -0
- data/lib/lrama/grammar/code.rb +123 -0
- data/lib/lrama/grammar/error_token.rb +9 -0
- data/lib/lrama/grammar/precedence.rb +11 -0
- data/lib/lrama/grammar/printer.rb +9 -0
- data/lib/lrama/grammar/reference.rb +22 -0
- data/lib/lrama/grammar/rule.rb +39 -0
- data/lib/lrama/grammar/symbol.rb +87 -0
- data/lib/lrama/grammar/union.rb +10 -0
- data/lib/lrama/grammar.rb +89 -282
- data/lib/lrama/lexer/token/type.rb +8 -0
- data/lib/lrama/lexer/token.rb +77 -0
- data/lib/lrama/lexer.rb +4 -74
- data/lib/lrama/output.rb +32 -4
- data/lib/lrama/parser/token_scanner.rb +3 -6
- data/lib/lrama/parser.rb +9 -1
- data/lib/lrama/report/duration.rb +25 -0
- data/lib/lrama/report/profile.rb +25 -0
- data/lib/lrama/report.rb +2 -47
- data/lib/lrama/state/reduce_reduce_conflict.rb +9 -0
- data/lib/lrama/state/resolved_conflict.rb +29 -0
- data/lib/lrama/state/shift_reduce_conflict.rb +9 -0
- data/lib/lrama/state.rb +13 -30
- data/lib/lrama/states/item.rb +79 -0
- data/lib/lrama/states.rb +24 -73
- data/lib/lrama/states_reporter.rb +28 -3
- data/lib/lrama/type.rb +4 -0
- data/lib/lrama/version.rb +1 -1
- data/lib/lrama.rb +2 -0
- data/lrama.gemspec +1 -1
- data/sig/lrama/{report.rbs → report/duration.rbs} +0 -4
- data/sig/lrama/report/profile.rbs +7 -0
- data/template/bison/yacc.c +371 -0
- metadata +30 -5
data/lib/lrama/grammar.rb
CHANGED
@@ -1,293 +1,23 @@
|
|
1
|
-
require "
|
1
|
+
require "lrama/grammar/auxiliary"
|
2
|
+
require "lrama/grammar/code"
|
3
|
+
require "lrama/grammar/error_token"
|
4
|
+
require "lrama/grammar/precedence"
|
5
|
+
require "lrama/grammar/printer"
|
6
|
+
require "lrama/grammar/reference"
|
7
|
+
require "lrama/grammar/rule"
|
8
|
+
require "lrama/grammar/symbol"
|
9
|
+
require "lrama/grammar/union"
|
2
10
|
require "lrama/lexer"
|
11
|
+
require "lrama/type"
|
3
12
|
|
4
13
|
module Lrama
|
5
|
-
Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
|
6
|
-
# TODO: Change this to display_name
|
7
|
-
def to_s
|
8
|
-
l = lhs.id.s_value
|
9
|
-
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
|
10
|
-
|
11
|
-
"#{l} -> #{r}"
|
12
|
-
end
|
13
|
-
|
14
|
-
# Used by #user_actions
|
15
|
-
def as_comment
|
16
|
-
l = lhs.id.s_value
|
17
|
-
r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
|
18
|
-
|
19
|
-
"#{l}: #{r}"
|
20
|
-
end
|
21
|
-
|
22
|
-
def precedence
|
23
|
-
precedence_sym && precedence_sym.precedence
|
24
|
-
end
|
25
|
-
|
26
|
-
def initial_rule?
|
27
|
-
id == 0
|
28
|
-
end
|
29
|
-
|
30
|
-
def translated_code
|
31
|
-
if code
|
32
|
-
code.translated_code
|
33
|
-
else
|
34
|
-
nil
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Symbol is both of nterm and term
|
40
|
-
# `number` is both for nterm and term
|
41
|
-
# `token_id` is tokentype for term, internal sequence number for nterm
|
42
|
-
#
|
43
|
-
# TODO: Add validation for ASCII code range for Token::Char
|
44
|
-
Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
|
45
|
-
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
46
|
-
|
47
|
-
def term?
|
48
|
-
term
|
49
|
-
end
|
50
|
-
|
51
|
-
def nterm?
|
52
|
-
!term
|
53
|
-
end
|
54
|
-
|
55
|
-
def eof_symbol?
|
56
|
-
!!@eof_symbol
|
57
|
-
end
|
58
|
-
|
59
|
-
def error_symbol?
|
60
|
-
!!@error_symbol
|
61
|
-
end
|
62
|
-
|
63
|
-
def undef_symbol?
|
64
|
-
!!@undef_symbol
|
65
|
-
end
|
66
|
-
|
67
|
-
def accept_symbol?
|
68
|
-
!!@accept_symbol
|
69
|
-
end
|
70
|
-
|
71
|
-
def display_name
|
72
|
-
if alias_name
|
73
|
-
alias_name
|
74
|
-
else
|
75
|
-
id.s_value
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# name for yysymbol_kind_t
|
80
|
-
#
|
81
|
-
# See: b4_symbol_kind_base
|
82
|
-
def enum_name
|
83
|
-
case
|
84
|
-
when accept_symbol?
|
85
|
-
name = "YYACCEPT"
|
86
|
-
when eof_symbol?
|
87
|
-
name = "YYEOF"
|
88
|
-
when term? && id.type == Token::Char
|
89
|
-
if alias_name
|
90
|
-
name = number.to_s + alias_name
|
91
|
-
else
|
92
|
-
name = number.to_s + id.s_value
|
93
|
-
end
|
94
|
-
when term? && id.type == Token::Ident
|
95
|
-
name = id.s_value
|
96
|
-
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
97
|
-
name = number.to_s + id.s_value
|
98
|
-
when nterm?
|
99
|
-
name = id.s_value
|
100
|
-
else
|
101
|
-
raise "Unexpected #{self}"
|
102
|
-
end
|
103
|
-
|
104
|
-
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
|
105
|
-
end
|
106
|
-
|
107
|
-
# comment for yysymbol_kind_t
|
108
|
-
def comment
|
109
|
-
case
|
110
|
-
when accept_symbol?
|
111
|
-
# YYSYMBOL_YYACCEPT
|
112
|
-
id.s_value
|
113
|
-
when eof_symbol?
|
114
|
-
# YYEOF
|
115
|
-
alias_name
|
116
|
-
when (term? && 0 < token_id && token_id < 128)
|
117
|
-
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
|
118
|
-
alias_name || id.s_value
|
119
|
-
when id.s_value.include?("$") || id.s_value.include?("@")
|
120
|
-
# YYSYMBOL_21_1
|
121
|
-
id.s_value
|
122
|
-
else
|
123
|
-
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
|
124
|
-
alias_name || id.s_value
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
Type = Struct.new(:id, :tag, keyword_init: true)
|
130
|
-
|
131
|
-
Code = Struct.new(:type, :token_code, keyword_init: true) do
|
132
|
-
extend Forwardable
|
133
|
-
|
134
|
-
def_delegators "token_code", :s_value, :line, :column, :references
|
135
|
-
|
136
|
-
# $$, $n, @$, @n is translated to C code
|
137
|
-
def translated_code
|
138
|
-
case type
|
139
|
-
when :user_code
|
140
|
-
translated_user_code
|
141
|
-
when :initial_action
|
142
|
-
translated_initial_action_code
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
# * ($1) error
|
147
|
-
# * ($$) *yyvaluep
|
148
|
-
# * (@1) error
|
149
|
-
# * (@$) *yylocationp
|
150
|
-
def translated_printer_code(tag)
|
151
|
-
t_code = s_value.dup
|
152
|
-
|
153
|
-
references.reverse.each do |ref|
|
154
|
-
first_column = ref.first_column
|
155
|
-
last_column = ref.last_column
|
156
|
-
|
157
|
-
case
|
158
|
-
when ref.value == "$" && ref.type == :dollar # $$
|
159
|
-
# Omit "<>"
|
160
|
-
member = tag.s_value[1..-2]
|
161
|
-
str = "((*yyvaluep).#{member})"
|
162
|
-
when ref.value == "$" && ref.type == :at # @$
|
163
|
-
str = "(*yylocationp)"
|
164
|
-
when ref.type == :dollar # $n
|
165
|
-
raise "$#{ref.value} can not be used in %printer."
|
166
|
-
when ref.type == :at # @n
|
167
|
-
raise "@#{ref.value} can not be used in %printer."
|
168
|
-
else
|
169
|
-
raise "Unexpected. #{self}, #{ref}"
|
170
|
-
end
|
171
|
-
|
172
|
-
t_code[first_column..last_column] = str
|
173
|
-
end
|
174
|
-
|
175
|
-
return t_code
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
private
|
180
|
-
|
181
|
-
# * ($1) yyvsp[i]
|
182
|
-
# * ($$) yyval
|
183
|
-
# * (@1) yylsp[i]
|
184
|
-
# * (@$) yyloc
|
185
|
-
def translated_user_code
|
186
|
-
t_code = s_value.dup
|
187
|
-
|
188
|
-
references.reverse.each do |ref|
|
189
|
-
first_column = ref.first_column
|
190
|
-
last_column = ref.last_column
|
191
|
-
|
192
|
-
case
|
193
|
-
when ref.value == "$" && ref.type == :dollar # $$
|
194
|
-
# Omit "<>"
|
195
|
-
member = ref.tag.s_value[1..-2]
|
196
|
-
str = "(yyval.#{member})"
|
197
|
-
when ref.value == "$" && ref.type == :at # @$
|
198
|
-
str = "(yyloc)"
|
199
|
-
when ref.type == :dollar # $n
|
200
|
-
i = -ref.position_in_rhs + ref.value
|
201
|
-
# Omit "<>"
|
202
|
-
member = ref.tag.s_value[1..-2]
|
203
|
-
str = "(yyvsp[#{i}].#{member})"
|
204
|
-
when ref.type == :at # @n
|
205
|
-
i = -ref.position_in_rhs + ref.value
|
206
|
-
str = "(yylsp[#{i}])"
|
207
|
-
else
|
208
|
-
raise "Unexpected. #{self}, #{ref}"
|
209
|
-
end
|
210
|
-
|
211
|
-
t_code[first_column..last_column] = str
|
212
|
-
end
|
213
|
-
|
214
|
-
return t_code
|
215
|
-
end
|
216
|
-
|
217
|
-
# * ($1) error
|
218
|
-
# * ($$) yylval
|
219
|
-
# * (@1) error
|
220
|
-
# * (@$) yylloc
|
221
|
-
def translated_initial_action_code
|
222
|
-
t_code = s_value.dup
|
223
|
-
|
224
|
-
references.reverse.each do |ref|
|
225
|
-
first_column = ref.first_column
|
226
|
-
last_column = ref.last_column
|
227
|
-
|
228
|
-
case
|
229
|
-
when ref.value == "$" && ref.type == :dollar # $$
|
230
|
-
str = "yylval"
|
231
|
-
when ref.value == "$" && ref.type == :at # @$
|
232
|
-
str = "yylloc"
|
233
|
-
when ref.type == :dollar # $n
|
234
|
-
raise "$#{ref.value} can not be used in initial_action."
|
235
|
-
when ref.type == :at # @n
|
236
|
-
raise "@#{ref.value} can not be used in initial_action."
|
237
|
-
else
|
238
|
-
raise "Unexpected. #{self}, #{ref}"
|
239
|
-
end
|
240
|
-
|
241
|
-
t_code[first_column..last_column] = str
|
242
|
-
end
|
243
|
-
|
244
|
-
return t_code
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
# type: :dollar or :at
|
249
|
-
# ex_tag: "$<tag>1" (Optional)
|
250
|
-
Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
|
251
|
-
def tag
|
252
|
-
if ex_tag
|
253
|
-
ex_tag
|
254
|
-
else
|
255
|
-
referring_symbol.tag
|
256
|
-
end
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
Precedence = Struct.new(:type, :precedence, keyword_init: true) do
|
261
|
-
include Comparable
|
262
|
-
|
263
|
-
def <=>(other)
|
264
|
-
self.precedence <=> other.precedence
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
|
269
|
-
def translated_code(member)
|
270
|
-
code.translated_printer_code(member)
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
Union = Struct.new(:code, :lineno, keyword_init: true) do
|
275
|
-
def braces_less_code
|
276
|
-
# Remove braces
|
277
|
-
code.s_value[1..-2]
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
14
|
Token = Lrama::Lexer::Token
|
282
15
|
|
283
16
|
# Grammar is the result of parsing an input grammar file
|
284
17
|
class Grammar
|
285
|
-
# Grammar file information not used by States but by Output
|
286
|
-
Aux = Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true)
|
287
|
-
|
288
18
|
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
289
19
|
attr_accessor :union, :expect,
|
290
|
-
:printers,
|
20
|
+
:printers, :error_tokens,
|
291
21
|
:lex_param, :parse_param, :initial_action,
|
292
22
|
:symbols, :types,
|
293
23
|
:rules, :_rules,
|
@@ -295,6 +25,7 @@ module Lrama
|
|
295
25
|
|
296
26
|
def initialize
|
297
27
|
@printers = []
|
28
|
+
@error_tokens = []
|
298
29
|
@symbols = []
|
299
30
|
@types = []
|
300
31
|
@_rules = []
|
@@ -305,7 +36,7 @@ module Lrama
|
|
305
36
|
@error_symbol = nil
|
306
37
|
@undef_symbol = nil
|
307
38
|
@accept_symbol = nil
|
308
|
-
@aux =
|
39
|
+
@aux = Auxiliary.new
|
309
40
|
|
310
41
|
append_special_symbols
|
311
42
|
end
|
@@ -314,6 +45,10 @@ module Lrama
|
|
314
45
|
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
315
46
|
end
|
316
47
|
|
48
|
+
def add_error_token(ident_or_tags:, code:, lineno:)
|
49
|
+
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
50
|
+
end
|
51
|
+
|
317
52
|
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
318
53
|
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
319
54
|
if replace
|
@@ -419,12 +154,14 @@ module Lrama
|
|
419
154
|
fill_sym_to_rules
|
420
155
|
fill_nterm_type
|
421
156
|
fill_symbol_printer
|
157
|
+
fill_symbol_error_token
|
422
158
|
@symbols.sort_by!(&:number)
|
423
159
|
end
|
424
160
|
|
425
161
|
# TODO: More validation methods
|
426
162
|
def validate!
|
427
163
|
validate_symbol_number_uniqueness!
|
164
|
+
validate_no_declared_type_reference!
|
428
165
|
end
|
429
166
|
|
430
167
|
def compute_nullable
|
@@ -476,6 +213,41 @@ module Lrama
|
|
476
213
|
end
|
477
214
|
end
|
478
215
|
|
216
|
+
def compute_first_set
|
217
|
+
terms.each do |term|
|
218
|
+
term.first_set = Set.new([term]).freeze
|
219
|
+
term.first_set_bitmap = Lrama::Bitmap.from_array([term.number])
|
220
|
+
end
|
221
|
+
|
222
|
+
nterms.each do |nterm|
|
223
|
+
nterm.first_set = Set.new([]).freeze
|
224
|
+
nterm.first_set_bitmap = Lrama::Bitmap.from_array([])
|
225
|
+
end
|
226
|
+
|
227
|
+
while true do
|
228
|
+
changed = false
|
229
|
+
|
230
|
+
@rules.each do |rule|
|
231
|
+
rule.rhs.each do |r|
|
232
|
+
if rule.lhs.first_set_bitmap | r.first_set_bitmap != rule.lhs.first_set_bitmap
|
233
|
+
changed = true
|
234
|
+
rule.lhs.first_set_bitmap = rule.lhs.first_set_bitmap | r.first_set_bitmap
|
235
|
+
end
|
236
|
+
|
237
|
+
break unless r.nullable
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
break unless changed
|
242
|
+
end
|
243
|
+
|
244
|
+
nterms.each do |nterm|
|
245
|
+
nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number|
|
246
|
+
find_symbol_by_number!(number)
|
247
|
+
end.to_set
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
479
251
|
def find_symbol_by_s_value(s_value)
|
480
252
|
@symbols.find do |sym|
|
481
253
|
sym.id.s_value == s_value
|
@@ -845,6 +617,23 @@ module Lrama
|
|
845
617
|
end
|
846
618
|
end
|
847
619
|
|
620
|
+
def fill_symbol_error_token
|
621
|
+
@symbols.each do |sym|
|
622
|
+
@error_tokens.each do |error_token|
|
623
|
+
error_token.ident_or_tags.each do |ident_or_tag|
|
624
|
+
case ident_or_tag.type
|
625
|
+
when Token::Ident
|
626
|
+
sym.error_token = error_token if sym.id == ident_or_tag
|
627
|
+
when Token::Tag
|
628
|
+
sym.error_token = error_token if sym.tag == ident_or_tag
|
629
|
+
else
|
630
|
+
raise "Unknown token type. #{error_token}"
|
631
|
+
end
|
632
|
+
end
|
633
|
+
end
|
634
|
+
end
|
635
|
+
end
|
636
|
+
|
848
637
|
def validate_symbol_number_uniqueness!
|
849
638
|
invalid = @symbols.group_by(&:number).select do |number, syms|
|
850
639
|
syms.count > 1
|
@@ -854,5 +643,23 @@ module Lrama
|
|
854
643
|
|
855
644
|
raise "Symbol number is duplicated. #{invalid}"
|
856
645
|
end
|
646
|
+
|
647
|
+
def validate_no_declared_type_reference!
|
648
|
+
errors = []
|
649
|
+
|
650
|
+
rules.each do |rule|
|
651
|
+
next unless rule.code
|
652
|
+
|
653
|
+
rule.code.references.select do |ref|
|
654
|
+
ref.type == :dollar && !ref.tag
|
655
|
+
end.each do |ref|
|
656
|
+
errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
|
657
|
+
end
|
658
|
+
end
|
659
|
+
|
660
|
+
return if errors.empty?
|
661
|
+
|
662
|
+
raise errors.join("\n")
|
663
|
+
end
|
857
664
|
end
|
858
665
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'lrama/lexer/token/type'
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
class Lexer
|
5
|
+
class Token
|
6
|
+
|
7
|
+
attr_accessor :line, :column, :referred
|
8
|
+
# For User_code
|
9
|
+
attr_accessor :references
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"#{super} line: #{line}, column: #{column}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def referred_by?(string)
|
16
|
+
[self.s_value, self.alias].include?(string)
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
self.class == other.class && self.type == other.type && self.s_value == other.s_value
|
21
|
+
end
|
22
|
+
|
23
|
+
def numberize_references(lhs, rhs)
|
24
|
+
self.references.map! {|ref|
|
25
|
+
ref_name = ref[1]
|
26
|
+
if ref_name.is_a?(::String) && ref_name != '$'
|
27
|
+
value =
|
28
|
+
if lhs.referred_by?(ref_name)
|
29
|
+
'$'
|
30
|
+
else
|
31
|
+
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
|
32
|
+
end
|
33
|
+
[ref[0], value, ref[2], ref[3], ref[4]]
|
34
|
+
else
|
35
|
+
ref
|
36
|
+
end
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
@i = 0
|
41
|
+
@types = []
|
42
|
+
|
43
|
+
def self.define_type(name)
|
44
|
+
type = Type.new(id: @i, name: name.to_s)
|
45
|
+
const_set(name, type)
|
46
|
+
@types << type
|
47
|
+
@i += 1
|
48
|
+
end
|
49
|
+
|
50
|
+
# Token types
|
51
|
+
define_type(:P_expect) # %expect
|
52
|
+
define_type(:P_define) # %define
|
53
|
+
define_type(:P_printer) # %printer
|
54
|
+
define_type(:P_error_token) # %error-token
|
55
|
+
define_type(:P_lex_param) # %lex-param
|
56
|
+
define_type(:P_parse_param) # %parse-param
|
57
|
+
define_type(:P_initial_action) # %initial-action
|
58
|
+
define_type(:P_union) # %union
|
59
|
+
define_type(:P_token) # %token
|
60
|
+
define_type(:P_type) # %type
|
61
|
+
define_type(:P_nonassoc) # %nonassoc
|
62
|
+
define_type(:P_left) # %left
|
63
|
+
define_type(:P_right) # %right
|
64
|
+
define_type(:P_prec) # %prec
|
65
|
+
define_type(:User_code) # { ... }
|
66
|
+
define_type(:Tag) # <int>
|
67
|
+
define_type(:Number) # 0
|
68
|
+
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
69
|
+
define_type(:Ident) # api.pure, tNUMBER
|
70
|
+
define_type(:Named_Ref) # [foo]
|
71
|
+
define_type(:Semicolon) # ;
|
72
|
+
define_type(:Bar) # |
|
73
|
+
define_type(:String) # "str"
|
74
|
+
define_type(:Char) # '+'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/lrama/lexer.rb
CHANGED
@@ -1,84 +1,12 @@
|
|
1
1
|
require "strscan"
|
2
|
-
require "lrama/report"
|
2
|
+
require "lrama/report/duration"
|
3
|
+
require "lrama/lexer/token"
|
3
4
|
|
4
5
|
module Lrama
|
5
6
|
# Lexer for parse.y
|
6
7
|
class Lexer
|
7
8
|
include Lrama::Report::Duration
|
8
9
|
|
9
|
-
# s_value is semantic value
|
10
|
-
Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do
|
11
|
-
Type = Struct.new(:id, :name, keyword_init: true)
|
12
|
-
|
13
|
-
attr_accessor :line, :column, :referred
|
14
|
-
# For User_code
|
15
|
-
attr_accessor :references
|
16
|
-
|
17
|
-
def to_s
|
18
|
-
"#{super} line: #{line}, column: #{column}"
|
19
|
-
end
|
20
|
-
|
21
|
-
def referred_by?(string)
|
22
|
-
[self.s_value, self.alias].include?(string)
|
23
|
-
end
|
24
|
-
|
25
|
-
def ==(other)
|
26
|
-
self.class == other.class && self.type == other.type && self.s_value == other.s_value
|
27
|
-
end
|
28
|
-
|
29
|
-
def numberize_references(lhs, rhs)
|
30
|
-
self.references.map! {|ref|
|
31
|
-
ref_name = ref[1]
|
32
|
-
if ref_name.is_a?(String) && ref_name != '$'
|
33
|
-
value =
|
34
|
-
if lhs.referred_by?(ref_name)
|
35
|
-
'$'
|
36
|
-
else
|
37
|
-
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
|
38
|
-
end
|
39
|
-
[ref[0], value, ref[2], ref[3], ref[4]]
|
40
|
-
else
|
41
|
-
ref
|
42
|
-
end
|
43
|
-
}
|
44
|
-
end
|
45
|
-
|
46
|
-
@i = 0
|
47
|
-
@types = []
|
48
|
-
|
49
|
-
def self.define_type(name)
|
50
|
-
type = Type.new(id: @i, name: name.to_s)
|
51
|
-
const_set(name, type)
|
52
|
-
@types << type
|
53
|
-
@i += 1
|
54
|
-
end
|
55
|
-
|
56
|
-
# Token types
|
57
|
-
define_type(:P_expect) # %expect
|
58
|
-
define_type(:P_define) # %define
|
59
|
-
define_type(:P_printer) # %printer
|
60
|
-
define_type(:P_lex_param) # %lex-param
|
61
|
-
define_type(:P_parse_param) # %parse-param
|
62
|
-
define_type(:P_initial_action) # %initial-action
|
63
|
-
define_type(:P_union) # %union
|
64
|
-
define_type(:P_token) # %token
|
65
|
-
define_type(:P_type) # %type
|
66
|
-
define_type(:P_nonassoc) # %nonassoc
|
67
|
-
define_type(:P_left) # %left
|
68
|
-
define_type(:P_right) # %right
|
69
|
-
define_type(:P_prec) # %prec
|
70
|
-
define_type(:User_code) # { ... }
|
71
|
-
define_type(:Tag) # <int>
|
72
|
-
define_type(:Number) # 0
|
73
|
-
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
74
|
-
define_type(:Ident) # api.pure, tNUMBER
|
75
|
-
define_type(:Named_Ref) # [foo]
|
76
|
-
define_type(:Semicolon) # ;
|
77
|
-
define_type(:Bar) # |
|
78
|
-
define_type(:String) # "str"
|
79
|
-
define_type(:Char) # '+'
|
80
|
-
end
|
81
|
-
|
82
10
|
# States
|
83
11
|
#
|
84
12
|
# See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
|
@@ -207,6 +135,8 @@ module Lrama
|
|
207
135
|
tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
|
208
136
|
when ss.scan(/%printer/)
|
209
137
|
tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
|
138
|
+
when ss.scan(/%error-token/)
|
139
|
+
tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
|
210
140
|
when ss.scan(/%lex-param/)
|
211
141
|
tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
|
212
142
|
when ss.scan(/%parse-param/)
|
data/lib/lrama/output.rb
CHANGED
@@ -1,20 +1,24 @@
|
|
1
1
|
require "erb"
|
2
2
|
require "forwardable"
|
3
|
-
require "lrama/report"
|
3
|
+
require "lrama/report/duration"
|
4
4
|
|
5
5
|
module Lrama
|
6
6
|
class Output
|
7
7
|
extend Forwardable
|
8
8
|
include Report::Duration
|
9
9
|
|
10
|
-
attr_reader :grammar_file_path, :context, :grammar
|
10
|
+
attr_reader :grammar_file_path, :context, :grammar, :error_recovery
|
11
11
|
|
12
12
|
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
|
13
13
|
:yymaxutok, :yypact_ninf, :yytable_ninf
|
14
14
|
|
15
15
|
def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
16
16
|
|
17
|
-
def initialize(
|
17
|
+
def initialize(
|
18
|
+
out:, output_file_path:, template_name:, grammar_file_path:,
|
19
|
+
header_out: nil, header_file_path: nil,
|
20
|
+
context:, grammar:, error_recovery: false
|
21
|
+
)
|
18
22
|
@out = out
|
19
23
|
@output_file_path = output_file_path
|
20
24
|
@template_name = template_name
|
@@ -23,6 +27,7 @@ module Lrama
|
|
23
27
|
@header_file_path = header_file_path
|
24
28
|
@context = context
|
25
29
|
@grammar = grammar
|
30
|
+
@error_recovery = error_recovery
|
26
31
|
end
|
27
32
|
|
28
33
|
if ERB.instance_method(:initialize).parameters.last.first == :key
|
@@ -98,6 +103,10 @@ module Lrama
|
|
98
103
|
int_array_to_string(@context.yytranslate)
|
99
104
|
end
|
100
105
|
|
106
|
+
def yytranslate_inverted
|
107
|
+
int_array_to_string(@context.yytranslate_inverted)
|
108
|
+
end
|
109
|
+
|
101
110
|
def yyrline
|
102
111
|
int_array_to_string(@context.yyrline)
|
103
112
|
end
|
@@ -155,6 +164,25 @@ module Lrama
|
|
155
164
|
STR
|
156
165
|
end
|
157
166
|
|
167
|
+
def symbol_actions_for_error_token
|
168
|
+
str = ""
|
169
|
+
|
170
|
+
@grammar.symbols.each do |sym|
|
171
|
+
next unless sym.error_token
|
172
|
+
|
173
|
+
str << <<-STR
|
174
|
+
case #{sym.enum_name}: /* #{sym.comment} */
|
175
|
+
#line #{sym.error_token.lineno} "#{@grammar_file_path}"
|
176
|
+
#{sym.error_token.translated_code(sym.tag)}
|
177
|
+
#line [@oline@] [@ofile@]
|
178
|
+
break;
|
179
|
+
|
180
|
+
STR
|
181
|
+
end
|
182
|
+
|
183
|
+
str
|
184
|
+
end
|
185
|
+
|
158
186
|
# b4_user_actions
|
159
187
|
def user_actions
|
160
188
|
str = ""
|
@@ -224,7 +252,7 @@ module Lrama
|
|
224
252
|
end
|
225
253
|
|
226
254
|
def extract_param_name(param)
|
227
|
-
/\A(
|
255
|
+
/\A(\W*)([a-zA-Z0-9_]+)\z/.match(param.split.last)[2]
|
228
256
|
end
|
229
257
|
|
230
258
|
def parse_param_name
|