lrama 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +2 -3
- data/.gitignore +2 -0
- data/Gemfile +1 -0
- data/LEGAL.md +1 -16
- data/README.md +1 -1
- data/Steepfile +3 -0
- data/doc/TODO.md +4 -3
- data/exe/lrama +1 -1
- data/lib/lrama/command.rb +90 -71
- data/lib/lrama/context.rb +11 -1
- data/lib/lrama/grammar/code.rb +123 -0
- data/lib/lrama/grammar/error_token.rb +9 -0
- data/lib/lrama/grammar/precedence.rb +11 -0
- data/lib/lrama/grammar/printer.rb +9 -0
- data/lib/lrama/grammar/reference.rb +22 -0
- data/lib/lrama/grammar/rule.rb +33 -0
- data/lib/lrama/grammar/symbol.rb +94 -0
- data/lib/lrama/grammar/union.rb +10 -0
- data/lib/lrama/grammar.rb +67 -285
- data/lib/lrama/lexer/token.rb +76 -0
- data/lib/lrama/lexer.rb +12 -48
- data/lib/lrama/output.rb +31 -3
- data/lib/lrama/parser/token_scanner.rb +4 -0
- data/lib/lrama/parser.rb +19 -5
- data/lib/lrama/report/duration.rb +25 -0
- data/lib/lrama/report/profile.rb +25 -0
- data/lib/lrama/report.rb +2 -47
- data/lib/lrama/state/resolved_conflict.rb +29 -0
- data/lib/lrama/state.rb +3 -28
- data/lib/lrama/states/item.rb +43 -0
- data/lib/lrama/states.rb +3 -41
- data/lib/lrama/version.rb +1 -1
- data/lrama.gemspec +2 -2
- data/rbs_collection.lock.yaml +26 -0
- data/rbs_collection.yaml +22 -0
- data/sig/lrama/report/duration.rbs +11 -0
- data/sig/lrama/report/profile.rbs +7 -0
- data/sig/lrama/warning.rbs +16 -0
- data/template/bison/yacc.c +397 -32
- metadata +23 -5
data/lib/lrama/grammar.rb
CHANGED
@@ -1,283 +1,15 @@
|
|
1
|
-
require "
|
1
|
+
require "lrama/grammar/code"
|
2
|
+
require "lrama/grammar/error_token"
|
3
|
+
require "lrama/grammar/precedence"
|
4
|
+
require "lrama/grammar/printer"
|
5
|
+
require "lrama/grammar/reference"
|
6
|
+
require "lrama/grammar/rule"
|
7
|
+
require "lrama/grammar/symbol"
|
8
|
+
require "lrama/grammar/union"
|
2
9
|
require "lrama/lexer"
|
3
10
|
|
4
11
|
module Lrama
|
5
|
-
Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
|
6
|
-
# TODO: Change this to display_name
|
7
|
-
def to_s
|
8
|
-
l = lhs.id.s_value
|
9
|
-
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
|
10
|
-
|
11
|
-
"#{l} -> #{r}"
|
12
|
-
end
|
13
|
-
|
14
|
-
# Used by #user_actions
|
15
|
-
def as_comment
|
16
|
-
l = lhs.id.s_value
|
17
|
-
r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
|
18
|
-
|
19
|
-
"#{l}: #{r}"
|
20
|
-
end
|
21
|
-
|
22
|
-
def precedence
|
23
|
-
precedence_sym && precedence_sym.precedence
|
24
|
-
end
|
25
|
-
|
26
|
-
def initial_rule?
|
27
|
-
id == 0
|
28
|
-
end
|
29
|
-
|
30
|
-
def translated_code
|
31
|
-
if code
|
32
|
-
code.translated_code
|
33
|
-
else
|
34
|
-
nil
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Symbol is both of nterm and term
|
40
|
-
# `number` is both for nterm and term
|
41
|
-
# `token_id` is tokentype for term, internal sequence number for nterm
|
42
|
-
#
|
43
|
-
# TODO: Add validation for ASCII code range for Token::Char
|
44
|
-
Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
|
45
|
-
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
46
|
-
|
47
|
-
def term?
|
48
|
-
term
|
49
|
-
end
|
50
|
-
|
51
|
-
def nterm?
|
52
|
-
!term
|
53
|
-
end
|
54
|
-
|
55
|
-
def eof_symbol?
|
56
|
-
!!@eof_symbol
|
57
|
-
end
|
58
|
-
|
59
|
-
def error_symbol?
|
60
|
-
!!@error_symbol
|
61
|
-
end
|
62
|
-
|
63
|
-
def undef_symbol?
|
64
|
-
!!@undef_symbol
|
65
|
-
end
|
66
|
-
|
67
|
-
def accept_symbol?
|
68
|
-
!!@accept_symbol
|
69
|
-
end
|
70
|
-
|
71
|
-
def display_name
|
72
|
-
if alias_name
|
73
|
-
alias_name
|
74
|
-
else
|
75
|
-
id.s_value
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# name for yysymbol_kind_t
|
80
|
-
#
|
81
|
-
# See: b4_symbol_kind_base
|
82
|
-
def enum_name
|
83
|
-
case
|
84
|
-
when accept_symbol?
|
85
|
-
name = "YYACCEPT"
|
86
|
-
when eof_symbol?
|
87
|
-
name = "YYEOF"
|
88
|
-
when term? && id.type == Token::Char
|
89
|
-
if alias_name
|
90
|
-
name = number.to_s + alias_name
|
91
|
-
else
|
92
|
-
name = number.to_s + id.s_value
|
93
|
-
end
|
94
|
-
when term? && id.type == Token::Ident
|
95
|
-
name = id.s_value
|
96
|
-
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
97
|
-
name = number.to_s + id.s_value
|
98
|
-
when nterm?
|
99
|
-
name = id.s_value
|
100
|
-
else
|
101
|
-
raise "Unexpected #{self}"
|
102
|
-
end
|
103
|
-
|
104
|
-
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
|
105
|
-
end
|
106
|
-
|
107
|
-
# comment for yysymbol_kind_t
|
108
|
-
def comment
|
109
|
-
case
|
110
|
-
when accept_symbol?
|
111
|
-
# YYSYMBOL_YYACCEPT
|
112
|
-
id.s_value
|
113
|
-
when eof_symbol?
|
114
|
-
# YYEOF
|
115
|
-
alias_name
|
116
|
-
when (term? && 0 < token_id && token_id < 128)
|
117
|
-
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
|
118
|
-
alias_name || id.s_value
|
119
|
-
when id.s_value.include?("$") || id.s_value.include?("@")
|
120
|
-
# YYSYMBOL_21_1
|
121
|
-
id.s_value
|
122
|
-
else
|
123
|
-
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
|
124
|
-
alias_name || id.s_value
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
12
|
Type = Struct.new(:id, :tag, keyword_init: true)
|
130
|
-
|
131
|
-
Code = Struct.new(:type, :token_code, keyword_init: true) do
|
132
|
-
extend Forwardable
|
133
|
-
|
134
|
-
def_delegators "token_code", :s_value, :line, :column, :references
|
135
|
-
|
136
|
-
# $$, $n, @$, @n is translated to C code
|
137
|
-
def translated_code
|
138
|
-
case type
|
139
|
-
when :user_code
|
140
|
-
translated_user_code
|
141
|
-
when :initial_action
|
142
|
-
translated_initial_action_code
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
# * ($1) error
|
147
|
-
# * ($$) *yyvaluep
|
148
|
-
# * (@1) error
|
149
|
-
# * (@$) *yylocationp
|
150
|
-
def translated_printer_code(tag)
|
151
|
-
t_code = s_value.dup
|
152
|
-
|
153
|
-
references.reverse.each do |ref|
|
154
|
-
first_column = ref.first_column
|
155
|
-
last_column = ref.last_column
|
156
|
-
|
157
|
-
case
|
158
|
-
when ref.number == "$" && ref.type == :dollar # $$
|
159
|
-
# Omit "<>"
|
160
|
-
member = tag.s_value[1..-2]
|
161
|
-
str = "((*yyvaluep).#{member})"
|
162
|
-
when ref.number == "$" && ref.type == :at # @$
|
163
|
-
str = "(*yylocationp)"
|
164
|
-
when ref.type == :dollar # $n
|
165
|
-
raise "$#{ref.number} can not be used in %printer."
|
166
|
-
when ref.type == :at # @n
|
167
|
-
raise "@#{ref.number} can not be used in %printer."
|
168
|
-
else
|
169
|
-
raise "Unexpected. #{self}, #{ref}"
|
170
|
-
end
|
171
|
-
|
172
|
-
t_code[first_column..last_column] = str
|
173
|
-
end
|
174
|
-
|
175
|
-
return t_code
|
176
|
-
end
|
177
|
-
|
178
|
-
|
179
|
-
private
|
180
|
-
|
181
|
-
# * ($1) yyvsp[i]
|
182
|
-
# * ($$) yyval
|
183
|
-
# * (@1) yylsp[i]
|
184
|
-
# * (@$) yyloc
|
185
|
-
def translated_user_code
|
186
|
-
t_code = s_value.dup
|
187
|
-
|
188
|
-
references.reverse.each do |ref|
|
189
|
-
first_column = ref.first_column
|
190
|
-
last_column = ref.last_column
|
191
|
-
|
192
|
-
case
|
193
|
-
when ref.number == "$" && ref.type == :dollar # $$
|
194
|
-
# Omit "<>"
|
195
|
-
member = ref.tag.s_value[1..-2]
|
196
|
-
str = "(yyval.#{member})"
|
197
|
-
when ref.number == "$" && ref.type == :at # @$
|
198
|
-
str = "(yyloc)"
|
199
|
-
when ref.type == :dollar # $n
|
200
|
-
i = -ref.position_in_rhs + ref.number
|
201
|
-
# Omit "<>"
|
202
|
-
member = ref.tag.s_value[1..-2]
|
203
|
-
str = "(yyvsp[#{i}].#{member})"
|
204
|
-
when ref.type == :at # @n
|
205
|
-
i = -ref.position_in_rhs + ref.number
|
206
|
-
str = "(yylsp[#{i}])"
|
207
|
-
else
|
208
|
-
raise "Unexpected. #{self}, #{ref}"
|
209
|
-
end
|
210
|
-
|
211
|
-
t_code[first_column..last_column] = str
|
212
|
-
end
|
213
|
-
|
214
|
-
return t_code
|
215
|
-
end
|
216
|
-
|
217
|
-
# * ($1) error
|
218
|
-
# * ($$) yylval
|
219
|
-
# * (@1) error
|
220
|
-
# * (@$) yylloc
|
221
|
-
def translated_initial_action_code
|
222
|
-
t_code = s_value.dup
|
223
|
-
|
224
|
-
references.reverse.each do |ref|
|
225
|
-
first_column = ref.first_column
|
226
|
-
last_column = ref.last_column
|
227
|
-
|
228
|
-
case
|
229
|
-
when ref.number == "$" && ref.type == :dollar # $$
|
230
|
-
str = "yylval"
|
231
|
-
when ref.number == "$" && ref.type == :at # @$
|
232
|
-
str = "yylloc"
|
233
|
-
when ref.type == :dollar # $n
|
234
|
-
raise "$#{ref.number} can not be used in initial_action."
|
235
|
-
when ref.type == :at # @n
|
236
|
-
raise "@#{ref.number} can not be used in initial_action."
|
237
|
-
else
|
238
|
-
raise "Unexpected. #{self}, #{ref}"
|
239
|
-
end
|
240
|
-
|
241
|
-
t_code[first_column..last_column] = str
|
242
|
-
end
|
243
|
-
|
244
|
-
return t_code
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
# type: :dollar or :at
|
249
|
-
# ex_tag: "$<tag>1" (Optional)
|
250
|
-
Reference = Struct.new(:type, :number, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
|
251
|
-
def tag
|
252
|
-
if ex_tag
|
253
|
-
ex_tag
|
254
|
-
else
|
255
|
-
referring_symbol.tag
|
256
|
-
end
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
Precedence = Struct.new(:type, :precedence, keyword_init: true) do
|
261
|
-
include Comparable
|
262
|
-
|
263
|
-
def <=>(other)
|
264
|
-
self.precedence <=> other.precedence
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
|
269
|
-
def translated_code(member)
|
270
|
-
code.translated_printer_code(member)
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
Union = Struct.new(:code, :lineno, keyword_init: true) do
|
275
|
-
def braces_less_code
|
276
|
-
# Remove braces
|
277
|
-
code.s_value[1..-2]
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
13
|
Token = Lrama::Lexer::Token
|
282
14
|
|
283
15
|
# Grammar is the result of parsing an input grammar file
|
@@ -287,7 +19,7 @@ module Lrama
|
|
287
19
|
|
288
20
|
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
289
21
|
attr_accessor :union, :expect,
|
290
|
-
:printers,
|
22
|
+
:printers, :error_tokens,
|
291
23
|
:lex_param, :parse_param, :initial_action,
|
292
24
|
:symbols, :types,
|
293
25
|
:rules, :_rules,
|
@@ -295,6 +27,7 @@ module Lrama
|
|
295
27
|
|
296
28
|
def initialize
|
297
29
|
@printers = []
|
30
|
+
@error_tokens = []
|
298
31
|
@symbols = []
|
299
32
|
@types = []
|
300
33
|
@_rules = []
|
@@ -314,6 +47,10 @@ module Lrama
|
|
314
47
|
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
315
48
|
end
|
316
49
|
|
50
|
+
def add_error_token(ident_or_tags:, code:, lineno:)
|
51
|
+
@error_tokens << ErrorToken.new(ident_or_tags, code, lineno)
|
52
|
+
end
|
53
|
+
|
317
54
|
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
318
55
|
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
319
56
|
if replace
|
@@ -382,8 +119,8 @@ module Lrama
|
|
382
119
|
end
|
383
120
|
|
384
121
|
def build_references(token_code)
|
385
|
-
token_code.references.map! do |type,
|
386
|
-
Reference.new(type: type,
|
122
|
+
token_code.references.map! do |type, value, tag, first_column, last_column|
|
123
|
+
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
|
387
124
|
end
|
388
125
|
|
389
126
|
token_code
|
@@ -419,12 +156,14 @@ module Lrama
|
|
419
156
|
fill_sym_to_rules
|
420
157
|
fill_nterm_type
|
421
158
|
fill_symbol_printer
|
159
|
+
fill_symbol_error_token
|
422
160
|
@symbols.sort_by!(&:number)
|
423
161
|
end
|
424
162
|
|
425
163
|
# TODO: More validation methods
|
426
164
|
def validate!
|
427
165
|
validate_symbol_number_uniqueness!
|
166
|
+
validate_no_declared_type_reference!
|
428
167
|
end
|
429
168
|
|
430
169
|
def compute_nullable
|
@@ -627,15 +366,23 @@ module Lrama
|
|
627
366
|
ref.position_in_rhs = i - 1
|
628
367
|
next if ref.type == :at
|
629
368
|
# $$, $n, @$, @n can be used in any actions
|
630
|
-
number = ref.number
|
631
369
|
|
632
|
-
if
|
370
|
+
if ref.value == "$"
|
633
371
|
# TODO: Should be postponed after middle actions are extracted?
|
634
372
|
ref.referring_symbol = lhs
|
635
|
-
|
636
|
-
raise "Can not refer following component. #{
|
637
|
-
rhs1[
|
638
|
-
ref.referring_symbol = rhs1[
|
373
|
+
elsif ref.value.is_a?(Integer)
|
374
|
+
raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
|
375
|
+
rhs1[ref.value - 1].referred = true
|
376
|
+
ref.referring_symbol = rhs1[ref.value - 1]
|
377
|
+
elsif ref.value.is_a?(String)
|
378
|
+
target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
|
379
|
+
referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
|
380
|
+
raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
|
381
|
+
raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0
|
382
|
+
|
383
|
+
referring_symbol = referring_symbol_candidate.first
|
384
|
+
referring_symbol.referred = true
|
385
|
+
ref.referring_symbol = referring_symbol
|
639
386
|
end
|
640
387
|
end
|
641
388
|
end
|
@@ -837,6 +584,23 @@ module Lrama
|
|
837
584
|
end
|
838
585
|
end
|
839
586
|
|
587
|
+
def fill_symbol_error_token
|
588
|
+
@symbols.each do |sym|
|
589
|
+
@error_tokens.each do |error_token|
|
590
|
+
error_token.ident_or_tags.each do |ident_or_tag|
|
591
|
+
case ident_or_tag.type
|
592
|
+
when Token::Ident
|
593
|
+
sym.error_token = error_token if sym.id == ident_or_tag
|
594
|
+
when Token::Tag
|
595
|
+
sym.error_token = error_token if sym.tag == ident_or_tag
|
596
|
+
else
|
597
|
+
raise "Unknown token type. #{error_token}"
|
598
|
+
end
|
599
|
+
end
|
600
|
+
end
|
601
|
+
end
|
602
|
+
end
|
603
|
+
|
840
604
|
def validate_symbol_number_uniqueness!
|
841
605
|
invalid = @symbols.group_by(&:number).select do |number, syms|
|
842
606
|
syms.count > 1
|
@@ -846,5 +610,23 @@ module Lrama
|
|
846
610
|
|
847
611
|
raise "Symbol number is duplicated. #{invalid}"
|
848
612
|
end
|
613
|
+
|
614
|
+
def validate_no_declared_type_reference!
|
615
|
+
errors = []
|
616
|
+
|
617
|
+
rules.each do |rule|
|
618
|
+
next unless rule.code
|
619
|
+
|
620
|
+
rule.code.references.select do |ref|
|
621
|
+
ref.type == :dollar && !ref.tag
|
622
|
+
end.each do |ref|
|
623
|
+
errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
|
624
|
+
end
|
625
|
+
end
|
626
|
+
|
627
|
+
return if errors.empty?
|
628
|
+
|
629
|
+
raise errors.join("\n")
|
630
|
+
end
|
849
631
|
end
|
850
632
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token < Struct.new(:type, :s_value, :alias, keyword_init: true)
|
4
|
+
Type = Struct.new(:id, :name, keyword_init: true)
|
5
|
+
|
6
|
+
attr_accessor :line, :column, :referred
|
7
|
+
# For User_code
|
8
|
+
attr_accessor :references
|
9
|
+
|
10
|
+
def to_s
|
11
|
+
"#{super} line: #{line}, column: #{column}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def referred_by?(string)
|
15
|
+
[self.s_value, self.alias].include?(string)
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
self.class == other.class && self.type == other.type && self.s_value == other.s_value
|
20
|
+
end
|
21
|
+
|
22
|
+
def numberize_references(lhs, rhs)
|
23
|
+
self.references.map! {|ref|
|
24
|
+
ref_name = ref[1]
|
25
|
+
if ref_name.is_a?(::String) && ref_name != '$'
|
26
|
+
value =
|
27
|
+
if lhs.referred_by?(ref_name)
|
28
|
+
'$'
|
29
|
+
else
|
30
|
+
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
|
31
|
+
end
|
32
|
+
[ref[0], value, ref[2], ref[3], ref[4]]
|
33
|
+
else
|
34
|
+
ref
|
35
|
+
end
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
@i = 0
|
40
|
+
@types = []
|
41
|
+
|
42
|
+
def self.define_type(name)
|
43
|
+
type = Type.new(id: @i, name: name.to_s)
|
44
|
+
const_set(name, type)
|
45
|
+
@types << type
|
46
|
+
@i += 1
|
47
|
+
end
|
48
|
+
|
49
|
+
# Token types
|
50
|
+
define_type(:P_expect) # %expect
|
51
|
+
define_type(:P_define) # %define
|
52
|
+
define_type(:P_printer) # %printer
|
53
|
+
define_type(:P_error_token) # %error-token
|
54
|
+
define_type(:P_lex_param) # %lex-param
|
55
|
+
define_type(:P_parse_param) # %parse-param
|
56
|
+
define_type(:P_initial_action) # %initial-action
|
57
|
+
define_type(:P_union) # %union
|
58
|
+
define_type(:P_token) # %token
|
59
|
+
define_type(:P_type) # %type
|
60
|
+
define_type(:P_nonassoc) # %nonassoc
|
61
|
+
define_type(:P_left) # %left
|
62
|
+
define_type(:P_right) # %right
|
63
|
+
define_type(:P_prec) # %prec
|
64
|
+
define_type(:User_code) # { ... }
|
65
|
+
define_type(:Tag) # <int>
|
66
|
+
define_type(:Number) # 0
|
67
|
+
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
68
|
+
define_type(:Ident) # api.pure, tNUMBER
|
69
|
+
define_type(:Named_Ref) # [foo]
|
70
|
+
define_type(:Semicolon) # ;
|
71
|
+
define_type(:Bar) # |
|
72
|
+
define_type(:String) # "str"
|
73
|
+
define_type(:Char) # '+'
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
data/lib/lrama/lexer.rb
CHANGED
@@ -1,58 +1,12 @@
|
|
1
1
|
require "strscan"
|
2
|
-
require "lrama/report"
|
2
|
+
require "lrama/report/duration"
|
3
|
+
require "lrama/lexer/token"
|
3
4
|
|
4
5
|
module Lrama
|
5
6
|
# Lexer for parse.y
|
6
7
|
class Lexer
|
7
8
|
include Lrama::Report::Duration
|
8
9
|
|
9
|
-
# s_value is semantic value
|
10
|
-
Token = Struct.new(:type, :s_value, keyword_init: true) do
|
11
|
-
Type = Struct.new(:id, :name, keyword_init: true)
|
12
|
-
|
13
|
-
attr_accessor :line, :column, :referred
|
14
|
-
# For User_code
|
15
|
-
attr_accessor :references
|
16
|
-
|
17
|
-
def to_s
|
18
|
-
"#{super} line: #{line}, column: #{column}"
|
19
|
-
end
|
20
|
-
|
21
|
-
@i = 0
|
22
|
-
@types = []
|
23
|
-
|
24
|
-
def self.define_type(name)
|
25
|
-
type = Type.new(id: @i, name: name.to_s)
|
26
|
-
const_set(name, type)
|
27
|
-
@types << type
|
28
|
-
@i += 1
|
29
|
-
end
|
30
|
-
|
31
|
-
# Token types
|
32
|
-
define_type(:P_expect) # %expect
|
33
|
-
define_type(:P_define) # %define
|
34
|
-
define_type(:P_printer) # %printer
|
35
|
-
define_type(:P_lex_param) # %lex-param
|
36
|
-
define_type(:P_parse_param) # %parse-param
|
37
|
-
define_type(:P_initial_action) # %initial-action
|
38
|
-
define_type(:P_union) # %union
|
39
|
-
define_type(:P_token) # %token
|
40
|
-
define_type(:P_type) # %type
|
41
|
-
define_type(:P_nonassoc) # %nonassoc
|
42
|
-
define_type(:P_left) # %left
|
43
|
-
define_type(:P_right) # %right
|
44
|
-
define_type(:P_prec) # %prec
|
45
|
-
define_type(:User_code) # { ... }
|
46
|
-
define_type(:Tag) # <int>
|
47
|
-
define_type(:Number) # 0
|
48
|
-
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
49
|
-
define_type(:Ident) # api.pure, tNUMBER
|
50
|
-
define_type(:Semicolon) # ;
|
51
|
-
define_type(:Bar) # |
|
52
|
-
define_type(:String) # "str"
|
53
|
-
define_type(:Char) # '+'
|
54
|
-
end
|
55
|
-
|
56
10
|
# States
|
57
11
|
#
|
58
12
|
# See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
|
@@ -166,16 +120,23 @@ module Lrama
|
|
166
120
|
tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
|
167
121
|
when ss.scan(/(<[a-zA-Z0-9_]+>)/)
|
168
122
|
tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
|
123
|
+
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
|
124
|
+
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
|
125
|
+
tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
|
169
126
|
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
|
170
127
|
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
|
171
128
|
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
|
172
129
|
tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
|
130
|
+
when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
|
131
|
+
tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
|
173
132
|
when ss.scan(/%expect/)
|
174
133
|
tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
|
175
134
|
when ss.scan(/%define/)
|
176
135
|
tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
|
177
136
|
when ss.scan(/%printer/)
|
178
137
|
tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
|
138
|
+
when ss.scan(/%error-token/)
|
139
|
+
tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
|
179
140
|
when ss.scan(/%lex-param/)
|
180
141
|
tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
|
181
142
|
when ss.scan(/%parse-param/)
|
@@ -257,6 +218,9 @@ module Lrama
|
|
257
218
|
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
258
219
|
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
|
259
220
|
references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
|
221
|
+
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
|
222
|
+
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
|
223
|
+
references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
|
260
224
|
when ss.scan(/@\$/) # @$
|
261
225
|
references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
|
262
226
|
when ss.scan(/@(\d)+/) # @1
|
data/lib/lrama/output.rb
CHANGED
@@ -1,20 +1,24 @@
|
|
1
1
|
require "erb"
|
2
2
|
require "forwardable"
|
3
|
-
require "lrama/report"
|
3
|
+
require "lrama/report/duration"
|
4
4
|
|
5
5
|
module Lrama
|
6
6
|
class Output
|
7
7
|
extend Forwardable
|
8
8
|
include Report::Duration
|
9
9
|
|
10
|
-
attr_reader :grammar_file_path, :context, :grammar
|
10
|
+
attr_reader :grammar_file_path, :context, :grammar, :error_recovery
|
11
11
|
|
12
12
|
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
|
13
13
|
:yymaxutok, :yypact_ninf, :yytable_ninf
|
14
14
|
|
15
15
|
def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
16
16
|
|
17
|
-
def initialize(
|
17
|
+
def initialize(
|
18
|
+
out:, output_file_path:, template_name:, grammar_file_path:,
|
19
|
+
header_out: nil, header_file_path: nil,
|
20
|
+
context:, grammar:, error_recovery: false
|
21
|
+
)
|
18
22
|
@out = out
|
19
23
|
@output_file_path = output_file_path
|
20
24
|
@template_name = template_name
|
@@ -23,6 +27,7 @@ module Lrama
|
|
23
27
|
@header_file_path = header_file_path
|
24
28
|
@context = context
|
25
29
|
@grammar = grammar
|
30
|
+
@error_recovery = error_recovery
|
26
31
|
end
|
27
32
|
|
28
33
|
if ERB.instance_method(:initialize).parameters.last.first == :key
|
@@ -98,6 +103,10 @@ module Lrama
|
|
98
103
|
int_array_to_string(@context.yytranslate)
|
99
104
|
end
|
100
105
|
|
106
|
+
def yytranslate_inverted
|
107
|
+
int_array_to_string(@context.yytranslate_inverted)
|
108
|
+
end
|
109
|
+
|
101
110
|
def yyrline
|
102
111
|
int_array_to_string(@context.yyrline)
|
103
112
|
end
|
@@ -155,6 +164,25 @@ module Lrama
|
|
155
164
|
STR
|
156
165
|
end
|
157
166
|
|
167
|
+
def symbol_actions_for_error_token
|
168
|
+
str = ""
|
169
|
+
|
170
|
+
@grammar.symbols.each do |sym|
|
171
|
+
next unless sym.error_token
|
172
|
+
|
173
|
+
str << <<-STR
|
174
|
+
case #{sym.enum_name}: /* #{sym.comment} */
|
175
|
+
#line #{sym.error_token.lineno} "#{@grammar_file_path}"
|
176
|
+
#{sym.error_token.translated_code(sym.tag)}
|
177
|
+
#line [@oline@] [@ofile@]
|
178
|
+
break;
|
179
|
+
|
180
|
+
STR
|
181
|
+
end
|
182
|
+
|
183
|
+
str
|
184
|
+
end
|
185
|
+
|
158
186
|
# b4_user_actions
|
159
187
|
def user_actions
|
160
188
|
str = ""
|