lrama 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +2 -3
- data/Gemfile +1 -1
- data/NEWS.md +101 -1
- data/README.md +23 -0
- data/Steepfile +5 -0
- data/lib/lrama/context.rb +4 -4
- data/lib/lrama/grammar/code/destructor_code.rb +40 -0
- data/lib/lrama/grammar/code/initial_action_code.rb +6 -0
- data/lib/lrama/grammar/code/no_reference_code.rb +4 -0
- data/lib/lrama/grammar/code/printer_code.rb +6 -0
- data/lib/lrama/grammar/code/rule_action.rb +11 -1
- data/lib/lrama/grammar/code.rb +1 -0
- data/lib/lrama/grammar/destructor.rb +9 -0
- data/lib/lrama/grammar/reference.rb +4 -3
- data/lib/lrama/grammar/rule_builder.rb +10 -3
- data/lib/lrama/grammar/stdlib.y +42 -0
- data/lib/lrama/grammar/symbol.rb +4 -2
- data/lib/lrama/grammar/symbols/resolver.rb +293 -0
- data/lib/lrama/grammar/symbols.rb +1 -0
- data/lib/lrama/grammar.rb +32 -244
- data/lib/lrama/lexer/token/user_code.rb +13 -2
- data/lib/lrama/lexer/token.rb +1 -1
- data/lib/lrama/lexer.rb +7 -0
- data/lib/lrama/option_parser.rb +25 -12
- data/lib/lrama/options.rb +1 -0
- data/lib/lrama/output.rb +75 -2
- data/lib/lrama/parser.rb +537 -464
- data/lib/lrama/state.rb +4 -4
- data/lib/lrama/states/item.rb +6 -8
- data/lib/lrama/states_reporter.rb +2 -2
- data/lib/lrama/version.rb +1 -1
- data/lrama.gemspec +7 -0
- data/parser.y +27 -0
- data/sig/lrama/grammar/binding.rbs +0 -1
- data/sig/lrama/grammar/code/destructor_code.rbs +15 -0
- data/sig/lrama/grammar/destructor.rbs +11 -0
- data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +0 -1
- data/sig/lrama/grammar/reference.rbs +2 -2
- data/sig/lrama/grammar/symbol.rbs +5 -4
- data/sig/lrama/grammar/symbols/resolver.rbs +41 -0
- data/sig/lrama/grammar/type.rbs +11 -0
- data/sig/lrama/options.rbs +17 -0
- data/template/bison/yacc.c +12 -1
- metadata +17 -3
@@ -0,0 +1,293 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Grammar
|
3
|
+
class Symbols
|
4
|
+
class Resolver
|
5
|
+
attr_reader :terms, :nterms
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@terms = []
|
9
|
+
@nterms = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def symbols
|
13
|
+
@symbols ||= (@terms + @nterms)
|
14
|
+
end
|
15
|
+
|
16
|
+
def sort_by_number!
|
17
|
+
symbols.sort_by!(&:number)
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
21
|
+
if token_id && (sym = find_symbol_by_token_id(token_id))
|
22
|
+
if replace
|
23
|
+
sym.id = id
|
24
|
+
sym.alias_name = alias_name
|
25
|
+
sym.tag = tag
|
26
|
+
end
|
27
|
+
|
28
|
+
return sym
|
29
|
+
end
|
30
|
+
|
31
|
+
if (sym = find_symbol_by_id(id))
|
32
|
+
return sym
|
33
|
+
end
|
34
|
+
|
35
|
+
@symbols = nil
|
36
|
+
term = Symbol.new(
|
37
|
+
id: id, alias_name: alias_name, number: nil, tag: tag,
|
38
|
+
term: true, token_id: token_id, nullable: false
|
39
|
+
)
|
40
|
+
@terms << term
|
41
|
+
term
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_nterm(id:, alias_name: nil, tag: nil)
|
45
|
+
return if find_symbol_by_id(id)
|
46
|
+
|
47
|
+
@symbols = nil
|
48
|
+
nterm = Symbol.new(
|
49
|
+
id: id, alias_name: alias_name, number: nil, tag: tag,
|
50
|
+
term: false, token_id: nil, nullable: nil,
|
51
|
+
)
|
52
|
+
@nterms << nterm
|
53
|
+
nterm
|
54
|
+
end
|
55
|
+
|
56
|
+
def find_symbol_by_s_value(s_value)
|
57
|
+
symbols.find { |s| s.id.s_value == s_value }
|
58
|
+
end
|
59
|
+
|
60
|
+
def find_symbol_by_s_value!(s_value)
|
61
|
+
find_symbol_by_s_value(s_value) || (raise "Symbol not found. value: `#{s_value}`")
|
62
|
+
end
|
63
|
+
|
64
|
+
def find_symbol_by_id(id)
|
65
|
+
symbols.find do |s|
|
66
|
+
s.id == id || s.alias_name == id.s_value
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def find_symbol_by_id!(id)
|
71
|
+
find_symbol_by_id(id) || (raise "Symbol not found. #{id}")
|
72
|
+
end
|
73
|
+
|
74
|
+
def find_symbol_by_token_id(token_id)
|
75
|
+
symbols.find {|s| s.token_id == token_id }
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_symbol_by_number!(number)
|
79
|
+
sym = symbols[number]
|
80
|
+
|
81
|
+
raise "Symbol not found. number: `#{number}`" unless sym
|
82
|
+
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
83
|
+
|
84
|
+
sym
|
85
|
+
end
|
86
|
+
|
87
|
+
def fill_symbol_number
|
88
|
+
# YYEMPTY = -2
|
89
|
+
# YYEOF = 0
|
90
|
+
# YYerror = 1
|
91
|
+
# YYUNDEF = 2
|
92
|
+
@number = 3
|
93
|
+
fill_terms_number
|
94
|
+
fill_nterms_number
|
95
|
+
end
|
96
|
+
|
97
|
+
def fill_nterm_type(types)
|
98
|
+
types.each do |type|
|
99
|
+
nterm = find_nterm_by_id!(type.id)
|
100
|
+
nterm.tag = type.tag
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def fill_printer(printers)
|
105
|
+
symbols.each do |sym|
|
106
|
+
printers.each do |printer|
|
107
|
+
printer.ident_or_tags.each do |ident_or_tag|
|
108
|
+
case ident_or_tag
|
109
|
+
when Lrama::Lexer::Token::Ident
|
110
|
+
sym.printer = printer if sym.id == ident_or_tag
|
111
|
+
when Lrama::Lexer::Token::Tag
|
112
|
+
sym.printer = printer if sym.tag == ident_or_tag
|
113
|
+
else
|
114
|
+
raise "Unknown token type. #{printer}"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def fill_destructor(destructors)
|
122
|
+
symbols.each do |sym|
|
123
|
+
destructors.each do |destructor|
|
124
|
+
destructor.ident_or_tags.each do |ident_or_tag|
|
125
|
+
case ident_or_tag
|
126
|
+
when Lrama::Lexer::Token::Ident
|
127
|
+
sym.destructor = destructor if sym.id == ident_or_tag
|
128
|
+
when Lrama::Lexer::Token::Tag
|
129
|
+
sym.destructor = destructor if sym.tag == ident_or_tag
|
130
|
+
else
|
131
|
+
raise "Unknown token type. #{destructor}"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def fill_error_token(error_tokens)
|
139
|
+
symbols.each do |sym|
|
140
|
+
error_tokens.each do |token|
|
141
|
+
token.ident_or_tags.each do |ident_or_tag|
|
142
|
+
case ident_or_tag
|
143
|
+
when Lrama::Lexer::Token::Ident
|
144
|
+
sym.error_token = token if sym.id == ident_or_tag
|
145
|
+
when Lrama::Lexer::Token::Tag
|
146
|
+
sym.error_token = token if sym.tag == ident_or_tag
|
147
|
+
else
|
148
|
+
raise "Unknown token type. #{token}"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
|
155
|
+
def token_to_symbol(token)
|
156
|
+
case token
|
157
|
+
when Lrama::Lexer::Token
|
158
|
+
find_symbol_by_id!(token)
|
159
|
+
else
|
160
|
+
raise "Unknown class: #{token}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
def validate!
|
165
|
+
validate_number_uniqueness!
|
166
|
+
validate_alias_name_uniqueness!
|
167
|
+
end
|
168
|
+
|
169
|
+
private
|
170
|
+
|
171
|
+
def find_nterm_by_id!(id)
|
172
|
+
@nterms.find do |s|
|
173
|
+
s.id == id
|
174
|
+
end || (raise "Symbol not found. #{id}")
|
175
|
+
end
|
176
|
+
|
177
|
+
def fill_terms_number
|
178
|
+
# Character literal in grammar file has
|
179
|
+
# token id corresponding to ASCII code by default,
|
180
|
+
# so start token_id from 256.
|
181
|
+
token_id = 256
|
182
|
+
|
183
|
+
@terms.each do |sym|
|
184
|
+
while used_numbers[@number] do
|
185
|
+
@number += 1
|
186
|
+
end
|
187
|
+
|
188
|
+
if sym.number.nil?
|
189
|
+
sym.number = @number
|
190
|
+
used_numbers[@number] = true
|
191
|
+
@number += 1
|
192
|
+
end
|
193
|
+
|
194
|
+
# If id is Token::Char, it uses ASCII code
|
195
|
+
if sym.token_id.nil?
|
196
|
+
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
197
|
+
# Ignore ' on the both sides
|
198
|
+
case sym.id.s_value[1..-2]
|
199
|
+
when "\\b"
|
200
|
+
sym.token_id = 8
|
201
|
+
when "\\f"
|
202
|
+
sym.token_id = 12
|
203
|
+
when "\\n"
|
204
|
+
sym.token_id = 10
|
205
|
+
when "\\r"
|
206
|
+
sym.token_id = 13
|
207
|
+
when "\\t"
|
208
|
+
sym.token_id = 9
|
209
|
+
when "\\v"
|
210
|
+
sym.token_id = 11
|
211
|
+
when "\""
|
212
|
+
sym.token_id = 34
|
213
|
+
when "'"
|
214
|
+
sym.token_id = 39
|
215
|
+
when "\\\\"
|
216
|
+
sym.token_id = 92
|
217
|
+
when /\A\\(\d+)\z/
|
218
|
+
unless (id = Integer($1, 8)).nil?
|
219
|
+
sym.token_id = id
|
220
|
+
else
|
221
|
+
raise "Unknown Char s_value #{sym}"
|
222
|
+
end
|
223
|
+
when /\A(.)\z/
|
224
|
+
unless (id = $1&.bytes&.first).nil?
|
225
|
+
sym.token_id = id
|
226
|
+
else
|
227
|
+
raise "Unknown Char s_value #{sym}"
|
228
|
+
end
|
229
|
+
else
|
230
|
+
raise "Unknown Char s_value #{sym}"
|
231
|
+
end
|
232
|
+
else
|
233
|
+
sym.token_id = token_id
|
234
|
+
token_id += 1
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def fill_nterms_number
|
241
|
+
token_id = 0
|
242
|
+
|
243
|
+
@nterms.each do |sym|
|
244
|
+
while used_numbers[@number] do
|
245
|
+
@number += 1
|
246
|
+
end
|
247
|
+
|
248
|
+
if sym.number.nil?
|
249
|
+
sym.number = @number
|
250
|
+
used_numbers[@number] = true
|
251
|
+
@number += 1
|
252
|
+
end
|
253
|
+
|
254
|
+
if sym.token_id.nil?
|
255
|
+
sym.token_id = token_id
|
256
|
+
token_id += 1
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def used_numbers
|
262
|
+
return @used_numbers if defined?(@used_numbers)
|
263
|
+
|
264
|
+
@used_numbers = {}
|
265
|
+
symbols.map(&:number).each do |n|
|
266
|
+
@used_numbers[n] = true
|
267
|
+
end
|
268
|
+
@used_numbers
|
269
|
+
end
|
270
|
+
|
271
|
+
def validate_number_uniqueness!
|
272
|
+
invalid = symbols.group_by(&:number).select do |number, syms|
|
273
|
+
syms.count > 1
|
274
|
+
end
|
275
|
+
|
276
|
+
return if invalid.empty?
|
277
|
+
|
278
|
+
raise "Symbol number is duplicated. #{invalid}"
|
279
|
+
end
|
280
|
+
|
281
|
+
def validate_alias_name_uniqueness!
|
282
|
+
invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
283
|
+
syms.count > 1
|
284
|
+
end
|
285
|
+
|
286
|
+
return if invalid.empty?
|
287
|
+
|
288
|
+
raise "Symbol alias name is duplicated. #{invalid}"
|
289
|
+
end
|
290
|
+
end
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative "symbols/resolver"
|
data/lib/lrama/grammar.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
+
require "forwardable"
|
1
2
|
require "lrama/grammar/auxiliary"
|
2
3
|
require "lrama/grammar/binding"
|
3
4
|
require "lrama/grammar/code"
|
4
5
|
require "lrama/grammar/counter"
|
6
|
+
require "lrama/grammar/destructor"
|
5
7
|
require "lrama/grammar/error_token"
|
8
|
+
require "lrama/grammar/parameterizing_rule"
|
6
9
|
require "lrama/grammar/percent_code"
|
7
10
|
require "lrama/grammar/precedence"
|
8
11
|
require "lrama/grammar/printer"
|
9
12
|
require "lrama/grammar/reference"
|
10
13
|
require "lrama/grammar/rule"
|
11
14
|
require "lrama/grammar/rule_builder"
|
12
|
-
require "lrama/grammar/parameterizing_rule"
|
13
15
|
require "lrama/grammar/symbol"
|
16
|
+
require "lrama/grammar/symbols"
|
14
17
|
require "lrama/grammar/type"
|
15
18
|
require "lrama/grammar/union"
|
16
19
|
require "lrama/lexer"
|
@@ -18,22 +21,32 @@ require "lrama/lexer"
|
|
18
21
|
module Lrama
|
19
22
|
# Grammar is the result of parsing an input grammar file
|
20
23
|
class Grammar
|
24
|
+
extend Forwardable
|
25
|
+
|
21
26
|
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
22
27
|
attr_accessor :union, :expect,
|
23
28
|
:printers, :error_tokens,
|
24
29
|
:lex_param, :parse_param, :initial_action,
|
25
|
-
:
|
30
|
+
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
|
31
|
+
:symbols_resolver, :types,
|
26
32
|
:rules, :rule_builders,
|
27
33
|
:sym_to_rules, :no_stdlib
|
28
34
|
|
35
|
+
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
|
36
|
+
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
|
37
|
+
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
|
38
|
+
:fill_printer, :fill_destructor, :fill_error_token, :sort_by_number!
|
39
|
+
|
40
|
+
|
29
41
|
def initialize(rule_counter)
|
30
42
|
@rule_counter = rule_counter
|
31
43
|
|
32
44
|
# Code defined by "%code"
|
33
45
|
@percent_codes = []
|
34
46
|
@printers = []
|
47
|
+
@destructors = []
|
35
48
|
@error_tokens = []
|
36
|
-
@
|
49
|
+
@symbols_resolver = Grammar::Symbols::Resolver.new
|
37
50
|
@types = []
|
38
51
|
@rule_builders = []
|
39
52
|
@rules = []
|
@@ -54,6 +67,10 @@ module Lrama
|
|
54
67
|
@percent_codes << PercentCode.new(id.s_value, code.s_value)
|
55
68
|
end
|
56
69
|
|
70
|
+
def add_destructor(ident_or_tags:, token_code:, lineno:)
|
71
|
+
@destructors << Destructor.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
72
|
+
end
|
73
|
+
|
57
74
|
def add_printer(ident_or_tags:, token_code:, lineno:)
|
58
75
|
@printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
59
76
|
end
|
@@ -62,44 +79,6 @@ module Lrama
|
|
62
79
|
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
63
80
|
end
|
64
81
|
|
65
|
-
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
66
|
-
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
67
|
-
if replace
|
68
|
-
sym.id = id
|
69
|
-
sym.alias_name = alias_name
|
70
|
-
sym.tag = tag
|
71
|
-
end
|
72
|
-
|
73
|
-
return sym
|
74
|
-
end
|
75
|
-
|
76
|
-
if (sym = @symbols.find {|s| s.id == id })
|
77
|
-
return sym
|
78
|
-
end
|
79
|
-
|
80
|
-
sym = Symbol.new(
|
81
|
-
id: id, alias_name: alias_name, number: nil, tag: tag,
|
82
|
-
term: true, token_id: token_id, nullable: false
|
83
|
-
)
|
84
|
-
@symbols << sym
|
85
|
-
@terms = nil
|
86
|
-
|
87
|
-
return sym
|
88
|
-
end
|
89
|
-
|
90
|
-
def add_nterm(id:, alias_name: nil, tag: nil)
|
91
|
-
return if @symbols.find {|s| s.id == id }
|
92
|
-
|
93
|
-
sym = Symbol.new(
|
94
|
-
id: id, alias_name: alias_name, number: nil, tag: tag,
|
95
|
-
term: false, token_id: nil, nullable: nil,
|
96
|
-
)
|
97
|
-
@symbols << sym
|
98
|
-
@nterms = nil
|
99
|
-
|
100
|
-
return sym
|
101
|
-
end
|
102
|
-
|
103
82
|
def add_type(id:, tag:)
|
104
83
|
@types << Type.new(id: id, tag: tag)
|
105
84
|
end
|
@@ -165,13 +144,9 @@ module Lrama
|
|
165
144
|
normalize_rules
|
166
145
|
collect_symbols
|
167
146
|
set_lhs_and_rhs
|
168
|
-
fill_symbol_number
|
169
147
|
fill_default_precedence
|
148
|
+
fill_symbols
|
170
149
|
fill_sym_to_rules
|
171
|
-
fill_nterm_type
|
172
|
-
fill_symbol_printer
|
173
|
-
fill_symbol_error_token
|
174
|
-
@symbols.sort_by!(&:number)
|
175
150
|
compute_nullable
|
176
151
|
compute_first_set
|
177
152
|
end
|
@@ -180,40 +155,10 @@ module Lrama
|
|
180
155
|
#
|
181
156
|
# * Validation for no_declared_type_reference
|
182
157
|
def validate!
|
183
|
-
|
184
|
-
validate_symbol_alias_name_uniqueness!
|
158
|
+
@symbols_resolver.validate!
|
185
159
|
validate_rule_lhs_is_nterm!
|
186
160
|
end
|
187
161
|
|
188
|
-
def find_symbol_by_s_value(s_value)
|
189
|
-
@symbols.find do |sym|
|
190
|
-
sym.id.s_value == s_value
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
def find_symbol_by_s_value!(s_value)
|
195
|
-
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
196
|
-
end
|
197
|
-
|
198
|
-
def find_symbol_by_id(id)
|
199
|
-
@symbols.find do |sym|
|
200
|
-
sym.id == id || sym.alias_name == id.s_value
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
def find_symbol_by_id!(id)
|
205
|
-
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
206
|
-
end
|
207
|
-
|
208
|
-
def find_symbol_by_number!(number)
|
209
|
-
sym = @symbols[number]
|
210
|
-
|
211
|
-
raise "Symbol not found: #{number}" unless sym
|
212
|
-
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
213
|
-
|
214
|
-
sym
|
215
|
-
end
|
216
|
-
|
217
162
|
def find_rules_by_symbol!(sym)
|
218
163
|
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
219
164
|
end
|
@@ -222,22 +167,6 @@ module Lrama
|
|
222
167
|
@sym_to_rules[sym.number]
|
223
168
|
end
|
224
169
|
|
225
|
-
def terms_count
|
226
|
-
terms.count
|
227
|
-
end
|
228
|
-
|
229
|
-
def terms
|
230
|
-
@terms ||= @symbols.select(&:term?)
|
231
|
-
end
|
232
|
-
|
233
|
-
def nterms_count
|
234
|
-
nterms.count
|
235
|
-
end
|
236
|
-
|
237
|
-
def nterms
|
238
|
-
@nterms ||= @symbols.select(&:nterm?)
|
239
|
-
end
|
240
|
-
|
241
170
|
private
|
242
171
|
|
243
172
|
def compute_nullable
|
@@ -284,7 +213,7 @@ module Lrama
|
|
284
213
|
rule.nullable = false
|
285
214
|
end
|
286
215
|
|
287
|
-
nterms.select {|
|
216
|
+
nterms.select {|e| e.nullable.nil? }.each do |nterm|
|
288
217
|
nterm.nullable = false
|
289
218
|
end
|
290
219
|
end
|
@@ -330,12 +259,6 @@ module Lrama
|
|
330
259
|
end
|
331
260
|
end
|
332
261
|
|
333
|
-
def find_nterm_by_id!(id)
|
334
|
-
nterms.find do |nterm|
|
335
|
-
nterm.id == id
|
336
|
-
end || (raise "Nterm not found: #{id}")
|
337
|
-
end
|
338
|
-
|
339
262
|
def append_special_symbols
|
340
263
|
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
|
341
264
|
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
|
@@ -397,79 +320,6 @@ module Lrama
|
|
397
320
|
end
|
398
321
|
end
|
399
322
|
|
400
|
-
# Fill #number and #token_id
|
401
|
-
def fill_symbol_number
|
402
|
-
# Character literal in grammar file has
|
403
|
-
# token id corresponding to ASCII code by default,
|
404
|
-
# so start token_id from 256.
|
405
|
-
token_id = 256
|
406
|
-
|
407
|
-
# YYEMPTY = -2
|
408
|
-
# YYEOF = 0
|
409
|
-
# YYerror = 1
|
410
|
-
# YYUNDEF = 2
|
411
|
-
number = 3
|
412
|
-
|
413
|
-
nterm_token_id = 0
|
414
|
-
used_numbers = {}
|
415
|
-
|
416
|
-
@symbols.map(&:number).each do |n|
|
417
|
-
used_numbers[n] = true
|
418
|
-
end
|
419
|
-
|
420
|
-
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
|
421
|
-
while used_numbers[number] do
|
422
|
-
number += 1
|
423
|
-
end
|
424
|
-
|
425
|
-
if sym.number.nil?
|
426
|
-
sym.number = number
|
427
|
-
number += 1
|
428
|
-
end
|
429
|
-
|
430
|
-
# If id is Token::Char, it uses ASCII code
|
431
|
-
if sym.term? && sym.token_id.nil?
|
432
|
-
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
433
|
-
# Ignore ' on the both sides
|
434
|
-
case sym.id.s_value[1..-2]
|
435
|
-
when "\\b"
|
436
|
-
sym.token_id = 8
|
437
|
-
when "\\f"
|
438
|
-
sym.token_id = 12
|
439
|
-
when "\\n"
|
440
|
-
sym.token_id = 10
|
441
|
-
when "\\r"
|
442
|
-
sym.token_id = 13
|
443
|
-
when "\\t"
|
444
|
-
sym.token_id = 9
|
445
|
-
when "\\v"
|
446
|
-
sym.token_id = 11
|
447
|
-
when "\""
|
448
|
-
sym.token_id = 34
|
449
|
-
when "'"
|
450
|
-
sym.token_id = 39
|
451
|
-
when "\\\\"
|
452
|
-
sym.token_id = 92
|
453
|
-
when /\A\\(\d+)\z/
|
454
|
-
sym.token_id = Integer($1, 8)
|
455
|
-
when /\A(.)\z/
|
456
|
-
sym.token_id = $1.bytes.first
|
457
|
-
else
|
458
|
-
raise "Unknown Char s_value #{sym}"
|
459
|
-
end
|
460
|
-
else
|
461
|
-
sym.token_id = token_id
|
462
|
-
token_id += 1
|
463
|
-
end
|
464
|
-
end
|
465
|
-
|
466
|
-
if sym.nterm? && sym.token_id.nil?
|
467
|
-
sym.token_id = nterm_token_id
|
468
|
-
nterm_token_id += 1
|
469
|
-
end
|
470
|
-
end
|
471
|
-
end
|
472
|
-
|
473
323
|
def set_lhs_and_rhs
|
474
324
|
@rules.each do |rule|
|
475
325
|
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
|
@@ -480,15 +330,6 @@ module Lrama
|
|
480
330
|
end
|
481
331
|
end
|
482
332
|
|
483
|
-
def token_to_symbol(token)
|
484
|
-
case token
|
485
|
-
when Lrama::Lexer::Token
|
486
|
-
find_symbol_by_id!(token)
|
487
|
-
else
|
488
|
-
raise "Unknown class: #{token}"
|
489
|
-
end
|
490
|
-
end
|
491
|
-
|
492
333
|
# Rule inherits precedence from the last term in RHS.
|
493
334
|
#
|
494
335
|
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
|
@@ -506,6 +347,15 @@ module Lrama
|
|
506
347
|
end
|
507
348
|
end
|
508
349
|
|
350
|
+
def fill_symbols
|
351
|
+
fill_symbol_number
|
352
|
+
fill_nterm_type(@types)
|
353
|
+
fill_printer(@printers)
|
354
|
+
fill_destructor(@destructors)
|
355
|
+
fill_error_token(@error_tokens)
|
356
|
+
sort_by_number!
|
357
|
+
end
|
358
|
+
|
509
359
|
def fill_sym_to_rules
|
510
360
|
@rules.each do |rule|
|
511
361
|
key = rule.lhs.number
|
@@ -514,68 +364,6 @@ module Lrama
|
|
514
364
|
end
|
515
365
|
end
|
516
366
|
|
517
|
-
# Fill nterm's tag defined by %type decl
|
518
|
-
def fill_nterm_type
|
519
|
-
@types.each do |type|
|
520
|
-
nterm = find_nterm_by_id!(type.id)
|
521
|
-
nterm.tag = type.tag
|
522
|
-
end
|
523
|
-
end
|
524
|
-
|
525
|
-
def fill_symbol_printer
|
526
|
-
@symbols.each do |sym|
|
527
|
-
@printers.each do |printer|
|
528
|
-
printer.ident_or_tags.each do |ident_or_tag|
|
529
|
-
case ident_or_tag
|
530
|
-
when Lrama::Lexer::Token::Ident
|
531
|
-
sym.printer = printer if sym.id == ident_or_tag
|
532
|
-
when Lrama::Lexer::Token::Tag
|
533
|
-
sym.printer = printer if sym.tag == ident_or_tag
|
534
|
-
else
|
535
|
-
raise "Unknown token type. #{printer}"
|
536
|
-
end
|
537
|
-
end
|
538
|
-
end
|
539
|
-
end
|
540
|
-
end
|
541
|
-
|
542
|
-
def fill_symbol_error_token
|
543
|
-
@symbols.each do |sym|
|
544
|
-
@error_tokens.each do |error_token|
|
545
|
-
error_token.ident_or_tags.each do |ident_or_tag|
|
546
|
-
case ident_or_tag
|
547
|
-
when Lrama::Lexer::Token::Ident
|
548
|
-
sym.error_token = error_token if sym.id == ident_or_tag
|
549
|
-
when Lrama::Lexer::Token::Tag
|
550
|
-
sym.error_token = error_token if sym.tag == ident_or_tag
|
551
|
-
else
|
552
|
-
raise "Unknown token type. #{error_token}"
|
553
|
-
end
|
554
|
-
end
|
555
|
-
end
|
556
|
-
end
|
557
|
-
end
|
558
|
-
|
559
|
-
def validate_symbol_number_uniqueness!
|
560
|
-
invalid = @symbols.group_by(&:number).select do |number, syms|
|
561
|
-
syms.count > 1
|
562
|
-
end
|
563
|
-
|
564
|
-
return if invalid.empty?
|
565
|
-
|
566
|
-
raise "Symbol number is duplicated. #{invalid}"
|
567
|
-
end
|
568
|
-
|
569
|
-
def validate_symbol_alias_name_uniqueness!
|
570
|
-
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
571
|
-
syms.count > 1
|
572
|
-
end
|
573
|
-
|
574
|
-
return if invalid.empty?
|
575
|
-
|
576
|
-
raise "Symbol alias name is duplicated. #{invalid}"
|
577
|
-
end
|
578
|
-
|
579
367
|
def validate_rule_lhs_is_nterm!
|
580
368
|
errors = []
|
581
369
|
|