lrama 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +7 -2
- data/NEWS.md +43 -0
- data/README.md +23 -0
- data/Steepfile +2 -0
- data/lib/lrama/command.rb +10 -2
- data/lib/lrama/context.rb +6 -6
- data/lib/lrama/counterexamples/example.rb +2 -2
- data/lib/lrama/grammar/code/initial_action_code.rb +6 -0
- data/lib/lrama/grammar/code/no_reference_code.rb +4 -0
- data/lib/lrama/grammar/code/printer_code.rb +6 -0
- data/lib/lrama/grammar/code/rule_action.rb +11 -1
- data/lib/lrama/grammar/code.rb +1 -1
- data/lib/lrama/grammar/parameterizing_rule/resolver.rb +17 -9
- data/lib/lrama/grammar/reference.rb +4 -3
- data/lib/lrama/grammar/rule.rb +2 -2
- data/lib/lrama/grammar/rule_builder.rb +38 -36
- data/lib/lrama/grammar/stdlib.y +80 -0
- data/lib/lrama/grammar/symbol.rb +1 -1
- data/lib/lrama/grammar/symbols/resolver.rb +276 -0
- data/lib/lrama/grammar/symbols.rb +1 -0
- data/lib/lrama/grammar.rb +36 -246
- data/lib/lrama/lexer/token/user_code.rb +13 -2
- data/lib/lrama/lexer.rb +7 -0
- data/lib/lrama/output.rb +56 -2
- data/lib/lrama/parser.rb +571 -485
- data/lib/lrama/state.rb +4 -4
- data/lib/lrama/states/item.rb +19 -17
- data/lib/lrama/states_reporter.rb +10 -12
- data/lib/lrama/version.rb +1 -1
- data/lrama.gemspec +7 -0
- data/parser.y +24 -5
- data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +1 -0
- data/sig/lrama/grammar/reference.rbs +2 -1
- data/sig/lrama/grammar/symbol.rbs +4 -4
- data/sig/lrama/grammar/symbols/resolver.rbs +41 -0
- data/sig/lrama/grammar/type.rbs +11 -0
- data/template/bison/yacc.c +6 -0
- metadata +13 -17
- data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +0 -36
- data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +0 -28
- data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +0 -28
- data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +0 -28
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +0 -39
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +0 -34
- data/lib/lrama/grammar/parameterizing_rules/builder.rb +0 -60
- data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +0 -28
- data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +0 -10
- data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +0 -10
- data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +0 -10
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +0 -13
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +0 -13
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +0 -24
data/lib/lrama/grammar/symbol.rb
CHANGED
@@ -11,7 +11,7 @@ module Lrama
|
|
11
11
|
attr_reader :term
|
12
12
|
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
13
13
|
|
14
|
-
def initialize(id:, alias_name: nil, number: nil, tag: nil,
|
14
|
+
def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil)
|
15
15
|
@id = id
|
16
16
|
@alias_name = alias_name
|
17
17
|
@number = number
|
@@ -0,0 +1,276 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Grammar
|
3
|
+
class Symbols
|
4
|
+
class Resolver
|
5
|
+
attr_reader :terms, :nterms
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@terms = []
|
9
|
+
@nterms = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def symbols
|
13
|
+
@symbols ||= (@terms + @nterms)
|
14
|
+
end
|
15
|
+
|
16
|
+
def sort_by_number!
|
17
|
+
symbols.sort_by!(&:number)
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
21
|
+
if token_id && (sym = find_symbol_by_token_id(token_id))
|
22
|
+
if replace
|
23
|
+
sym.id = id
|
24
|
+
sym.alias_name = alias_name
|
25
|
+
sym.tag = tag
|
26
|
+
end
|
27
|
+
|
28
|
+
return sym
|
29
|
+
end
|
30
|
+
|
31
|
+
if (sym = find_symbol_by_id(id))
|
32
|
+
return sym
|
33
|
+
end
|
34
|
+
|
35
|
+
@symbols = nil
|
36
|
+
term = Symbol.new(
|
37
|
+
id: id, alias_name: alias_name, number: nil, tag: tag,
|
38
|
+
term: true, token_id: token_id, nullable: false
|
39
|
+
)
|
40
|
+
@terms << term
|
41
|
+
term
|
42
|
+
end
|
43
|
+
|
44
|
+
def add_nterm(id:, alias_name: nil, tag: nil)
|
45
|
+
return if find_symbol_by_id(id)
|
46
|
+
|
47
|
+
@symbols = nil
|
48
|
+
nterm = Symbol.new(
|
49
|
+
id: id, alias_name: alias_name, number: nil, tag: tag,
|
50
|
+
term: false, token_id: nil, nullable: nil,
|
51
|
+
)
|
52
|
+
@nterms << nterm
|
53
|
+
nterm
|
54
|
+
end
|
55
|
+
|
56
|
+
def find_symbol_by_s_value(s_value)
|
57
|
+
symbols.find { |s| s.id.s_value == s_value }
|
58
|
+
end
|
59
|
+
|
60
|
+
def find_symbol_by_s_value!(s_value)
|
61
|
+
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
62
|
+
end
|
63
|
+
|
64
|
+
def find_symbol_by_id(id)
|
65
|
+
symbols.find do |s|
|
66
|
+
s.id == id || s.alias_name == id.s_value
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def find_symbol_by_id!(id)
|
71
|
+
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
72
|
+
end
|
73
|
+
|
74
|
+
def find_symbol_by_token_id(token_id)
|
75
|
+
symbols.find {|s| s.token_id == token_id }
|
76
|
+
end
|
77
|
+
|
78
|
+
def find_symbol_by_number!(number)
|
79
|
+
sym = symbols[number]
|
80
|
+
|
81
|
+
raise "Symbol not found: #{number}" unless sym
|
82
|
+
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
83
|
+
|
84
|
+
sym
|
85
|
+
end
|
86
|
+
|
87
|
+
def fill_symbol_number
|
88
|
+
# YYEMPTY = -2
|
89
|
+
# YYEOF = 0
|
90
|
+
# YYerror = 1
|
91
|
+
# YYUNDEF = 2
|
92
|
+
@number = 3
|
93
|
+
fill_terms_number
|
94
|
+
fill_nterms_number
|
95
|
+
end
|
96
|
+
|
97
|
+
def fill_nterm_type(types)
|
98
|
+
types.each do |type|
|
99
|
+
nterm = find_nterm_by_id!(type.id)
|
100
|
+
nterm.tag = type.tag
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def fill_printer(printers)
|
105
|
+
symbols.each do |sym|
|
106
|
+
printers.each do |printer|
|
107
|
+
printer.ident_or_tags.each do |ident_or_tag|
|
108
|
+
case ident_or_tag
|
109
|
+
when Lrama::Lexer::Token::Ident
|
110
|
+
sym.printer = printer if sym.id == ident_or_tag
|
111
|
+
when Lrama::Lexer::Token::Tag
|
112
|
+
sym.printer = printer if sym.tag == ident_or_tag
|
113
|
+
else
|
114
|
+
raise "Unknown token type. #{printer}"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def fill_error_token(error_tokens)
|
122
|
+
symbols.each do |sym|
|
123
|
+
error_tokens.each do |token|
|
124
|
+
token.ident_or_tags.each do |ident_or_tag|
|
125
|
+
case ident_or_tag
|
126
|
+
when Lrama::Lexer::Token::Ident
|
127
|
+
sym.error_token = token if sym.id == ident_or_tag
|
128
|
+
when Lrama::Lexer::Token::Tag
|
129
|
+
sym.error_token = token if sym.tag == ident_or_tag
|
130
|
+
else
|
131
|
+
raise "Unknown token type. #{token}"
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def token_to_symbol(token)
|
139
|
+
case token
|
140
|
+
when Lrama::Lexer::Token
|
141
|
+
find_symbol_by_id!(token)
|
142
|
+
else
|
143
|
+
raise "Unknown class: #{token}"
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
def validate!
|
148
|
+
validate_number_uniqueness!
|
149
|
+
validate_alias_name_uniqueness!
|
150
|
+
end
|
151
|
+
|
152
|
+
private
|
153
|
+
|
154
|
+
def find_nterm_by_id!(id)
|
155
|
+
@nterms.find do |s|
|
156
|
+
s.id == id
|
157
|
+
end || (raise "Symbol not found: #{id}")
|
158
|
+
end
|
159
|
+
|
160
|
+
def fill_terms_number
|
161
|
+
# Character literal in grammar file has
|
162
|
+
# token id corresponding to ASCII code by default,
|
163
|
+
# so start token_id from 256.
|
164
|
+
token_id = 256
|
165
|
+
|
166
|
+
@terms.each do |sym|
|
167
|
+
while used_numbers[@number] do
|
168
|
+
@number += 1
|
169
|
+
end
|
170
|
+
|
171
|
+
if sym.number.nil?
|
172
|
+
sym.number = @number
|
173
|
+
used_numbers[@number] = true
|
174
|
+
@number += 1
|
175
|
+
end
|
176
|
+
|
177
|
+
# If id is Token::Char, it uses ASCII code
|
178
|
+
if sym.token_id.nil?
|
179
|
+
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
180
|
+
# Ignore ' on the both sides
|
181
|
+
case sym.id.s_value[1..-2]
|
182
|
+
when "\\b"
|
183
|
+
sym.token_id = 8
|
184
|
+
when "\\f"
|
185
|
+
sym.token_id = 12
|
186
|
+
when "\\n"
|
187
|
+
sym.token_id = 10
|
188
|
+
when "\\r"
|
189
|
+
sym.token_id = 13
|
190
|
+
when "\\t"
|
191
|
+
sym.token_id = 9
|
192
|
+
when "\\v"
|
193
|
+
sym.token_id = 11
|
194
|
+
when "\""
|
195
|
+
sym.token_id = 34
|
196
|
+
when "'"
|
197
|
+
sym.token_id = 39
|
198
|
+
when "\\\\"
|
199
|
+
sym.token_id = 92
|
200
|
+
when /\A\\(\d+)\z/
|
201
|
+
unless (id = Integer($1, 8)).nil?
|
202
|
+
sym.token_id = id
|
203
|
+
else
|
204
|
+
raise "Unknown Char s_value #{sym}"
|
205
|
+
end
|
206
|
+
when /\A(.)\z/
|
207
|
+
unless (id = $1&.bytes&.first).nil?
|
208
|
+
sym.token_id = id
|
209
|
+
else
|
210
|
+
raise "Unknown Char s_value #{sym}"
|
211
|
+
end
|
212
|
+
else
|
213
|
+
raise "Unknown Char s_value #{sym}"
|
214
|
+
end
|
215
|
+
else
|
216
|
+
sym.token_id = token_id
|
217
|
+
token_id += 1
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def fill_nterms_number
|
224
|
+
token_id = 0
|
225
|
+
|
226
|
+
@nterms.each do |sym|
|
227
|
+
while used_numbers[@number] do
|
228
|
+
@number += 1
|
229
|
+
end
|
230
|
+
|
231
|
+
if sym.number.nil?
|
232
|
+
sym.number = @number
|
233
|
+
used_numbers[@number] = true
|
234
|
+
@number += 1
|
235
|
+
end
|
236
|
+
|
237
|
+
if sym.token_id.nil?
|
238
|
+
sym.token_id = token_id
|
239
|
+
token_id += 1
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def used_numbers
|
245
|
+
return @used_numbers if defined?(@used_numbers)
|
246
|
+
|
247
|
+
@used_numbers = {}
|
248
|
+
symbols.map(&:number).each do |n|
|
249
|
+
@used_numbers[n] = true
|
250
|
+
end
|
251
|
+
@used_numbers
|
252
|
+
end
|
253
|
+
|
254
|
+
def validate_number_uniqueness!
|
255
|
+
invalid = symbols.group_by(&:number).select do |number, syms|
|
256
|
+
syms.count > 1
|
257
|
+
end
|
258
|
+
|
259
|
+
return if invalid.empty?
|
260
|
+
|
261
|
+
raise "Symbol number is duplicated. #{invalid}"
|
262
|
+
end
|
263
|
+
|
264
|
+
def validate_alias_name_uniqueness!
|
265
|
+
invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
266
|
+
syms.count > 1
|
267
|
+
end
|
268
|
+
|
269
|
+
return if invalid.empty?
|
270
|
+
|
271
|
+
raise "Symbol alias name is duplicated. #{invalid}"
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative "symbols/resolver"
|
data/lib/lrama/grammar.rb
CHANGED
@@ -1,16 +1,18 @@
|
|
1
|
+
require "forwardable"
|
1
2
|
require "lrama/grammar/auxiliary"
|
2
3
|
require "lrama/grammar/binding"
|
3
4
|
require "lrama/grammar/code"
|
4
5
|
require "lrama/grammar/counter"
|
5
6
|
require "lrama/grammar/error_token"
|
7
|
+
require "lrama/grammar/parameterizing_rule"
|
6
8
|
require "lrama/grammar/percent_code"
|
7
9
|
require "lrama/grammar/precedence"
|
8
10
|
require "lrama/grammar/printer"
|
9
11
|
require "lrama/grammar/reference"
|
10
12
|
require "lrama/grammar/rule"
|
11
13
|
require "lrama/grammar/rule_builder"
|
12
|
-
require "lrama/grammar/parameterizing_rule"
|
13
14
|
require "lrama/grammar/symbol"
|
15
|
+
require "lrama/grammar/symbols"
|
14
16
|
require "lrama/grammar/type"
|
15
17
|
require "lrama/grammar/union"
|
16
18
|
require "lrama/lexer"
|
@@ -18,13 +20,22 @@ require "lrama/lexer"
|
|
18
20
|
module Lrama
|
19
21
|
# Grammar is the result of parsing an input grammar file
|
20
22
|
class Grammar
|
23
|
+
extend Forwardable
|
24
|
+
|
21
25
|
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
22
26
|
attr_accessor :union, :expect,
|
23
27
|
:printers, :error_tokens,
|
24
28
|
:lex_param, :parse_param, :initial_action,
|
25
|
-
:
|
29
|
+
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
|
30
|
+
:symbols_resolver, :types,
|
26
31
|
:rules, :rule_builders,
|
27
|
-
:sym_to_rules
|
32
|
+
:sym_to_rules, :no_stdlib
|
33
|
+
|
34
|
+
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
|
35
|
+
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
|
36
|
+
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
|
37
|
+
:fill_printer, :fill_error_token, :sort_by_number!
|
38
|
+
|
28
39
|
|
29
40
|
def initialize(rule_counter)
|
30
41
|
@rule_counter = rule_counter
|
@@ -33,7 +44,7 @@ module Lrama
|
|
33
44
|
@percent_codes = []
|
34
45
|
@printers = []
|
35
46
|
@error_tokens = []
|
36
|
-
@
|
47
|
+
@symbols_resolver = Grammar::Symbols::Resolver.new
|
37
48
|
@types = []
|
38
49
|
@rule_builders = []
|
39
50
|
@rules = []
|
@@ -45,6 +56,7 @@ module Lrama
|
|
45
56
|
@undef_symbol = nil
|
46
57
|
@accept_symbol = nil
|
47
58
|
@aux = Auxiliary.new
|
59
|
+
@no_stdlib = false
|
48
60
|
|
49
61
|
append_special_symbols
|
50
62
|
end
|
@@ -61,44 +73,6 @@ module Lrama
|
|
61
73
|
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
62
74
|
end
|
63
75
|
|
64
|
-
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
65
|
-
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
66
|
-
if replace
|
67
|
-
sym.id = id
|
68
|
-
sym.alias_name = alias_name
|
69
|
-
sym.tag = tag
|
70
|
-
end
|
71
|
-
|
72
|
-
return sym
|
73
|
-
end
|
74
|
-
|
75
|
-
if (sym = @symbols.find {|s| s.id == id })
|
76
|
-
return sym
|
77
|
-
end
|
78
|
-
|
79
|
-
sym = Symbol.new(
|
80
|
-
id: id, alias_name: alias_name, number: nil, tag: tag,
|
81
|
-
term: true, token_id: token_id, nullable: false
|
82
|
-
)
|
83
|
-
@symbols << sym
|
84
|
-
@terms = nil
|
85
|
-
|
86
|
-
return sym
|
87
|
-
end
|
88
|
-
|
89
|
-
def add_nterm(id:, alias_name: nil, tag: nil)
|
90
|
-
return if @symbols.find {|s| s.id == id }
|
91
|
-
|
92
|
-
sym = Symbol.new(
|
93
|
-
id: id, alias_name: alias_name, number: nil, tag: tag,
|
94
|
-
term: false, token_id: nil, nullable: nil,
|
95
|
-
)
|
96
|
-
@symbols << sym
|
97
|
-
@nterms = nil
|
98
|
-
|
99
|
-
return sym
|
100
|
-
end
|
101
|
-
|
102
76
|
def add_type(id:, tag:)
|
103
77
|
@types << Type.new(id: id, tag: tag)
|
104
78
|
end
|
@@ -136,6 +110,14 @@ module Lrama
|
|
136
110
|
@parameterizing_rule_resolver.add_parameterizing_rule(rule)
|
137
111
|
end
|
138
112
|
|
113
|
+
def parameterizing_rules
|
114
|
+
@parameterizing_rule_resolver.rules
|
115
|
+
end
|
116
|
+
|
117
|
+
def insert_before_parameterizing_rules(rules)
|
118
|
+
@parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules
|
119
|
+
end
|
120
|
+
|
139
121
|
def prologue_first_lineno=(prologue_first_lineno)
|
140
122
|
@aux.prologue_first_lineno = prologue_first_lineno
|
141
123
|
end
|
@@ -156,13 +138,9 @@ module Lrama
|
|
156
138
|
normalize_rules
|
157
139
|
collect_symbols
|
158
140
|
set_lhs_and_rhs
|
159
|
-
fill_symbol_number
|
160
141
|
fill_default_precedence
|
142
|
+
fill_symbols
|
161
143
|
fill_sym_to_rules
|
162
|
-
fill_nterm_type
|
163
|
-
fill_symbol_printer
|
164
|
-
fill_symbol_error_token
|
165
|
-
@symbols.sort_by!(&:number)
|
166
144
|
compute_nullable
|
167
145
|
compute_first_set
|
168
146
|
end
|
@@ -171,40 +149,10 @@ module Lrama
|
|
171
149
|
#
|
172
150
|
# * Validation for no_declared_type_reference
|
173
151
|
def validate!
|
174
|
-
|
175
|
-
validate_symbol_alias_name_uniqueness!
|
152
|
+
@symbols_resolver.validate!
|
176
153
|
validate_rule_lhs_is_nterm!
|
177
154
|
end
|
178
155
|
|
179
|
-
def find_symbol_by_s_value(s_value)
|
180
|
-
@symbols.find do |sym|
|
181
|
-
sym.id.s_value == s_value
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
def find_symbol_by_s_value!(s_value)
|
186
|
-
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
187
|
-
end
|
188
|
-
|
189
|
-
def find_symbol_by_id(id)
|
190
|
-
@symbols.find do |sym|
|
191
|
-
sym.id == id || sym.alias_name == id.s_value
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
def find_symbol_by_id!(id)
|
196
|
-
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
197
|
-
end
|
198
|
-
|
199
|
-
def find_symbol_by_number!(number)
|
200
|
-
sym = @symbols[number]
|
201
|
-
|
202
|
-
raise "Symbol not found: #{number}" unless sym
|
203
|
-
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
204
|
-
|
205
|
-
sym
|
206
|
-
end
|
207
|
-
|
208
156
|
def find_rules_by_symbol!(sym)
|
209
157
|
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
210
158
|
end
|
@@ -213,28 +161,12 @@ module Lrama
|
|
213
161
|
@sym_to_rules[sym.number]
|
214
162
|
end
|
215
163
|
|
216
|
-
def terms_count
|
217
|
-
terms.count
|
218
|
-
end
|
219
|
-
|
220
|
-
def terms
|
221
|
-
@terms ||= @symbols.select(&:term?)
|
222
|
-
end
|
223
|
-
|
224
|
-
def nterms_count
|
225
|
-
nterms.count
|
226
|
-
end
|
227
|
-
|
228
|
-
def nterms
|
229
|
-
@nterms ||= @symbols.select(&:nterm?)
|
230
|
-
end
|
231
|
-
|
232
164
|
private
|
233
165
|
|
234
166
|
def compute_nullable
|
235
167
|
@rules.each do |rule|
|
236
168
|
case
|
237
|
-
when rule.
|
169
|
+
when rule.empty_rule?
|
238
170
|
rule.nullable = true
|
239
171
|
when rule.rhs.any?(&:term)
|
240
172
|
rule.nullable = false
|
@@ -275,7 +207,7 @@ module Lrama
|
|
275
207
|
rule.nullable = false
|
276
208
|
end
|
277
209
|
|
278
|
-
nterms.select {|
|
210
|
+
nterms.select {|e| e.nullable.nil? }.each do |nterm|
|
279
211
|
nterm.nullable = false
|
280
212
|
end
|
281
213
|
end
|
@@ -321,12 +253,6 @@ module Lrama
|
|
321
253
|
end
|
322
254
|
end
|
323
255
|
|
324
|
-
def find_nterm_by_id!(id)
|
325
|
-
nterms.find do |nterm|
|
326
|
-
nterm.id == id
|
327
|
-
end || (raise "Nterm not found: #{id}")
|
328
|
-
end
|
329
|
-
|
330
256
|
def append_special_symbols
|
331
257
|
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
|
332
258
|
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
|
@@ -388,79 +314,6 @@ module Lrama
|
|
388
314
|
end
|
389
315
|
end
|
390
316
|
|
391
|
-
# Fill #number and #token_id
|
392
|
-
def fill_symbol_number
|
393
|
-
# Character literal in grammar file has
|
394
|
-
# token id corresponding to ASCII code by default,
|
395
|
-
# so start token_id from 256.
|
396
|
-
token_id = 256
|
397
|
-
|
398
|
-
# YYEMPTY = -2
|
399
|
-
# YYEOF = 0
|
400
|
-
# YYerror = 1
|
401
|
-
# YYUNDEF = 2
|
402
|
-
number = 3
|
403
|
-
|
404
|
-
nterm_token_id = 0
|
405
|
-
used_numbers = {}
|
406
|
-
|
407
|
-
@symbols.map(&:number).each do |n|
|
408
|
-
used_numbers[n] = true
|
409
|
-
end
|
410
|
-
|
411
|
-
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
|
412
|
-
while used_numbers[number] do
|
413
|
-
number += 1
|
414
|
-
end
|
415
|
-
|
416
|
-
if sym.number.nil?
|
417
|
-
sym.number = number
|
418
|
-
number += 1
|
419
|
-
end
|
420
|
-
|
421
|
-
# If id is Token::Char, it uses ASCII code
|
422
|
-
if sym.term? && sym.token_id.nil?
|
423
|
-
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
424
|
-
# Ignore ' on the both sides
|
425
|
-
case sym.id.s_value[1..-2]
|
426
|
-
when "\\b"
|
427
|
-
sym.token_id = 8
|
428
|
-
when "\\f"
|
429
|
-
sym.token_id = 12
|
430
|
-
when "\\n"
|
431
|
-
sym.token_id = 10
|
432
|
-
when "\\r"
|
433
|
-
sym.token_id = 13
|
434
|
-
when "\\t"
|
435
|
-
sym.token_id = 9
|
436
|
-
when "\\v"
|
437
|
-
sym.token_id = 11
|
438
|
-
when "\""
|
439
|
-
sym.token_id = 34
|
440
|
-
when "'"
|
441
|
-
sym.token_id = 39
|
442
|
-
when "\\\\"
|
443
|
-
sym.token_id = 92
|
444
|
-
when /\A\\(\d+)\z/
|
445
|
-
sym.token_id = Integer($1, 8)
|
446
|
-
when /\A(.)\z/
|
447
|
-
sym.token_id = $1.bytes.first
|
448
|
-
else
|
449
|
-
raise "Unknown Char s_value #{sym}"
|
450
|
-
end
|
451
|
-
else
|
452
|
-
sym.token_id = token_id
|
453
|
-
token_id += 1
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
if sym.nterm? && sym.token_id.nil?
|
458
|
-
sym.token_id = nterm_token_id
|
459
|
-
nterm_token_id += 1
|
460
|
-
end
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
317
|
def set_lhs_and_rhs
|
465
318
|
@rules.each do |rule|
|
466
319
|
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
|
@@ -471,15 +324,6 @@ module Lrama
|
|
471
324
|
end
|
472
325
|
end
|
473
326
|
|
474
|
-
def token_to_symbol(token)
|
475
|
-
case token
|
476
|
-
when Lrama::Lexer::Token
|
477
|
-
find_symbol_by_id!(token)
|
478
|
-
else
|
479
|
-
raise "Unknown class: #{token}"
|
480
|
-
end
|
481
|
-
end
|
482
|
-
|
483
327
|
# Rule inherits precedence from the last term in RHS.
|
484
328
|
#
|
485
329
|
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
|
@@ -497,6 +341,14 @@ module Lrama
|
|
497
341
|
end
|
498
342
|
end
|
499
343
|
|
344
|
+
def fill_symbols
|
345
|
+
fill_symbol_number
|
346
|
+
fill_nterm_type(@types)
|
347
|
+
fill_printer(@printers)
|
348
|
+
fill_error_token(@error_tokens)
|
349
|
+
sort_by_number!
|
350
|
+
end
|
351
|
+
|
500
352
|
def fill_sym_to_rules
|
501
353
|
@rules.each do |rule|
|
502
354
|
key = rule.lhs.number
|
@@ -505,68 +357,6 @@ module Lrama
|
|
505
357
|
end
|
506
358
|
end
|
507
359
|
|
508
|
-
# Fill nterm's tag defined by %type decl
|
509
|
-
def fill_nterm_type
|
510
|
-
@types.each do |type|
|
511
|
-
nterm = find_nterm_by_id!(type.id)
|
512
|
-
nterm.tag = type.tag
|
513
|
-
end
|
514
|
-
end
|
515
|
-
|
516
|
-
def fill_symbol_printer
|
517
|
-
@symbols.each do |sym|
|
518
|
-
@printers.each do |printer|
|
519
|
-
printer.ident_or_tags.each do |ident_or_tag|
|
520
|
-
case ident_or_tag
|
521
|
-
when Lrama::Lexer::Token::Ident
|
522
|
-
sym.printer = printer if sym.id == ident_or_tag
|
523
|
-
when Lrama::Lexer::Token::Tag
|
524
|
-
sym.printer = printer if sym.tag == ident_or_tag
|
525
|
-
else
|
526
|
-
raise "Unknown token type. #{printer}"
|
527
|
-
end
|
528
|
-
end
|
529
|
-
end
|
530
|
-
end
|
531
|
-
end
|
532
|
-
|
533
|
-
def fill_symbol_error_token
|
534
|
-
@symbols.each do |sym|
|
535
|
-
@error_tokens.each do |error_token|
|
536
|
-
error_token.ident_or_tags.each do |ident_or_tag|
|
537
|
-
case ident_or_tag
|
538
|
-
when Lrama::Lexer::Token::Ident
|
539
|
-
sym.error_token = error_token if sym.id == ident_or_tag
|
540
|
-
when Lrama::Lexer::Token::Tag
|
541
|
-
sym.error_token = error_token if sym.tag == ident_or_tag
|
542
|
-
else
|
543
|
-
raise "Unknown token type. #{error_token}"
|
544
|
-
end
|
545
|
-
end
|
546
|
-
end
|
547
|
-
end
|
548
|
-
end
|
549
|
-
|
550
|
-
def validate_symbol_number_uniqueness!
|
551
|
-
invalid = @symbols.group_by(&:number).select do |number, syms|
|
552
|
-
syms.count > 1
|
553
|
-
end
|
554
|
-
|
555
|
-
return if invalid.empty?
|
556
|
-
|
557
|
-
raise "Symbol number is duplicated. #{invalid}"
|
558
|
-
end
|
559
|
-
|
560
|
-
def validate_symbol_alias_name_uniqueness!
|
561
|
-
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
562
|
-
syms.count > 1
|
563
|
-
end
|
564
|
-
|
565
|
-
return if invalid.empty?
|
566
|
-
|
567
|
-
raise "Symbol alias name is duplicated. #{invalid}"
|
568
|
-
end
|
569
|
-
|
570
360
|
def validate_rule_lhs_is_nterm!
|
571
361
|
errors = []
|
572
362
|
|