lrama 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yaml +72 -0
- data/.gitignore +4 -0
- data/.rspec +2 -0
- data/Gemfile +8 -0
- data/LEGAL.md +26 -0
- data/MIT +21 -0
- data/README.md +32 -0
- data/Rakefile +1 -0
- data/doc/TODO.md +50 -0
- data/exe/lrama +7 -0
- data/lib/lrama/command.rb +129 -0
- data/lib/lrama/context.rb +510 -0
- data/lib/lrama/grammar.rb +850 -0
- data/lib/lrama/lexer.rb +349 -0
- data/lib/lrama/output.rb +268 -0
- data/lib/lrama/parser.rb +321 -0
- data/lib/lrama/report.rb +35 -0
- data/lib/lrama/states.rb +1124 -0
- data/lib/lrama/version.rb +3 -0
- data/lib/lrama.rb +9 -0
- data/lrama.gemspec +22 -0
- data/template/bison/yacc.c +1750 -0
- data/template/bison/yacc.h +112 -0
- metadata +67 -0
data/lib/lrama/parser.rb
ADDED
@@ -0,0 +1,321 @@
|
|
1
|
+
require "lrama/report"
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
# Parser for parse.y, generates a grammar
|
5
|
+
class Parser
|
6
|
+
include Lrama::Report::Duration
|
7
|
+
|
8
|
+
T = Lrama::Lexer::Token
|
9
|
+
|
10
|
+
class TokenScanner
|
11
|
+
def initialize(tokens)
|
12
|
+
@tokens = tokens
|
13
|
+
@index = 0
|
14
|
+
end
|
15
|
+
|
16
|
+
def current_token
|
17
|
+
@tokens[@index]
|
18
|
+
end
|
19
|
+
|
20
|
+
def current_type
|
21
|
+
current_token && current_token.type
|
22
|
+
end
|
23
|
+
|
24
|
+
def next
|
25
|
+
token = current_token
|
26
|
+
@index += 1
|
27
|
+
return token
|
28
|
+
end
|
29
|
+
|
30
|
+
def consume(*token_types)
|
31
|
+
if token_types.include?(current_type)
|
32
|
+
token = current_token
|
33
|
+
self.next
|
34
|
+
return token
|
35
|
+
end
|
36
|
+
|
37
|
+
return nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def consume!(*token_types)
|
41
|
+
consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
|
42
|
+
end
|
43
|
+
|
44
|
+
def consume_multi(*token_types)
|
45
|
+
a = []
|
46
|
+
|
47
|
+
while token_types.include?(current_type)
|
48
|
+
a << current_token
|
49
|
+
self.next
|
50
|
+
end
|
51
|
+
|
52
|
+
raise "No token is consumed. #{token_types}" if a.empty?
|
53
|
+
|
54
|
+
return a
|
55
|
+
end
|
56
|
+
|
57
|
+
def eots?
|
58
|
+
current_token.nil?
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def initialize(text)
|
63
|
+
@text = text
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse
|
67
|
+
report_duration(:parse) do
|
68
|
+
lexer = Lexer.new(@text)
|
69
|
+
grammar = Grammar.new
|
70
|
+
process_prologue(grammar, lexer)
|
71
|
+
parse_bison_declarations(TokenScanner.new(lexer.bison_declarations_tokens), grammar)
|
72
|
+
parse_grammar_rules(TokenScanner.new(lexer.grammar_rules_tokens), grammar)
|
73
|
+
process_epilogue(grammar, lexer)
|
74
|
+
grammar.prepare
|
75
|
+
grammar.compute_nullable
|
76
|
+
grammar.validate!
|
77
|
+
|
78
|
+
grammar
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def process_prologue(grammar, lexer)
|
85
|
+
grammar.prologue_first_lineno = lexer.prologue.first[1] if lexer.prologue.first
|
86
|
+
grammar.prologue = lexer.prologue.map(&:first).join
|
87
|
+
end
|
88
|
+
|
89
|
+
def process_epilogue(grammar, lexer)
|
90
|
+
grammar.epilogue_first_lineno = lexer.epilogue.first[1] if lexer.epilogue.first
|
91
|
+
grammar.epilogue = lexer.epilogue.map(&:first).join
|
92
|
+
end
|
93
|
+
|
94
|
+
def parse_bison_declarations(ts, grammar)
|
95
|
+
precedence_number = 0
|
96
|
+
|
97
|
+
while !ts.eots? do
|
98
|
+
case ts.current_type
|
99
|
+
when T::P_expect
|
100
|
+
ts.next
|
101
|
+
grammar.expect = ts.consume!(T::Number).s_value
|
102
|
+
when T::P_define
|
103
|
+
ts.next
|
104
|
+
# Ignore
|
105
|
+
ts.consume_multi(T::Ident)
|
106
|
+
when T::P_printer
|
107
|
+
lineno = ts.current_token.line
|
108
|
+
ts.next
|
109
|
+
code = ts.consume!(T::User_code)
|
110
|
+
code = grammar.build_code(:printer, code)
|
111
|
+
ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
|
112
|
+
grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
113
|
+
when T::P_lex_param
|
114
|
+
ts.next
|
115
|
+
code = ts.consume!(T::User_code)
|
116
|
+
code = grammar.build_code(:lex_param, code)
|
117
|
+
grammar.lex_param = code.token_code.s_value
|
118
|
+
when T::P_parse_param
|
119
|
+
ts.next
|
120
|
+
code = ts.consume!(T::User_code)
|
121
|
+
code = grammar.build_code(:parse_param, code)
|
122
|
+
grammar.parse_param = code.token_code.s_value
|
123
|
+
when T::P_initial_action
|
124
|
+
ts.next
|
125
|
+
code = ts.consume!(T::User_code)
|
126
|
+
code = grammar.build_code(:initial_action, code)
|
127
|
+
ts.consume(T::Semicolon)
|
128
|
+
grammar.initial_action = code
|
129
|
+
when T::P_union
|
130
|
+
lineno = ts.current_token.line
|
131
|
+
ts.next
|
132
|
+
code = ts.consume!(T::User_code)
|
133
|
+
code = grammar.build_code(:union, code)
|
134
|
+
ts.consume(T::Semicolon)
|
135
|
+
grammar.set_union(code, lineno)
|
136
|
+
when T::P_token
|
137
|
+
# %token tag? (ident number? string?)+
|
138
|
+
#
|
139
|
+
# * ident can be char, e.g. '\\', '\t', '\13'
|
140
|
+
# * number is a token_id for term
|
141
|
+
#
|
142
|
+
# These are valid token declaration (from CRuby parse.y)
|
143
|
+
#
|
144
|
+
# %token END_OF_INPUT 0 "end-of-input"
|
145
|
+
# %token <id> '\\' "backslash"
|
146
|
+
# %token tSP "escaped space"
|
147
|
+
# %token tUPLUS 132 "unary+"
|
148
|
+
# %token tCOLON3 ":: at EXPR_BEG"
|
149
|
+
# %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
|
150
|
+
#
|
151
|
+
#
|
152
|
+
# See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
|
153
|
+
ts.next
|
154
|
+
opt_tag = ts.consume(T::Tag)
|
155
|
+
|
156
|
+
while (id = ts.consume(T::Ident, T::Char)) do
|
157
|
+
opt_number = ts.consume(T::Number)
|
158
|
+
opt_string = ts.consume(T::String)
|
159
|
+
# Can replace 0 (EOF)
|
160
|
+
grammar.add_term(
|
161
|
+
id: id,
|
162
|
+
alias_name: opt_string && opt_string.s_value,
|
163
|
+
token_id: opt_number && opt_number.s_value,
|
164
|
+
tag: opt_tag,
|
165
|
+
replace: true,
|
166
|
+
)
|
167
|
+
end
|
168
|
+
when T::P_type
|
169
|
+
# %type tag? (ident|char|string)+
|
170
|
+
#
|
171
|
+
# See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
|
172
|
+
ts.next
|
173
|
+
opt_tag = ts.consume(T::Tag)
|
174
|
+
|
175
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
176
|
+
grammar.add_type(
|
177
|
+
id: id,
|
178
|
+
tag: opt_tag
|
179
|
+
)
|
180
|
+
end
|
181
|
+
when T::P_nonassoc
|
182
|
+
# %nonassoc (ident|char|string)+
|
183
|
+
ts.next
|
184
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
185
|
+
sym = grammar.add_term(id: id)
|
186
|
+
grammar.add_nonassoc(sym, precedence_number)
|
187
|
+
end
|
188
|
+
precedence_number += 1
|
189
|
+
when T::P_left
|
190
|
+
# %left (ident|char|string)+
|
191
|
+
ts.next
|
192
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
193
|
+
sym = grammar.add_term(id: id)
|
194
|
+
grammar.add_left(sym, precedence_number)
|
195
|
+
end
|
196
|
+
precedence_number += 1
|
197
|
+
when T::P_right
|
198
|
+
# %right (ident|char|string)+
|
199
|
+
ts.next
|
200
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
201
|
+
sym = grammar.add_term(id: id)
|
202
|
+
grammar.add_right(sym, precedence_number)
|
203
|
+
end
|
204
|
+
precedence_number += 1
|
205
|
+
when nil
|
206
|
+
# end of input
|
207
|
+
raise "Reach to end of input within declarations"
|
208
|
+
else
|
209
|
+
raise "Unexpected token: #{ts.current_token}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def parse_grammar_rules(ts, grammar)
|
215
|
+
while !ts.eots? do
|
216
|
+
parse_grammar_rule(ts, grammar)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# TODO: Take care of %prec of rule.
|
221
|
+
# If %prec exists, user code before %prec
|
222
|
+
# is NOT an action. For example "{ code 3 }" is NOT an action.
|
223
|
+
#
|
224
|
+
# keyword_class { code 2 } tSTRING '!' keyword_end { code 3 } %prec "="
|
225
|
+
def parse_grammar_rule(ts, grammar)
|
226
|
+
# LHS
|
227
|
+
lhs = ts.consume!(T::Ident_Colon) # class:
|
228
|
+
lhs.type = T::Ident
|
229
|
+
|
230
|
+
rhs = parse_grammar_rule_rhs(ts, grammar)
|
231
|
+
|
232
|
+
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
|
233
|
+
|
234
|
+
while true do
|
235
|
+
case ts.current_type
|
236
|
+
when T::Bar
|
237
|
+
# |
|
238
|
+
bar_lineno = ts.current_token.line
|
239
|
+
ts.next
|
240
|
+
rhs = parse_grammar_rule_rhs(ts, grammar)
|
241
|
+
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
|
242
|
+
when T::Semicolon
|
243
|
+
# ;
|
244
|
+
ts.next
|
245
|
+
break
|
246
|
+
when T::Ident_Colon
|
247
|
+
# Next lhs can be here because ";" is optional.
|
248
|
+
# Do not consume next token.
|
249
|
+
break
|
250
|
+
when nil
|
251
|
+
# end of input can be here when ";" is omitted
|
252
|
+
break
|
253
|
+
else
|
254
|
+
raise "Unexpected token: #{ts.current_token}"
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def parse_grammar_rule_rhs(ts, grammar)
|
260
|
+
a = []
|
261
|
+
prec_seen = false
|
262
|
+
code_after_prec = false
|
263
|
+
|
264
|
+
while true do
|
265
|
+
# TODO: Srting can be here
|
266
|
+
case ts.current_type
|
267
|
+
when T::Ident
|
268
|
+
# keyword_class
|
269
|
+
|
270
|
+
raise "Ident after %prec" if prec_seen
|
271
|
+
a << ts.current_token
|
272
|
+
ts.next
|
273
|
+
when T::Char
|
274
|
+
# '!'
|
275
|
+
|
276
|
+
raise "Char after %prec" if prec_seen
|
277
|
+
a << ts.current_token
|
278
|
+
ts.next
|
279
|
+
when T::P_prec
|
280
|
+
# %prec tPLUS
|
281
|
+
#
|
282
|
+
# See: https://www.gnu.org/software/bison/manual/html_node/Contextual-Precedence.html
|
283
|
+
|
284
|
+
ts.next
|
285
|
+
prec_seen = true
|
286
|
+
precedence_id = ts.consume!(T::Ident, T::String, T::Char)
|
287
|
+
precedence_sym = grammar.find_symbol_by_id!(precedence_id)
|
288
|
+
a << precedence_sym
|
289
|
+
when T::User_code
|
290
|
+
# { code } in the middle of rhs
|
291
|
+
|
292
|
+
if prec_seen
|
293
|
+
raise "Multiple User_code after %prec" if code_after_prec
|
294
|
+
code_after_prec = true
|
295
|
+
end
|
296
|
+
|
297
|
+
code = ts.current_token
|
298
|
+
grammar.build_references(code)
|
299
|
+
a << code
|
300
|
+
ts.next
|
301
|
+
when T::Bar
|
302
|
+
# |
|
303
|
+
break
|
304
|
+
when T::Semicolon
|
305
|
+
# ;
|
306
|
+
break
|
307
|
+
when T::Ident_Colon
|
308
|
+
# Next lhs can be here because ";" is optional.
|
309
|
+
break
|
310
|
+
when nil
|
311
|
+
# end of input can be here when ";" is omitted
|
312
|
+
break
|
313
|
+
else
|
314
|
+
raise "Unexpected token: #{ts.current_token}"
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
return a
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
data/lib/lrama/report.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Report
|
3
|
+
module Profile
|
4
|
+
def self.report_profile
|
5
|
+
require "stackprof"
|
6
|
+
|
7
|
+
StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
|
8
|
+
yield
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module Duration
|
14
|
+
def self.enable
|
15
|
+
@_report_duration_enabled = true
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.enabled?
|
19
|
+
!!@_report_duration_enabled
|
20
|
+
end
|
21
|
+
|
22
|
+
def report_duration(method_name)
|
23
|
+
time1 = Time.now.to_f
|
24
|
+
result = yield
|
25
|
+
time2 = Time.now.to_f
|
26
|
+
|
27
|
+
if Duration.enabled?
|
28
|
+
puts sprintf("%s %10.5f s", method_name, time2 - time1)
|
29
|
+
end
|
30
|
+
|
31
|
+
return result
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|