lrama 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yaml +72 -0
- data/.gitignore +4 -0
- data/.rspec +2 -0
- data/Gemfile +8 -0
- data/LEGAL.md +26 -0
- data/MIT +21 -0
- data/README.md +32 -0
- data/Rakefile +1 -0
- data/doc/TODO.md +50 -0
- data/exe/lrama +7 -0
- data/lib/lrama/command.rb +129 -0
- data/lib/lrama/context.rb +510 -0
- data/lib/lrama/grammar.rb +850 -0
- data/lib/lrama/lexer.rb +349 -0
- data/lib/lrama/output.rb +268 -0
- data/lib/lrama/parser.rb +321 -0
- data/lib/lrama/report.rb +35 -0
- data/lib/lrama/states.rb +1124 -0
- data/lib/lrama/version.rb +3 -0
- data/lib/lrama.rb +9 -0
- data/lrama.gemspec +22 -0
- data/template/bison/yacc.c +1750 -0
- data/template/bison/yacc.h +112 -0
- metadata +67 -0
data/lib/lrama/parser.rb
ADDED
@@ -0,0 +1,321 @@
|
|
1
|
+
require "lrama/report"
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
# Parser for parse.y, generates a grammar
|
5
|
+
class Parser
|
6
|
+
include Lrama::Report::Duration
|
7
|
+
|
8
|
+
T = Lrama::Lexer::Token
|
9
|
+
|
10
|
+
class TokenScanner
|
11
|
+
def initialize(tokens)
|
12
|
+
@tokens = tokens
|
13
|
+
@index = 0
|
14
|
+
end
|
15
|
+
|
16
|
+
def current_token
|
17
|
+
@tokens[@index]
|
18
|
+
end
|
19
|
+
|
20
|
+
def current_type
|
21
|
+
current_token && current_token.type
|
22
|
+
end
|
23
|
+
|
24
|
+
def next
|
25
|
+
token = current_token
|
26
|
+
@index += 1
|
27
|
+
return token
|
28
|
+
end
|
29
|
+
|
30
|
+
def consume(*token_types)
|
31
|
+
if token_types.include?(current_type)
|
32
|
+
token = current_token
|
33
|
+
self.next
|
34
|
+
return token
|
35
|
+
end
|
36
|
+
|
37
|
+
return nil
|
38
|
+
end
|
39
|
+
|
40
|
+
def consume!(*token_types)
|
41
|
+
consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
|
42
|
+
end
|
43
|
+
|
44
|
+
def consume_multi(*token_types)
|
45
|
+
a = []
|
46
|
+
|
47
|
+
while token_types.include?(current_type)
|
48
|
+
a << current_token
|
49
|
+
self.next
|
50
|
+
end
|
51
|
+
|
52
|
+
raise "No token is consumed. #{token_types}" if a.empty?
|
53
|
+
|
54
|
+
return a
|
55
|
+
end
|
56
|
+
|
57
|
+
def eots?
|
58
|
+
current_token.nil?
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def initialize(text)
|
63
|
+
@text = text
|
64
|
+
end
|
65
|
+
|
66
|
+
def parse
|
67
|
+
report_duration(:parse) do
|
68
|
+
lexer = Lexer.new(@text)
|
69
|
+
grammar = Grammar.new
|
70
|
+
process_prologue(grammar, lexer)
|
71
|
+
parse_bison_declarations(TokenScanner.new(lexer.bison_declarations_tokens), grammar)
|
72
|
+
parse_grammar_rules(TokenScanner.new(lexer.grammar_rules_tokens), grammar)
|
73
|
+
process_epilogue(grammar, lexer)
|
74
|
+
grammar.prepare
|
75
|
+
grammar.compute_nullable
|
76
|
+
grammar.validate!
|
77
|
+
|
78
|
+
grammar
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def process_prologue(grammar, lexer)
|
85
|
+
grammar.prologue_first_lineno = lexer.prologue.first[1] if lexer.prologue.first
|
86
|
+
grammar.prologue = lexer.prologue.map(&:first).join
|
87
|
+
end
|
88
|
+
|
89
|
+
def process_epilogue(grammar, lexer)
|
90
|
+
grammar.epilogue_first_lineno = lexer.epilogue.first[1] if lexer.epilogue.first
|
91
|
+
grammar.epilogue = lexer.epilogue.map(&:first).join
|
92
|
+
end
|
93
|
+
|
94
|
+
def parse_bison_declarations(ts, grammar)
|
95
|
+
precedence_number = 0
|
96
|
+
|
97
|
+
while !ts.eots? do
|
98
|
+
case ts.current_type
|
99
|
+
when T::P_expect
|
100
|
+
ts.next
|
101
|
+
grammar.expect = ts.consume!(T::Number).s_value
|
102
|
+
when T::P_define
|
103
|
+
ts.next
|
104
|
+
# Ignore
|
105
|
+
ts.consume_multi(T::Ident)
|
106
|
+
when T::P_printer
|
107
|
+
lineno = ts.current_token.line
|
108
|
+
ts.next
|
109
|
+
code = ts.consume!(T::User_code)
|
110
|
+
code = grammar.build_code(:printer, code)
|
111
|
+
ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
|
112
|
+
grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
113
|
+
when T::P_lex_param
|
114
|
+
ts.next
|
115
|
+
code = ts.consume!(T::User_code)
|
116
|
+
code = grammar.build_code(:lex_param, code)
|
117
|
+
grammar.lex_param = code.token_code.s_value
|
118
|
+
when T::P_parse_param
|
119
|
+
ts.next
|
120
|
+
code = ts.consume!(T::User_code)
|
121
|
+
code = grammar.build_code(:parse_param, code)
|
122
|
+
grammar.parse_param = code.token_code.s_value
|
123
|
+
when T::P_initial_action
|
124
|
+
ts.next
|
125
|
+
code = ts.consume!(T::User_code)
|
126
|
+
code = grammar.build_code(:initial_action, code)
|
127
|
+
ts.consume(T::Semicolon)
|
128
|
+
grammar.initial_action = code
|
129
|
+
when T::P_union
|
130
|
+
lineno = ts.current_token.line
|
131
|
+
ts.next
|
132
|
+
code = ts.consume!(T::User_code)
|
133
|
+
code = grammar.build_code(:union, code)
|
134
|
+
ts.consume(T::Semicolon)
|
135
|
+
grammar.set_union(code, lineno)
|
136
|
+
when T::P_token
|
137
|
+
# %token tag? (ident number? string?)+
|
138
|
+
#
|
139
|
+
# * ident can be char, e.g. '\\', '\t', '\13'
|
140
|
+
# * number is a token_id for term
|
141
|
+
#
|
142
|
+
# These are valid token declaration (from CRuby parse.y)
|
143
|
+
#
|
144
|
+
# %token END_OF_INPUT 0 "end-of-input"
|
145
|
+
# %token <id> '\\' "backslash"
|
146
|
+
# %token tSP "escaped space"
|
147
|
+
# %token tUPLUS 132 "unary+"
|
148
|
+
# %token tCOLON3 ":: at EXPR_BEG"
|
149
|
+
# %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
|
150
|
+
#
|
151
|
+
#
|
152
|
+
# See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
|
153
|
+
ts.next
|
154
|
+
opt_tag = ts.consume(T::Tag)
|
155
|
+
|
156
|
+
while (id = ts.consume(T::Ident, T::Char)) do
|
157
|
+
opt_number = ts.consume(T::Number)
|
158
|
+
opt_string = ts.consume(T::String)
|
159
|
+
# Can replace 0 (EOF)
|
160
|
+
grammar.add_term(
|
161
|
+
id: id,
|
162
|
+
alias_name: opt_string && opt_string.s_value,
|
163
|
+
token_id: opt_number && opt_number.s_value,
|
164
|
+
tag: opt_tag,
|
165
|
+
replace: true,
|
166
|
+
)
|
167
|
+
end
|
168
|
+
when T::P_type
|
169
|
+
# %type tag? (ident|char|string)+
|
170
|
+
#
|
171
|
+
# See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
|
172
|
+
ts.next
|
173
|
+
opt_tag = ts.consume(T::Tag)
|
174
|
+
|
175
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
176
|
+
grammar.add_type(
|
177
|
+
id: id,
|
178
|
+
tag: opt_tag
|
179
|
+
)
|
180
|
+
end
|
181
|
+
when T::P_nonassoc
|
182
|
+
# %nonassoc (ident|char|string)+
|
183
|
+
ts.next
|
184
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
185
|
+
sym = grammar.add_term(id: id)
|
186
|
+
grammar.add_nonassoc(sym, precedence_number)
|
187
|
+
end
|
188
|
+
precedence_number += 1
|
189
|
+
when T::P_left
|
190
|
+
# %left (ident|char|string)+
|
191
|
+
ts.next
|
192
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
193
|
+
sym = grammar.add_term(id: id)
|
194
|
+
grammar.add_left(sym, precedence_number)
|
195
|
+
end
|
196
|
+
precedence_number += 1
|
197
|
+
when T::P_right
|
198
|
+
# %right (ident|char|string)+
|
199
|
+
ts.next
|
200
|
+
while (id = ts.consume(T::Ident, T::Char, T::String)) do
|
201
|
+
sym = grammar.add_term(id: id)
|
202
|
+
grammar.add_right(sym, precedence_number)
|
203
|
+
end
|
204
|
+
precedence_number += 1
|
205
|
+
when nil
|
206
|
+
# end of input
|
207
|
+
raise "Reach to end of input within declarations"
|
208
|
+
else
|
209
|
+
raise "Unexpected token: #{ts.current_token}"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
def parse_grammar_rules(ts, grammar)
|
215
|
+
while !ts.eots? do
|
216
|
+
parse_grammar_rule(ts, grammar)
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# TODO: Take care of %prec of rule.
|
221
|
+
# If %prec exists, user code before %prec
|
222
|
+
# is NOT an action. For example "{ code 3 }" is NOT an action.
|
223
|
+
#
|
224
|
+
# keyword_class { code 2 } tSTRING '!' keyword_end { code 3 } %prec "="
|
225
|
+
def parse_grammar_rule(ts, grammar)
|
226
|
+
# LHS
|
227
|
+
lhs = ts.consume!(T::Ident_Colon) # class:
|
228
|
+
lhs.type = T::Ident
|
229
|
+
|
230
|
+
rhs = parse_grammar_rule_rhs(ts, grammar)
|
231
|
+
|
232
|
+
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
|
233
|
+
|
234
|
+
while true do
|
235
|
+
case ts.current_type
|
236
|
+
when T::Bar
|
237
|
+
# |
|
238
|
+
bar_lineno = ts.current_token.line
|
239
|
+
ts.next
|
240
|
+
rhs = parse_grammar_rule_rhs(ts, grammar)
|
241
|
+
grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
|
242
|
+
when T::Semicolon
|
243
|
+
# ;
|
244
|
+
ts.next
|
245
|
+
break
|
246
|
+
when T::Ident_Colon
|
247
|
+
# Next lhs can be here because ";" is optional.
|
248
|
+
# Do not consume next token.
|
249
|
+
break
|
250
|
+
when nil
|
251
|
+
# end of input can be here when ";" is omitted
|
252
|
+
break
|
253
|
+
else
|
254
|
+
raise "Unexpected token: #{ts.current_token}"
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def parse_grammar_rule_rhs(ts, grammar)
|
260
|
+
a = []
|
261
|
+
prec_seen = false
|
262
|
+
code_after_prec = false
|
263
|
+
|
264
|
+
while true do
|
265
|
+
# TODO: Srting can be here
|
266
|
+
case ts.current_type
|
267
|
+
when T::Ident
|
268
|
+
# keyword_class
|
269
|
+
|
270
|
+
raise "Ident after %prec" if prec_seen
|
271
|
+
a << ts.current_token
|
272
|
+
ts.next
|
273
|
+
when T::Char
|
274
|
+
# '!'
|
275
|
+
|
276
|
+
raise "Char after %prec" if prec_seen
|
277
|
+
a << ts.current_token
|
278
|
+
ts.next
|
279
|
+
when T::P_prec
|
280
|
+
# %prec tPLUS
|
281
|
+
#
|
282
|
+
# See: https://www.gnu.org/software/bison/manual/html_node/Contextual-Precedence.html
|
283
|
+
|
284
|
+
ts.next
|
285
|
+
prec_seen = true
|
286
|
+
precedence_id = ts.consume!(T::Ident, T::String, T::Char)
|
287
|
+
precedence_sym = grammar.find_symbol_by_id!(precedence_id)
|
288
|
+
a << precedence_sym
|
289
|
+
when T::User_code
|
290
|
+
# { code } in the middle of rhs
|
291
|
+
|
292
|
+
if prec_seen
|
293
|
+
raise "Multiple User_code after %prec" if code_after_prec
|
294
|
+
code_after_prec = true
|
295
|
+
end
|
296
|
+
|
297
|
+
code = ts.current_token
|
298
|
+
grammar.build_references(code)
|
299
|
+
a << code
|
300
|
+
ts.next
|
301
|
+
when T::Bar
|
302
|
+
# |
|
303
|
+
break
|
304
|
+
when T::Semicolon
|
305
|
+
# ;
|
306
|
+
break
|
307
|
+
when T::Ident_Colon
|
308
|
+
# Next lhs can be here because ";" is optional.
|
309
|
+
break
|
310
|
+
when nil
|
311
|
+
# end of input can be here when ";" is omitted
|
312
|
+
break
|
313
|
+
else
|
314
|
+
raise "Unexpected token: #{ts.current_token}"
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
return a
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
data/lib/lrama/report.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Report
|
3
|
+
module Profile
|
4
|
+
def self.report_profile
|
5
|
+
require "stackprof"
|
6
|
+
|
7
|
+
StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
|
8
|
+
yield
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module Duration
|
14
|
+
def self.enable
|
15
|
+
@_report_duration_enabled = true
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.enabled?
|
19
|
+
!!@_report_duration_enabled
|
20
|
+
end
|
21
|
+
|
22
|
+
def report_duration(method_name)
|
23
|
+
time1 = Time.now.to_f
|
24
|
+
result = yield
|
25
|
+
time2 = Time.now.to_f
|
26
|
+
|
27
|
+
if Duration.enabled?
|
28
|
+
puts sprintf("%s %10.5f s", method_name, time2 - time1)
|
29
|
+
end
|
30
|
+
|
31
|
+
return result
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|