lrama 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,321 @@
1
+ require "lrama/report"
2
+
3
+ module Lrama
4
+ # Parser for parse.y, generates a grammar
5
+ class Parser
6
+ include Lrama::Report::Duration
7
+
8
+ T = Lrama::Lexer::Token
9
+
10
+ class TokenScanner
11
+ def initialize(tokens)
12
+ @tokens = tokens
13
+ @index = 0
14
+ end
15
+
16
+ def current_token
17
+ @tokens[@index]
18
+ end
19
+
20
+ def current_type
21
+ current_token && current_token.type
22
+ end
23
+
24
+ def next
25
+ token = current_token
26
+ @index += 1
27
+ return token
28
+ end
29
+
30
+ def consume(*token_types)
31
+ if token_types.include?(current_type)
32
+ token = current_token
33
+ self.next
34
+ return token
35
+ end
36
+
37
+ return nil
38
+ end
39
+
40
+ def consume!(*token_types)
41
+ consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
42
+ end
43
+
44
+ def consume_multi(*token_types)
45
+ a = []
46
+
47
+ while token_types.include?(current_type)
48
+ a << current_token
49
+ self.next
50
+ end
51
+
52
+ raise "No token is consumed. #{token_types}" if a.empty?
53
+
54
+ return a
55
+ end
56
+
57
+ def eots?
58
+ current_token.nil?
59
+ end
60
+ end
61
+
62
+ def initialize(text)
63
+ @text = text
64
+ end
65
+
66
+ def parse
67
+ report_duration(:parse) do
68
+ lexer = Lexer.new(@text)
69
+ grammar = Grammar.new
70
+ process_prologue(grammar, lexer)
71
+ parse_bison_declarations(TokenScanner.new(lexer.bison_declarations_tokens), grammar)
72
+ parse_grammar_rules(TokenScanner.new(lexer.grammar_rules_tokens), grammar)
73
+ process_epilogue(grammar, lexer)
74
+ grammar.prepare
75
+ grammar.compute_nullable
76
+ grammar.validate!
77
+
78
+ grammar
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ def process_prologue(grammar, lexer)
85
+ grammar.prologue_first_lineno = lexer.prologue.first[1] if lexer.prologue.first
86
+ grammar.prologue = lexer.prologue.map(&:first).join
87
+ end
88
+
89
+ def process_epilogue(grammar, lexer)
90
+ grammar.epilogue_first_lineno = lexer.epilogue.first[1] if lexer.epilogue.first
91
+ grammar.epilogue = lexer.epilogue.map(&:first).join
92
+ end
93
+
94
+ def parse_bison_declarations(ts, grammar)
95
+ precedence_number = 0
96
+
97
+ while !ts.eots? do
98
+ case ts.current_type
99
+ when T::P_expect
100
+ ts.next
101
+ grammar.expect = ts.consume!(T::Number).s_value
102
+ when T::P_define
103
+ ts.next
104
+ # Ignore
105
+ ts.consume_multi(T::Ident)
106
+ when T::P_printer
107
+ lineno = ts.current_token.line
108
+ ts.next
109
+ code = ts.consume!(T::User_code)
110
+ code = grammar.build_code(:printer, code)
111
+ ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
112
+ grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
113
+ when T::P_lex_param
114
+ ts.next
115
+ code = ts.consume!(T::User_code)
116
+ code = grammar.build_code(:lex_param, code)
117
+ grammar.lex_param = code.token_code.s_value
118
+ when T::P_parse_param
119
+ ts.next
120
+ code = ts.consume!(T::User_code)
121
+ code = grammar.build_code(:parse_param, code)
122
+ grammar.parse_param = code.token_code.s_value
123
+ when T::P_initial_action
124
+ ts.next
125
+ code = ts.consume!(T::User_code)
126
+ code = grammar.build_code(:initial_action, code)
127
+ ts.consume(T::Semicolon)
128
+ grammar.initial_action = code
129
+ when T::P_union
130
+ lineno = ts.current_token.line
131
+ ts.next
132
+ code = ts.consume!(T::User_code)
133
+ code = grammar.build_code(:union, code)
134
+ ts.consume(T::Semicolon)
135
+ grammar.set_union(code, lineno)
136
+ when T::P_token
137
+ # %token tag? (ident number? string?)+
138
+ #
139
+ # * ident can be char, e.g. '\\', '\t', '\13'
140
+ # * number is a token_id for term
141
+ #
142
+ # These are valid token declaration (from CRuby parse.y)
143
+ #
144
+ # %token END_OF_INPUT 0 "end-of-input"
145
+ # %token <id> '\\' "backslash"
146
+ # %token tSP "escaped space"
147
+ # %token tUPLUS 132 "unary+"
148
+ # %token tCOLON3 ":: at EXPR_BEG"
149
+ # %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
150
+ #
151
+ #
152
+ # See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
153
+ ts.next
154
+ opt_tag = ts.consume(T::Tag)
155
+
156
+ while (id = ts.consume(T::Ident, T::Char)) do
157
+ opt_number = ts.consume(T::Number)
158
+ opt_string = ts.consume(T::String)
159
+ # Can replace 0 (EOF)
160
+ grammar.add_term(
161
+ id: id,
162
+ alias_name: opt_string && opt_string.s_value,
163
+ token_id: opt_number && opt_number.s_value,
164
+ tag: opt_tag,
165
+ replace: true,
166
+ )
167
+ end
168
+ when T::P_type
169
+ # %type tag? (ident|char|string)+
170
+ #
171
+ # See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
172
+ ts.next
173
+ opt_tag = ts.consume(T::Tag)
174
+
175
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
176
+ grammar.add_type(
177
+ id: id,
178
+ tag: opt_tag
179
+ )
180
+ end
181
+ when T::P_nonassoc
182
+ # %nonassoc (ident|char|string)+
183
+ ts.next
184
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
185
+ sym = grammar.add_term(id: id)
186
+ grammar.add_nonassoc(sym, precedence_number)
187
+ end
188
+ precedence_number += 1
189
+ when T::P_left
190
+ # %left (ident|char|string)+
191
+ ts.next
192
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
193
+ sym = grammar.add_term(id: id)
194
+ grammar.add_left(sym, precedence_number)
195
+ end
196
+ precedence_number += 1
197
+ when T::P_right
198
+ # %right (ident|char|string)+
199
+ ts.next
200
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
201
+ sym = grammar.add_term(id: id)
202
+ grammar.add_right(sym, precedence_number)
203
+ end
204
+ precedence_number += 1
205
+ when nil
206
+ # end of input
207
+ raise "Reach to end of input within declarations"
208
+ else
209
+ raise "Unexpected token: #{ts.current_token}"
210
+ end
211
+ end
212
+ end
213
+
214
+ def parse_grammar_rules(ts, grammar)
215
+ while !ts.eots? do
216
+ parse_grammar_rule(ts, grammar)
217
+ end
218
+ end
219
+
220
+ # TODO: Take care of %prec of rule.
221
+ # If %prec exists, user code before %prec
222
+ # is NOT an action. For example "{ code 3 }" is NOT an action.
223
+ #
224
+ # keyword_class { code 2 } tSTRING '!' keyword_end { code 3 } %prec "="
225
+ def parse_grammar_rule(ts, grammar)
226
+ # LHS
227
+ lhs = ts.consume!(T::Ident_Colon) # class:
228
+ lhs.type = T::Ident
229
+
230
+ rhs = parse_grammar_rule_rhs(ts, grammar)
231
+
232
+ grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
233
+
234
+ while true do
235
+ case ts.current_type
236
+ when T::Bar
237
+ # |
238
+ bar_lineno = ts.current_token.line
239
+ ts.next
240
+ rhs = parse_grammar_rule_rhs(ts, grammar)
241
+ grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
242
+ when T::Semicolon
243
+ # ;
244
+ ts.next
245
+ break
246
+ when T::Ident_Colon
247
+ # Next lhs can be here because ";" is optional.
248
+ # Do not consume next token.
249
+ break
250
+ when nil
251
+ # end of input can be here when ";" is omitted
252
+ break
253
+ else
254
+ raise "Unexpected token: #{ts.current_token}"
255
+ end
256
+ end
257
+ end
258
+
259
+ def parse_grammar_rule_rhs(ts, grammar)
260
+ a = []
261
+ prec_seen = false
262
+ code_after_prec = false
263
+
264
+ while true do
265
+ # TODO: Srting can be here
266
+ case ts.current_type
267
+ when T::Ident
268
+ # keyword_class
269
+
270
+ raise "Ident after %prec" if prec_seen
271
+ a << ts.current_token
272
+ ts.next
273
+ when T::Char
274
+ # '!'
275
+
276
+ raise "Char after %prec" if prec_seen
277
+ a << ts.current_token
278
+ ts.next
279
+ when T::P_prec
280
+ # %prec tPLUS
281
+ #
282
+ # See: https://www.gnu.org/software/bison/manual/html_node/Contextual-Precedence.html
283
+
284
+ ts.next
285
+ prec_seen = true
286
+ precedence_id = ts.consume!(T::Ident, T::String, T::Char)
287
+ precedence_sym = grammar.find_symbol_by_id!(precedence_id)
288
+ a << precedence_sym
289
+ when T::User_code
290
+ # { code } in the middle of rhs
291
+
292
+ if prec_seen
293
+ raise "Multiple User_code after %prec" if code_after_prec
294
+ code_after_prec = true
295
+ end
296
+
297
+ code = ts.current_token
298
+ grammar.build_references(code)
299
+ a << code
300
+ ts.next
301
+ when T::Bar
302
+ # |
303
+ break
304
+ when T::Semicolon
305
+ # ;
306
+ break
307
+ when T::Ident_Colon
308
+ # Next lhs can be here because ";" is optional.
309
+ break
310
+ when nil
311
+ # end of input can be here when ";" is omitted
312
+ break
313
+ else
314
+ raise "Unexpected token: #{ts.current_token}"
315
+ end
316
+ end
317
+
318
+ return a
319
+ end
320
+ end
321
+ end
@@ -0,0 +1,35 @@
1
+ module Lrama
2
+ class Report
3
+ module Profile
4
+ def self.report_profile
5
+ require "stackprof"
6
+
7
+ StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
8
+ yield
9
+ end
10
+ end
11
+ end
12
+
13
+ module Duration
14
+ def self.enable
15
+ @_report_duration_enabled = true
16
+ end
17
+
18
+ def self.enabled?
19
+ !!@_report_duration_enabled
20
+ end
21
+
22
+ def report_duration(method_name)
23
+ time1 = Time.now.to_f
24
+ result = yield
25
+ time2 = Time.now.to_f
26
+
27
+ if Duration.enabled?
28
+ puts sprintf("%s %10.5f s", method_name, time2 - time1)
29
+ end
30
+
31
+ return result
32
+ end
33
+ end
34
+ end
35
+ end