lrama 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,321 @@
1
+ require "lrama/report"
2
+
3
+ module Lrama
4
+ # Parser for parse.y, generates a grammar
5
+ class Parser
6
+ include Lrama::Report::Duration
7
+
8
+ T = Lrama::Lexer::Token
9
+
10
+ class TokenScanner
11
+ def initialize(tokens)
12
+ @tokens = tokens
13
+ @index = 0
14
+ end
15
+
16
+ def current_token
17
+ @tokens[@index]
18
+ end
19
+
20
+ def current_type
21
+ current_token && current_token.type
22
+ end
23
+
24
+ def next
25
+ token = current_token
26
+ @index += 1
27
+ return token
28
+ end
29
+
30
+ def consume(*token_types)
31
+ if token_types.include?(current_type)
32
+ token = current_token
33
+ self.next
34
+ return token
35
+ end
36
+
37
+ return nil
38
+ end
39
+
40
+ def consume!(*token_types)
41
+ consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
42
+ end
43
+
44
+ def consume_multi(*token_types)
45
+ a = []
46
+
47
+ while token_types.include?(current_type)
48
+ a << current_token
49
+ self.next
50
+ end
51
+
52
+ raise "No token is consumed. #{token_types}" if a.empty?
53
+
54
+ return a
55
+ end
56
+
57
+ def eots?
58
+ current_token.nil?
59
+ end
60
+ end
61
+
62
+ def initialize(text)
63
+ @text = text
64
+ end
65
+
66
+ def parse
67
+ report_duration(:parse) do
68
+ lexer = Lexer.new(@text)
69
+ grammar = Grammar.new
70
+ process_prologue(grammar, lexer)
71
+ parse_bison_declarations(TokenScanner.new(lexer.bison_declarations_tokens), grammar)
72
+ parse_grammar_rules(TokenScanner.new(lexer.grammar_rules_tokens), grammar)
73
+ process_epilogue(grammar, lexer)
74
+ grammar.prepare
75
+ grammar.compute_nullable
76
+ grammar.validate!
77
+
78
+ grammar
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ def process_prologue(grammar, lexer)
85
+ grammar.prologue_first_lineno = lexer.prologue.first[1] if lexer.prologue.first
86
+ grammar.prologue = lexer.prologue.map(&:first).join
87
+ end
88
+
89
+ def process_epilogue(grammar, lexer)
90
+ grammar.epilogue_first_lineno = lexer.epilogue.first[1] if lexer.epilogue.first
91
+ grammar.epilogue = lexer.epilogue.map(&:first).join
92
+ end
93
+
94
+ def parse_bison_declarations(ts, grammar)
95
+ precedence_number = 0
96
+
97
+ while !ts.eots? do
98
+ case ts.current_type
99
+ when T::P_expect
100
+ ts.next
101
+ grammar.expect = ts.consume!(T::Number).s_value
102
+ when T::P_define
103
+ ts.next
104
+ # Ignore
105
+ ts.consume_multi(T::Ident)
106
+ when T::P_printer
107
+ lineno = ts.current_token.line
108
+ ts.next
109
+ code = ts.consume!(T::User_code)
110
+ code = grammar.build_code(:printer, code)
111
+ ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
112
+ grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
113
+ when T::P_lex_param
114
+ ts.next
115
+ code = ts.consume!(T::User_code)
116
+ code = grammar.build_code(:lex_param, code)
117
+ grammar.lex_param = code.token_code.s_value
118
+ when T::P_parse_param
119
+ ts.next
120
+ code = ts.consume!(T::User_code)
121
+ code = grammar.build_code(:parse_param, code)
122
+ grammar.parse_param = code.token_code.s_value
123
+ when T::P_initial_action
124
+ ts.next
125
+ code = ts.consume!(T::User_code)
126
+ code = grammar.build_code(:initial_action, code)
127
+ ts.consume(T::Semicolon)
128
+ grammar.initial_action = code
129
+ when T::P_union
130
+ lineno = ts.current_token.line
131
+ ts.next
132
+ code = ts.consume!(T::User_code)
133
+ code = grammar.build_code(:union, code)
134
+ ts.consume(T::Semicolon)
135
+ grammar.set_union(code, lineno)
136
+ when T::P_token
137
+ # %token tag? (ident number? string?)+
138
+ #
139
+ # * ident can be char, e.g. '\\', '\t', '\13'
140
+ # * number is a token_id for term
141
+ #
142
+ # These are valid token declaration (from CRuby parse.y)
143
+ #
144
+ # %token END_OF_INPUT 0 "end-of-input"
145
+ # %token <id> '\\' "backslash"
146
+ # %token tSP "escaped space"
147
+ # %token tUPLUS 132 "unary+"
148
+ # %token tCOLON3 ":: at EXPR_BEG"
149
+ # %token tSTRING_DBEG tSTRING_DVAR tLAMBEG tLABEL_END
150
+ #
151
+ #
152
+ # See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
153
+ ts.next
154
+ opt_tag = ts.consume(T::Tag)
155
+
156
+ while (id = ts.consume(T::Ident, T::Char)) do
157
+ opt_number = ts.consume(T::Number)
158
+ opt_string = ts.consume(T::String)
159
+ # Can replace 0 (EOF)
160
+ grammar.add_term(
161
+ id: id,
162
+ alias_name: opt_string && opt_string.s_value,
163
+ token_id: opt_number && opt_number.s_value,
164
+ tag: opt_tag,
165
+ replace: true,
166
+ )
167
+ end
168
+ when T::P_type
169
+ # %type tag? (ident|char|string)+
170
+ #
171
+ # See: https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
172
+ ts.next
173
+ opt_tag = ts.consume(T::Tag)
174
+
175
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
176
+ grammar.add_type(
177
+ id: id,
178
+ tag: opt_tag
179
+ )
180
+ end
181
+ when T::P_nonassoc
182
+ # %nonassoc (ident|char|string)+
183
+ ts.next
184
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
185
+ sym = grammar.add_term(id: id)
186
+ grammar.add_nonassoc(sym, precedence_number)
187
+ end
188
+ precedence_number += 1
189
+ when T::P_left
190
+ # %left (ident|char|string)+
191
+ ts.next
192
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
193
+ sym = grammar.add_term(id: id)
194
+ grammar.add_left(sym, precedence_number)
195
+ end
196
+ precedence_number += 1
197
+ when T::P_right
198
+ # %right (ident|char|string)+
199
+ ts.next
200
+ while (id = ts.consume(T::Ident, T::Char, T::String)) do
201
+ sym = grammar.add_term(id: id)
202
+ grammar.add_right(sym, precedence_number)
203
+ end
204
+ precedence_number += 1
205
+ when nil
206
+ # end of input
207
+ raise "Reach to end of input within declarations"
208
+ else
209
+ raise "Unexpected token: #{ts.current_token}"
210
+ end
211
+ end
212
+ end
213
+
214
+ def parse_grammar_rules(ts, grammar)
215
+ while !ts.eots? do
216
+ parse_grammar_rule(ts, grammar)
217
+ end
218
+ end
219
+
220
+ # TODO: Take care of %prec of rule.
221
+ # If %prec exists, user code before %prec
222
+ # is NOT an action. For example "{ code 3 }" is NOT an action.
223
+ #
224
+ # keyword_class { code 2 } tSTRING '!' keyword_end { code 3 } %prec "="
225
+ def parse_grammar_rule(ts, grammar)
226
+ # LHS
227
+ lhs = ts.consume!(T::Ident_Colon) # class:
228
+ lhs.type = T::Ident
229
+
230
+ rhs = parse_grammar_rule_rhs(ts, grammar)
231
+
232
+ grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : lhs.line)
233
+
234
+ while true do
235
+ case ts.current_type
236
+ when T::Bar
237
+ # |
238
+ bar_lineno = ts.current_token.line
239
+ ts.next
240
+ rhs = parse_grammar_rule_rhs(ts, grammar)
241
+ grammar.add_rule(lhs: lhs, rhs: rhs, lineno: rhs.first ? rhs.first.line : bar_lineno)
242
+ when T::Semicolon
243
+ # ;
244
+ ts.next
245
+ break
246
+ when T::Ident_Colon
247
+ # Next lhs can be here because ";" is optional.
248
+ # Do not consume next token.
249
+ break
250
+ when nil
251
+ # end of input can be here when ";" is omitted
252
+ break
253
+ else
254
+ raise "Unexpected token: #{ts.current_token}"
255
+ end
256
+ end
257
+ end
258
+
259
+ def parse_grammar_rule_rhs(ts, grammar)
260
+ a = []
261
+ prec_seen = false
262
+ code_after_prec = false
263
+
264
+ while true do
265
+ # TODO: Srting can be here
266
+ case ts.current_type
267
+ when T::Ident
268
+ # keyword_class
269
+
270
+ raise "Ident after %prec" if prec_seen
271
+ a << ts.current_token
272
+ ts.next
273
+ when T::Char
274
+ # '!'
275
+
276
+ raise "Char after %prec" if prec_seen
277
+ a << ts.current_token
278
+ ts.next
279
+ when T::P_prec
280
+ # %prec tPLUS
281
+ #
282
+ # See: https://www.gnu.org/software/bison/manual/html_node/Contextual-Precedence.html
283
+
284
+ ts.next
285
+ prec_seen = true
286
+ precedence_id = ts.consume!(T::Ident, T::String, T::Char)
287
+ precedence_sym = grammar.find_symbol_by_id!(precedence_id)
288
+ a << precedence_sym
289
+ when T::User_code
290
+ # { code } in the middle of rhs
291
+
292
+ if prec_seen
293
+ raise "Multiple User_code after %prec" if code_after_prec
294
+ code_after_prec = true
295
+ end
296
+
297
+ code = ts.current_token
298
+ grammar.build_references(code)
299
+ a << code
300
+ ts.next
301
+ when T::Bar
302
+ # |
303
+ break
304
+ when T::Semicolon
305
+ # ;
306
+ break
307
+ when T::Ident_Colon
308
+ # Next lhs can be here because ";" is optional.
309
+ break
310
+ when nil
311
+ # end of input can be here when ";" is omitted
312
+ break
313
+ else
314
+ raise "Unexpected token: #{ts.current_token}"
315
+ end
316
+ end
317
+
318
+ return a
319
+ end
320
+ end
321
+ end
@@ -0,0 +1,35 @@
1
+ module Lrama
2
+ class Report
3
+ module Profile
4
+ def self.report_profile
5
+ require "stackprof"
6
+
7
+ StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
8
+ yield
9
+ end
10
+ end
11
+ end
12
+
13
+ module Duration
14
+ def self.enable
15
+ @_report_duration_enabled = true
16
+ end
17
+
18
+ def self.enabled?
19
+ !!@_report_duration_enabled
20
+ end
21
+
22
+ def report_duration(method_name)
23
+ time1 = Time.now.to_f
24
+ result = yield
25
+ time2 = Time.now.to_f
26
+
27
+ if Duration.enabled?
28
+ puts sprintf("%s %10.5f s", method_name, time2 - time1)
29
+ end
30
+
31
+ return result
32
+ end
33
+ end
34
+ end
35
+ end