lrama 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,349 @@
1
+ require "strscan"
2
+ require "lrama/report"
3
+
4
+ module Lrama
5
+ # Lexer for parse.y
6
+ class Lexer
7
+ include Lrama::Report::Duration
8
+
9
+ # s_value is semantic value
10
+ Token = Struct.new(:type, :s_value, keyword_init: true) do
11
+ Type = Struct.new(:id, :name, keyword_init: true)
12
+
13
+ attr_accessor :line, :column, :referred
14
+ # For User_code
15
+ attr_accessor :references
16
+
17
+ def to_s
18
+ "#{super} line: #{line}, column: #{column}"
19
+ end
20
+
21
+ @i = 0
22
+ @types = []
23
+
24
+ def self.define_type(name)
25
+ type = Type.new(id: @i, name: name.to_s)
26
+ const_set(name, type)
27
+ @types << type
28
+ @i += 1
29
+ end
30
+
31
+ # Token types
32
+ define_type(:P_expect) # %expect
33
+ define_type(:P_define) # %define
34
+ define_type(:P_printer) # %printer
35
+ define_type(:P_lex_param) # %lex-param
36
+ define_type(:P_parse_param) # %parse-param
37
+ define_type(:P_initial_action) # %initial-action
38
+ define_type(:P_union) # %union
39
+ define_type(:P_token) # %token
40
+ define_type(:P_type) # %type
41
+ define_type(:P_nonassoc) # %nonassoc
42
+ define_type(:P_left) # %left
43
+ define_type(:P_right) # %right
44
+ define_type(:P_prec) # %prec
45
+ define_type(:User_code) # { ... }
46
+ define_type(:Tag) # <int>
47
+ define_type(:Number) # 0
48
+ define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
49
+ define_type(:Ident) # api.pure, tNUMBER
50
+ define_type(:Semicolon) # ;
51
+ define_type(:Bar) # |
52
+ define_type(:String) # "str"
53
+ define_type(:Char) # '+'
54
+ end
55
+
56
+ # States
57
+ #
58
+ # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
59
+ Initial = 0
60
+ Prologue = 1
61
+ BisonDeclarations = 2
62
+ GrammarRules = 3
63
+ Epilogue = 4
64
+
65
+ # Token types
66
+
67
+ attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
68
+ :bison_declarations_tokens, :grammar_rules_tokens
69
+
70
+ def initialize(text)
71
+ @text = text
72
+ @state = Initial
73
+ # Array of texts
74
+ @prologue = []
75
+ @bison_declarations = []
76
+ @grammar_rules = []
77
+ @epilogue = []
78
+
79
+ #
80
+ @bison_declarations_tokens = []
81
+ @grammar_rules_tokens = []
82
+
83
+ @debug = false
84
+
85
+ report_duration(:lex) do
86
+ lex_text
87
+ lex_bison_declarations_tokens
88
+ lex_grammar_rules_tokens
89
+ end
90
+ end
91
+
92
+ private
93
+
94
+ def create_token(type, s_value, line, column)
95
+ t = Token.new(type: type, s_value: s_value)
96
+ t.line = line
97
+ t.column = column
98
+
99
+ return t
100
+ end
101
+
102
+ # TODO: Remove this
103
+ def lex_text
104
+ @text.each_line.with_index(1) do |string, lineno|
105
+ case @state
106
+ when Initial
107
+ # Skip until "%{"
108
+ if string == "%{\n"
109
+ @state = Prologue
110
+ @prologue << ["", lineno]
111
+ next
112
+ end
113
+ when Prologue
114
+ # Between "%{" and "%}"
115
+ if string == "%}\n"
116
+ @state = BisonDeclarations
117
+ @prologue << ["", lineno]
118
+ next
119
+ end
120
+
121
+ @prologue << [string, lineno]
122
+ when BisonDeclarations
123
+ if string == "%%\n"
124
+ @state = GrammarRules
125
+ next
126
+ end
127
+
128
+ @bison_declarations << [string, lineno]
129
+ when GrammarRules
130
+ # Between "%%" and "%%"
131
+ if string == "%%\n"
132
+ @state = Epilogue
133
+ next
134
+ end
135
+
136
+ @grammar_rules << [string, lineno]
137
+ when Epilogue
138
+ @epilogue << [string, lineno]
139
+ else
140
+ raise "Unknown state: #{@state}"
141
+ end
142
+ end
143
+ end
144
+
145
+ # See:
146
+ # * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
147
+ # * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
148
+ # * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
149
+ def lex_common(lines, tokens)
150
+ line = lines.first[1]
151
+ column = 0
152
+ ss = StringScanner.new(lines.map(&:first).join)
153
+
154
+ while !ss.eos? do
155
+ case
156
+ when ss.scan(/\n/)
157
+ line += 1
158
+ column = ss.pos
159
+ when ss.scan(/\s+/)
160
+ # skip
161
+ when ss.scan(/;/)
162
+ tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
163
+ when ss.scan(/\|/)
164
+ tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
165
+ when ss.scan(/(\d+)/)
166
+ tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
167
+ when ss.scan(/(<[a-zA-Z0-9_]+>)/)
168
+ tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
169
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
170
+ tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
171
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
172
+ tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
173
+ when ss.scan(/%expect/)
174
+ tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
175
+ when ss.scan(/%define/)
176
+ tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
177
+ when ss.scan(/%printer/)
178
+ tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
179
+ when ss.scan(/%lex-param/)
180
+ tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
181
+ when ss.scan(/%parse-param/)
182
+ tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
183
+ when ss.scan(/%initial-action/)
184
+ tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
185
+ when ss.scan(/%union/)
186
+ tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
187
+ when ss.scan(/%token/)
188
+ tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
189
+ when ss.scan(/%type/)
190
+ tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
191
+ when ss.scan(/%nonassoc/)
192
+ tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
193
+ when ss.scan(/%left/)
194
+ tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
195
+ when ss.scan(/%right/)
196
+ tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
197
+ when ss.scan(/%prec/)
198
+ tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
199
+ when ss.scan(/{/)
200
+ token, line = lex_user_code(ss, line, ss.pos - column, lines)
201
+ tokens << token
202
+ when ss.scan(/"/)
203
+ string, line = lex_string(ss, "\"", line, lines)
204
+ token = create_token(Token::String, string, line, ss.pos - column)
205
+ tokens << token
206
+ when ss.scan(/\/\*/)
207
+ # TODO: Need to keep comment?
208
+ line = lex_comment(ss, line, lines, "")
209
+ when ss.scan(/'(.)'/)
210
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
211
+ when ss.scan(/'\\(.)'/) # '\\', '\t'
212
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
213
+ when ss.scan(/'\\(\d+)'/) # '\13'
214
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
215
+ when ss.scan(/%empty/)
216
+ # skip
217
+ else
218
+ l = line - lines.first[1]
219
+ split = ss.string.split("\n")
220
+ col = ss.pos - split[0...l].join("\n").length
221
+ raise "Parse error (unknow token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
222
+ end
223
+ end
224
+ end
225
+
226
+ def lex_bison_declarations_tokens
227
+ lex_common(@bison_declarations, @bison_declarations_tokens)
228
+ end
229
+
230
+ def lex_user_code(ss, line, column, lines)
231
+ first_line = line
232
+ first_column = column
233
+ debug("Enter lex_user_code: #{line}")
234
+ brace_count = 1
235
+ str = "{"
236
+ # Array of [type, $n, tag, first column, last column]
237
+ # TODO: Is it better to keep string, like "$$", and use gsub?
238
+ references = []
239
+
240
+ while !ss.eos? do
241
+ case
242
+ when ss.scan(/\n/)
243
+ line += 1
244
+ when ss.scan(/"/)
245
+ string, line = lex_string(ss, "\"", line, lines)
246
+ str << string
247
+ next
248
+ when ss.scan(/'/)
249
+ string, line = lex_string(ss, "'", line, lines)
250
+ str << string
251
+ next
252
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
253
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
254
+ references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
255
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
256
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
257
+ references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
258
+ when ss.scan(/@\$/) # @$
259
+ references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
260
+ when ss.scan(/@(\d)+/) # @1
261
+ references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
262
+ when ss.scan(/{/)
263
+ brace_count += 1
264
+ when ss.scan(/}/)
265
+ brace_count -= 1
266
+
267
+ debug("Return lex_user_code: #{line}")
268
+ if brace_count == 0
269
+ str << ss[0]
270
+ user_code = Token.new(type: Token::User_code, s_value: str.freeze)
271
+ user_code.line = first_line
272
+ user_code.column = first_column
273
+ user_code.references = references
274
+ return [user_code, line]
275
+ end
276
+ when ss.scan(/\/\*/)
277
+ str << ss[0]
278
+ line = lex_comment(ss, line, lines, str)
279
+ else
280
+ # noop, just consume char
281
+ str << ss.getch
282
+ next
283
+ end
284
+
285
+ str << ss[0]
286
+ end
287
+
288
+ # Reach to end of input but brace does not match
289
+ l = line - lines.first[1]
290
+ raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
291
+ end
292
+
293
+ def lex_string(ss, terminator, line, lines)
294
+ debug("Enter lex_string: #{line}")
295
+
296
+ str = terminator.dup
297
+
298
+ while (c = ss.getch) do
299
+ str << c
300
+
301
+ case c
302
+ when "\n"
303
+ line += 1
304
+ when terminator
305
+ debug("Return lex_string: #{line}")
306
+ return [str, line]
307
+ else
308
+ # noop
309
+ end
310
+ end
311
+
312
+ # Reach to end of input but quote does not match
313
+ l = line - lines.first[1]
314
+ raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
315
+ end
316
+
317
+ # TODO: Need to handle // style comment
318
+ #
319
+ # /* */ style comment
320
+ def lex_comment(ss, line, lines, str)
321
+ while !ss.eos? do
322
+ case
323
+ when ss.scan(/\n/)
324
+ line += 1
325
+ when ss.scan(/\*\//)
326
+ return line
327
+ else
328
+ str << ss.getch
329
+ next
330
+ end
331
+
332
+ str << ss[0]
333
+ end
334
+
335
+ # Reach to end of input but quote does not match
336
+ l = line - lines.first[1]
337
+ raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
338
+ end
339
+
340
+ def lex_grammar_rules_tokens
341
+ lex_common(@grammar_rules, @grammar_rules_tokens)
342
+ end
343
+
344
+ def debug(msg)
345
+ return unless @debug
346
+ puts "#{msg}\n"
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,268 @@
1
+ require "erb"
2
+ require "forwardable"
3
+ require "lrama/report"
4
+
5
+ module Lrama
6
+ class Output
7
+ extend Forwardable
8
+ include Report::Duration
9
+
10
+ attr_reader :grammar_file_path, :context, :grammar
11
+
12
+ def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
13
+ :yymaxutok, :yypact_ninf, :yytable_ninf
14
+
15
+ def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
16
+
17
+ def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:)
18
+ @out = out
19
+ @output_file_path = output_file_path
20
+ @template_name = template_name
21
+ @grammar_file_path = grammar_file_path
22
+ @header_out = header_out
23
+ @header_file_path = header_file_path
24
+ @context = context
25
+ @grammar = grammar
26
+ end
27
+
28
+ def render
29
+ report_duration(:render) do
30
+ erb = ERB.new(File.read(template_file), nil, '-')
31
+ erb.filename = template_file
32
+ tmp = erb.result_with_hash(context: @context, output: self)
33
+ tmp = replace_special_variables(tmp, @output_file_path)
34
+ @out << tmp
35
+
36
+ if @header_file_path
37
+ erb = ERB.new(File.read(header_template_file), nil, '-')
38
+ erb.filename = header_template_file
39
+ tmp = erb.result_with_hash(context: @context, output: self)
40
+ tmp = replace_special_variables(tmp, @header_file_path)
41
+
42
+ if @header_out
43
+ @header_out << tmp
44
+ else
45
+ File.open(@header_file_path, "w+") do |f|
46
+ f << tmp
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ # A part of b4_token_enums
54
+ def token_enums
55
+ str = ""
56
+
57
+ @context.yytokentype.each do |s_value, token_id, display_name|
58
+ s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",")
59
+
60
+ if display_name
61
+ str << sprintf(" %-30s /* %s */\n", s, display_name)
62
+ else
63
+ str << sprintf(" %s\n", s)
64
+ end
65
+ end
66
+
67
+ str
68
+ end
69
+
70
+ # b4_symbol_enum
71
+ def symbol_enum
72
+ str = ""
73
+
74
+ last_sym_number = @context.yysymbol_kind_t.last[1]
75
+ @context.yysymbol_kind_t.each do |s_value, sym_number, display_name|
76
+ s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",")
77
+
78
+ if display_name
79
+ str << sprintf(" %-40s /* %s */\n", s, display_name)
80
+ else
81
+ str << sprintf(" %s\n", s)
82
+ end
83
+ end
84
+
85
+ str
86
+ end
87
+
88
+ def yytranslate
89
+ int_array_to_string(@context.yytranslate)
90
+ end
91
+
92
+ def yyrline
93
+ int_array_to_string(@context.yyrline)
94
+ end
95
+
96
+ def yytname
97
+ string_array_to_string(@context.yytname) + " YY_NULLPTR"
98
+ end
99
+
100
+ # b4_int_type_for
101
+ def int_type_for(ary)
102
+ min = ary.min
103
+ max = ary.max
104
+
105
+ case
106
+ when (-127 <= min && min <= 127) && (-127 <= max && max <= 127)
107
+ "yytype_int8"
108
+ when (0 <= min && min <= 255) && (0 <= max && max <= 255)
109
+ "yytype_uint8"
110
+ when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767)
111
+ "yytype_int16"
112
+ when (0 <= min && min <= 65535) && (0 <= max && max <= 65535)
113
+ "yytype_uint16"
114
+ else
115
+ "int"
116
+ end
117
+ end
118
+
119
+ def symbol_actions_for_printer
120
+ str = ""
121
+
122
+ @grammar.symbols.each do |sym|
123
+ next unless sym.printer
124
+
125
+ str << <<-STR
126
+ case #{sym.enum_name}: /* #{sym.comment} */
127
+ #line #{sym.printer.lineno} "#{@grammar_file_path}"
128
+ #{sym.printer.translated_code(sym.tag)}
129
+ #line [@oline@] [@ofile@]
130
+ break;
131
+
132
+ STR
133
+ end
134
+
135
+ str
136
+ end
137
+
138
+ # b4_user_actions
139
+ def user_actions
140
+ str = ""
141
+
142
+ @context.states.rules.each do |rule|
143
+ next unless rule.code
144
+
145
+ rule = rule
146
+ code = rule.code
147
+ spaces = " " * (code.column - 1)
148
+
149
+ str << <<-STR
150
+ case #{rule.id + 1}: /* #{rule.as_comment} */
151
+ #line #{code.line} "#{@grammar_file_path}"
152
+ #{spaces}#{rule.translated_code}
153
+ #line [@oline@] [@ofile@]
154
+ break;
155
+
156
+ STR
157
+ end
158
+
159
+ str << <<-STR
160
+
161
+ #line [@oline@] [@ofile@]
162
+ STR
163
+
164
+ str
165
+ end
166
+
167
+ # b4_parse_param
168
+ def parse_param
169
+ # Omit "{}"
170
+ @grammar.parse_param[1..-2]
171
+ end
172
+
173
+ # b4_user_formals
174
+ def user_formals
175
+ if @grammar.parse_param
176
+ ", #{parse_param}"
177
+ else
178
+ ""
179
+ end
180
+ end
181
+
182
+ # b4_table_value_equals
183
+ def table_value_equals(table, value, literal, symbol)
184
+ if literal < table.min || table.max < literal
185
+ "0"
186
+ else
187
+ "((#{value}) == #{symbol})"
188
+ end
189
+ end
190
+
191
+ def template_basename
192
+ File.basename(template_file)
193
+ end
194
+
195
+ def aux
196
+ @grammar.aux
197
+ end
198
+
199
+ def int_array_to_string(ary)
200
+ last = ary.count - 1
201
+
202
+ s = ary.each_with_index.each_slice(10).map do |slice|
203
+ str = " "
204
+
205
+ slice.each do |e, i|
206
+ str << sprintf("%6d%s", e, (i == last) ? "" : ",")
207
+ end
208
+
209
+ str
210
+ end
211
+
212
+ s.join("\n")
213
+ end
214
+
215
+ def spec_mapped_header_file
216
+ @header_file_path
217
+ end
218
+
219
+ def b4_cpp_guard__b4_spec_mapped_header_file
220
+ if @header_file_path
221
+ "YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED"
222
+ else
223
+ ""
224
+ end
225
+ end
226
+
227
+ private
228
+
229
+ def template_file
230
+ File.join(template_dir, @template_name)
231
+ end
232
+
233
+ def header_template_file
234
+ File.join(template_dir, "bison/yacc.h")
235
+ end
236
+
237
+ def template_dir
238
+ File.expand_path("../../../template", __FILE__)
239
+ end
240
+
241
+ def string_array_to_string(ary)
242
+ str = ""
243
+ tmp = " "
244
+
245
+ ary.each do |s|
246
+ s = s.gsub('\\', '\\\\\\\\')
247
+ s = s.gsub('"', '\\"')
248
+
249
+ if (tmp + s + " \"\",").length > 75
250
+ str << tmp << "\n"
251
+ tmp = " \"#{s}\","
252
+ else
253
+ tmp << " \"#{s}\","
254
+ end
255
+ end
256
+
257
+ str << tmp
258
+ end
259
+
260
+ def replace_special_variables(str, ofile)
261
+ str.each_line.with_index(1).map do |line, i|
262
+ line.gsub!("[@oline@]", (i + 1).to_s)
263
+ line.gsub!("[@ofile@]", "\"#{ofile}\"")
264
+ line
265
+ end.join
266
+ end
267
+ end
268
+ end