lrama 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ require "strscan"
2
+ require "lrama/report"
3
+
4
+ module Lrama
5
+ # Lexer for parse.y
6
+ class Lexer
7
+ include Lrama::Report::Duration
8
+
9
+ # s_value is semantic value
10
+ Token = Struct.new(:type, :s_value, keyword_init: true) do
11
+ Type = Struct.new(:id, :name, keyword_init: true)
12
+
13
+ attr_accessor :line, :column, :referred
14
+ # For User_code
15
+ attr_accessor :references
16
+
17
+ def to_s
18
+ "#{super} line: #{line}, column: #{column}"
19
+ end
20
+
21
+ @i = 0
22
+ @types = []
23
+
24
+ def self.define_type(name)
25
+ type = Type.new(id: @i, name: name.to_s)
26
+ const_set(name, type)
27
+ @types << type
28
+ @i += 1
29
+ end
30
+
31
+ # Token types
32
+ define_type(:P_expect) # %expect
33
+ define_type(:P_define) # %define
34
+ define_type(:P_printer) # %printer
35
+ define_type(:P_lex_param) # %lex-param
36
+ define_type(:P_parse_param) # %parse-param
37
+ define_type(:P_initial_action) # %initial-action
38
+ define_type(:P_union) # %union
39
+ define_type(:P_token) # %token
40
+ define_type(:P_type) # %type
41
+ define_type(:P_nonassoc) # %nonassoc
42
+ define_type(:P_left) # %left
43
+ define_type(:P_right) # %right
44
+ define_type(:P_prec) # %prec
45
+ define_type(:User_code) # { ... }
46
+ define_type(:Tag) # <int>
47
+ define_type(:Number) # 0
48
+ define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
49
+ define_type(:Ident) # api.pure, tNUMBER
50
+ define_type(:Semicolon) # ;
51
+ define_type(:Bar) # |
52
+ define_type(:String) # "str"
53
+ define_type(:Char) # '+'
54
+ end
55
+
56
+ # States
57
+ #
58
+ # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
59
+ Initial = 0
60
+ Prologue = 1
61
+ BisonDeclarations = 2
62
+ GrammarRules = 3
63
+ Epilogue = 4
64
+
65
+ # Token types
66
+
67
+ attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
68
+ :bison_declarations_tokens, :grammar_rules_tokens
69
+
70
+ def initialize(text)
71
+ @text = text
72
+ @state = Initial
73
+ # Array of texts
74
+ @prologue = []
75
+ @bison_declarations = []
76
+ @grammar_rules = []
77
+ @epilogue = []
78
+
79
+ #
80
+ @bison_declarations_tokens = []
81
+ @grammar_rules_tokens = []
82
+
83
+ @debug = false
84
+
85
+ report_duration(:lex) do
86
+ lex_text
87
+ lex_bison_declarations_tokens
88
+ lex_grammar_rules_tokens
89
+ end
90
+ end
91
+
92
+ private
93
+
94
+ def create_token(type, s_value, line, column)
95
+ t = Token.new(type: type, s_value: s_value)
96
+ t.line = line
97
+ t.column = column
98
+
99
+ return t
100
+ end
101
+
102
+ # TODO: Remove this
103
+ def lex_text
104
+ @text.each_line.with_index(1) do |string, lineno|
105
+ case @state
106
+ when Initial
107
+ # Skip until "%{"
108
+ if string == "%{\n"
109
+ @state = Prologue
110
+ @prologue << ["", lineno]
111
+ next
112
+ end
113
+ when Prologue
114
+ # Between "%{" and "%}"
115
+ if string == "%}\n"
116
+ @state = BisonDeclarations
117
+ @prologue << ["", lineno]
118
+ next
119
+ end
120
+
121
+ @prologue << [string, lineno]
122
+ when BisonDeclarations
123
+ if string == "%%\n"
124
+ @state = GrammarRules
125
+ next
126
+ end
127
+
128
+ @bison_declarations << [string, lineno]
129
+ when GrammarRules
130
+ # Between "%%" and "%%"
131
+ if string == "%%\n"
132
+ @state = Epilogue
133
+ next
134
+ end
135
+
136
+ @grammar_rules << [string, lineno]
137
+ when Epilogue
138
+ @epilogue << [string, lineno]
139
+ else
140
+ raise "Unknown state: #{@state}"
141
+ end
142
+ end
143
+ end
144
+
145
+ # See:
146
+ # * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
147
+ # * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
148
+ # * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
149
+ def lex_common(lines, tokens)
150
+ line = lines.first[1]
151
+ column = 0
152
+ ss = StringScanner.new(lines.map(&:first).join)
153
+
154
+ while !ss.eos? do
155
+ case
156
+ when ss.scan(/\n/)
157
+ line += 1
158
+ column = ss.pos
159
+ when ss.scan(/\s+/)
160
+ # skip
161
+ when ss.scan(/;/)
162
+ tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
163
+ when ss.scan(/\|/)
164
+ tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
165
+ when ss.scan(/(\d+)/)
166
+ tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
167
+ when ss.scan(/(<[a-zA-Z0-9_]+>)/)
168
+ tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
169
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
170
+ tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
171
+ when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
172
+ tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
173
+ when ss.scan(/%expect/)
174
+ tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
175
+ when ss.scan(/%define/)
176
+ tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
177
+ when ss.scan(/%printer/)
178
+ tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
179
+ when ss.scan(/%lex-param/)
180
+ tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
181
+ when ss.scan(/%parse-param/)
182
+ tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
183
+ when ss.scan(/%initial-action/)
184
+ tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
185
+ when ss.scan(/%union/)
186
+ tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
187
+ when ss.scan(/%token/)
188
+ tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
189
+ when ss.scan(/%type/)
190
+ tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
191
+ when ss.scan(/%nonassoc/)
192
+ tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
193
+ when ss.scan(/%left/)
194
+ tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
195
+ when ss.scan(/%right/)
196
+ tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
197
+ when ss.scan(/%prec/)
198
+ tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
199
+ when ss.scan(/{/)
200
+ token, line = lex_user_code(ss, line, ss.pos - column, lines)
201
+ tokens << token
202
+ when ss.scan(/"/)
203
+ string, line = lex_string(ss, "\"", line, lines)
204
+ token = create_token(Token::String, string, line, ss.pos - column)
205
+ tokens << token
206
+ when ss.scan(/\/\*/)
207
+ # TODO: Need to keep comment?
208
+ line = lex_comment(ss, line, lines, "")
209
+ when ss.scan(/'(.)'/)
210
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
211
+ when ss.scan(/'\\(.)'/) # '\\', '\t'
212
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
213
+ when ss.scan(/'\\(\d+)'/) # '\13'
214
+ tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
215
+ when ss.scan(/%empty/)
216
+ # skip
217
+ else
218
+ l = line - lines.first[1]
219
+ split = ss.string.split("\n")
220
+ col = ss.pos - split[0...l].join("\n").length
221
+ raise "Parse error (unknow token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
222
+ end
223
+ end
224
+ end
225
+
226
+ def lex_bison_declarations_tokens
227
+ lex_common(@bison_declarations, @bison_declarations_tokens)
228
+ end
229
+
230
+ def lex_user_code(ss, line, column, lines)
231
+ first_line = line
232
+ first_column = column
233
+ debug("Enter lex_user_code: #{line}")
234
+ brace_count = 1
235
+ str = "{"
236
+ # Array of [type, $n, tag, first column, last column]
237
+ # TODO: Is it better to keep string, like "$$", and use gsub?
238
+ references = []
239
+
240
+ while !ss.eos? do
241
+ case
242
+ when ss.scan(/\n/)
243
+ line += 1
244
+ when ss.scan(/"/)
245
+ string, line = lex_string(ss, "\"", line, lines)
246
+ str << string
247
+ next
248
+ when ss.scan(/'/)
249
+ string, line = lex_string(ss, "'", line, lines)
250
+ str << string
251
+ next
252
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
253
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
254
+ references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
255
+ when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
256
+ tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
257
+ references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
258
+ when ss.scan(/@\$/) # @$
259
+ references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
260
+ when ss.scan(/@(\d)+/) # @1
261
+ references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
262
+ when ss.scan(/{/)
263
+ brace_count += 1
264
+ when ss.scan(/}/)
265
+ brace_count -= 1
266
+
267
+ debug("Return lex_user_code: #{line}")
268
+ if brace_count == 0
269
+ str << ss[0]
270
+ user_code = Token.new(type: Token::User_code, s_value: str.freeze)
271
+ user_code.line = first_line
272
+ user_code.column = first_column
273
+ user_code.references = references
274
+ return [user_code, line]
275
+ end
276
+ when ss.scan(/\/\*/)
277
+ str << ss[0]
278
+ line = lex_comment(ss, line, lines, str)
279
+ else
280
+ # noop, just consume char
281
+ str << ss.getch
282
+ next
283
+ end
284
+
285
+ str << ss[0]
286
+ end
287
+
288
+ # Reach to end of input but brace does not match
289
+ l = line - lines.first[1]
290
+ raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
291
+ end
292
+
293
+ def lex_string(ss, terminator, line, lines)
294
+ debug("Enter lex_string: #{line}")
295
+
296
+ str = terminator.dup
297
+
298
+ while (c = ss.getch) do
299
+ str << c
300
+
301
+ case c
302
+ when "\n"
303
+ line += 1
304
+ when terminator
305
+ debug("Return lex_string: #{line}")
306
+ return [str, line]
307
+ else
308
+ # noop
309
+ end
310
+ end
311
+
312
+ # Reach to end of input but quote does not match
313
+ l = line - lines.first[1]
314
+ raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
315
+ end
316
+
317
+ # TODO: Need to handle // style comment
318
+ #
319
+ # /* */ style comment
320
+ def lex_comment(ss, line, lines, str)
321
+ while !ss.eos? do
322
+ case
323
+ when ss.scan(/\n/)
324
+ line += 1
325
+ when ss.scan(/\*\//)
326
+ return line
327
+ else
328
+ str << ss.getch
329
+ next
330
+ end
331
+
332
+ str << ss[0]
333
+ end
334
+
335
+ # Reach to end of input but quote does not match
336
+ l = line - lines.first[1]
337
+ raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
338
+ end
339
+
340
+ def lex_grammar_rules_tokens
341
+ lex_common(@grammar_rules, @grammar_rules_tokens)
342
+ end
343
+
344
+ def debug(msg)
345
+ return unless @debug
346
+ puts "#{msg}\n"
347
+ end
348
+ end
349
+ end
@@ -0,0 +1,268 @@
1
+ require "erb"
2
+ require "forwardable"
3
+ require "lrama/report"
4
+
5
+ module Lrama
6
+ class Output
7
+ extend Forwardable
8
+ include Report::Duration
9
+
10
+ attr_reader :grammar_file_path, :context, :grammar
11
+
12
+ def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
13
+ :yymaxutok, :yypact_ninf, :yytable_ninf
14
+
15
+ def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
16
+
17
+ def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:)
18
+ @out = out
19
+ @output_file_path = output_file_path
20
+ @template_name = template_name
21
+ @grammar_file_path = grammar_file_path
22
+ @header_out = header_out
23
+ @header_file_path = header_file_path
24
+ @context = context
25
+ @grammar = grammar
26
+ end
27
+
28
+ def render
29
+ report_duration(:render) do
30
+ erb = ERB.new(File.read(template_file), nil, '-')
31
+ erb.filename = template_file
32
+ tmp = erb.result_with_hash(context: @context, output: self)
33
+ tmp = replace_special_variables(tmp, @output_file_path)
34
+ @out << tmp
35
+
36
+ if @header_file_path
37
+ erb = ERB.new(File.read(header_template_file), nil, '-')
38
+ erb.filename = header_template_file
39
+ tmp = erb.result_with_hash(context: @context, output: self)
40
+ tmp = replace_special_variables(tmp, @header_file_path)
41
+
42
+ if @header_out
43
+ @header_out << tmp
44
+ else
45
+ File.open(@header_file_path, "w+") do |f|
46
+ f << tmp
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+
53
+ # A part of b4_token_enums
54
+ def token_enums
55
+ str = ""
56
+
57
+ @context.yytokentype.each do |s_value, token_id, display_name|
58
+ s = sprintf("%s = %d%s", s_value, token_id, token_id == yymaxutok ? "" : ",")
59
+
60
+ if display_name
61
+ str << sprintf(" %-30s /* %s */\n", s, display_name)
62
+ else
63
+ str << sprintf(" %s\n", s)
64
+ end
65
+ end
66
+
67
+ str
68
+ end
69
+
70
+ # b4_symbol_enum
71
+ def symbol_enum
72
+ str = ""
73
+
74
+ last_sym_number = @context.yysymbol_kind_t.last[1]
75
+ @context.yysymbol_kind_t.each do |s_value, sym_number, display_name|
76
+ s = sprintf("%s = %d%s", s_value, sym_number, (sym_number == last_sym_number) ? "" : ",")
77
+
78
+ if display_name
79
+ str << sprintf(" %-40s /* %s */\n", s, display_name)
80
+ else
81
+ str << sprintf(" %s\n", s)
82
+ end
83
+ end
84
+
85
+ str
86
+ end
87
+
88
+ def yytranslate
89
+ int_array_to_string(@context.yytranslate)
90
+ end
91
+
92
+ def yyrline
93
+ int_array_to_string(@context.yyrline)
94
+ end
95
+
96
+ def yytname
97
+ string_array_to_string(@context.yytname) + " YY_NULLPTR"
98
+ end
99
+
100
+ # b4_int_type_for
101
+ def int_type_for(ary)
102
+ min = ary.min
103
+ max = ary.max
104
+
105
+ case
106
+ when (-127 <= min && min <= 127) && (-127 <= max && max <= 127)
107
+ "yytype_int8"
108
+ when (0 <= min && min <= 255) && (0 <= max && max <= 255)
109
+ "yytype_uint8"
110
+ when (-32767 <= min && min <= 32767) && (-32767 <= max && max <= 32767)
111
+ "yytype_int16"
112
+ when (0 <= min && min <= 65535) && (0 <= max && max <= 65535)
113
+ "yytype_uint16"
114
+ else
115
+ "int"
116
+ end
117
+ end
118
+
119
+ def symbol_actions_for_printer
120
+ str = ""
121
+
122
+ @grammar.symbols.each do |sym|
123
+ next unless sym.printer
124
+
125
+ str << <<-STR
126
+ case #{sym.enum_name}: /* #{sym.comment} */
127
+ #line #{sym.printer.lineno} "#{@grammar_file_path}"
128
+ #{sym.printer.translated_code(sym.tag)}
129
+ #line [@oline@] [@ofile@]
130
+ break;
131
+
132
+ STR
133
+ end
134
+
135
+ str
136
+ end
137
+
138
+ # b4_user_actions
139
+ def user_actions
140
+ str = ""
141
+
142
+ @context.states.rules.each do |rule|
143
+ next unless rule.code
144
+
145
+ rule = rule
146
+ code = rule.code
147
+ spaces = " " * (code.column - 1)
148
+
149
+ str << <<-STR
150
+ case #{rule.id + 1}: /* #{rule.as_comment} */
151
+ #line #{code.line} "#{@grammar_file_path}"
152
+ #{spaces}#{rule.translated_code}
153
+ #line [@oline@] [@ofile@]
154
+ break;
155
+
156
+ STR
157
+ end
158
+
159
+ str << <<-STR
160
+
161
+ #line [@oline@] [@ofile@]
162
+ STR
163
+
164
+ str
165
+ end
166
+
167
+ # b4_parse_param
168
+ def parse_param
169
+ # Omit "{}"
170
+ @grammar.parse_param[1..-2]
171
+ end
172
+
173
+ # b4_user_formals
174
+ def user_formals
175
+ if @grammar.parse_param
176
+ ", #{parse_param}"
177
+ else
178
+ ""
179
+ end
180
+ end
181
+
182
+ # b4_table_value_equals
183
+ def table_value_equals(table, value, literal, symbol)
184
+ if literal < table.min || table.max < literal
185
+ "0"
186
+ else
187
+ "((#{value}) == #{symbol})"
188
+ end
189
+ end
190
+
191
+ def template_basename
192
+ File.basename(template_file)
193
+ end
194
+
195
+ def aux
196
+ @grammar.aux
197
+ end
198
+
199
+ def int_array_to_string(ary)
200
+ last = ary.count - 1
201
+
202
+ s = ary.each_with_index.each_slice(10).map do |slice|
203
+ str = " "
204
+
205
+ slice.each do |e, i|
206
+ str << sprintf("%6d%s", e, (i == last) ? "" : ",")
207
+ end
208
+
209
+ str
210
+ end
211
+
212
+ s.join("\n")
213
+ end
214
+
215
+ def spec_mapped_header_file
216
+ @header_file_path
217
+ end
218
+
219
+ def b4_cpp_guard__b4_spec_mapped_header_file
220
+ if @header_file_path
221
+ "YY_YY_" + @header_file_path.gsub(/[^a-zA-Z_0-9]+/, "_").upcase + "_INCLUDED"
222
+ else
223
+ ""
224
+ end
225
+ end
226
+
227
+ private
228
+
229
+ def template_file
230
+ File.join(template_dir, @template_name)
231
+ end
232
+
233
+ def header_template_file
234
+ File.join(template_dir, "bison/yacc.h")
235
+ end
236
+
237
+ def template_dir
238
+ File.expand_path("../../../template", __FILE__)
239
+ end
240
+
241
+ def string_array_to_string(ary)
242
+ str = ""
243
+ tmp = " "
244
+
245
+ ary.each do |s|
246
+ s = s.gsub('\\', '\\\\\\\\')
247
+ s = s.gsub('"', '\\"')
248
+
249
+ if (tmp + s + " \"\",").length > 75
250
+ str << tmp << "\n"
251
+ tmp = " \"#{s}\","
252
+ else
253
+ tmp << " \"#{s}\","
254
+ end
255
+ end
256
+
257
+ str << tmp
258
+ end
259
+
260
+ def replace_special_variables(str, ofile)
261
+ str.each_line.with_index(1).map do |line, i|
262
+ line.gsub!("[@oline@]", (i + 1).to_s)
263
+ line.gsub!("[@ofile@]", "\"#{ofile}\"")
264
+ line
265
+ end.join
266
+ end
267
+ end
268
+ end