rucc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +55 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +6 -0
  6. data/Gemfile.lock +46 -0
  7. data/LICENCE +21 -0
  8. data/README.md +82 -0
  9. data/Rakefile +2 -0
  10. data/Vagrantfile +10 -0
  11. data/bin/console +10 -0
  12. data/bin/rspec +2 -0
  13. data/bin/setup +8 -0
  14. data/exe/rucc +7 -0
  15. data/include/8cc.h +48 -0
  16. data/include/float.h +44 -0
  17. data/include/iso646.h +20 -0
  18. data/include/rucc.h +2 -0
  19. data/include/stdalign.h +11 -0
  20. data/include/stdarg.h +52 -0
  21. data/include/stdbool.h +11 -0
  22. data/include/stddef.h +15 -0
  23. data/include/stdnoreturn.h +8 -0
  24. data/lib/rucc.rb +8 -0
  25. data/lib/rucc/case.rb +22 -0
  26. data/lib/rucc/decl.rb +9 -0
  27. data/lib/rucc/enc.rb +9 -0
  28. data/lib/rucc/engine.rb +138 -0
  29. data/lib/rucc/file_io.rb +108 -0
  30. data/lib/rucc/file_io_list.rb +56 -0
  31. data/lib/rucc/gen.rb +1602 -0
  32. data/lib/rucc/int_evaluator.rb +114 -0
  33. data/lib/rucc/k.rb +73 -0
  34. data/lib/rucc/keyword.rb +17 -0
  35. data/lib/rucc/kind.rb +43 -0
  36. data/lib/rucc/label_gen.rb +13 -0
  37. data/lib/rucc/lexer.rb +40 -0
  38. data/lib/rucc/lexer/impl.rb +683 -0
  39. data/lib/rucc/lexer/preprocessor.rb +888 -0
  40. data/lib/rucc/lexer/preprocessor/cond_incl.rb +27 -0
  41. data/lib/rucc/lexer/preprocessor/constructor.rb +54 -0
  42. data/lib/rucc/lexer/preprocessor/pragma.rb +31 -0
  43. data/lib/rucc/lexer/preprocessor/special_macro.rb +110 -0
  44. data/lib/rucc/libc.rb +47 -0
  45. data/lib/rucc/m.rb +7 -0
  46. data/lib/rucc/macro.rb +24 -0
  47. data/lib/rucc/node.rb +530 -0
  48. data/lib/rucc/node/conv.rb +33 -0
  49. data/lib/rucc/op.rb +61 -0
  50. data/lib/rucc/operator.rb +13 -0
  51. data/lib/rucc/option.rb +30 -0
  52. data/lib/rucc/parser.rb +961 -0
  53. data/lib/rucc/parser/break.rb +18 -0
  54. data/lib/rucc/parser/builtin.rb +25 -0
  55. data/lib/rucc/parser/continue.rb +18 -0
  56. data/lib/rucc/parser/do.rb +33 -0
  57. data/lib/rucc/parser/ensure.rb +39 -0
  58. data/lib/rucc/parser/enum.rb +64 -0
  59. data/lib/rucc/parser/expr.rb +493 -0
  60. data/lib/rucc/parser/for.rb +71 -0
  61. data/lib/rucc/parser/func.rb +274 -0
  62. data/lib/rucc/parser/func_call.rb +54 -0
  63. data/lib/rucc/parser/goto.rb +29 -0
  64. data/lib/rucc/parser/if.rb +23 -0
  65. data/lib/rucc/parser/initializer.rb +237 -0
  66. data/lib/rucc/parser/label.rb +31 -0
  67. data/lib/rucc/parser/return.rb +16 -0
  68. data/lib/rucc/parser/struct_and_union.rb +280 -0
  69. data/lib/rucc/parser/switch.rb +117 -0
  70. data/lib/rucc/parser/while.rb +29 -0
  71. data/lib/rucc/pos.rb +11 -0
  72. data/lib/rucc/rmap.rb +22 -0
  73. data/lib/rucc/s.rb +9 -0
  74. data/lib/rucc/static_label_gen.rb +15 -0
  75. data/lib/rucc/t.rb +18 -0
  76. data/lib/rucc/tempname_gen.rb +14 -0
  77. data/lib/rucc/token.rb +114 -0
  78. data/lib/rucc/token_gen.rb +68 -0
  79. data/lib/rucc/type.rb +304 -0
  80. data/lib/rucc/type/check.rb +39 -0
  81. data/lib/rucc/type/conv.rb +29 -0
  82. data/lib/rucc/type_info.rb +21 -0
  83. data/lib/rucc/utf.rb +126 -0
  84. data/lib/rucc/util.rb +111 -0
  85. data/lib/rucc/version.rb +3 -0
  86. data/rucc.gemspec +38 -0
  87. metadata +201 -0
@@ -0,0 +1,33 @@
1
+ module Rucc
2
+ class Node
3
+ #
4
+ # Type conversion
5
+ #
6
+ module Conv
7
+ # @param [Node] node
8
+ # @return [Node]
9
+ def conv(node)
10
+ return nil if node.nil?
11
+
12
+ ty = node.ty
13
+ case ty.kind
14
+ when Kind::ARRAY
15
+ # C11 6.3.2.1p3: An array of T is converted to a pointer to T.
16
+ return Node.ast_uop(AST::CONV, Type.make_ptr_type(ty.ptr), node)
17
+ when Kind::FUNC
18
+ # C11 6.3.2.1p4: A function designator is converted to a pointer to the function.
19
+ return Node.ast_uop(AST::ADDR, Type.make_ptr_type(ty), node)
20
+ when Kind::SHORT, Kind::CHAR, Kind::BOOL
21
+ # C11 6.3.1.1p2: The integer promotions
22
+ return Node.ast_conv(Type::INT, node)
23
+ when Kind::INT
24
+ if !ty.bitsize.nil? && ty.bitsize > 0
25
+ return Node.ast_conv(Type::INT, node)
26
+ end
27
+ end
28
+
29
+ node
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,61 @@
1
+ require "rucc/operator"
2
+
3
+ module Rucc
4
+ module OP
5
+ # Container of operators used as keyword in C-code
6
+ # @key [String]
7
+ # @value [Operator]
8
+ @operators = {}
9
+
10
+ class << self
11
+ def operators
12
+ @operators
13
+ end
14
+
15
+ private
16
+
17
+ # @param [Symbol] name keyword name
18
+ # @param [String] str String representation
19
+ def op(name, str)
20
+ o = Operator.new(name, str)
21
+ const_set(name, o)
22
+ @operators[str] = o
23
+ end
24
+ end
25
+
26
+ op :ARROW, "->"
27
+ op :A_ADD, "+="
28
+ op :A_AND, "&="
29
+ op :A_DIV, "/="
30
+ op :A_MOD, "%="
31
+ op :A_MUL, "*="
32
+ op :A_OR, "|="
33
+ op :A_SAL, "<<="
34
+ op :A_SAR, ">>="
35
+ op :A_SUB, "-="
36
+ op :A_XOR, "^="
37
+ op :DEC, "--"
38
+ op :EQ, "=="
39
+ op :GE, ">="
40
+ op :INC, "++"
41
+ op :LE, "<="
42
+ op :LOGAND, "&&"
43
+ op :LOGOR, "||"
44
+ op :NE, "!="
45
+ op :SAL, "<<"
46
+ op :SAR, ">>"
47
+
48
+ # Not used as keyword in C-code
49
+ SIZEOF = "OP::SIZEOF"
50
+ CAST = "OP::CAST"
51
+ SHR = "OP::SHR"
52
+ SHL = "OP::SHL"
53
+ A_SHR = "OP::A_SHR"
54
+ A_SHL = "OP::A_SHL"
55
+ PRE_INC = "OP::PRE_INC"
56
+ PRE_DEC = "OP::PRE_DEC"
57
+ POST_INC = "OP::POST_INC"
58
+ POST_DEC = "OP::POST_DEC"
59
+ LABEL_ADDR = "OP::LABEL_ADDR"
60
+ end
61
+ end
@@ -0,0 +1,13 @@
1
+ module Rucc
2
+ class Operator
3
+ def initialize(name, str)
4
+ @name = name
5
+ @str = str
6
+ end
7
+ attr_reader :str
8
+
9
+ def to_s
10
+ @str
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,30 @@
1
+ require "optparse"
2
+
3
+ module Rucc
4
+ class Option
5
+ def initialize
6
+ @include_path = []
7
+ @dumpasm = false
8
+ @dontlink = false
9
+ @outfile = nil
10
+ end
11
+ attr_reader :include_path, :dumpasm, :dontlink, :outfile
12
+
13
+ def parse!(argv)
14
+ opt = OptionParser.new
15
+ opt.on('-I include_path') do |v|
16
+ @include_path.push v
17
+ end
18
+ opt.on('-S') do
19
+ @dumpasm = true
20
+ end
21
+ opt.on('-c') do
22
+ @dontlink = true
23
+ end
24
+ opt.on('-o outputfile') do |v|
25
+ @outfile = v
26
+ end
27
+ opt.parse!(argv)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,961 @@
1
+ require "rucc/decl"
2
+ require "rucc/label_gen"
3
+ require "rucc/lexer"
4
+ require "rucc/node"
5
+ require "rucc/rmap"
6
+ require "rucc/s"
7
+ require "rucc/static_label_gen"
8
+ require "rucc/tempname_gen"
9
+ require "rucc/type"
10
+ require "rucc/type_info"
11
+ require "rucc/int_evaluator"
12
+
13
+ require "rucc/parser/break"
14
+ require "rucc/parser/builtin"
15
+ require "rucc/parser/continue"
16
+ require "rucc/parser/do"
17
+ require "rucc/parser/ensure"
18
+ require "rucc/parser/enum"
19
+ require "rucc/parser/expr"
20
+ require "rucc/parser/for"
21
+ require "rucc/parser/func"
22
+ require "rucc/parser/func_call"
23
+ require "rucc/parser/goto"
24
+ require "rucc/parser/if"
25
+ require "rucc/parser/initializer"
26
+ require "rucc/parser/label"
27
+ require "rucc/parser/return"
28
+ require "rucc/parser/struct_and_union"
29
+ require "rucc/parser/switch"
30
+ require "rucc/parser/while"
31
+
32
+ module Rucc
33
+ class Parser
34
+ include Break
35
+ include Builtin
36
+ include Continue
37
+ include Do
38
+ include Ensure
39
+ include Enum
40
+ include Expr
41
+ include For
42
+ include Func
43
+ include FuncCall
44
+ include Goto
45
+ include If
46
+ include Initializer
47
+ include Label
48
+ include Return
49
+ include StructAndUnion
50
+ include Switch
51
+ include While
52
+
53
+ # @param [Lexer] lexer
54
+ # @param [LabelGen] label_gen
55
+ def initialize(lexer, label_gen)
56
+ @lexer = lexer
57
+
58
+ @label_gen = label_gen
59
+ @tempname_gen = TempnameGen.new
60
+ @static_label_gen = StaticLabelGen.new
61
+
62
+ # You can use the same name for global variable, local variable,
63
+ # struct/union/enum tag, and goto label!
64
+ @globalenv = RMap.new # [RMap]
65
+ @localenv = nil # [RMap, NilClass]
66
+ @tags = RMap.new # [RMap, NilClass]
67
+ @labels = nil # [RMap, NilClass]
68
+
69
+ @toplevels = [] # [Array, NilClass]
70
+ @localvars = nil # [Array, NilClass]
71
+ @gotos = nil # [Array, NilClass]
72
+ @cases = nil # [Array, NilClass]
73
+
74
+ @current_func_type = nil # [Type, NilClass]
75
+
76
+ @defaultcase = nil # [String, NilClass]
77
+ @lbreak = nil # [String, NilClass]
78
+ @lcontinue = nil # [String, NilClass]
79
+
80
+ define_builtin_functions!
81
+ end
82
+
83
+ def read_toplevels
84
+ while true
85
+ return @toplevels if (peek.kind == T::EOF)
86
+
87
+ if is_funcdef?
88
+ @toplevels.push(read_funcdef)
89
+ elsif next_token?(K::STATIC_ASSERT)
90
+ read_static_assert
91
+ else
92
+ read_decl(@toplevels, true)
93
+ end
94
+ end
95
+ raise "Must not reach here"
96
+ end
97
+
98
+ private
99
+
100
+ # @param(return) list
101
+ def read_decl_or_stmt(list)
102
+ tok = peek
103
+ if (tok.kind == T::EOF)
104
+ raise "premature end of input"
105
+ end
106
+ # TODO(south37) Impl mark_location if necessary
107
+ # mark_location();
108
+ if is_type?(tok)
109
+ read_decl(list, false)
110
+ elsif next_token?(K::STATIC_ASSERT)
111
+ read_static_assert
112
+ else
113
+ stmt = read_stmt
114
+ if stmt
115
+ list.push(stmt)
116
+ end
117
+ end
118
+ end
119
+
120
+ # @param [Array] block
121
+ # @param [Boolean] isglobal
122
+ # @return [Node]
123
+ def read_decl(block, isglobal)
124
+ basetype, sclass = read_decl_spec_opt
125
+ return if next_token?(';')
126
+ while true
127
+ name = ''
128
+ ty = read_declarator(name, Type.copy_incomplete_type(basetype), nil, DECL::BODY)
129
+ ty.isstatic = (sclass == S::STATIC)
130
+ if (sclass == S::TYPEDEF)
131
+ Node.ast_typedef(ty, name, env)
132
+ elsif ty.isstatic && !isglobal
133
+ ensure_not_void!(ty)
134
+ read_static_local_var(ty, name)
135
+ else
136
+ ensure_not_void!(ty)
137
+ var = (isglobal ? Node.ast_gvar(ty, name, @globalenv) : Node.ast_lvar(ty, name, @localenv, @localvars))
138
+ if next_token?('=')
139
+ block.push Node.ast_decl(var, read_decl_init(ty))
140
+ elsif (sclass != S::EXTERN) && (ty.kind != Kind::FUNC)
141
+ block.push Node.ast_decl(var, nil)
142
+ end
143
+ end
144
+
145
+ if next_token?(';')
146
+ return
147
+ end
148
+ if !next_token?(',')
149
+ Util.errort!(peek, "';' or ',' are expected, but got #{peek}")
150
+ end
151
+ end
152
+ end
153
+
154
+ def read_static_assert
155
+ expect!('(')
156
+ val = read_intexpr
157
+ expect!(',')
158
+ tok = get
159
+ if tok.kind != T::STRING
160
+ Util.errort!(tok, "string expected as the second argument for _Static_assert, but got #{tok}")
161
+ end
162
+ expect!(')')
163
+ expect!(';')
164
+ if (!val)
165
+ Util.errort!(tok, "_Static_assert failure: #{tok.sval}")
166
+ end
167
+ end
168
+
169
+ # @param [Type] ty
170
+ # @param [String] name
171
+ def read_static_local_var(ty, name)
172
+ var = Node.ast_static_lvar(ty, name, @localenv, @static_label_gen.next(name))
173
+ init = nil
174
+ if next_token?('=')
175
+ orig = @localenv
176
+ @localenv = nil
177
+ init = read_decl_init(ty)
178
+ @localenv = orig
179
+ end
180
+ @toplevels.push(Node.ast_decl(var, init))
181
+ end
182
+
183
+ # @param [Array] inits
184
+ # @param [Type] ty
185
+ # @param [String] p
186
+ # @param [Integer] off
187
+ def assign_string(inits, ty, p, off)
188
+ return if ty.len == 0
189
+ if ty.len == -1
190
+ # p.size is the size of ruby string. In C, string is terminated with
191
+ # a null byte (\0), so ty.size is p.size + 1.
192
+ ty.len = ty.size = (p.size + 1)
193
+ end
194
+ i = 0
195
+ while (i < ty.len) && (c = p[i])
196
+ inits.push(Node.ast_init(Node.ast_inttype(Type::CHAR, c.ord), Type::CHAR, off + i))
197
+ i += 1
198
+ end
199
+ while (i < ty.len)
200
+ inits.push(Node.ast_init(Node.ast_inttype(Type::CHAR, 0), Type::CHAR, off + i))
201
+ i += 1
202
+ end
203
+ end
204
+
205
+ # @return [Node]
206
+ def read_stmt
207
+ tok = get
208
+ if (tok.kind == T::KEYWORD)
209
+ case tok.id
210
+ when '{' then return read_compound_stmt
211
+ when K::IF then return read_if_stmt
212
+ when K::FOR then return read_for_stmt
213
+ when K::WHILE then return read_while_stmt
214
+ when K::DO then return read_do_stmt
215
+ when K::RETURN then return read_return_stmt
216
+ when K::SWITCH then return read_switch_stmt
217
+ when K::CASE then return read_case_label(tok)
218
+ when K::DEFAULT then return read_default_label(tok)
219
+ when K::BREAK then return read_break_stmt(tok)
220
+ when K::CONTINUE then return read_continue_stmt(tok)
221
+ when K::GOTO then return read_goto_stmt
222
+ end
223
+ end
224
+
225
+ if (tok.kind == T::IDENT) && next_token?(':')
226
+ return read_label(tok)
227
+ end
228
+ @lexer.unget_token(tok)
229
+ r = read_expr_opt
230
+ expect!(';')
231
+ r
232
+ end
233
+
234
+ # @return [Node]
235
+ def read_compound_stmt
236
+ orig = @localenv
237
+ @localenv = RMap.new(@localenv)
238
+ list = []
239
+ while true
240
+ if next_token?('}')
241
+ break
242
+ end
243
+ read_decl_or_stmt(list)
244
+ end
245
+ @localenv = orig
246
+ Node.ast_compound_stmt(list)
247
+ end
248
+
249
+ # TODO(south37) Impl
250
+ # @return [Node]
251
+ def read_generic
252
+ raise "_Generic is not supported!"
253
+ end
254
+
255
+ # @param [Token] tok
256
+ def read_number(tok)
257
+ s = tok.sval
258
+ isfloat =
259
+ (s.include?(".".freeze) || s.include?("p".freeze) || s.include?("P".freeze)) ||
260
+ (
261
+ (s[0..1].downcase != "0x") &&
262
+ (s.include?("e".freeze) || s.include?("E".freeze))
263
+ )
264
+ isfloat ? read_float(tok) : read_int(tok)
265
+ end
266
+
267
+ # @param [String] s
268
+ # @return [<Type, Integer>]
269
+ def read_int_suffix(s)
270
+ if (s.size > 3) && (s[-3..-1].downcase == "ull" || s[-3..-1].downcase == "llu")
271
+ return Type::ULLONG, 3
272
+ end
273
+
274
+ if (s.size > 2) && (s[-2..-1].downcase == "ul" || s[-2..-1].downcase == "lu")
275
+ return Type::ULONG, 2
276
+ end
277
+
278
+ if (s.size > 2) && (s[-2..-1].downcase == "ll")
279
+ return Type::LLONG, 2
280
+ end
281
+
282
+ if (s.size > 1) && (s[-1].downcase == "u")
283
+ return Type::UINT, 1
284
+ end
285
+
286
+ if (s.size > 1) && (s[-1].downcase == "l")
287
+ return Type::LONG, 1
288
+ end
289
+
290
+ return nil, 0
291
+ end
292
+
293
+ INT_MAX = 2 ** 31 - 1 # Max of int
294
+ UINT_MAX = 2 ** 32 - 1 # Max of uint
295
+ LONG_MAX = 2 ** 63 - 1 # Max of long
296
+
297
+ # @param [s]
298
+ # @return [Integer]
299
+ def read_int_sval(s)
300
+ s = s.downcase
301
+ if s.match(/^[+-]?0x/)
302
+ return s.to_i(16)
303
+ end
304
+ if s.match(/^[+-]?0b/)
305
+ return s.to_i(2)
306
+ end
307
+ if s.match(/^[+-]?0/)
308
+ return s.to_i(8)
309
+ end
310
+ s.to_i(10)
311
+ end
312
+
313
+ # @param [Token] tok
314
+ # @return [Node]
315
+ def read_int(tok)
316
+ s = tok.sval
317
+
318
+ ty, suf_size = read_int_suffix(s)
319
+ s = s[0..-(suf_size + 1)]
320
+
321
+ # TODO(south37) Use strtoul instead of original impl for compatibility with C
322
+ v = read_int_sval(s)
323
+ # if (*end != '\0')
324
+ # errort(tok, "invalid character '%c': %s", *end, s);
325
+
326
+ if ty
327
+ return Node.ast_inttype(ty, v)
328
+ end
329
+
330
+ # C11 6.4.4.1p5: Decimal constant type is int, long, or long long.
331
+ # In 8cc, long and long long are the same size.
332
+ base10 = (s[0] != '0')
333
+ if base10
334
+ if (v & ~INT_MAX) == 0
335
+ ty = Type::INT
336
+ else
337
+ ty = Type::LONG
338
+ end
339
+ return Node.ast_inttype(ty, v)
340
+ end
341
+
342
+ # Octal or hexadecimal constant type may be unsigned.
343
+ ty =
344
+ if (v & ~INT_MAX) == 0
345
+ Type::INT
346
+ elsif (v & ~UINT_MAX) == 0
347
+ Type::UINT
348
+ elsif (v & ~LONG_MAX) == 0
349
+ Type::LONG
350
+ else # TODO(south37) Check for value is in ulong
351
+ Type::ULONG
352
+ end
353
+ Node.ast_inttype(ty, v)
354
+ end
355
+
356
+ def read_float(tok)
357
+ s = tok.sval
358
+
359
+ # TODO(sotuh37) Impl strtod in Ruby. Use strtod instead of String#to_f
360
+ # double v = strtod(s, &end);
361
+ # if (*end != '\0')
362
+ # errort(tok, "invalid character '%c': %s", *end, s);
363
+
364
+ val = read_float_sval(s)
365
+
366
+ # C11 6.4.4.2p4: The default type for flonum is double.
367
+ if s[-1].downcase == "l"
368
+ type = Type::LDOUBLE
369
+ s = s[0..-2]
370
+ elsif s[-1].downcase == "f"
371
+ type = Type::FLOAT
372
+ s = s[0..-2]
373
+ else
374
+ type = Type::DOUBLE
375
+ end
376
+
377
+ return Node.ast_floattype(type, val)
378
+ end
379
+
380
+ # @param [String] s
381
+ # @param [Symbol] sign
382
+ # @return [Float]
383
+ def read_float_sval(s)
384
+ if s[0..1].downcase == "0x"
385
+ if s.include?("p") || s.include?("P")
386
+ sign, exp = s.split(/[pP]/)
387
+ if sign.include?('.')
388
+ l, r = sign.split('.')
389
+ v = (l + r).to_i(16).to_f / (16 ** r.size)
390
+ else
391
+ v = sign.to_i(16).to_f
392
+ end
393
+ v * (2 ** exp.to_i)
394
+ else
395
+ s.to_i(16).to_f
396
+ end
397
+ else
398
+ s.to_f
399
+ end
400
+ end
401
+
402
+ # @return [Integer]
403
+ def read_intexpr
404
+ i, _ = IntEvaluator.eval(read_conditional_expr)
405
+ i
406
+ end
407
+
408
+ # @param [String] name
409
+ # @return [Node]
410
+ def read_var_or_func(name)
411
+ v = env[name]
412
+ if v.nil?
413
+ tok = peek
414
+ if !Token.is_keyword?(tok, '(')
415
+ Util.errort!(tok, "undefined variable: #{name}")
416
+ end
417
+ ty = Type.make_func_type(Type::INT, [], true, false)
418
+ # TODO(south37) Impl want when necessary
419
+ # warnt(tok, "assume returning int: %s()", name);
420
+ return Node.ast_funcdesg(ty, name)
421
+ end
422
+ if v.ty.kind == Kind::FUNC
423
+ return Node.ast_funcdesg(v.ty, name)
424
+ end
425
+ v
426
+ end
427
+
428
+ # @param [Token] tok
429
+ # @return [String, OP, NilClass]
430
+ def get_compound_assign_op(tok)
431
+ if tok.kind != T::KEYWORD
432
+ return nil
433
+ end
434
+
435
+ case tok.id
436
+ when OP::A_ADD then return '+'
437
+ when OP::A_SUB then return '-'
438
+ when OP::A_MUL then return '*'
439
+ when OP::A_DIV then return '/'
440
+ when OP::A_MOD then return '%'
441
+ when OP::A_AND then return '&'
442
+ when OP::A_OR then return '|'
443
+ when OP::A_XOR then return '^'
444
+ when OP::A_SAL then return OP::SAL
445
+ when OP::A_SAR then return OP::SAR
446
+ when OP::A_SHR then return OP::SHR
447
+ else return nil
448
+ end
449
+ end
450
+
451
+ # @param [String] name
452
+ # @return [Type, nil]
453
+ def get_typedef(name)
454
+ node = env[name]
455
+ if node && (node.kind == AST::TYPEDEF)
456
+ node.ty
457
+ else
458
+ nil
459
+ end
460
+ end
461
+
462
+ # @param [Token] tok
463
+ def is_type?(tok)
464
+ return get_typedef(tok.sval) if tok.kind == T::IDENT
465
+ return false if tok.kind != T::KEYWORD
466
+ return tok.id.is_type? if tok.id.is_a?(Keyword)
467
+ false
468
+ end
469
+
470
+ #
471
+ # Declarator
472
+ #
473
+ # C's syntax for declaration is not only hard to read for humans but also
474
+ # hard to parse for hand-written parsers. Consider the following two cases:
475
+ #
476
+ # A: int *x;
477
+ # B: int *x();
478
+ #
479
+ # A is of type pointer to int, but B is not a pointer type, B is type of
480
+ # function returning a pointer to an integer. The meaning of the first half
481
+ # of the declaration ("int *" part) is different between them.
482
+ #
483
+ # In 8cc, delcarations are parsed by two functions: read_declarator
484
+ # and read_declarator_tail. The former function parses the first half of a
485
+ # declaration, and the latter parses the (possibly nonexistent) parentheses
486
+ # of a function or an array.
487
+ #
488
+
489
+ # C11 6.7.6: Declarators
490
+ # TODO(south37) Use return value for rname, params
491
+ # @param(return) [String] rname used as return value
492
+ # @param [Type] basety
493
+ # @param(return) [Array] params
494
+ # @param [DECL] ctx
495
+ # @return [Type]
496
+ def read_declarator(rname, basety, params, ctx)
497
+ if next_token?('(')
498
+ # '(' is either beginning of grouping parentheses or of a function parameter list.
499
+ # If the next token is a type name, a parameter list must follow.
500
+ if is_type?(peek)
501
+ return read_declarator_func(basety, params)
502
+ end
503
+
504
+ # If not, it's grouping. In that case we have to read from outside.
505
+ # For example, consider int (*)(), which is "pointer to function returning int".
506
+ # We have only read "int" so far. We don't want to pass "int" to
507
+ # a recursive call, or otherwise we would get "pointer to int".
508
+ # Here, we pass a dummy object to get "pointer to <something>" first,
509
+ # continue reading to get "function returning int", and then combine them.
510
+ stub = Type.make_stub_type
511
+ t = read_declarator(rname, stub, params, ctx)
512
+ expect!(')')
513
+ stub.replace_by!(read_declarator_tail(basety, params))
514
+ return t
515
+ end
516
+
517
+ if next_token?('*')
518
+ skip_type_qualifiers!
519
+ return read_declarator(rname, Type.make_ptr_type(basety), params, ctx)
520
+ end
521
+
522
+ tok = get
523
+ if tok.kind == T::IDENT
524
+ if ctx == DECL::CAST
525
+ Util.errort!(tok, "identifier is not expected, but got #{tok}")
526
+ end
527
+ rname << tok.sval # Write as return value
528
+ return read_declarator_tail(basety, params)
529
+ end
530
+
531
+ if [DECL::BODY, DECL::PARAM].include?(ctx)
532
+ Util.errort!(tok, "identifier, ( or * are expected, but got #{tok}")
533
+ end
534
+ @lexer.unget_token(tok)
535
+ read_declarator_tail(basety, params)
536
+ end
537
+
538
+ # @param [TypeInfo] kind
539
+ # @param [TypeInfo] size
540
+ # @param [TypeInfo] sig
541
+ # @param [] usertype
542
+ # @return [Boolean]
543
+ def error_check(kind, size, sig, usertype)
544
+ if (kind == TypeInfo::BOOL) && size && sig
545
+ return false
546
+ end
547
+
548
+ if (size == TypeInfo::SHORT) && kind && (kind != Type::INT)
549
+ return false
550
+ end
551
+
552
+ if (size == TypeInfo::LONG) && kind && (kind != TypeInfo::INT) && (kind != TypeInfo::DOUBLE)
553
+ return false
554
+ end
555
+
556
+ if sig && [TypeInfo::VOID, TypeInfo::FLOAT, TypeInfo::DOUBLE].include?(kind)
557
+ return false
558
+ end
559
+
560
+ if usertype
561
+ if kind || size || sig
562
+ return false
563
+ end
564
+ end
565
+ true
566
+ end
567
+
568
+ # @raise [RuntimeError]
569
+ def read_decl_spec_error!(tok)
570
+ Util.errort!(tok, "type name expected, but got #{tok}")
571
+ end
572
+
573
+ # @return [<Type, S>]
574
+ def read_decl_spec
575
+ sclass = nil
576
+ tok = peek
577
+ if !is_type?(tok)
578
+ read_decl_spec_error!(tok)
579
+ end
580
+
581
+ usertype = nil
582
+ kind = nil
583
+ size = nil
584
+ sig = nil
585
+ align = -1
586
+
587
+ while true
588
+ tok = get
589
+ if tok.kind == T::EOF
590
+ read_decl_spec_error!(tok)
591
+ end
592
+ if kind.nil? && (tok.kind == T::IDENT) && !usertype
593
+ typedef = get_typedef(tok.sval)
594
+ if typedef
595
+ if usertype
596
+ read_decl_spec_error!(tok)
597
+ end
598
+ usertype = typedef
599
+ if error_check(kind, size, sig, usertype)
600
+ next
601
+ else
602
+ read_decl_spec_error!(tok)
603
+ end
604
+ end
605
+ end
606
+
607
+ if tok.kind != T::KEYWORD
608
+ @lexer.unget_token(tok)
609
+ break
610
+ end
611
+
612
+ case tok.id
613
+ when K::TYPEDEF
614
+ if sclass then read_decl_spec_error!(tok) else sclass = S::TYPEDEF end
615
+ when K::EXTERN
616
+ if sclass then read_decl_spec_error!(tok) else sclass = S::EXTERN end
617
+ when K::STATIC
618
+ if sclass then read_decl_spec_error!(tok) else sclass = S::STATIC end
619
+ when K::AUTO
620
+ if sclass then read_decl_spec_error!(tok) else sclass = S::AUTO end
621
+ when K::REGISTER
622
+ if sclass then read_decl_spec_error!(tok) else sclass = S::REGISTER end
623
+ when K::CONST
624
+ # Do nothing
625
+ when K::VOLATILE
626
+ # Do nothing
627
+ when K::INLINE
628
+ # Do nothing
629
+ when K::NORETURN
630
+ # Do nothing
631
+ when K::VOID
632
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::VOID end
633
+ when K::BOOL
634
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::BOOL end
635
+ when K::CHAR
636
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::CHAR end
637
+ when K::INT
638
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::INT end
639
+ when K::FLOAT
640
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::FLOAT end
641
+ when K::DOUBLE
642
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::DOUBLE end
643
+ when K::SIGNED
644
+ if sig then read_decl_spec_error!(tok) else sig = TypeInfo::SIGNED end
645
+ when K::UNSIGNED
646
+ if sig then read_decl_spec_error!(tok) else sig = TypeInfo::UNSIGNED end
647
+ when K::SHORT
648
+ if size then read_decl_spec_error!(tok) else size = TypeInfo::SHORT end
649
+ when K::STRUCT
650
+ if usertype then read_decl_spec_error!(tok) else usertype = read_struct_def end
651
+ when K::UNION
652
+ if usertype then read_decl_spec_error!(tok) else usertype = read_union_def end
653
+ when K::ENUM
654
+ if usertype then read_decl_spec_error!(tok) else usertype = read_enum_def end
655
+ when K::ALIGNAS
656
+ val = read_alignas
657
+ if val < 0
658
+ Util.errort!(tok, "negative alignment: #{val}")
659
+ end
660
+
661
+ if val == 0
662
+ if error_check(kind, size, sig, usertype)
663
+ next
664
+ end
665
+ end
666
+
667
+ if (align == -1) || (val < align)
668
+ align = val
669
+ end
670
+ when K::LONG
671
+ if size.nil?
672
+ size = TypeInfo::LONG
673
+ elsif (size == TypeInfo::LONG)
674
+ size = TypeInfo::LLONG
675
+ else
676
+ read_decl_spec_error!(tok)
677
+ end
678
+ when K::TYPEOF
679
+ if usertype
680
+ read_decl_spec_error!(tok)
681
+ end
682
+ usertype = read_typeof
683
+ else
684
+ @lexer.unget_token(tok)
685
+ break # Stop loop
686
+ end
687
+
688
+ # NOTE: check error before next loop
689
+ error_check(kind, size, sig, usertype)
690
+ end
691
+
692
+ if usertype
693
+ return usertype, sclass
694
+ end
695
+ if (align != -1) && !is_peweroftwo?(align)
696
+ Util.errort!(tok, "alignment must be power of 2, but got #{align}")
697
+ end
698
+
699
+ ty = nil
700
+ case kind
701
+ when TypeInfo::VOID
702
+ ty = Type::VOID
703
+ when TypeInfo::BOOL
704
+ ty = Type.make_numtype(Kind::BOOL, false)
705
+ when TypeInfo::CHAR
706
+ ty = Type.make_numtype(Kind::CHAR, sig == TypeInfo::UNSIGNED)
707
+ when TypeInfo::FLOAT
708
+ ty = Type.make_numtype(Kind::FLOAT, false)
709
+ when TypeInfo::DOUBLE
710
+ ty = Type.make_numtype(((size == TypeInfo::LONG) ? Kind::LDOUBLE : Kind::DOUBLE), false)
711
+ else
712
+ # Do nothing
713
+ end
714
+
715
+ if ty
716
+ ty.align = align if (align != -1)
717
+ return ty, sclass
718
+ end
719
+
720
+ case size
721
+ when TypeInfo::SHORT
722
+ ty = Type.make_numtype(Kind::SHORT, sig == TypeInfo::UNSIGNED)
723
+ when TypeInfo::LONG
724
+ ty = Type.make_numtype(Kind::LONG, sig == TypeInfo::UNSIGNED)
725
+ when TypeInfo::LLONG
726
+ ty = Type.make_numtype(Kind::LLONG, sig == TypeInfo::UNSIGNED)
727
+ else
728
+ ty = Type.make_numtype(Kind::INT, sig == TypeInfo::UNSIGNED)
729
+ end
730
+
731
+ ty.align = align if (align != -1)
732
+ return ty, sclass
733
+ end
734
+
735
+ # @return [<Type, S>]
736
+ def read_decl_spec_opt
737
+ if is_type?(peek)
738
+ return read_decl_spec
739
+ end
740
+ # TODO(south37) Impl warnt
741
+ # warnt(peek(), "type specifier missing, assuming int");
742
+ return Type::INT, nil
743
+ end
744
+
745
+ # @param [Type] basety
746
+ # @return [Type]
747
+ def read_declarator_array(basety)
748
+ if next_token?(']')
749
+ len = -1
750
+ else
751
+ len = read_intexpr
752
+ expect!(']')
753
+ end
754
+ tok = peek
755
+ t = read_declarator_tail(basety, nil)
756
+ if t.kind == Kind::FUNC
757
+ Util.errort!(tok, "array of functions")
758
+ end
759
+ Type.make_array_type(t, len)
760
+ end
761
+
762
+ # @param [Type] basety
763
+ # @param(return) [Array] params
764
+ # @return [Type]
765
+ def read_declarator_tail(basety, params)
766
+ if next_token?('[')
767
+ return read_declarator_array(basety)
768
+ end
769
+ if next_token?('(')
770
+ return read_declarator_func(basety, params)
771
+ end
772
+ return basety
773
+ end
774
+
775
+ def skip_type_qualifiers!
776
+ while (next_token?(K::CONST) || next_token?(K::VOLATILE) || next_token?(K::RESTRICT))
777
+ # Do nothing
778
+ end
779
+ end
780
+
781
+ # @return [Array]
782
+ # @param [<Node>] params
783
+ # @return [<Type>]
784
+ def param_types(params)
785
+ params.map(&:ty)
786
+ end
787
+
788
+ # @return [Integer]
789
+ def read_alignas
790
+ # C11 6.7.5. Valid form of _Alignof is either _Alignas(type-name) or
791
+ # _Alignas(constant-expression).
792
+ expect!('(')
793
+ r = is_type?(peek) ? read_cast_type.align : read_intexpr
794
+ expect!(')')
795
+ r
796
+ end
797
+
798
+ # @return [Type]
799
+ def read_typeof
800
+ expect!('(')
801
+ r = is_type?(peek) ? read_cast_type : read_comma_expr.ty
802
+ expect!(')')
803
+ r
804
+ end
805
+
806
+ # @param [Type] ty
807
+ # @return [Node]
808
+ def read_compound_literal(ty)
809
+ name = @label_gen.next
810
+ init = read_decl_init(ty)
811
+ r = Node.ast_lvar(ty, name, @localenv, @localvars)
812
+ r.lvarinit = init
813
+ r
814
+ end
815
+
816
+ # TODO(south37) Improve performance by cache.
817
+ # is_funcdef? returns true if we are at beginning of a function definition.
818
+ # The basic idea is that if we see '{' or a type keyword after a closing
819
+ # parenthesis of a function parameter list, we were reading a function
820
+ # definition. (Usually '{' comes after a closing parenthesis.
821
+ # A type keyword is allowed for K&R-style function definitions.)
822
+ def is_funcdef?
823
+ buf = []
824
+ r = false
825
+ while true
826
+ tok = get
827
+ buf.push(tok)
828
+
829
+ # Early return
830
+ raise "premature end of input" if tok.kind == T::EOF
831
+ if Token.is_keyword?(tok, ';')
832
+ break
833
+ end
834
+ if is_type?(tok)
835
+ next
836
+ end
837
+ if Token.is_keyword?(tok, '(')
838
+ skip_parentheses!(buf)
839
+ next
840
+ end
841
+
842
+ # Check if function definition
843
+ if tok.kind != T::IDENT
844
+ next
845
+ end
846
+ if !Token.is_keyword?(peek, '(')
847
+ next
848
+ end
849
+ buf.push(get)
850
+ skip_parentheses!(buf)
851
+
852
+ # (Usually '{' comes after a closing parenthesis.
853
+ # A type keyword is allowed for K&R-style function definitions.
854
+ r = (Token.is_keyword?(peek, '{') || is_type?(peek))
855
+ break
856
+ end
857
+
858
+ while buf.size > 0
859
+ @lexer.unget_token(buf.pop)
860
+ end
861
+ r
862
+ end
863
+
864
+ def skip_parentheses!(buf)
865
+ while true
866
+ tok = get
867
+ if tok.kind == T::EOF
868
+ raise "premature end of input"
869
+ end
870
+ buf.push(tok)
871
+ if Token.is_keyword?(tok, ')')
872
+ return
873
+ end
874
+ if Token.is_keyword?(tok, '(')
875
+ skip_parentheses!(buf)
876
+ end
877
+ end
878
+ raise "Must not reach here!"
879
+ end
880
+
881
+ # @param [Token] tok
882
+ def concatenate_string(tok)
883
+ enc = tok.enc
884
+ b = tok.sval.dup
885
+ while (peek.kind == T::STRING)
886
+ tok2 = @lexer.read_token
887
+ b << tok2.sval
888
+ enc2 = tok2.enc
889
+ if (enc != ENC::NONE) && (enc2 != ENC::NONE) && (enc != enc2)
890
+ Util.errort!(tok2, "unsupported non-standard concatenation of string literals: #{tok2}")
891
+ end
892
+ if enc == ENC::NONE
893
+ enc = enc2
894
+ end
895
+ end
896
+ tok.sval = b
897
+ tok.enc = enc
898
+ end
899
+
900
+ # @param [Integer] x
901
+ # @return [Boolean]
902
+ def is_peweroftwo?(x)
903
+ if x > 0
904
+ # If there's only one bit set in x, the value is a power of 2.
905
+ (x & (x - 1)) == 0
906
+ else
907
+ false
908
+ end
909
+ end
910
+
911
+ def env
912
+ @localenv.nil? ? @globalenv : @localenv
913
+ end
914
+
915
+ # @param [String] kind
916
+ # @return [Boolean]
917
+ def next_token?(kind)
918
+ tok = get_internal
919
+ if Token.is_keyword?(tok, kind)
920
+ return true
921
+ end
922
+ @lexer.unget_token(tok)
923
+ return false
924
+ end
925
+
926
+ # Consume and print for debug
927
+ #
928
+ # @return [Token]
929
+ def get
930
+ tok = get_internal
931
+ # NOTE: Only for debug
932
+ # print "#{tok} #{tok.file&.name || "(unknown)"} #{tok.line} #{tok.column}\n"
933
+ tok
934
+ end
935
+
936
+ # @return [Token]
937
+ def get_internal
938
+ r = @lexer.read_token
939
+ if r.kind == T::INVALID
940
+ Util.errort!(r, "stray character in program: '#{r.c}'")
941
+ end
942
+ if r.kind == T::STRING && peek.kind == T::STRING
943
+ concatenate_string(r)
944
+ end
945
+ r
946
+ end
947
+
948
+ # @return [Token]
949
+ def peek
950
+ @lexer.peek_token
951
+ end
952
+
953
+ # @param [String] id
954
+ def expect!(id)
955
+ tok = get
956
+ if !Token.is_keyword?(tok, id)
957
+ Util.errort!(tok, "'#{id}' expected, but got #{tok}")
958
+ end
959
+ end
960
+ end
961
+ end