rucc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +55 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +6 -0
  6. data/Gemfile.lock +46 -0
  7. data/LICENCE +21 -0
  8. data/README.md +82 -0
  9. data/Rakefile +2 -0
  10. data/Vagrantfile +10 -0
  11. data/bin/console +10 -0
  12. data/bin/rspec +2 -0
  13. data/bin/setup +8 -0
  14. data/exe/rucc +7 -0
  15. data/include/8cc.h +48 -0
  16. data/include/float.h +44 -0
  17. data/include/iso646.h +20 -0
  18. data/include/rucc.h +2 -0
  19. data/include/stdalign.h +11 -0
  20. data/include/stdarg.h +52 -0
  21. data/include/stdbool.h +11 -0
  22. data/include/stddef.h +15 -0
  23. data/include/stdnoreturn.h +8 -0
  24. data/lib/rucc.rb +8 -0
  25. data/lib/rucc/case.rb +22 -0
  26. data/lib/rucc/decl.rb +9 -0
  27. data/lib/rucc/enc.rb +9 -0
  28. data/lib/rucc/engine.rb +138 -0
  29. data/lib/rucc/file_io.rb +108 -0
  30. data/lib/rucc/file_io_list.rb +56 -0
  31. data/lib/rucc/gen.rb +1602 -0
  32. data/lib/rucc/int_evaluator.rb +114 -0
  33. data/lib/rucc/k.rb +73 -0
  34. data/lib/rucc/keyword.rb +17 -0
  35. data/lib/rucc/kind.rb +43 -0
  36. data/lib/rucc/label_gen.rb +13 -0
  37. data/lib/rucc/lexer.rb +40 -0
  38. data/lib/rucc/lexer/impl.rb +683 -0
  39. data/lib/rucc/lexer/preprocessor.rb +888 -0
  40. data/lib/rucc/lexer/preprocessor/cond_incl.rb +27 -0
  41. data/lib/rucc/lexer/preprocessor/constructor.rb +54 -0
  42. data/lib/rucc/lexer/preprocessor/pragma.rb +31 -0
  43. data/lib/rucc/lexer/preprocessor/special_macro.rb +110 -0
  44. data/lib/rucc/libc.rb +47 -0
  45. data/lib/rucc/m.rb +7 -0
  46. data/lib/rucc/macro.rb +24 -0
  47. data/lib/rucc/node.rb +530 -0
  48. data/lib/rucc/node/conv.rb +33 -0
  49. data/lib/rucc/op.rb +61 -0
  50. data/lib/rucc/operator.rb +13 -0
  51. data/lib/rucc/option.rb +30 -0
  52. data/lib/rucc/parser.rb +961 -0
  53. data/lib/rucc/parser/break.rb +18 -0
  54. data/lib/rucc/parser/builtin.rb +25 -0
  55. data/lib/rucc/parser/continue.rb +18 -0
  56. data/lib/rucc/parser/do.rb +33 -0
  57. data/lib/rucc/parser/ensure.rb +39 -0
  58. data/lib/rucc/parser/enum.rb +64 -0
  59. data/lib/rucc/parser/expr.rb +493 -0
  60. data/lib/rucc/parser/for.rb +71 -0
  61. data/lib/rucc/parser/func.rb +274 -0
  62. data/lib/rucc/parser/func_call.rb +54 -0
  63. data/lib/rucc/parser/goto.rb +29 -0
  64. data/lib/rucc/parser/if.rb +23 -0
  65. data/lib/rucc/parser/initializer.rb +237 -0
  66. data/lib/rucc/parser/label.rb +31 -0
  67. data/lib/rucc/parser/return.rb +16 -0
  68. data/lib/rucc/parser/struct_and_union.rb +280 -0
  69. data/lib/rucc/parser/switch.rb +117 -0
  70. data/lib/rucc/parser/while.rb +29 -0
  71. data/lib/rucc/pos.rb +11 -0
  72. data/lib/rucc/rmap.rb +22 -0
  73. data/lib/rucc/s.rb +9 -0
  74. data/lib/rucc/static_label_gen.rb +15 -0
  75. data/lib/rucc/t.rb +18 -0
  76. data/lib/rucc/tempname_gen.rb +14 -0
  77. data/lib/rucc/token.rb +114 -0
  78. data/lib/rucc/token_gen.rb +68 -0
  79. data/lib/rucc/type.rb +304 -0
  80. data/lib/rucc/type/check.rb +39 -0
  81. data/lib/rucc/type/conv.rb +29 -0
  82. data/lib/rucc/type_info.rb +21 -0
  83. data/lib/rucc/utf.rb +126 -0
  84. data/lib/rucc/util.rb +111 -0
  85. data/lib/rucc/version.rb +3 -0
  86. data/rucc.gemspec +38 -0
  87. metadata +201 -0
@@ -0,0 +1,33 @@
1
+ module Rucc
2
+ class Node
3
+ #
4
+ # Type conversion
5
+ #
6
+ module Conv
7
+ # @param [Node] node
8
+ # @return [Node]
9
+ def conv(node)
10
+ return nil if node.nil?
11
+
12
+ ty = node.ty
13
+ case ty.kind
14
+ when Kind::ARRAY
15
+ # C11 6.3.2.1p3: An array of T is converted to a pointer to T.
16
+ return Node.ast_uop(AST::CONV, Type.make_ptr_type(ty.ptr), node)
17
+ when Kind::FUNC
18
+ # C11 6.3.2.1p4: A function designator is converted to a pointer to the function.
19
+ return Node.ast_uop(AST::ADDR, Type.make_ptr_type(ty), node)
20
+ when Kind::SHORT, Kind::CHAR, Kind::BOOL
21
+ # C11 6.3.1.1p2: The integer promotions
22
+ return Node.ast_conv(Type::INT, node)
23
+ when Kind::INT
24
+ if !ty.bitsize.nil? && ty.bitsize > 0
25
+ return Node.ast_conv(Type::INT, node)
26
+ end
27
+ end
28
+
29
+ node
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,61 @@
1
+ require "rucc/operator"
2
+
3
+ module Rucc
4
+ module OP
5
+ # Container of operators used as keyword in C-code
6
+ # @key [String]
7
+ # @value [Operator]
8
+ @operators = {}
9
+
10
+ class << self
11
+ def operators
12
+ @operators
13
+ end
14
+
15
+ private
16
+
17
+ # @param [Symbol] name keyword name
18
+ # @param [String] str String representation
19
+ def op(name, str)
20
+ o = Operator.new(name, str)
21
+ const_set(name, o)
22
+ @operators[str] = o
23
+ end
24
+ end
25
+
26
+ op :ARROW, "->"
27
+ op :A_ADD, "+="
28
+ op :A_AND, "&="
29
+ op :A_DIV, "/="
30
+ op :A_MOD, "%="
31
+ op :A_MUL, "*="
32
+ op :A_OR, "|="
33
+ op :A_SAL, "<<="
34
+ op :A_SAR, ">>="
35
+ op :A_SUB, "-="
36
+ op :A_XOR, "^="
37
+ op :DEC, "--"
38
+ op :EQ, "=="
39
+ op :GE, ">="
40
+ op :INC, "++"
41
+ op :LE, "<="
42
+ op :LOGAND, "&&"
43
+ op :LOGOR, "||"
44
+ op :NE, "!="
45
+ op :SAL, "<<"
46
+ op :SAR, ">>"
47
+
48
+ # Not used as keyword in C-code
49
+ SIZEOF = "OP::SIZEOF"
50
+ CAST = "OP::CAST"
51
+ SHR = "OP::SHR"
52
+ SHL = "OP::SHL"
53
+ A_SHR = "OP::A_SHR"
54
+ A_SHL = "OP::A_SHL"
55
+ PRE_INC = "OP::PRE_INC"
56
+ PRE_DEC = "OP::PRE_DEC"
57
+ POST_INC = "OP::POST_INC"
58
+ POST_DEC = "OP::POST_DEC"
59
+ LABEL_ADDR = "OP::LABEL_ADDR"
60
+ end
61
+ end
@@ -0,0 +1,13 @@
1
+ module Rucc
2
+ class Operator
3
+ def initialize(name, str)
4
+ @name = name
5
+ @str = str
6
+ end
7
+ attr_reader :str
8
+
9
+ def to_s
10
+ @str
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,30 @@
1
+ require "optparse"
2
+
3
+ module Rucc
4
+ class Option
5
+ def initialize
6
+ @include_path = []
7
+ @dumpasm = false
8
+ @dontlink = false
9
+ @outfile = nil
10
+ end
11
+ attr_reader :include_path, :dumpasm, :dontlink, :outfile
12
+
13
+ def parse!(argv)
14
+ opt = OptionParser.new
15
+ opt.on('-I include_path') do |v|
16
+ @include_path.push v
17
+ end
18
+ opt.on('-S') do
19
+ @dumpasm = true
20
+ end
21
+ opt.on('-c') do
22
+ @dontlink = true
23
+ end
24
+ opt.on('-o outputfile') do |v|
25
+ @outfile = v
26
+ end
27
+ opt.parse!(argv)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,961 @@
1
+ require "rucc/decl"
2
+ require "rucc/label_gen"
3
+ require "rucc/lexer"
4
+ require "rucc/node"
5
+ require "rucc/rmap"
6
+ require "rucc/s"
7
+ require "rucc/static_label_gen"
8
+ require "rucc/tempname_gen"
9
+ require "rucc/type"
10
+ require "rucc/type_info"
11
+ require "rucc/int_evaluator"
12
+
13
+ require "rucc/parser/break"
14
+ require "rucc/parser/builtin"
15
+ require "rucc/parser/continue"
16
+ require "rucc/parser/do"
17
+ require "rucc/parser/ensure"
18
+ require "rucc/parser/enum"
19
+ require "rucc/parser/expr"
20
+ require "rucc/parser/for"
21
+ require "rucc/parser/func"
22
+ require "rucc/parser/func_call"
23
+ require "rucc/parser/goto"
24
+ require "rucc/parser/if"
25
+ require "rucc/parser/initializer"
26
+ require "rucc/parser/label"
27
+ require "rucc/parser/return"
28
+ require "rucc/parser/struct_and_union"
29
+ require "rucc/parser/switch"
30
+ require "rucc/parser/while"
31
+
32
+ module Rucc
33
+ class Parser
34
+ include Break
35
+ include Builtin
36
+ include Continue
37
+ include Do
38
+ include Ensure
39
+ include Enum
40
+ include Expr
41
+ include For
42
+ include Func
43
+ include FuncCall
44
+ include Goto
45
+ include If
46
+ include Initializer
47
+ include Label
48
+ include Return
49
+ include StructAndUnion
50
+ include Switch
51
+ include While
52
+
53
+ # @param [Lexer] lexer
54
+ # @param [LabelGen] label_gen
55
+ def initialize(lexer, label_gen)
56
+ @lexer = lexer
57
+
58
+ @label_gen = label_gen
59
+ @tempname_gen = TempnameGen.new
60
+ @static_label_gen = StaticLabelGen.new
61
+
62
+ # You can use the same name for global variable, local variable,
63
+ # struct/union/enum tag, and goto label!
64
+ @globalenv = RMap.new # [RMap]
65
+ @localenv = nil # [RMap, NilClass]
66
+ @tags = RMap.new # [RMap, NilClass]
67
+ @labels = nil # [RMap, NilClass]
68
+
69
+ @toplevels = [] # [Array, NilClass]
70
+ @localvars = nil # [Array, NilClass]
71
+ @gotos = nil # [Array, NilClass]
72
+ @cases = nil # [Array, NilClass]
73
+
74
+ @current_func_type = nil # [Type, NilClass]
75
+
76
+ @defaultcase = nil # [String, NilClass]
77
+ @lbreak = nil # [String, NilClass]
78
+ @lcontinue = nil # [String, NilClass]
79
+
80
+ define_builtin_functions!
81
+ end
82
+
83
+ def read_toplevels
84
+ while true
85
+ return @toplevels if (peek.kind == T::EOF)
86
+
87
+ if is_funcdef?
88
+ @toplevels.push(read_funcdef)
89
+ elsif next_token?(K::STATIC_ASSERT)
90
+ read_static_assert
91
+ else
92
+ read_decl(@toplevels, true)
93
+ end
94
+ end
95
+ raise "Must not reach here"
96
+ end
97
+
98
+ private
99
+
100
+ # @param(return) list
101
+ def read_decl_or_stmt(list)
102
+ tok = peek
103
+ if (tok.kind == T::EOF)
104
+ raise "premature end of input"
105
+ end
106
+ # TODO(south37) Impl mark_location if necessary
107
+ # mark_location();
108
+ if is_type?(tok)
109
+ read_decl(list, false)
110
+ elsif next_token?(K::STATIC_ASSERT)
111
+ read_static_assert
112
+ else
113
+ stmt = read_stmt
114
+ if stmt
115
+ list.push(stmt)
116
+ end
117
+ end
118
+ end
119
+
120
+ # @param [Array] block
121
+ # @param [Boolean] isglobal
122
+ # @return [Node]
123
+ def read_decl(block, isglobal)
124
+ basetype, sclass = read_decl_spec_opt
125
+ return if next_token?(';')
126
+ while true
127
+ name = ''
128
+ ty = read_declarator(name, Type.copy_incomplete_type(basetype), nil, DECL::BODY)
129
+ ty.isstatic = (sclass == S::STATIC)
130
+ if (sclass == S::TYPEDEF)
131
+ Node.ast_typedef(ty, name, env)
132
+ elsif ty.isstatic && !isglobal
133
+ ensure_not_void!(ty)
134
+ read_static_local_var(ty, name)
135
+ else
136
+ ensure_not_void!(ty)
137
+ var = (isglobal ? Node.ast_gvar(ty, name, @globalenv) : Node.ast_lvar(ty, name, @localenv, @localvars))
138
+ if next_token?('=')
139
+ block.push Node.ast_decl(var, read_decl_init(ty))
140
+ elsif (sclass != S::EXTERN) && (ty.kind != Kind::FUNC)
141
+ block.push Node.ast_decl(var, nil)
142
+ end
143
+ end
144
+
145
+ if next_token?(';')
146
+ return
147
+ end
148
+ if !next_token?(',')
149
+ Util.errort!(peek, "';' or ',' are expected, but got #{peek}")
150
+ end
151
+ end
152
+ end
153
+
154
+ def read_static_assert
155
+ expect!('(')
156
+ val = read_intexpr
157
+ expect!(',')
158
+ tok = get
159
+ if tok.kind != T::STRING
160
+ Util.errort!(tok, "string expected as the second argument for _Static_assert, but got #{tok}")
161
+ end
162
+ expect!(')')
163
+ expect!(';')
164
+ if (!val)
165
+ Util.errort!(tok, "_Static_assert failure: #{tok.sval}")
166
+ end
167
+ end
168
+
169
+ # @param [Type] ty
170
+ # @param [String] name
171
+ def read_static_local_var(ty, name)
172
+ var = Node.ast_static_lvar(ty, name, @localenv, @static_label_gen.next(name))
173
+ init = nil
174
+ if next_token?('=')
175
+ orig = @localenv
176
+ @localenv = nil
177
+ init = read_decl_init(ty)
178
+ @localenv = orig
179
+ end
180
+ @toplevels.push(Node.ast_decl(var, init))
181
+ end
182
+
183
+ # @param [Array] inits
184
+ # @param [Type] ty
185
+ # @param [String] p
186
+ # @param [Integer] off
187
+ def assign_string(inits, ty, p, off)
188
+ return if ty.len == 0
189
+ if ty.len == -1
190
+ # p.size is the size of ruby string. In C, string is terminated with
191
+ # a null byte (\0), so ty.size is p.size + 1.
192
+ ty.len = ty.size = (p.size + 1)
193
+ end
194
+ i = 0
195
+ while (i < ty.len) && (c = p[i])
196
+ inits.push(Node.ast_init(Node.ast_inttype(Type::CHAR, c.ord), Type::CHAR, off + i))
197
+ i += 1
198
+ end
199
+ while (i < ty.len)
200
+ inits.push(Node.ast_init(Node.ast_inttype(Type::CHAR, 0), Type::CHAR, off + i))
201
+ i += 1
202
+ end
203
+ end
204
+
205
+ # @return [Node]
206
+ def read_stmt
207
+ tok = get
208
+ if (tok.kind == T::KEYWORD)
209
+ case tok.id
210
+ when '{' then return read_compound_stmt
211
+ when K::IF then return read_if_stmt
212
+ when K::FOR then return read_for_stmt
213
+ when K::WHILE then return read_while_stmt
214
+ when K::DO then return read_do_stmt
215
+ when K::RETURN then return read_return_stmt
216
+ when K::SWITCH then return read_switch_stmt
217
+ when K::CASE then return read_case_label(tok)
218
+ when K::DEFAULT then return read_default_label(tok)
219
+ when K::BREAK then return read_break_stmt(tok)
220
+ when K::CONTINUE then return read_continue_stmt(tok)
221
+ when K::GOTO then return read_goto_stmt
222
+ end
223
+ end
224
+
225
+ if (tok.kind == T::IDENT) && next_token?(':')
226
+ return read_label(tok)
227
+ end
228
+ @lexer.unget_token(tok)
229
+ r = read_expr_opt
230
+ expect!(';')
231
+ r
232
+ end
233
+
234
+ # @return [Node]
235
+ def read_compound_stmt
236
+ orig = @localenv
237
+ @localenv = RMap.new(@localenv)
238
+ list = []
239
+ while true
240
+ if next_token?('}')
241
+ break
242
+ end
243
+ read_decl_or_stmt(list)
244
+ end
245
+ @localenv = orig
246
+ Node.ast_compound_stmt(list)
247
+ end
248
+
249
+ # TODO(south37) Impl
250
+ # @return [Node]
251
+ def read_generic
252
+ raise "_Generic is not supported!"
253
+ end
254
+
255
+ # @param [Token] tok
256
+ def read_number(tok)
257
+ s = tok.sval
258
+ isfloat =
259
+ (s.include?(".".freeze) || s.include?("p".freeze) || s.include?("P".freeze)) ||
260
+ (
261
+ (s[0..1].downcase != "0x") &&
262
+ (s.include?("e".freeze) || s.include?("E".freeze))
263
+ )
264
+ isfloat ? read_float(tok) : read_int(tok)
265
+ end
266
+
267
+ # @param [String] s
268
+ # @return [<Type, Integer>]
269
+ def read_int_suffix(s)
270
+ if (s.size > 3) && (s[-3..-1].downcase == "ull" || s[-3..-1].downcase == "llu")
271
+ return Type::ULLONG, 3
272
+ end
273
+
274
+ if (s.size > 2) && (s[-2..-1].downcase == "ul" || s[-2..-1].downcase == "lu")
275
+ return Type::ULONG, 2
276
+ end
277
+
278
+ if (s.size > 2) && (s[-2..-1].downcase == "ll")
279
+ return Type::LLONG, 2
280
+ end
281
+
282
+ if (s.size > 1) && (s[-1].downcase == "u")
283
+ return Type::UINT, 1
284
+ end
285
+
286
+ if (s.size > 1) && (s[-1].downcase == "l")
287
+ return Type::LONG, 1
288
+ end
289
+
290
+ return nil, 0
291
+ end
292
+
293
+ INT_MAX = 2 ** 31 - 1 # Max of int
294
+ UINT_MAX = 2 ** 32 - 1 # Max of uint
295
+ LONG_MAX = 2 ** 63 - 1 # Max of long
296
+
297
+ # @param [s]
298
+ # @return [Integer]
299
+ def read_int_sval(s)
300
+ s = s.downcase
301
+ if s.match(/^[+-]?0x/)
302
+ return s.to_i(16)
303
+ end
304
+ if s.match(/^[+-]?0b/)
305
+ return s.to_i(2)
306
+ end
307
+ if s.match(/^[+-]?0/)
308
+ return s.to_i(8)
309
+ end
310
+ s.to_i(10)
311
+ end
312
+
313
+ # @param [Token] tok
314
+ # @return [Node]
315
+ def read_int(tok)
316
+ s = tok.sval
317
+
318
+ ty, suf_size = read_int_suffix(s)
319
+ s = s[0..-(suf_size + 1)]
320
+
321
+ # TODO(south37) Use strtoul instead of original impl for compatibility with C
322
+ v = read_int_sval(s)
323
+ # if (*end != '\0')
324
+ # errort(tok, "invalid character '%c': %s", *end, s);
325
+
326
+ if ty
327
+ return Node.ast_inttype(ty, v)
328
+ end
329
+
330
+ # C11 6.4.4.1p5: Decimal constant type is int, long, or long long.
331
+ # In 8cc, long and long long are the same size.
332
+ base10 = (s[0] != '0')
333
+ if base10
334
+ if (v & ~INT_MAX) == 0
335
+ ty = Type::INT
336
+ else
337
+ ty = Type::LONG
338
+ end
339
+ return Node.ast_inttype(ty, v)
340
+ end
341
+
342
+ # Octal or hexadecimal constant type may be unsigned.
343
+ ty =
344
+ if (v & ~INT_MAX) == 0
345
+ Type::INT
346
+ elsif (v & ~UINT_MAX) == 0
347
+ Type::UINT
348
+ elsif (v & ~LONG_MAX) == 0
349
+ Type::LONG
350
+ else # TODO(south37) Check for value is in ulong
351
+ Type::ULONG
352
+ end
353
+ Node.ast_inttype(ty, v)
354
+ end
355
+
356
+ def read_float(tok)
357
+ s = tok.sval
358
+
359
+ # TODO(sotuh37) Impl strtod in Ruby. Use strtod instead of String#to_f
360
+ # double v = strtod(s, &end);
361
+ # if (*end != '\0')
362
+ # errort(tok, "invalid character '%c': %s", *end, s);
363
+
364
+ val = read_float_sval(s)
365
+
366
+ # C11 6.4.4.2p4: The default type for flonum is double.
367
+ if s[-1].downcase == "l"
368
+ type = Type::LDOUBLE
369
+ s = s[0..-2]
370
+ elsif s[-1].downcase == "f"
371
+ type = Type::FLOAT
372
+ s = s[0..-2]
373
+ else
374
+ type = Type::DOUBLE
375
+ end
376
+
377
+ return Node.ast_floattype(type, val)
378
+ end
379
+
380
+ # @param [String] s
381
+ # @param [Symbol] sign
382
+ # @return [Float]
383
+ def read_float_sval(s)
384
+ if s[0..1].downcase == "0x"
385
+ if s.include?("p") || s.include?("P")
386
+ sign, exp = s.split(/[pP]/)
387
+ if sign.include?('.')
388
+ l, r = sign.split('.')
389
+ v = (l + r).to_i(16).to_f / (16 ** r.size)
390
+ else
391
+ v = sign.to_i(16).to_f
392
+ end
393
+ v * (2 ** exp.to_i)
394
+ else
395
+ s.to_i(16).to_f
396
+ end
397
+ else
398
+ s.to_f
399
+ end
400
+ end
401
+
402
+ # @return [Integer]
403
+ def read_intexpr
404
+ i, _ = IntEvaluator.eval(read_conditional_expr)
405
+ i
406
+ end
407
+
408
+ # @param [String] name
409
+ # @return [Node]
410
+ def read_var_or_func(name)
411
+ v = env[name]
412
+ if v.nil?
413
+ tok = peek
414
+ if !Token.is_keyword?(tok, '(')
415
+ Util.errort!(tok, "undefined variable: #{name}")
416
+ end
417
+ ty = Type.make_func_type(Type::INT, [], true, false)
418
+ # TODO(south37) Impl want when necessary
419
+ # warnt(tok, "assume returning int: %s()", name);
420
+ return Node.ast_funcdesg(ty, name)
421
+ end
422
+ if v.ty.kind == Kind::FUNC
423
+ return Node.ast_funcdesg(v.ty, name)
424
+ end
425
+ v
426
+ end
427
+
428
+ # @param [Token] tok
429
+ # @return [String, OP, NilClass]
430
+ def get_compound_assign_op(tok)
431
+ if tok.kind != T::KEYWORD
432
+ return nil
433
+ end
434
+
435
+ case tok.id
436
+ when OP::A_ADD then return '+'
437
+ when OP::A_SUB then return '-'
438
+ when OP::A_MUL then return '*'
439
+ when OP::A_DIV then return '/'
440
+ when OP::A_MOD then return '%'
441
+ when OP::A_AND then return '&'
442
+ when OP::A_OR then return '|'
443
+ when OP::A_XOR then return '^'
444
+ when OP::A_SAL then return OP::SAL
445
+ when OP::A_SAR then return OP::SAR
446
+ when OP::A_SHR then return OP::SHR
447
+ else return nil
448
+ end
449
+ end
450
+
451
+ # @param [String] name
452
+ # @return [Type, nil]
453
+ def get_typedef(name)
454
+ node = env[name]
455
+ if node && (node.kind == AST::TYPEDEF)
456
+ node.ty
457
+ else
458
+ nil
459
+ end
460
+ end
461
+
462
+ # @param [Token] tok
463
+ def is_type?(tok)
464
+ return get_typedef(tok.sval) if tok.kind == T::IDENT
465
+ return false if tok.kind != T::KEYWORD
466
+ return tok.id.is_type? if tok.id.is_a?(Keyword)
467
+ false
468
+ end
469
+
470
+ #
471
+ # Declarator
472
+ #
473
+ # C's syntax for declaration is not only hard to read for humans but also
474
+ # hard to parse for hand-written parsers. Consider the following two cases:
475
+ #
476
+ # A: int *x;
477
+ # B: int *x();
478
+ #
479
+ # A is of type pointer to int, but B is not a pointer type, B is type of
480
+ # function returning a pointer to an integer. The meaning of the first half
481
+ # of the declaration ("int *" part) is different between them.
482
+ #
483
+ # In 8cc, delcarations are parsed by two functions: read_declarator
484
+ # and read_declarator_tail. The former function parses the first half of a
485
+ # declaration, and the latter parses the (possibly nonexistent) parentheses
486
+ # of a function or an array.
487
+ #
488
+
489
+ # C11 6.7.6: Declarators
490
+ # TODO(south37) Use return value for rname, params
491
+ # @param(return) [String] rname used as return value
492
+ # @param [Type] basety
493
+ # @param(return) [Array] params
494
+ # @param [DECL] ctx
495
+ # @return [Type]
496
+ def read_declarator(rname, basety, params, ctx)
497
+ if next_token?('(')
498
+ # '(' is either beginning of grouping parentheses or of a function parameter list.
499
+ # If the next token is a type name, a parameter list must follow.
500
+ if is_type?(peek)
501
+ return read_declarator_func(basety, params)
502
+ end
503
+
504
+ # If not, it's grouping. In that case we have to read from outside.
505
+ # For example, consider int (*)(), which is "pointer to function returning int".
506
+ # We have only read "int" so far. We don't want to pass "int" to
507
+ # a recursive call, or otherwise we would get "pointer to int".
508
+ # Here, we pass a dummy object to get "pointer to <something>" first,
509
+ # continue reading to get "function returning int", and then combine them.
510
+ stub = Type.make_stub_type
511
+ t = read_declarator(rname, stub, params, ctx)
512
+ expect!(')')
513
+ stub.replace_by!(read_declarator_tail(basety, params))
514
+ return t
515
+ end
516
+
517
+ if next_token?('*')
518
+ skip_type_qualifiers!
519
+ return read_declarator(rname, Type.make_ptr_type(basety), params, ctx)
520
+ end
521
+
522
+ tok = get
523
+ if tok.kind == T::IDENT
524
+ if ctx == DECL::CAST
525
+ Util.errort!(tok, "identifier is not expected, but got #{tok}")
526
+ end
527
+ rname << tok.sval # Write as return value
528
+ return read_declarator_tail(basety, params)
529
+ end
530
+
531
+ if [DECL::BODY, DECL::PARAM].include?(ctx)
532
+ Util.errort!(tok, "identifier, ( or * are expected, but got #{tok}")
533
+ end
534
+ @lexer.unget_token(tok)
535
+ read_declarator_tail(basety, params)
536
+ end
537
+
538
+ # @param [TypeInfo] kind
539
+ # @param [TypeInfo] size
540
+ # @param [TypeInfo] sig
541
+ # @param [] usertype
542
+ # @return [Boolean]
543
+ def error_check(kind, size, sig, usertype)
544
+ if (kind == TypeInfo::BOOL) && size && sig
545
+ return false
546
+ end
547
+
548
+ if (size == TypeInfo::SHORT) && kind && (kind != Type::INT)
549
+ return false
550
+ end
551
+
552
+ if (size == TypeInfo::LONG) && kind && (kind != TypeInfo::INT) && (kind != TypeInfo::DOUBLE)
553
+ return false
554
+ end
555
+
556
+ if sig && [TypeInfo::VOID, TypeInfo::FLOAT, TypeInfo::DOUBLE].include?(kind)
557
+ return false
558
+ end
559
+
560
+ if usertype
561
+ if kind || size || sig
562
+ return false
563
+ end
564
+ end
565
+ true
566
+ end
567
+
568
+ # @raise [RuntimeError]
569
+ def read_decl_spec_error!(tok)
570
+ Util.errort!(tok, "type name expected, but got #{tok}")
571
+ end
572
+
573
+ # @return [<Type, S>]
574
+ def read_decl_spec
575
+ sclass = nil
576
+ tok = peek
577
+ if !is_type?(tok)
578
+ read_decl_spec_error!(tok)
579
+ end
580
+
581
+ usertype = nil
582
+ kind = nil
583
+ size = nil
584
+ sig = nil
585
+ align = -1
586
+
587
+ while true
588
+ tok = get
589
+ if tok.kind == T::EOF
590
+ read_decl_spec_error!(tok)
591
+ end
592
+ if kind.nil? && (tok.kind == T::IDENT) && !usertype
593
+ typedef = get_typedef(tok.sval)
594
+ if typedef
595
+ if usertype
596
+ read_decl_spec_error!(tok)
597
+ end
598
+ usertype = typedef
599
+ if error_check(kind, size, sig, usertype)
600
+ next
601
+ else
602
+ read_decl_spec_error!(tok)
603
+ end
604
+ end
605
+ end
606
+
607
+ if tok.kind != T::KEYWORD
608
+ @lexer.unget_token(tok)
609
+ break
610
+ end
611
+
612
+ case tok.id
613
+ when K::TYPEDEF
614
+ if sclass then read_decl_spec_error!(tok) else sclass = S::TYPEDEF end
615
+ when K::EXTERN
616
+ if sclass then read_decl_spec_error!(tok) else sclass = S::EXTERN end
617
+ when K::STATIC
618
+ if sclass then read_decl_spec_error!(tok) else sclass = S::STATIC end
619
+ when K::AUTO
620
+ if sclass then read_decl_spec_error!(tok) else sclass = S::AUTO end
621
+ when K::REGISTER
622
+ if sclass then read_decl_spec_error!(tok) else sclass = S::REGISTER end
623
+ when K::CONST
624
+ # Do nothing
625
+ when K::VOLATILE
626
+ # Do nothing
627
+ when K::INLINE
628
+ # Do nothing
629
+ when K::NORETURN
630
+ # Do nothing
631
+ when K::VOID
632
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::VOID end
633
+ when K::BOOL
634
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::BOOL end
635
+ when K::CHAR
636
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::CHAR end
637
+ when K::INT
638
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::INT end
639
+ when K::FLOAT
640
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::FLOAT end
641
+ when K::DOUBLE
642
+ if kind then read_decl_spec_error!(tok) else kind = TypeInfo::DOUBLE end
643
+ when K::SIGNED
644
+ if sig then read_decl_spec_error!(tok) else sig = TypeInfo::SIGNED end
645
+ when K::UNSIGNED
646
+ if sig then read_decl_spec_error!(tok) else sig = TypeInfo::UNSIGNED end
647
+ when K::SHORT
648
+ if size then read_decl_spec_error!(tok) else size = TypeInfo::SHORT end
649
+ when K::STRUCT
650
+ if usertype then read_decl_spec_error!(tok) else usertype = read_struct_def end
651
+ when K::UNION
652
+ if usertype then read_decl_spec_error!(tok) else usertype = read_union_def end
653
+ when K::ENUM
654
+ if usertype then read_decl_spec_error!(tok) else usertype = read_enum_def end
655
+ when K::ALIGNAS
656
+ val = read_alignas
657
+ if val < 0
658
+ Util.errort!(tok, "negative alignment: #{val}")
659
+ end
660
+
661
+ if val == 0
662
+ if error_check(kind, size, sig, usertype)
663
+ next
664
+ end
665
+ end
666
+
667
+ if (align == -1) || (val < align)
668
+ align = val
669
+ end
670
+ when K::LONG
671
+ if size.nil?
672
+ size = TypeInfo::LONG
673
+ elsif (size == TypeInfo::LONG)
674
+ size = TypeInfo::LLONG
675
+ else
676
+ read_decl_spec_error!(tok)
677
+ end
678
+ when K::TYPEOF
679
+ if usertype
680
+ read_decl_spec_error!(tok)
681
+ end
682
+ usertype = read_typeof
683
+ else
684
+ @lexer.unget_token(tok)
685
+ break # Stop loop
686
+ end
687
+
688
+ # NOTE: check error before next loop
689
+ error_check(kind, size, sig, usertype)
690
+ end
691
+
692
+ if usertype
693
+ return usertype, sclass
694
+ end
695
+ if (align != -1) && !is_peweroftwo?(align)
696
+ Util.errort!(tok, "alignment must be power of 2, but got #{align}")
697
+ end
698
+
699
+ ty = nil
700
+ case kind
701
+ when TypeInfo::VOID
702
+ ty = Type::VOID
703
+ when TypeInfo::BOOL
704
+ ty = Type.make_numtype(Kind::BOOL, false)
705
+ when TypeInfo::CHAR
706
+ ty = Type.make_numtype(Kind::CHAR, sig == TypeInfo::UNSIGNED)
707
+ when TypeInfo::FLOAT
708
+ ty = Type.make_numtype(Kind::FLOAT, false)
709
+ when TypeInfo::DOUBLE
710
+ ty = Type.make_numtype(((size == TypeInfo::LONG) ? Kind::LDOUBLE : Kind::DOUBLE), false)
711
+ else
712
+ # Do nothing
713
+ end
714
+
715
+ if ty
716
+ ty.align = align if (align != -1)
717
+ return ty, sclass
718
+ end
719
+
720
+ case size
721
+ when TypeInfo::SHORT
722
+ ty = Type.make_numtype(Kind::SHORT, sig == TypeInfo::UNSIGNED)
723
+ when TypeInfo::LONG
724
+ ty = Type.make_numtype(Kind::LONG, sig == TypeInfo::UNSIGNED)
725
+ when TypeInfo::LLONG
726
+ ty = Type.make_numtype(Kind::LLONG, sig == TypeInfo::UNSIGNED)
727
+ else
728
+ ty = Type.make_numtype(Kind::INT, sig == TypeInfo::UNSIGNED)
729
+ end
730
+
731
+ ty.align = align if (align != -1)
732
+ return ty, sclass
733
+ end
734
+
735
+ # @return [<Type, S>]
736
+ def read_decl_spec_opt
737
+ if is_type?(peek)
738
+ return read_decl_spec
739
+ end
740
+ # TODO(south37) Impl warnt
741
+ # warnt(peek(), "type specifier missing, assuming int");
742
+ return Type::INT, nil
743
+ end
744
+
745
+ # @param [Type] basety
746
+ # @return [Type]
747
+ def read_declarator_array(basety)
748
+ if next_token?(']')
749
+ len = -1
750
+ else
751
+ len = read_intexpr
752
+ expect!(']')
753
+ end
754
+ tok = peek
755
+ t = read_declarator_tail(basety, nil)
756
+ if t.kind == Kind::FUNC
757
+ Util.errort!(tok, "array of functions")
758
+ end
759
+ Type.make_array_type(t, len)
760
+ end
761
+
762
+ # @param [Type] basety
763
+ # @param(return) [Array] params
764
+ # @return [Type]
765
+ def read_declarator_tail(basety, params)
766
+ if next_token?('[')
767
+ return read_declarator_array(basety)
768
+ end
769
+ if next_token?('(')
770
+ return read_declarator_func(basety, params)
771
+ end
772
+ return basety
773
+ end
774
+
775
+ def skip_type_qualifiers!
776
+ while (next_token?(K::CONST) || next_token?(K::VOLATILE) || next_token?(K::RESTRICT))
777
+ # Do nothing
778
+ end
779
+ end
780
+
781
+ # @return [Array]
782
+ # @param [<Node>] params
783
+ # @return [<Type>]
784
+ def param_types(params)
785
+ params.map(&:ty)
786
+ end
787
+
788
+ # @return [Integer]
789
+ def read_alignas
790
+ # C11 6.7.5. Valid form of _Alignof is either _Alignas(type-name) or
791
+ # _Alignas(constant-expression).
792
+ expect!('(')
793
+ r = is_type?(peek) ? read_cast_type.align : read_intexpr
794
+ expect!(')')
795
+ r
796
+ end
797
+
798
+ # @return [Type]
799
+ def read_typeof
800
+ expect!('(')
801
+ r = is_type?(peek) ? read_cast_type : read_comma_expr.ty
802
+ expect!(')')
803
+ r
804
+ end
805
+
806
+ # @param [Type] ty
807
+ # @return [Node]
808
+ def read_compound_literal(ty)
809
+ name = @label_gen.next
810
+ init = read_decl_init(ty)
811
+ r = Node.ast_lvar(ty, name, @localenv, @localvars)
812
+ r.lvarinit = init
813
+ r
814
+ end
815
+
816
+ # TODO(south37) Improve performance by cache.
817
+ # is_funcdef? returns true if we are at beginning of a function definition.
818
+ # The basic idea is that if we see '{' or a type keyword after a closing
819
+ # parenthesis of a function parameter list, we were reading a function
820
+ # definition. (Usually '{' comes after a closing parenthesis.
821
+ # A type keyword is allowed for K&R-style function definitions.)
822
+ def is_funcdef?
823
+ buf = []
824
+ r = false
825
+ while true
826
+ tok = get
827
+ buf.push(tok)
828
+
829
+ # Early return
830
+ raise "premature end of input" if tok.kind == T::EOF
831
+ if Token.is_keyword?(tok, ';')
832
+ break
833
+ end
834
+ if is_type?(tok)
835
+ next
836
+ end
837
+ if Token.is_keyword?(tok, '(')
838
+ skip_parentheses!(buf)
839
+ next
840
+ end
841
+
842
+ # Check if function definition
843
+ if tok.kind != T::IDENT
844
+ next
845
+ end
846
+ if !Token.is_keyword?(peek, '(')
847
+ next
848
+ end
849
+ buf.push(get)
850
+ skip_parentheses!(buf)
851
+
852
+ # (Usually '{' comes after a closing parenthesis.
853
+ # A type keyword is allowed for K&R-style function definitions.
854
+ r = (Token.is_keyword?(peek, '{') || is_type?(peek))
855
+ break
856
+ end
857
+
858
+ while buf.size > 0
859
+ @lexer.unget_token(buf.pop)
860
+ end
861
+ r
862
+ end
863
+
864
+ def skip_parentheses!(buf)
865
+ while true
866
+ tok = get
867
+ if tok.kind == T::EOF
868
+ raise "premature end of input"
869
+ end
870
+ buf.push(tok)
871
+ if Token.is_keyword?(tok, ')')
872
+ return
873
+ end
874
+ if Token.is_keyword?(tok, '(')
875
+ skip_parentheses!(buf)
876
+ end
877
+ end
878
+ raise "Must not reach here!"
879
+ end
880
+
881
+ # @param [Token] tok
882
+ def concatenate_string(tok)
883
+ enc = tok.enc
884
+ b = tok.sval.dup
885
+ while (peek.kind == T::STRING)
886
+ tok2 = @lexer.read_token
887
+ b << tok2.sval
888
+ enc2 = tok2.enc
889
+ if (enc != ENC::NONE) && (enc2 != ENC::NONE) && (enc != enc2)
890
+ Util.errort!(tok2, "unsupported non-standard concatenation of string literals: #{tok2}")
891
+ end
892
+ if enc == ENC::NONE
893
+ enc = enc2
894
+ end
895
+ end
896
+ tok.sval = b
897
+ tok.enc = enc
898
+ end
899
+
900
+ # @param [Integer] x
901
+ # @return [Boolean]
902
+ def is_peweroftwo?(x)
903
+ if x > 0
904
+ # If there's only one bit set in x, the value is a power of 2.
905
+ (x & (x - 1)) == 0
906
+ else
907
+ false
908
+ end
909
+ end
910
+
911
+ def env
912
+ @localenv.nil? ? @globalenv : @localenv
913
+ end
914
+
915
+ # @param [String] kind
916
+ # @return [Boolean]
917
+ def next_token?(kind)
918
+ tok = get_internal
919
+ if Token.is_keyword?(tok, kind)
920
+ return true
921
+ end
922
+ @lexer.unget_token(tok)
923
+ return false
924
+ end
925
+
926
+ # Consume and print for debug
927
+ #
928
+ # @return [Token]
929
+ def get
930
+ tok = get_internal
931
+ # NOTE: Only for debug
932
+ # print "#{tok} #{tok.file&.name || "(unknown)"} #{tok.line} #{tok.column}\n"
933
+ tok
934
+ end
935
+
936
+ # @return [Token]
937
+ def get_internal
938
+ r = @lexer.read_token
939
+ if r.kind == T::INVALID
940
+ Util.errort!(r, "stray character in program: '#{r.c}'")
941
+ end
942
+ if r.kind == T::STRING && peek.kind == T::STRING
943
+ concatenate_string(r)
944
+ end
945
+ r
946
+ end
947
+
948
+ # @return [Token]
949
+ def peek
950
+ @lexer.peek_token
951
+ end
952
+
953
+ # @param [String] id
954
+ def expect!(id)
955
+ tok = get
956
+ if !Token.is_keyword?(tok, id)
957
+ Util.errort!(tok, "'#{id}' expected, but got #{tok}")
958
+ end
959
+ end
960
+ end
961
+ end