python-cc 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pcc/__init__.py +0 -0
- pcc/__main__.py +3 -0
- pcc/ast/__init__.py +0 -0
- pcc/ast/ast.py +179 -0
- pcc/ast/ast_transforms.py +106 -0
- pcc/ast/c_ast.py +800 -0
- pcc/codegen/__init__.py +0 -0
- pcc/codegen/c_codegen.py +4177 -0
- pcc/evaluater/__init__.py +0 -0
- pcc/evaluater/c_evaluator.py +238 -0
- pcc/generator/__init__.py +0 -0
- pcc/generator/c_generator.py +399 -0
- pcc/lex/__init__.py +0 -0
- pcc/lex/c_lexer.py +495 -0
- pcc/lex/lexer.py +68 -0
- pcc/lex/token.py +24 -0
- pcc/parse/__init__.py +0 -0
- pcc/parse/c_parser.py +1700 -0
- pcc/parse/file_parser.py +82 -0
- pcc/parse/parser.py +300 -0
- pcc/parse/plyparser.py +56 -0
- pcc/pcc.py +38 -0
- pcc/ply/__init__.py +5 -0
- pcc/ply/cpp.py +908 -0
- pcc/ply/ctokens.py +133 -0
- pcc/ply/lex.py +1097 -0
- pcc/ply/yacc.py +3471 -0
- pcc/ply/ygen.py +74 -0
- pcc/preprocessor.py +509 -0
- pcc/project.py +78 -0
- pcc/util.py +121 -0
- python_cc-0.0.2.dist-info/METADATA +182 -0
- python_cc-0.0.2.dist-info/RECORD +36 -0
- python_cc-0.0.2.dist-info/WHEEL +4 -0
- python_cc-0.0.2.dist-info/entry_points.txt +2 -0
- python_cc-0.0.2.dist-info/licenses/LICENSE +25 -0
pcc/codegen/c_codegen.py
ADDED
|
@@ -0,0 +1,4177 @@
|
|
|
1
|
+
import llvmlite.ir as ir
|
|
2
|
+
import re
|
|
3
|
+
import struct
|
|
4
|
+
from collections import ChainMap
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from llvmlite.ir import IRBuilder
|
|
7
|
+
from ..ast import c_ast as c_ast
|
|
8
|
+
|
|
9
|
+
bool_t = ir.IntType(1)
|
|
10
|
+
int8_t = ir.IntType(8)
|
|
11
|
+
int32_t = ir.IntType(32)
|
|
12
|
+
int64_t = ir.IntType(64)
|
|
13
|
+
voidptr_t = int8_t.as_pointer()
|
|
14
|
+
int64ptr_t = int64_t.as_pointer()
|
|
15
|
+
true_bit = bool_t(1)
|
|
16
|
+
false_bit = bool_t(0)
|
|
17
|
+
true_byte = int8_t(1)
|
|
18
|
+
false_byte = int8_t(0)
|
|
19
|
+
cstring = voidptr_t
|
|
20
|
+
struct_types = {}
|
|
21
|
+
|
|
22
|
+
# Libc function signature registry: name -> (return_type, [param_types], var_arg)
|
|
23
|
+
# Covers: stdio.h, stdlib.h, string.h, ctype.h, math.h, unistd.h, time.h
|
|
24
|
+
_VOID = ir.VoidType()
|
|
25
|
+
_float = ir.FloatType()
|
|
26
|
+
_double = ir.DoubleType()
|
|
27
|
+
_FILE_ptr = voidptr_t # FILE* modeled as opaque void*
|
|
28
|
+
_size_t = int64_t
|
|
29
|
+
_time_t = int64_t
|
|
30
|
+
|
|
31
|
+
LIBC_FUNCTIONS = {
|
|
32
|
+
# === stdio.h ===
|
|
33
|
+
"printf": (int32_t, [cstring], True),
|
|
34
|
+
"fprintf": (int32_t, [_FILE_ptr, cstring], True),
|
|
35
|
+
"sprintf": (int32_t, [cstring, cstring], True),
|
|
36
|
+
"snprintf": (int32_t, [cstring, _size_t, cstring], True),
|
|
37
|
+
"vprintf": (int32_t, [cstring, voidptr_t], False),
|
|
38
|
+
"vfprintf": (int32_t, [_FILE_ptr, cstring, voidptr_t], False),
|
|
39
|
+
"vsprintf": (int32_t, [cstring, cstring, voidptr_t], False),
|
|
40
|
+
"vsnprintf": (int32_t, [cstring, _size_t, cstring, voidptr_t], False),
|
|
41
|
+
"scanf": (int32_t, [cstring], True),
|
|
42
|
+
"fscanf": (int32_t, [_FILE_ptr, cstring], True),
|
|
43
|
+
"sscanf": (int32_t, [cstring, cstring], True),
|
|
44
|
+
"fopen": (_FILE_ptr, [cstring, cstring], False),
|
|
45
|
+
"fclose": (int32_t, [_FILE_ptr], False),
|
|
46
|
+
"fread": (_size_t, [voidptr_t, _size_t, _size_t, _FILE_ptr], False),
|
|
47
|
+
"fwrite": (_size_t, [voidptr_t, _size_t, _size_t, _FILE_ptr], False),
|
|
48
|
+
"fseek": (int32_t, [_FILE_ptr, int64_t, int32_t], False),
|
|
49
|
+
"ftell": (int64_t, [_FILE_ptr], False),
|
|
50
|
+
"rewind": (_VOID, [_FILE_ptr], False),
|
|
51
|
+
"feof": (int32_t, [_FILE_ptr], False),
|
|
52
|
+
"ferror": (int32_t, [_FILE_ptr], False),
|
|
53
|
+
"fflush": (int32_t, [_FILE_ptr], False),
|
|
54
|
+
"fgets": (cstring, [cstring, int32_t, _FILE_ptr], False),
|
|
55
|
+
"fputs": (int32_t, [cstring, _FILE_ptr], False),
|
|
56
|
+
"fgetc": (int32_t, [_FILE_ptr], False),
|
|
57
|
+
"fputc": (int32_t, [int32_t, _FILE_ptr], False),
|
|
58
|
+
"getc": (int32_t, [_FILE_ptr], False),
|
|
59
|
+
"getc_unlocked": (int32_t, [_FILE_ptr], False),
|
|
60
|
+
"putc": (int32_t, [int32_t, _FILE_ptr], False),
|
|
61
|
+
"getchar": (int32_t, [], False),
|
|
62
|
+
"putchar": (int32_t, [int32_t], False),
|
|
63
|
+
"ungetc": (int32_t, [int32_t, _FILE_ptr], False),
|
|
64
|
+
"flockfile": (_VOID, [_FILE_ptr], False),
|
|
65
|
+
"funlockfile": (_VOID, [_FILE_ptr], False),
|
|
66
|
+
"puts": (int32_t, [cstring], False),
|
|
67
|
+
"perror": (_VOID, [cstring], False),
|
|
68
|
+
"remove": (int32_t, [cstring], False),
|
|
69
|
+
"rename": (int32_t, [cstring, cstring], False),
|
|
70
|
+
"fseeko": (int32_t, [_FILE_ptr, int64_t, int32_t], False),
|
|
71
|
+
"ftello": (int64_t, [_FILE_ptr], False),
|
|
72
|
+
# === stdlib.h ===
|
|
73
|
+
"malloc": (voidptr_t, [_size_t], False),
|
|
74
|
+
"calloc": (voidptr_t, [_size_t, _size_t], False),
|
|
75
|
+
"realloc": (voidptr_t, [voidptr_t, _size_t], False),
|
|
76
|
+
"free": (_VOID, [voidptr_t], False),
|
|
77
|
+
"exit": (_VOID, [int32_t], False),
|
|
78
|
+
"_Exit": (_VOID, [int32_t], False),
|
|
79
|
+
"abort": (_VOID, [], False),
|
|
80
|
+
"atexit": (int32_t, [voidptr_t], False),
|
|
81
|
+
"abs": (int32_t, [int32_t], False),
|
|
82
|
+
"labs": (int64_t, [int64_t], False),
|
|
83
|
+
"atoi": (int32_t, [cstring], False),
|
|
84
|
+
"atol": (int64_t, [cstring], False),
|
|
85
|
+
"atof": (_double, [cstring], False),
|
|
86
|
+
"strtol": (int64_t, [cstring, voidptr_t, int32_t], False),
|
|
87
|
+
"strtoul": (int64_t, [cstring, voidptr_t, int32_t], False),
|
|
88
|
+
"strtod": (_double, [cstring, voidptr_t], False),
|
|
89
|
+
"strtof": (_double, [cstring, voidptr_t], False),
|
|
90
|
+
"rand": (int32_t, [], False),
|
|
91
|
+
"srand": (_VOID, [int32_t], False),
|
|
92
|
+
"qsort": (_VOID, [voidptr_t, _size_t, _size_t, voidptr_t], False),
|
|
93
|
+
"bsearch": (voidptr_t, [voidptr_t, voidptr_t, _size_t, _size_t, voidptr_t], False),
|
|
94
|
+
"getenv": (cstring, [cstring], False),
|
|
95
|
+
"system": (int32_t, [cstring], False),
|
|
96
|
+
# === string.h ===
|
|
97
|
+
"strlen": (_size_t, [cstring], False),
|
|
98
|
+
"strcmp": (int32_t, [cstring, cstring], False),
|
|
99
|
+
"strncmp": (int32_t, [cstring, cstring, _size_t], False),
|
|
100
|
+
"strcpy": (cstring, [cstring, cstring], False),
|
|
101
|
+
"strncpy": (cstring, [cstring, cstring, _size_t], False),
|
|
102
|
+
"strcat": (cstring, [cstring, cstring], False),
|
|
103
|
+
"strncat": (cstring, [cstring, cstring, _size_t], False),
|
|
104
|
+
"strchr": (cstring, [cstring, int32_t], False),
|
|
105
|
+
"strrchr": (cstring, [cstring, int32_t], False),
|
|
106
|
+
"strstr": (cstring, [cstring, cstring], False),
|
|
107
|
+
"strpbrk": (cstring, [cstring, cstring], False),
|
|
108
|
+
"strspn": (_size_t, [cstring, cstring], False),
|
|
109
|
+
"strcspn": (_size_t, [cstring, cstring], False),
|
|
110
|
+
"strtok": (cstring, [cstring, cstring], False),
|
|
111
|
+
"memset": (voidptr_t, [voidptr_t, int32_t, _size_t], False),
|
|
112
|
+
"memcpy": (voidptr_t, [voidptr_t, voidptr_t, _size_t], False),
|
|
113
|
+
"memmove": (voidptr_t, [voidptr_t, voidptr_t, _size_t], False),
|
|
114
|
+
"memcmp": (int32_t, [voidptr_t, voidptr_t, _size_t], False),
|
|
115
|
+
"memchr": (voidptr_t, [voidptr_t, int32_t, _size_t], False),
|
|
116
|
+
"strerror": (cstring, [int32_t], False),
|
|
117
|
+
# === ctype.h ===
|
|
118
|
+
"isalpha": (int32_t, [int32_t], False),
|
|
119
|
+
"isdigit": (int32_t, [int32_t], False),
|
|
120
|
+
"isalnum": (int32_t, [int32_t], False),
|
|
121
|
+
"isspace": (int32_t, [int32_t], False),
|
|
122
|
+
"isupper": (int32_t, [int32_t], False),
|
|
123
|
+
"islower": (int32_t, [int32_t], False),
|
|
124
|
+
"isprint": (int32_t, [int32_t], False),
|
|
125
|
+
"ispunct": (int32_t, [int32_t], False),
|
|
126
|
+
"iscntrl": (int32_t, [int32_t], False),
|
|
127
|
+
"isxdigit": (int32_t, [int32_t], False),
|
|
128
|
+
"isgraph": (int32_t, [int32_t], False),
|
|
129
|
+
"toupper": (int32_t, [int32_t], False),
|
|
130
|
+
"tolower": (int32_t, [int32_t], False),
|
|
131
|
+
# === math.h ===
|
|
132
|
+
"sin": (_double, [_double], False),
|
|
133
|
+
"cos": (_double, [_double], False),
|
|
134
|
+
"tan": (_double, [_double], False),
|
|
135
|
+
"asin": (_double, [_double], False),
|
|
136
|
+
"acos": (_double, [_double], False),
|
|
137
|
+
"atan": (_double, [_double], False),
|
|
138
|
+
"atan2": (_double, [_double, _double], False),
|
|
139
|
+
"sinh": (_double, [_double], False),
|
|
140
|
+
"cosh": (_double, [_double], False),
|
|
141
|
+
"tanh": (_double, [_double], False),
|
|
142
|
+
"exp": (_double, [_double], False),
|
|
143
|
+
"exp2": (_double, [_double], False),
|
|
144
|
+
"log": (_double, [_double], False),
|
|
145
|
+
"log2": (_double, [_double], False),
|
|
146
|
+
"log10": (_double, [_double], False),
|
|
147
|
+
"pow": (_double, [_double, _double], False),
|
|
148
|
+
"sqrt": (_double, [_double], False),
|
|
149
|
+
"cbrt": (_double, [_double], False),
|
|
150
|
+
"hypot": (_double, [_double, _double], False),
|
|
151
|
+
"ceil": (_double, [_double], False),
|
|
152
|
+
"floor": (_double, [_double], False),
|
|
153
|
+
"round": (_double, [_double], False),
|
|
154
|
+
"trunc": (_double, [_double], False),
|
|
155
|
+
"fmod": (_double, [_double, _double], False),
|
|
156
|
+
"fabs": (_double, [_double], False),
|
|
157
|
+
"ldexp": (_double, [_double, int32_t], False),
|
|
158
|
+
# === time.h ===
|
|
159
|
+
"time": (_time_t, [voidptr_t], False),
|
|
160
|
+
"clock": (int64_t, [], False),
|
|
161
|
+
"difftime": (_double, [_time_t, _time_t], False),
|
|
162
|
+
"gmtime_r": (voidptr_t, [voidptr_t, voidptr_t], False),
|
|
163
|
+
"localtime_r": (voidptr_t, [voidptr_t, voidptr_t], False),
|
|
164
|
+
# === unistd.h (POSIX) ===
|
|
165
|
+
"sleep": (int32_t, [int32_t], False),
|
|
166
|
+
"usleep": (int32_t, [int32_t], False),
|
|
167
|
+
"read": (int64_t, [int32_t, voidptr_t, _size_t], False),
|
|
168
|
+
"write": (int64_t, [int32_t, voidptr_t, _size_t], False),
|
|
169
|
+
"open": (int32_t, [cstring, int32_t], True),
|
|
170
|
+
"close": (int32_t, [int32_t], False),
|
|
171
|
+
"getpid": (int32_t, [], False),
|
|
172
|
+
"getppid": (int32_t, [], False),
|
|
173
|
+
"isatty": (int32_t, [int32_t], False),
|
|
174
|
+
"mkstemp": (int32_t, [cstring], False),
|
|
175
|
+
# === setjmp.h ===
|
|
176
|
+
"setjmp": (int32_t, [voidptr_t], False),
|
|
177
|
+
"longjmp": (_VOID, [voidptr_t, int32_t], False),
|
|
178
|
+
"_setjmp": (int32_t, [voidptr_t], False),
|
|
179
|
+
"_longjmp": (_VOID, [voidptr_t, int32_t], False),
|
|
180
|
+
# === signal.h ===
|
|
181
|
+
"signal": (voidptr_t, [int32_t, voidptr_t], False),
|
|
182
|
+
"sigaction": (int32_t, [int32_t, voidptr_t, voidptr_t], False),
|
|
183
|
+
"sigemptyset": (int32_t, [voidptr_t], False),
|
|
184
|
+
"raise": (int32_t, [int32_t], False),
|
|
185
|
+
# === errno ===
|
|
186
|
+
"__errno_location": (ir.IntType(32).as_pointer(), [], False),
|
|
187
|
+
# === locale.h ===
|
|
188
|
+
"setlocale": (cstring, [int32_t, cstring], False),
|
|
189
|
+
"localeconv": (voidptr_t, [], False),
|
|
190
|
+
# === misc ===
|
|
191
|
+
"tmpnam": (cstring, [cstring], False),
|
|
192
|
+
"tmpfile": (voidptr_t, [], False),
|
|
193
|
+
"__errno_location": (int32_t.as_pointer(), [], False),
|
|
194
|
+
"gmtime": (voidptr_t, [voidptr_t], False),
|
|
195
|
+
"localtime": (voidptr_t, [voidptr_t], False),
|
|
196
|
+
"mktime": (_time_t, [voidptr_t], False),
|
|
197
|
+
"strftime": (_size_t, [cstring, _size_t, cstring, voidptr_t], False),
|
|
198
|
+
"ctime": (cstring, [voidptr_t], False),
|
|
199
|
+
"asctime": (cstring, [voidptr_t], False),
|
|
200
|
+
"frexp": (_double, [_double, int32_t.as_pointer()], False),
|
|
201
|
+
# GCC/Clang builtins (no-op stubs)
|
|
202
|
+
"__builtin_va_start": (_VOID, [voidptr_t], False),
|
|
203
|
+
"__builtin_va_end": (_VOID, [voidptr_t], False),
|
|
204
|
+
"__builtin_va_copy": (_VOID, [voidptr_t, voidptr_t], False),
|
|
205
|
+
"__builtin_expect": (int64_t, [int64_t, int64_t], False),
|
|
206
|
+
"__builtin_unreachable": (_VOID, [], False),
|
|
207
|
+
"__builtin_clz": (int32_t, [int32_t], False),
|
|
208
|
+
"__builtin_ctz": (int32_t, [int32_t], False),
|
|
209
|
+
"modf": (_double, [_double, ir.DoubleType().as_pointer()], False),
|
|
210
|
+
"ldexp": (_double, [_double, int32_t], False),
|
|
211
|
+
"__builtin_va_arg": (voidptr_t, [voidptr_t, int64_t], False),
|
|
212
|
+
"strcoll": (int32_t, [cstring, cstring], False),
|
|
213
|
+
"clearerr": (_VOID, [voidptr_t], False),
|
|
214
|
+
"fileno": (int32_t, [voidptr_t], False),
|
|
215
|
+
"popen": (voidptr_t, [cstring, cstring], False),
|
|
216
|
+
"pclose": (int32_t, [voidptr_t], False),
|
|
217
|
+
"dlopen": (voidptr_t, [cstring, int32_t], False),
|
|
218
|
+
"dlsym": (voidptr_t, [voidptr_t, cstring], False),
|
|
219
|
+
"dlclose": (int32_t, [voidptr_t], False),
|
|
220
|
+
"dlerror": (cstring, [], False),
|
|
221
|
+
"setvbuf": (int32_t, [voidptr_t, cstring, int32_t, _size_t], False),
|
|
222
|
+
"freopen": (voidptr_t, [cstring, cstring, voidptr_t], False),
|
|
223
|
+
"getc": (int32_t, [voidptr_t], False),
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class CodegenError(Exception):
|
|
228
|
+
pass
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
int16_t = ir.IntType(16)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def get_ir_type(type_str):
|
|
235
|
+
"""Get IR type from a single type name string."""
|
|
236
|
+
return get_ir_type_from_names([type_str] if isinstance(type_str, str) else type_str)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _names_to_key(names):
|
|
240
|
+
"""Convert a names list like ['unsigned', 'int'] to a canonical key string."""
|
|
241
|
+
return names[0] if len(names) == 1 else " ".join(sorted(names))
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _is_unsigned_names(names):
|
|
245
|
+
"""Check if a type name list represents an unsigned type."""
|
|
246
|
+
return "unsigned" in names
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# Known unsigned type names (after typedef resolution)
|
|
250
|
+
_UNSIGNED_TYPE_NAMES = frozenset(
|
|
251
|
+
{
|
|
252
|
+
"char unsigned",
|
|
253
|
+
"int unsigned",
|
|
254
|
+
"unsigned",
|
|
255
|
+
"int short unsigned",
|
|
256
|
+
"short unsigned",
|
|
257
|
+
"int long unsigned",
|
|
258
|
+
"long unsigned",
|
|
259
|
+
"long long unsigned",
|
|
260
|
+
"size_t",
|
|
261
|
+
"uint8_t",
|
|
262
|
+
"uint16_t",
|
|
263
|
+
"uint32_t",
|
|
264
|
+
"uint64_t",
|
|
265
|
+
}
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
_PCC_VAARG_DECL_RE = re.compile(r'^declare .+@"__pcc_va_arg_\d+"\(.+\)\n?', re.M)
|
|
270
|
+
_PCC_VAARG_CALL_RE = re.compile(
|
|
271
|
+
r"^(?P<lhs>\s*%\S+)\s*=\s*call\s+"
|
|
272
|
+
r'(?P<rettype>.+?)\s+@"(?P<name>__pcc_va_arg_\d+)"\('
|
|
273
|
+
r'(?P<argtype>.+?)\s+(?P<argval>%".+?"|%\S+)\)$',
|
|
274
|
+
re.M,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
_INVALID_VOID_INSTR_RE = re.compile(
|
|
279
|
+
r"^(?:%\S+\s*=\s*)?(?:alloca|load|bitcast) void(?! \()([,\s]|$)|^store void(?! \()([,\s]|$)"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
_LABEL_RE = re.compile(r"^[A-Za-z$._][A-Za-z0-9$._-]*:$")
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def postprocess_ir_text(text):
|
|
286
|
+
"""Apply textual IR rewrites that llvmlite cannot express directly."""
|
|
287
|
+
|
|
288
|
+
# --- simple regex rewrites ---
|
|
289
|
+
text = _PCC_VAARG_DECL_RE.sub("", text)
|
|
290
|
+
text = re.sub(
|
|
291
|
+
r"bitcast i64 (%\S+) to (i8\*|[^,\n]+\*)", r"inttoptr i64 \1 to \2", text
|
|
292
|
+
)
|
|
293
|
+
text = re.sub(
|
|
294
|
+
r"bitcast i8 (%\S+) to (i8\*|[^,\n]+\*)", r"inttoptr i8 \1 to \2", text
|
|
295
|
+
)
|
|
296
|
+
text = re.sub(r"ptrtoint \[\d+ x i8\] [^\n]+ to i64", "add i64 0, 0", text)
|
|
297
|
+
|
|
298
|
+
def repl(match):
|
|
299
|
+
lhs = match.group("lhs")
|
|
300
|
+
rettype = match.group("rettype")
|
|
301
|
+
argtype = match.group("argtype")
|
|
302
|
+
argval = match.group("argval")
|
|
303
|
+
return f"{lhs} = va_arg {argtype} {argval}, {rettype}"
|
|
304
|
+
|
|
305
|
+
text = _PCC_VAARG_CALL_RE.sub(repl, text)
|
|
306
|
+
|
|
307
|
+
# --- line-level fixups ---
|
|
308
|
+
lines = []
|
|
309
|
+
for line in text.splitlines():
|
|
310
|
+
# Fix Python repr leak in array initializers → zeroinitializer
|
|
311
|
+
if "<ir.Constant" in line:
|
|
312
|
+
m = re.match(r'(@"[^"]*"\s*=\s*(?:global|constant)\s*\[[^\]]*\]).*', line)
|
|
313
|
+
if m:
|
|
314
|
+
line = m.group(1) + " zeroinitializer"
|
|
315
|
+
else:
|
|
316
|
+
continue
|
|
317
|
+
s = line.strip()
|
|
318
|
+
# Drop invalid void instructions (alloca void, load void, store void)
|
|
319
|
+
if _INVALID_VOID_INSTR_RE.match(s):
|
|
320
|
+
continue
|
|
321
|
+
lines.append(line)
|
|
322
|
+
|
|
323
|
+
# Deduplicate switch case values
|
|
324
|
+
deduped = []
|
|
325
|
+
for line in lines:
|
|
326
|
+
s = line.strip()
|
|
327
|
+
if s.startswith("switch i64 ") and "[" in line and "]" in line:
|
|
328
|
+
prefix, rest = line.split("[", 1)
|
|
329
|
+
case_text, suffix = rest.rsplit("]", 1)
|
|
330
|
+
cases = re.findall(r'(i64 -?\d+, label %"[^"]*")', case_text)
|
|
331
|
+
if cases:
|
|
332
|
+
seen = set()
|
|
333
|
+
unique = []
|
|
334
|
+
for case in cases:
|
|
335
|
+
val = re.match(r"i64 (-?\d+)", case).group(1)
|
|
336
|
+
if val not in seen:
|
|
337
|
+
seen.add(val)
|
|
338
|
+
unique.append(case)
|
|
339
|
+
line = prefix + "[" + " ".join(unique) + "]" + suffix
|
|
340
|
+
deduped.append(line)
|
|
341
|
+
|
|
342
|
+
# Repair control flow: drop dead code after terminators, bridge empty labels
|
|
343
|
+
def _is_label(s):
|
|
344
|
+
return bool(_LABEL_RE.match(s))
|
|
345
|
+
|
|
346
|
+
def _is_terminator(s):
|
|
347
|
+
return s.startswith(("br ", "ret ", "switch ", "unreachable", "resume "))
|
|
348
|
+
|
|
349
|
+
repaired = []
|
|
350
|
+
skip_dead = False
|
|
351
|
+
for raw in deduped:
|
|
352
|
+
s = raw.strip()
|
|
353
|
+
if skip_dead:
|
|
354
|
+
if _is_label(s):
|
|
355
|
+
skip_dead = False
|
|
356
|
+
else:
|
|
357
|
+
continue
|
|
358
|
+
if (
|
|
359
|
+
repaired
|
|
360
|
+
and _is_terminator(repaired[-1].strip())
|
|
361
|
+
and s
|
|
362
|
+
and raw.startswith(" ")
|
|
363
|
+
and not _is_label(s)
|
|
364
|
+
):
|
|
365
|
+
skip_dead = True
|
|
366
|
+
continue
|
|
367
|
+
if repaired and _is_label(repaired[-1].strip()) and _is_label(s):
|
|
368
|
+
repaired.append(f' br label %"{s[:-1]}"')
|
|
369
|
+
if s == "}" and repaired and _is_label(repaired[-1].strip()):
|
|
370
|
+
repaired.append(" unreachable")
|
|
371
|
+
repaired.append(raw)
|
|
372
|
+
|
|
373
|
+
return "\n".join(repaired)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def get_ir_type_from_names(names):
|
|
377
|
+
"""Get IR type from a list of type specifier names like ['unsigned', 'int']."""
|
|
378
|
+
names = [
|
|
379
|
+
n
|
|
380
|
+
for n in names
|
|
381
|
+
if n
|
|
382
|
+
not in (
|
|
383
|
+
"const",
|
|
384
|
+
"volatile",
|
|
385
|
+
"register",
|
|
386
|
+
"restrict",
|
|
387
|
+
"inline",
|
|
388
|
+
"_Noreturn",
|
|
389
|
+
"signed",
|
|
390
|
+
"extern",
|
|
391
|
+
"static",
|
|
392
|
+
)
|
|
393
|
+
]
|
|
394
|
+
s = " ".join(sorted(names))
|
|
395
|
+
|
|
396
|
+
# Exact matches
|
|
397
|
+
type_map = {
|
|
398
|
+
"int": int32_t,
|
|
399
|
+
"char": int8_t,
|
|
400
|
+
"void": ir.VoidType(),
|
|
401
|
+
"double": _double,
|
|
402
|
+
"float": _float,
|
|
403
|
+
"short": int16_t,
|
|
404
|
+
"long": int64_t,
|
|
405
|
+
"int short": int16_t,
|
|
406
|
+
"int long": int64_t,
|
|
407
|
+
"long long": int64_t,
|
|
408
|
+
"int long long": int64_t,
|
|
409
|
+
"char unsigned": int8_t,
|
|
410
|
+
"int unsigned": int32_t,
|
|
411
|
+
"unsigned": int32_t,
|
|
412
|
+
"int short unsigned": int16_t,
|
|
413
|
+
"short unsigned": int16_t,
|
|
414
|
+
"int long unsigned": int64_t,
|
|
415
|
+
"long unsigned": int64_t,
|
|
416
|
+
"long long unsigned": int64_t,
|
|
417
|
+
# size_t, etc.
|
|
418
|
+
"size_t": int64_t,
|
|
419
|
+
"ssize_t": int64_t,
|
|
420
|
+
"ptrdiff_t": int64_t,
|
|
421
|
+
"int8_t": int8_t,
|
|
422
|
+
"int16_t": int16_t,
|
|
423
|
+
"int32_t": int32_t,
|
|
424
|
+
"int64_t": int64_t,
|
|
425
|
+
"uint8_t": int8_t,
|
|
426
|
+
"uint16_t": int16_t,
|
|
427
|
+
"uint32_t": int32_t,
|
|
428
|
+
"uint64_t": int64_t,
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if s in type_map:
|
|
432
|
+
return type_map[s]
|
|
433
|
+
|
|
434
|
+
if "double" in names:
|
|
435
|
+
return _double
|
|
436
|
+
if "float" in names:
|
|
437
|
+
return _float
|
|
438
|
+
# If it contains 'char', return i8
|
|
439
|
+
if "char" in names:
|
|
440
|
+
return int8_t
|
|
441
|
+
# If it contains 'short', return i16
|
|
442
|
+
if "short" in names:
|
|
443
|
+
return int16_t
|
|
444
|
+
# Default to i64
|
|
445
|
+
return int64_t
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def get_ir_type_from_node(node):
|
|
449
|
+
if isinstance(node, c_ast.EllipsisParam):
|
|
450
|
+
return voidptr_t # shouldn't be called, but be safe
|
|
451
|
+
|
|
452
|
+
return _resolve_node_type(node.type)
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _resolve_node_type(node_type):
|
|
456
|
+
"""Resolve an AST type node to an IR type."""
|
|
457
|
+
if isinstance(node_type, c_ast.PtrDecl):
|
|
458
|
+
inner = node_type.type
|
|
459
|
+
if isinstance(inner, c_ast.FuncDecl):
|
|
460
|
+
ret_type = _resolve_node_type(inner.type)
|
|
461
|
+
param_types = []
|
|
462
|
+
if inner.args:
|
|
463
|
+
for p in inner.args.params:
|
|
464
|
+
if isinstance(p, c_ast.EllipsisParam):
|
|
465
|
+
continue
|
|
466
|
+
t = get_ir_type_from_node(p)
|
|
467
|
+
if not isinstance(t, ir.VoidType):
|
|
468
|
+
param_types.append(t)
|
|
469
|
+
return ir.FunctionType(ret_type, param_types).as_pointer()
|
|
470
|
+
pointee = _resolve_node_type(inner)
|
|
471
|
+
if isinstance(pointee, ir.VoidType):
|
|
472
|
+
return voidptr_t
|
|
473
|
+
return ir.PointerType(pointee)
|
|
474
|
+
elif isinstance(node_type, c_ast.TypeDecl):
|
|
475
|
+
if isinstance(node_type.type, c_ast.IdentifierType):
|
|
476
|
+
return get_ir_type(node_type.type.names)
|
|
477
|
+
elif isinstance(node_type.type, c_ast.Struct):
|
|
478
|
+
snode = node_type.type
|
|
479
|
+
if snode.decls:
|
|
480
|
+
# Inline struct with declarations — build real type
|
|
481
|
+
member_types = []
|
|
482
|
+
for decl in snode.decls:
|
|
483
|
+
member_types.append(_resolve_node_type(decl.type))
|
|
484
|
+
st = ir.LiteralStructType(member_types)
|
|
485
|
+
st.members = [d.name for d in snode.decls]
|
|
486
|
+
st.member_decl_types = [d.type for d in snode.decls]
|
|
487
|
+
return st
|
|
488
|
+
return int8_t # opaque/forward-declared struct
|
|
489
|
+
elif isinstance(node_type.type, c_ast.Union):
|
|
490
|
+
unode = node_type.type
|
|
491
|
+
if unode.decls:
|
|
492
|
+
# Inline union with declarations — compute max size
|
|
493
|
+
max_size = 0
|
|
494
|
+
max_align = 1
|
|
495
|
+
member_types = {}
|
|
496
|
+
|
|
497
|
+
def _resolve_union_member_type(decl_type):
|
|
498
|
+
if isinstance(decl_type, c_ast.ArrayDecl):
|
|
499
|
+
dims = []
|
|
500
|
+
arr_node = decl_type
|
|
501
|
+
while isinstance(arr_node, c_ast.ArrayDecl):
|
|
502
|
+
dim = 0
|
|
503
|
+
if isinstance(arr_node.dim, c_ast.Constant):
|
|
504
|
+
dim = int(arr_node.dim.value.rstrip("uUlL"), 0)
|
|
505
|
+
elif arr_node.dim is not None:
|
|
506
|
+
dim = 0
|
|
507
|
+
dims.append(dim)
|
|
508
|
+
arr_node = arr_node.type
|
|
509
|
+
elem_ir_type = _resolve_node_type(arr_node)
|
|
510
|
+
arr_ir_type = elem_ir_type
|
|
511
|
+
for dim in reversed(dims):
|
|
512
|
+
arr_ir_type = ir.ArrayType(arr_ir_type, dim)
|
|
513
|
+
return arr_ir_type
|
|
514
|
+
return _resolve_node_type(decl_type)
|
|
515
|
+
|
|
516
|
+
for decl in unode.decls:
|
|
517
|
+
ir_t = _resolve_union_member_type(decl.type)
|
|
518
|
+
member_types[decl.name] = ir_t
|
|
519
|
+
sz = ir_t.width // 8 if isinstance(ir_t, ir.IntType) else 8
|
|
520
|
+
if isinstance(ir_t, ir.LiteralStructType):
|
|
521
|
+
sz = sum(
|
|
522
|
+
e.width // 8 if isinstance(e, ir.IntType) else 8
|
|
523
|
+
for e in ir_t.elements
|
|
524
|
+
)
|
|
525
|
+
if isinstance(ir_t, ir.PointerType):
|
|
526
|
+
sz = 8
|
|
527
|
+
if self._is_floating_ir_type(ir_t):
|
|
528
|
+
sz = self._ir_type_size(ir_t)
|
|
529
|
+
al = self._ir_type_align(ir_t)
|
|
530
|
+
if sz > max_size:
|
|
531
|
+
max_size = sz
|
|
532
|
+
if al > max_align:
|
|
533
|
+
max_align = al
|
|
534
|
+
align_map = {8: int64_t, 4: int32_t, 2: int16_t, 1: int8_t}
|
|
535
|
+
align_type = align_map.get(max_align, int64_t)
|
|
536
|
+
pad_size = max_size - max_align
|
|
537
|
+
if pad_size > 0:
|
|
538
|
+
ut = ir.LiteralStructType(
|
|
539
|
+
[align_type, ir.ArrayType(int8_t, pad_size)]
|
|
540
|
+
)
|
|
541
|
+
else:
|
|
542
|
+
ut = ir.LiteralStructType([align_type])
|
|
543
|
+
ut.members = list(member_types.keys())
|
|
544
|
+
ut.member_types = member_types
|
|
545
|
+
ut.is_union = True
|
|
546
|
+
return ut
|
|
547
|
+
return int8_t # opaque/forward-declared union
|
|
548
|
+
return int64_t
|
|
549
|
+
elif isinstance(node_type, c_ast.ArrayDecl):
|
|
550
|
+
return voidptr_t # array params decay to pointer
|
|
551
|
+
return int64_t
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
class LLVMCodeGenerator(object):
|
|
555
|
+
|
|
556
|
+
def __init__(self):
|
|
557
|
+
self.module = ir.Module()
|
|
558
|
+
# Set proper data layout for struct padding/alignment
|
|
559
|
+
import llvmlite.binding as _llvm
|
|
560
|
+
|
|
561
|
+
_llvm.initialize_native_target()
|
|
562
|
+
_triple = _llvm.get_default_triple()
|
|
563
|
+
_tm = _llvm.Target.from_default_triple().create_target_machine()
|
|
564
|
+
self.module.triple = _triple
|
|
565
|
+
self.module.data_layout = str(_tm.target_data)
|
|
566
|
+
|
|
567
|
+
#
|
|
568
|
+
self.builder = None
|
|
569
|
+
self.global_builder: IRBuilder = ir.IRBuilder()
|
|
570
|
+
self.env = ChainMap()
|
|
571
|
+
self.nlabels = 0
|
|
572
|
+
self.function = None
|
|
573
|
+
self.in_global = True
|
|
574
|
+
self._declared_libc = set()
|
|
575
|
+
self._unsigned_bindings = set() # alloca/global ids with unsigned type
|
|
576
|
+
self._unsigned_pointee_bindings = set()
|
|
577
|
+
self._unsigned_return_bindings = set()
|
|
578
|
+
self._expr_ir_types = {}
|
|
579
|
+
self._labels = {}
|
|
580
|
+
self._vaarg_counter = 0
|
|
581
|
+
|
|
582
|
+
def define(self, name, val):
|
|
583
|
+
self.env[name] = val
|
|
584
|
+
|
|
585
|
+
def _mark_unsigned(self, binding):
|
|
586
|
+
"""Mark a concrete IR binding as having unsigned type."""
|
|
587
|
+
if binding is not None:
|
|
588
|
+
self._unsigned_bindings.add(id(binding))
|
|
589
|
+
|
|
590
|
+
def _mark_unsigned_pointee(self, binding):
|
|
591
|
+
"""Mark a pointer/array binding whose immediate pointee is unsigned."""
|
|
592
|
+
if binding is not None:
|
|
593
|
+
self._unsigned_pointee_bindings.add(id(binding))
|
|
594
|
+
|
|
595
|
+
def _mark_unsigned_return(self, binding):
|
|
596
|
+
"""Mark a function or function-pointer binding with unsigned return."""
|
|
597
|
+
if binding is not None:
|
|
598
|
+
self._unsigned_return_bindings.add(id(binding))
|
|
599
|
+
|
|
600
|
+
def _is_unsigned_val(self, val):
|
|
601
|
+
"""Check if a value should use unsigned operations."""
|
|
602
|
+
# Check if the value was produced by an unsigned operation
|
|
603
|
+
return getattr(val, "_is_unsigned", False)
|
|
604
|
+
|
|
605
|
+
def _is_unsigned_binding(self, binding):
|
|
606
|
+
return binding is not None and id(binding) in self._unsigned_bindings
|
|
607
|
+
|
|
608
|
+
def _is_unsigned_pointee_binding(self, binding):
|
|
609
|
+
return binding is not None and id(binding) in self._unsigned_pointee_bindings
|
|
610
|
+
|
|
611
|
+
def _is_unsigned_return_binding(self, binding):
|
|
612
|
+
return binding is not None and id(binding) in self._unsigned_return_bindings
|
|
613
|
+
|
|
614
|
+
def _tag_unsigned(self, val):
|
|
615
|
+
"""Tag an IR value as unsigned."""
|
|
616
|
+
try:
|
|
617
|
+
val._is_unsigned = True
|
|
618
|
+
except (AttributeError, TypeError):
|
|
619
|
+
pass
|
|
620
|
+
return val
|
|
621
|
+
|
|
622
|
+
def _clear_unsigned(self, val):
|
|
623
|
+
"""Clear unsigned metadata from an IR value."""
|
|
624
|
+
try:
|
|
625
|
+
val._is_unsigned = False
|
|
626
|
+
except (AttributeError, TypeError):
|
|
627
|
+
pass
|
|
628
|
+
return val
|
|
629
|
+
|
|
630
|
+
def _tag_unsigned_pointee(self, val):
|
|
631
|
+
try:
|
|
632
|
+
val._pcc_unsigned_pointee = True
|
|
633
|
+
except (AttributeError, TypeError):
|
|
634
|
+
pass
|
|
635
|
+
return val
|
|
636
|
+
|
|
637
|
+
def _is_unsigned_pointee(self, val):
|
|
638
|
+
return getattr(val, "_pcc_unsigned_pointee", False)
|
|
639
|
+
|
|
640
|
+
def _tag_unsigned_return(self, val):
|
|
641
|
+
try:
|
|
642
|
+
val._pcc_unsigned_return = True
|
|
643
|
+
except (AttributeError, TypeError):
|
|
644
|
+
pass
|
|
645
|
+
return val
|
|
646
|
+
|
|
647
|
+
def _is_unsigned_return(self, val):
|
|
648
|
+
return getattr(val, "_pcc_unsigned_return", False)
|
|
649
|
+
|
|
650
|
+
def _set_expr_ir_type(self, node, ir_type):
|
|
651
|
+
if node is not None:
|
|
652
|
+
self._expr_ir_types[id(node)] = ir_type
|
|
653
|
+
|
|
654
|
+
def _get_expr_ir_type(self, node, default=None):
|
|
655
|
+
if node is None:
|
|
656
|
+
return default
|
|
657
|
+
return self._expr_ir_types.get(id(node), getattr(node, "ir_type", default))
|
|
658
|
+
|
|
659
|
+
def _either_unsigned(self, lhs, rhs):
|
|
660
|
+
"""Check if either operand is unsigned (C promotion rules)."""
|
|
661
|
+
return self._is_unsigned_val(lhs) or self._is_unsigned_val(rhs)
|
|
662
|
+
|
|
663
|
+
def _int_to_float(self, val, target_type):
|
|
664
|
+
if self._is_unsigned_val(val):
|
|
665
|
+
return self.builder.uitofp(val, target_type)
|
|
666
|
+
return self.builder.sitofp(val, target_type)
|
|
667
|
+
|
|
668
|
+
def _convert_int_value(self, val, target_type, result_unsigned=None):
|
|
669
|
+
if not (
|
|
670
|
+
isinstance(getattr(val, "type", None), ir.IntType)
|
|
671
|
+
and isinstance(target_type, ir.IntType)
|
|
672
|
+
):
|
|
673
|
+
return self._implicit_convert(val, target_type)
|
|
674
|
+
|
|
675
|
+
source_unsigned = self._is_unsigned_val(val)
|
|
676
|
+
if val.type.width < target_type.width:
|
|
677
|
+
if source_unsigned:
|
|
678
|
+
result = self.builder.zext(val, target_type)
|
|
679
|
+
else:
|
|
680
|
+
result = self.builder.sext(val, target_type)
|
|
681
|
+
elif val.type.width > target_type.width:
|
|
682
|
+
result = self.builder.trunc(val, target_type)
|
|
683
|
+
else:
|
|
684
|
+
result = val
|
|
685
|
+
|
|
686
|
+
if result_unsigned is None:
|
|
687
|
+
result_unsigned = source_unsigned
|
|
688
|
+
if result_unsigned:
|
|
689
|
+
return self._tag_unsigned(result)
|
|
690
|
+
return self._clear_unsigned(result)
|
|
691
|
+
|
|
692
|
+
def _integer_promotion(self, val):
|
|
693
|
+
if not isinstance(getattr(val, "type", None), ir.IntType):
|
|
694
|
+
return val
|
|
695
|
+
if val.type.width == 1:
|
|
696
|
+
return self._clear_unsigned(self.builder.zext(val, int32_t))
|
|
697
|
+
if val.type.width < int32_t.width:
|
|
698
|
+
return self._convert_int_value(val, int32_t, result_unsigned=False)
|
|
699
|
+
return val
|
|
700
|
+
|
|
701
|
+
def _usual_arithmetic_conversion(self, lhs, rhs):
|
|
702
|
+
lhs = self._integer_promotion(lhs)
|
|
703
|
+
rhs = self._integer_promotion(rhs)
|
|
704
|
+
|
|
705
|
+
lhs_unsigned = self._is_unsigned_val(lhs)
|
|
706
|
+
rhs_unsigned = self._is_unsigned_val(rhs)
|
|
707
|
+
lhs_width = lhs.type.width
|
|
708
|
+
rhs_width = rhs.type.width
|
|
709
|
+
|
|
710
|
+
if lhs_unsigned == rhs_unsigned:
|
|
711
|
+
target_type = lhs.type if lhs_width >= rhs_width else rhs.type
|
|
712
|
+
result_unsigned = lhs_unsigned
|
|
713
|
+
elif lhs_unsigned:
|
|
714
|
+
if lhs_width >= rhs_width:
|
|
715
|
+
target_type = lhs.type
|
|
716
|
+
result_unsigned = True
|
|
717
|
+
else:
|
|
718
|
+
target_type = rhs.type
|
|
719
|
+
result_unsigned = False
|
|
720
|
+
else:
|
|
721
|
+
if rhs_width >= lhs_width:
|
|
722
|
+
target_type = rhs.type
|
|
723
|
+
result_unsigned = True
|
|
724
|
+
else:
|
|
725
|
+
target_type = lhs.type
|
|
726
|
+
result_unsigned = False
|
|
727
|
+
|
|
728
|
+
lhs = self._convert_int_value(lhs, target_type, result_unsigned)
|
|
729
|
+
rhs = self._convert_int_value(rhs, target_type, result_unsigned)
|
|
730
|
+
return lhs, rhs, result_unsigned
|
|
731
|
+
|
|
732
|
+
def _shift_operand_conversion(self, lhs, rhs):
|
|
733
|
+
lhs = self._integer_promotion(lhs)
|
|
734
|
+
rhs = self._integer_promotion(rhs)
|
|
735
|
+
if lhs.type != rhs.type:
|
|
736
|
+
rhs = self._convert_int_value(
|
|
737
|
+
rhs, lhs.type, result_unsigned=self._is_unsigned_val(rhs)
|
|
738
|
+
)
|
|
739
|
+
return lhs, rhs, self._is_unsigned_val(lhs)
|
|
740
|
+
|
|
741
|
+
def _is_floating_ir_type(self, ir_type):
|
|
742
|
+
return isinstance(ir_type, (ir.FloatType, ir.DoubleType))
|
|
743
|
+
|
|
744
|
+
def _common_float_type(self, lhs_type, rhs_type):
|
|
745
|
+
if isinstance(lhs_type, ir.DoubleType) or isinstance(rhs_type, ir.DoubleType):
|
|
746
|
+
return _double
|
|
747
|
+
return _float
|
|
748
|
+
|
|
749
|
+
def _parse_float_constant(self, raw):
|
|
750
|
+
value = raw.rstrip("fFlL")
|
|
751
|
+
if value.lower().startswith("0x") and "p" in value.lower():
|
|
752
|
+
return float.fromhex(value)
|
|
753
|
+
return float(value)
|
|
754
|
+
|
|
755
|
+
def _float_literal_ir_type(self, raw):
|
|
756
|
+
if raw.endswith(("f", "F")):
|
|
757
|
+
return _float
|
|
758
|
+
return _double
|
|
759
|
+
|
|
760
|
+
def _float_compare(self, op, lhs, rhs, name):
|
|
761
|
+
if op == "!=":
|
|
762
|
+
return self.builder.fcmp_unordered(op, lhs, rhs, name)
|
|
763
|
+
return self.builder.fcmp_ordered(op, lhs, rhs, name)
|
|
764
|
+
|
|
765
|
+
def _safe_global_var(self, ir_type, name, external=False):
|
|
766
|
+
"""Create or reuse a global variable, avoiding DuplicatedNameError."""
|
|
767
|
+
existing = self.module.globals.get(name)
|
|
768
|
+
if existing:
|
|
769
|
+
return existing
|
|
770
|
+
try:
|
|
771
|
+
gv = ir.GlobalVariable(self.module, ir_type, name)
|
|
772
|
+
if external:
|
|
773
|
+
gv.linkage = "external"
|
|
774
|
+
else:
|
|
775
|
+
gv.initializer = ir.Constant(ir_type, None)
|
|
776
|
+
return gv
|
|
777
|
+
except Exception:
|
|
778
|
+
gv = self.module.globals.get(name) or ir.GlobalVariable(
|
|
779
|
+
self.module, ir_type, self.module.get_unique_name(name)
|
|
780
|
+
)
|
|
781
|
+
if external:
|
|
782
|
+
gv.linkage = "external"
|
|
783
|
+
elif getattr(gv, "initializer", None) is None:
|
|
784
|
+
gv.initializer = ir.Constant(ir_type, None)
|
|
785
|
+
return gv
|
|
786
|
+
|
|
787
|
+
# External C globals lazily declared on first use.
|
|
788
|
+
_EXTERN_GLOBAL_VARS = {
|
|
789
|
+
"stdout": voidptr_t,
|
|
790
|
+
"stderr": voidptr_t,
|
|
791
|
+
"stdin": voidptr_t,
|
|
792
|
+
"__stdoutp": voidptr_t,
|
|
793
|
+
"__stderrp": voidptr_t,
|
|
794
|
+
"__stdinp": voidptr_t,
|
|
795
|
+
"errno": int32_t,
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
def lookup(self, name):
|
|
799
|
+
if not isinstance(name, str):
|
|
800
|
+
name = name.name if hasattr(name, "name") else str(name)
|
|
801
|
+
if name not in self.env:
|
|
802
|
+
if name in LIBC_FUNCTIONS:
|
|
803
|
+
self._declare_libc(name)
|
|
804
|
+
elif name in self._EXTERN_GLOBAL_VARS:
|
|
805
|
+
gv_type = self._EXTERN_GLOBAL_VARS[name]
|
|
806
|
+
gv = self._safe_global_var(gv_type, name, external=True)
|
|
807
|
+
self.define(name, (gv_type, gv))
|
|
808
|
+
return self.env[name]
|
|
809
|
+
|
|
810
|
+
def _declare_libc(self, name):
|
|
811
|
+
"""Lazily declare a libc function on first use."""
|
|
812
|
+
existing = self.module.globals.get(name)
|
|
813
|
+
if existing:
|
|
814
|
+
self.define(name, (None, existing))
|
|
815
|
+
self._declared_libc.add(name)
|
|
816
|
+
return
|
|
817
|
+
ret_type, param_types, var_arg = LIBC_FUNCTIONS[name]
|
|
818
|
+
fnty = ir.FunctionType(ret_type, param_types, var_arg=var_arg)
|
|
819
|
+
try:
|
|
820
|
+
func = ir.Function(self.module, fnty, name=name)
|
|
821
|
+
except Exception:
|
|
822
|
+
func = self.module.globals.get(name)
|
|
823
|
+
if isinstance(func, ir.Function):
|
|
824
|
+
try:
|
|
825
|
+
if name in ("setjmp", "_setjmp"):
|
|
826
|
+
func.attributes.add("returns_twice")
|
|
827
|
+
elif name in ("longjmp", "_longjmp"):
|
|
828
|
+
func.attributes.add("noreturn")
|
|
829
|
+
except Exception:
|
|
830
|
+
pass
|
|
831
|
+
self.define(name, (fnty, func))
|
|
832
|
+
self._declared_libc.add(name)
|
|
833
|
+
|
|
834
|
+
def new_label(self, name):
|
|
835
|
+
self.nlabels += 1
|
|
836
|
+
return f"label_{name}_{self.nlabels}"
|
|
837
|
+
|
|
838
|
+
@contextmanager
|
|
839
|
+
def new_scope(self):
|
|
840
|
+
self.env = self.env.new_child()
|
|
841
|
+
yield
|
|
842
|
+
self.env = self.env.parents
|
|
843
|
+
|
|
844
|
+
@contextmanager
|
|
845
|
+
def new_function(self):
|
|
846
|
+
oldfunc = self.function
|
|
847
|
+
oldbuilder = self.builder
|
|
848
|
+
oldenv = self.env
|
|
849
|
+
oldlabels = self._labels
|
|
850
|
+
self.in_global = False
|
|
851
|
+
self.env = self.env.new_child()
|
|
852
|
+
self._labels = {}
|
|
853
|
+
try:
|
|
854
|
+
yield
|
|
855
|
+
finally:
|
|
856
|
+
self.function = oldfunc
|
|
857
|
+
self.builder = oldbuilder
|
|
858
|
+
self.env = oldenv
|
|
859
|
+
self._labels = oldlabels
|
|
860
|
+
self.in_global = True
|
|
861
|
+
|
|
862
|
+
def generate_code(self, node):
|
|
863
|
+
normal = self.codegen(node)
|
|
864
|
+
|
|
865
|
+
# for else end have no instruction
|
|
866
|
+
if self.builder:
|
|
867
|
+
if not self.builder.block.is_terminated:
|
|
868
|
+
self.builder.ret(ir.Constant(ir.IntType(64), int(0)))
|
|
869
|
+
|
|
870
|
+
pass # empty block fixes done in IR post-processing
|
|
871
|
+
|
|
872
|
+
return normal
|
|
873
|
+
|
|
874
|
+
def create_entry_block_alloca(
|
|
875
|
+
self, name, type_str, size, array_list=None, point_level=0
|
|
876
|
+
):
|
|
877
|
+
|
|
878
|
+
ir_type = get_ir_type(type_str)
|
|
879
|
+
|
|
880
|
+
if array_list is not None:
|
|
881
|
+
reversed_list = reversed(array_list)
|
|
882
|
+
for dim in reversed_list:
|
|
883
|
+
ir_type = ir.ArrayType(ir_type, dim)
|
|
884
|
+
ir_type.dim_array = array_list
|
|
885
|
+
|
|
886
|
+
if point_level != 0:
|
|
887
|
+
if isinstance(ir_type, ir.VoidType):
|
|
888
|
+
ir_type = int8_t # void* -> i8*
|
|
889
|
+
for level in range(point_level):
|
|
890
|
+
ir_type = ir.PointerType(ir_type)
|
|
891
|
+
|
|
892
|
+
if not self.in_global:
|
|
893
|
+
ret = self._alloca_in_entry(ir_type, name)
|
|
894
|
+
self.define(name, (ir_type, ret))
|
|
895
|
+
else:
|
|
896
|
+
existing = self.module.globals.get(name)
|
|
897
|
+
if existing:
|
|
898
|
+
ret = existing
|
|
899
|
+
else:
|
|
900
|
+
try:
|
|
901
|
+
ret = ir.GlobalVariable(self.module, ir_type, name)
|
|
902
|
+
ret.initializer = ir.Constant(ir_type, None)
|
|
903
|
+
except Exception:
|
|
904
|
+
ret = self.module.globals.get(name) or ir.GlobalVariable(
|
|
905
|
+
self.module, ir_type, self.module.get_unique_name(name)
|
|
906
|
+
)
|
|
907
|
+
if hasattr(ret, "initializer") and ret.initializer is None:
|
|
908
|
+
ret.initializer = ir.Constant(ir_type, None)
|
|
909
|
+
self.define(name, (ir_type, ret))
|
|
910
|
+
|
|
911
|
+
return ret, ir_type
|
|
912
|
+
|
|
913
|
+
def _alloca_in_entry(self, ir_type, name):
|
|
914
|
+
if self.function is None:
|
|
915
|
+
return self.builder.alloca(ir_type, size=None, name=name)
|
|
916
|
+
entry_block = self.function.entry_basic_block
|
|
917
|
+
current_block = self.builder.block if self.builder is not None else None
|
|
918
|
+
entry_builder = ir.IRBuilder(entry_block)
|
|
919
|
+
insert_before = None
|
|
920
|
+
for inst in entry_block.instructions:
|
|
921
|
+
if inst.opname not in ("phi", "alloca"):
|
|
922
|
+
insert_before = inst
|
|
923
|
+
break
|
|
924
|
+
if insert_before is not None:
|
|
925
|
+
entry_builder.position_before(insert_before)
|
|
926
|
+
else:
|
|
927
|
+
entry_builder.position_at_end(entry_block)
|
|
928
|
+
ret = entry_builder.alloca(ir_type, size=None, name=name)
|
|
929
|
+
if (
|
|
930
|
+
self.builder is not None
|
|
931
|
+
and current_block is entry_block
|
|
932
|
+
and not current_block.is_terminated
|
|
933
|
+
):
|
|
934
|
+
self.builder.position_at_end(current_block)
|
|
935
|
+
return ret
|
|
936
|
+
|
|
937
|
+
def codegen(self, node):
|
|
938
|
+
if node is None:
|
|
939
|
+
return None, None
|
|
940
|
+
method = "codegen_" + node.__class__.__name__
|
|
941
|
+
handler = getattr(self, method, None)
|
|
942
|
+
if handler is None:
|
|
943
|
+
return None, None
|
|
944
|
+
return handler(node)
|
|
945
|
+
|
|
946
|
+
def codegen_FileAST(self, node):
|
|
947
|
+
# Collect names of functions that have definitions (FuncDef)
|
|
948
|
+
funcdef_names = set()
|
|
949
|
+
for ext in node.ext:
|
|
950
|
+
if isinstance(ext, c_ast.FuncDef) and ext.decl:
|
|
951
|
+
funcdef_names.add(ext.decl.name)
|
|
952
|
+
self._funcdef_names = funcdef_names
|
|
953
|
+
|
|
954
|
+
# Two-pass: first types/typedefs, then everything else
|
|
955
|
+
pass1 = set()
|
|
956
|
+
for i, ext in enumerate(node.ext):
|
|
957
|
+
is_type_def = False
|
|
958
|
+
if isinstance(ext, c_ast.Decl):
|
|
959
|
+
if isinstance(ext.type, (c_ast.Struct, c_ast.Union, c_ast.Enum)):
|
|
960
|
+
is_type_def = True
|
|
961
|
+
elif isinstance(ext.type, c_ast.TypeDecl) and isinstance(
|
|
962
|
+
ext.type.type, (c_ast.Struct, c_ast.Union)
|
|
963
|
+
):
|
|
964
|
+
is_type_def = True
|
|
965
|
+
elif isinstance(ext, c_ast.Typedef):
|
|
966
|
+
is_type_def = True
|
|
967
|
+
if is_type_def:
|
|
968
|
+
try:
|
|
969
|
+
self.codegen(ext)
|
|
970
|
+
except Exception:
|
|
971
|
+
pass
|
|
972
|
+
pass1.add(i)
|
|
973
|
+
for i, ext in enumerate(node.ext):
|
|
974
|
+
if i not in pass1:
|
|
975
|
+
try:
|
|
976
|
+
self.codegen(ext)
|
|
977
|
+
except Exception as e:
|
|
978
|
+
ename = type(e).__name__
|
|
979
|
+
# Non-fatal errors: skip the problematic declaration/definition
|
|
980
|
+
if ename in ("DuplicatedNameError",) or isinstance(
|
|
981
|
+
e, (AssertionError, TypeError)
|
|
982
|
+
):
|
|
983
|
+
continue
|
|
984
|
+
if isinstance(e, KeyError) and e.args and e.args[0] is None:
|
|
985
|
+
continue
|
|
986
|
+
raise
|
|
987
|
+
|
|
988
|
+
_escape_map = {
|
|
989
|
+
"n": "\n",
|
|
990
|
+
"t": "\t",
|
|
991
|
+
"r": "\r",
|
|
992
|
+
"\\": "\\",
|
|
993
|
+
"0": "\0",
|
|
994
|
+
"'": "'",
|
|
995
|
+
'"': '"',
|
|
996
|
+
"a": "\a",
|
|
997
|
+
"b": "\b",
|
|
998
|
+
"f": "\f",
|
|
999
|
+
"v": "\v",
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
def _process_escapes(self, s):
|
|
1003
|
+
"""Process C escape sequences in a string."""
|
|
1004
|
+
result = []
|
|
1005
|
+
i = 0
|
|
1006
|
+
while i < len(s):
|
|
1007
|
+
if s[i] == "\\" and i + 1 < len(s):
|
|
1008
|
+
if s[i + 1] == "x":
|
|
1009
|
+
j = i + 2
|
|
1010
|
+
hex_digits = []
|
|
1011
|
+
while j < len(s) and s[j] in "0123456789abcdefABCDEF":
|
|
1012
|
+
hex_digits.append(s[j])
|
|
1013
|
+
j += 1
|
|
1014
|
+
if hex_digits:
|
|
1015
|
+
result.append(chr(int("".join(hex_digits), 16) & 0xFF))
|
|
1016
|
+
i = j
|
|
1017
|
+
continue
|
|
1018
|
+
if s[i + 1] in "01234567":
|
|
1019
|
+
j = i + 1
|
|
1020
|
+
oct_digits = []
|
|
1021
|
+
while j < len(s) and len(oct_digits) < 3 and s[j] in "01234567":
|
|
1022
|
+
oct_digits.append(s[j])
|
|
1023
|
+
j += 1
|
|
1024
|
+
result.append(chr(int("".join(oct_digits), 8) & 0xFF))
|
|
1025
|
+
i = j
|
|
1026
|
+
continue
|
|
1027
|
+
esc = self._escape_map.get(s[i + 1])
|
|
1028
|
+
if esc is not None:
|
|
1029
|
+
result.append(esc)
|
|
1030
|
+
i += 2
|
|
1031
|
+
continue
|
|
1032
|
+
result.append(s[i])
|
|
1033
|
+
i += 1
|
|
1034
|
+
return "".join(result)
|
|
1035
|
+
|
|
1036
|
+
@staticmethod
|
|
1037
|
+
def _string_bytes(s):
|
|
1038
|
+
return bytearray((ord(ch) & 0xFF) for ch in s)
|
|
1039
|
+
|
|
1040
|
+
def _char_constant_value(self, raw):
|
|
1041
|
+
if not raw or len(raw) < 2 or raw[0] != "'" or raw[-1] != "'":
|
|
1042
|
+
return 0
|
|
1043
|
+
processed = self._process_escapes(raw[1:-1])
|
|
1044
|
+
if not processed:
|
|
1045
|
+
return 0
|
|
1046
|
+
value = 0
|
|
1047
|
+
for ch in processed:
|
|
1048
|
+
value = (value << 8) | (ord(ch) & 0xFF)
|
|
1049
|
+
return value
|
|
1050
|
+
|
|
1051
|
+
def codegen_Constant(self, node):
|
|
1052
|
+
|
|
1053
|
+
if node.type == "int":
|
|
1054
|
+
# Support hex (0xFF), octal (077), and decimal literals
|
|
1055
|
+
raw = node.value
|
|
1056
|
+
is_unsigned = "u" in raw.lower() or "U" in raw
|
|
1057
|
+
val_str = raw.rstrip("uUlL")
|
|
1058
|
+
if val_str.startswith("0x") or val_str.startswith("0X"):
|
|
1059
|
+
int_val = int(val_str, 16)
|
|
1060
|
+
elif val_str.startswith("0") and len(val_str) > 1 and val_str[1:].isdigit():
|
|
1061
|
+
int_val = int(val_str, 8)
|
|
1062
|
+
else:
|
|
1063
|
+
int_val = int(val_str)
|
|
1064
|
+
result = ir.values.Constant(ir.IntType(64), int_val)
|
|
1065
|
+
if is_unsigned:
|
|
1066
|
+
self._tag_unsigned(result)
|
|
1067
|
+
return result, None
|
|
1068
|
+
elif node.type == "char":
|
|
1069
|
+
# char constant like 'a' -> i8
|
|
1070
|
+
return (
|
|
1071
|
+
ir.values.Constant(
|
|
1072
|
+
int8_t, self._char_constant_value(node.value) & 0xFF
|
|
1073
|
+
),
|
|
1074
|
+
None,
|
|
1075
|
+
)
|
|
1076
|
+
elif node.type == "string":
|
|
1077
|
+
raw = node.value[1:-1]
|
|
1078
|
+
processed = self._process_escapes(raw)
|
|
1079
|
+
b = self._string_bytes(processed + "\00")
|
|
1080
|
+
n = len(b)
|
|
1081
|
+
array = ir.ArrayType(ir.IntType(8), n)
|
|
1082
|
+
tmp = ir.values.Constant(array, b)
|
|
1083
|
+
return tmp, None
|
|
1084
|
+
else:
|
|
1085
|
+
ir_type = self._float_literal_ir_type(node.value)
|
|
1086
|
+
return (
|
|
1087
|
+
ir.values.Constant(ir_type, self._parse_float_constant(node.value)),
|
|
1088
|
+
None,
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
def codegen_Assignment(self, node):
|
|
1092
|
+
|
|
1093
|
+
lv, lv_addr = self.codegen(node.lvalue)
|
|
1094
|
+
rv, _ = self.codegen(node.rvalue)
|
|
1095
|
+
if lv is None or rv is None:
|
|
1096
|
+
return ir.Constant(int64_t, 0), None
|
|
1097
|
+
result = None
|
|
1098
|
+
|
|
1099
|
+
dispatch_type_double = 1
|
|
1100
|
+
dispatch_type_int = 0
|
|
1101
|
+
dispatch_dict = {
|
|
1102
|
+
("+=", dispatch_type_double): self.builder.fadd,
|
|
1103
|
+
("+=", dispatch_type_int): self.builder.add,
|
|
1104
|
+
("-=", dispatch_type_double): self.builder.fsub,
|
|
1105
|
+
("-=", dispatch_type_int): self.builder.sub,
|
|
1106
|
+
("*=", dispatch_type_double): self.builder.fmul,
|
|
1107
|
+
("*=", dispatch_type_int): self.builder.mul,
|
|
1108
|
+
("/=", dispatch_type_double): self.builder.fdiv,
|
|
1109
|
+
("/=", dispatch_type_int): self.builder.sdiv,
|
|
1110
|
+
("%=", dispatch_type_int): self.builder.srem,
|
|
1111
|
+
("%=", dispatch_type_double): self.builder.frem,
|
|
1112
|
+
("<<=", dispatch_type_int): self.builder.shl,
|
|
1113
|
+
(">>=", dispatch_type_int): self.builder.ashr,
|
|
1114
|
+
("&=", dispatch_type_int): self.builder.and_,
|
|
1115
|
+
("|=", dispatch_type_int): self.builder.or_,
|
|
1116
|
+
("^=", dispatch_type_int): self.builder.xor,
|
|
1117
|
+
}
|
|
1118
|
+
is_unsigned = False
|
|
1119
|
+
# Promote mismatched types before compound assignment
|
|
1120
|
+
if isinstance(lv.type, ir.IntType) and isinstance(rv.type, ir.IntType):
|
|
1121
|
+
if node.op in ("<<=", ">>="):
|
|
1122
|
+
lv, rv, is_unsigned = self._shift_operand_conversion(lv, rv)
|
|
1123
|
+
else:
|
|
1124
|
+
lv, rv, is_unsigned = self._usual_arithmetic_conversion(lv, rv)
|
|
1125
|
+
dispatch_type = dispatch_type_int
|
|
1126
|
+
elif isinstance(lv.type, ir.IntType) and self._is_floating_ir_type(rv.type):
|
|
1127
|
+
lv = self._implicit_convert(lv, rv.type)
|
|
1128
|
+
dispatch_type = dispatch_type_double
|
|
1129
|
+
elif self._is_floating_ir_type(lv.type) and isinstance(rv.type, ir.IntType):
|
|
1130
|
+
rv = self._implicit_convert(rv, lv.type)
|
|
1131
|
+
dispatch_type = dispatch_type_double
|
|
1132
|
+
elif self._is_floating_ir_type(lv.type) and self._is_floating_ir_type(rv.type):
|
|
1133
|
+
if lv.type != rv.type:
|
|
1134
|
+
target = self._common_float_type(lv.type, rv.type)
|
|
1135
|
+
lv = self._implicit_convert(lv, target)
|
|
1136
|
+
rv = self._implicit_convert(rv, target)
|
|
1137
|
+
dispatch_type = dispatch_type_double
|
|
1138
|
+
else:
|
|
1139
|
+
dispatch_type = dispatch_type_double
|
|
1140
|
+
dispatch = (node.op, dispatch_type)
|
|
1141
|
+
handle = dispatch_dict.get(dispatch)
|
|
1142
|
+
# Override to unsigned for /= %= >>= when operands are unsigned
|
|
1143
|
+
if dispatch_type == dispatch_type_int and is_unsigned:
|
|
1144
|
+
if node.op == "/=":
|
|
1145
|
+
handle = self.builder.udiv
|
|
1146
|
+
elif node.op == "%=":
|
|
1147
|
+
handle = self.builder.urem
|
|
1148
|
+
elif node.op == ">>=":
|
|
1149
|
+
handle = self.builder.lshr
|
|
1150
|
+
|
|
1151
|
+
if node.op == "=":
|
|
1152
|
+
# Type coercion: match rv to the target's pointee type
|
|
1153
|
+
if lv_addr and hasattr(lv_addr.type, "pointee"):
|
|
1154
|
+
target_type = lv_addr.type.pointee
|
|
1155
|
+
else:
|
|
1156
|
+
target_type = lv.type
|
|
1157
|
+
if rv.type != target_type:
|
|
1158
|
+
rv = self._implicit_convert(rv, target_type)
|
|
1159
|
+
self._safe_store(rv, lv_addr)
|
|
1160
|
+
return rv, lv_addr # return value for chained assignment
|
|
1161
|
+
else:
|
|
1162
|
+
# Pointer compound assignment: p += n, p -= n
|
|
1163
|
+
if isinstance(lv.type, ir.PointerType) and isinstance(rv.type, ir.IntType):
|
|
1164
|
+
rv = self._integer_promotion(rv)
|
|
1165
|
+
rv = self._convert_int_value(rv, int64_t, result_unsigned=False)
|
|
1166
|
+
if node.op == "+=":
|
|
1167
|
+
addresult = self.builder.gep(lv, [rv], name="ptradd")
|
|
1168
|
+
elif node.op == "-=":
|
|
1169
|
+
neg = self.builder.neg(rv, "neg")
|
|
1170
|
+
addresult = self.builder.gep(lv, [neg], name="ptrsub")
|
|
1171
|
+
else:
|
|
1172
|
+
addresult = handle(lv, rv, "addtmp")
|
|
1173
|
+
else:
|
|
1174
|
+
addresult = handle(lv, rv, "addtmp")
|
|
1175
|
+
self._safe_store(addresult, lv_addr)
|
|
1176
|
+
return addresult, lv_addr
|
|
1177
|
+
|
|
1178
|
+
def codegen_UnaryOp(self, node):
|
|
1179
|
+
|
|
1180
|
+
result = None
|
|
1181
|
+
result_ptr = None
|
|
1182
|
+
|
|
1183
|
+
if node.op in ("p++", "p--", "++", "--"):
|
|
1184
|
+
lv, lv_addr = self.codegen(node.expr)
|
|
1185
|
+
if lv is None:
|
|
1186
|
+
return ir.Constant(int64_t, 0), None
|
|
1187
|
+
is_post = node.op.startswith("p")
|
|
1188
|
+
is_inc = "+" in node.op
|
|
1189
|
+
if isinstance(lv.type, ir.PointerType):
|
|
1190
|
+
delta = ir.Constant(int64_t, 1 if is_inc else -1)
|
|
1191
|
+
new_val = self.builder.gep(lv, [delta], name="ptrincdec")
|
|
1192
|
+
else:
|
|
1193
|
+
one = ir.Constant(lv.type, 1)
|
|
1194
|
+
new_val = (
|
|
1195
|
+
self.builder.add(lv, one, "inc")
|
|
1196
|
+
if is_inc
|
|
1197
|
+
else self.builder.sub(lv, one, "dec")
|
|
1198
|
+
)
|
|
1199
|
+
self._safe_store(new_val, lv_addr)
|
|
1200
|
+
result = lv if is_post else new_val
|
|
1201
|
+
|
|
1202
|
+
elif node.op == "*":
|
|
1203
|
+
if (
|
|
1204
|
+
isinstance(node.expr, c_ast.Cast)
|
|
1205
|
+
and isinstance(node.expr.expr, c_ast.FuncCall)
|
|
1206
|
+
and isinstance(node.expr.expr.name, c_ast.ID)
|
|
1207
|
+
and node.expr.expr.name.name == "__builtin_va_arg"
|
|
1208
|
+
):
|
|
1209
|
+
target_ptr_type = self._resolve_ast_type(node.expr.to_type.type)
|
|
1210
|
+
va_args = (
|
|
1211
|
+
node.expr.expr.args.exprs if node.expr.expr.args is not None else []
|
|
1212
|
+
)
|
|
1213
|
+
if isinstance(target_ptr_type, ir.PointerType) and va_args:
|
|
1214
|
+
ap_addr, _ = self.codegen(va_args[0])
|
|
1215
|
+
if isinstance(getattr(ap_addr, "type", None), ir.PointerType):
|
|
1216
|
+
self._vaarg_counter += 1
|
|
1217
|
+
name = f"__pcc_va_arg_{self._vaarg_counter}"
|
|
1218
|
+
placeholder = self.module.globals.get(name)
|
|
1219
|
+
if placeholder is None:
|
|
1220
|
+
placeholder = ir.Function(
|
|
1221
|
+
self.module,
|
|
1222
|
+
ir.FunctionType(
|
|
1223
|
+
target_ptr_type.pointee, [ap_addr.type]
|
|
1224
|
+
),
|
|
1225
|
+
name=name,
|
|
1226
|
+
)
|
|
1227
|
+
result = self.builder.call(
|
|
1228
|
+
placeholder,
|
|
1229
|
+
[ap_addr],
|
|
1230
|
+
name=f"vaargtmp.{self._vaarg_counter}",
|
|
1231
|
+
)
|
|
1232
|
+
return result, None
|
|
1233
|
+
name_ir, name_ptr = self.codegen(node.expr)
|
|
1234
|
+
if name_ptr is None and isinstance(name_ir.type, ir.ArrayType):
|
|
1235
|
+
result_ptr = self._decay_array_value_to_pointer(name_ir, "derefarray")
|
|
1236
|
+
else:
|
|
1237
|
+
result_ptr = name_ir
|
|
1238
|
+
result = self._safe_load(result_ptr)
|
|
1239
|
+
if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
|
|
1240
|
+
result_ptr
|
|
1241
|
+
):
|
|
1242
|
+
self._tag_unsigned(result)
|
|
1243
|
+
|
|
1244
|
+
elif node.op == "&":
|
|
1245
|
+
name_ir, name_ptr = self.codegen(node.expr)
|
|
1246
|
+
if name_ptr is None:
|
|
1247
|
+
# Functions are already first-class pointers in LLVM IR.
|
|
1248
|
+
# Taking their address should preserve the function symbol,
|
|
1249
|
+
# not turn it into a null pointer.
|
|
1250
|
+
result = name_ir
|
|
1251
|
+
result_ptr = None
|
|
1252
|
+
else:
|
|
1253
|
+
result_ptr = name_ptr
|
|
1254
|
+
result = result_ptr
|
|
1255
|
+
if self._is_unsigned_binding(result_ptr):
|
|
1256
|
+
self._tag_unsigned_pointee(result)
|
|
1257
|
+
if self._is_unsigned_return_binding(result_ptr):
|
|
1258
|
+
self._tag_unsigned_return(result)
|
|
1259
|
+
|
|
1260
|
+
elif node.op == "+":
|
|
1261
|
+
operand, _ = self.codegen(node.expr)
|
|
1262
|
+
if isinstance(operand.type, ir.IntType):
|
|
1263
|
+
operand = self._integer_promotion(operand)
|
|
1264
|
+
result = operand # unary plus is a no-op
|
|
1265
|
+
|
|
1266
|
+
elif node.op == "-":
|
|
1267
|
+
operand, _ = self.codegen(node.expr)
|
|
1268
|
+
if isinstance(operand.type, ir.IntType):
|
|
1269
|
+
operand = self._integer_promotion(operand)
|
|
1270
|
+
result = self.builder.neg(operand, "negtmp")
|
|
1271
|
+
if self._is_unsigned_val(operand):
|
|
1272
|
+
self._tag_unsigned(result)
|
|
1273
|
+
else:
|
|
1274
|
+
result = self.builder.fneg(operand, "negtmp")
|
|
1275
|
+
|
|
1276
|
+
elif node.op == "!":
|
|
1277
|
+
operand, _ = self.codegen(node.expr)
|
|
1278
|
+
if isinstance(operand.type, ir.PointerType):
|
|
1279
|
+
null = ir.Constant(operand.type, None)
|
|
1280
|
+
cmp = self.builder.icmp_unsigned("==", operand, null, "nottmp")
|
|
1281
|
+
result = self.builder.zext(cmp, int64_t, "notres")
|
|
1282
|
+
elif isinstance(operand.type, ir.IntType):
|
|
1283
|
+
cmp = self.builder.icmp_signed(
|
|
1284
|
+
"==", operand, ir.Constant(operand.type, 0), "nottmp"
|
|
1285
|
+
)
|
|
1286
|
+
result = self.builder.zext(cmp, int64_t, "notres")
|
|
1287
|
+
else:
|
|
1288
|
+
cmp = self.builder.fcmp_ordered(
|
|
1289
|
+
"==", operand, ir.Constant(operand.type, 0.0), "nottmp"
|
|
1290
|
+
)
|
|
1291
|
+
result = self.builder.zext(cmp, int64_t, "notres")
|
|
1292
|
+
|
|
1293
|
+
elif node.op == "~":
|
|
1294
|
+
operand, _ = self.codegen(node.expr)
|
|
1295
|
+
if isinstance(operand.type, ir.IntType):
|
|
1296
|
+
operand = self._integer_promotion(operand)
|
|
1297
|
+
result = self.builder.not_(operand, "invtmp")
|
|
1298
|
+
if self._is_unsigned_val(operand):
|
|
1299
|
+
self._tag_unsigned(result)
|
|
1300
|
+
|
|
1301
|
+
elif node.op == "sizeof":
|
|
1302
|
+
result = self._codegen_sizeof(node.expr)
|
|
1303
|
+
|
|
1304
|
+
return result, result_ptr
|
|
1305
|
+
|
|
1306
|
+
def _codegen_sizeof(self, expr):
|
|
1307
|
+
"""Return sizeof as an i64 constant (always unsigned in C)."""
|
|
1308
|
+
if isinstance(expr, c_ast.Typename):
|
|
1309
|
+
ir_t = self._resolve_ast_type(expr.type)
|
|
1310
|
+
size = self._ir_type_size(ir_t)
|
|
1311
|
+
elif isinstance(expr, c_ast.ID):
|
|
1312
|
+
ir_type, _ = self.lookup(expr.name)
|
|
1313
|
+
size = self._ir_type_size(ir_type)
|
|
1314
|
+
else:
|
|
1315
|
+
val, _ = self.codegen(expr)
|
|
1316
|
+
semantic_type = self._get_expr_ir_type(expr, getattr(val, "type", None))
|
|
1317
|
+
size = self._ir_type_size(semantic_type)
|
|
1318
|
+
result = ir.Constant(int64_t, size)
|
|
1319
|
+
return self._tag_unsigned(result)
|
|
1320
|
+
|
|
1321
|
+
def _resolve_type_str(self, type_str, depth=0):
|
|
1322
|
+
"""Resolve typedef'd type names to their base type string."""
|
|
1323
|
+
if depth > 10:
|
|
1324
|
+
return type_str # prevent infinite recursion
|
|
1325
|
+
if isinstance(type_str, list):
|
|
1326
|
+
type_str = type_str[0] if len(type_str) == 1 else type_str
|
|
1327
|
+
if isinstance(type_str, list):
|
|
1328
|
+
return type_str # multi-word type, not a typedef
|
|
1329
|
+
key = f"__typedef_{type_str}"
|
|
1330
|
+
if key in self.env:
|
|
1331
|
+
resolved = self.env[key]
|
|
1332
|
+
if isinstance(resolved, str):
|
|
1333
|
+
# Could be a __struct_ reference or a base type name
|
|
1334
|
+
if resolved.startswith("__struct_"):
|
|
1335
|
+
struct_name = resolved[len("__struct_") :]
|
|
1336
|
+
if struct_name in self.env:
|
|
1337
|
+
return self.env[struct_name][0]
|
|
1338
|
+
return int8_t # opaque
|
|
1339
|
+
# Recursively resolve further typedefs
|
|
1340
|
+
return self._resolve_type_str(resolved, depth + 1)
|
|
1341
|
+
if isinstance(resolved, ir.Type):
|
|
1342
|
+
return resolved
|
|
1343
|
+
# resolved is a list — recursively resolve single-element lists
|
|
1344
|
+
if isinstance(resolved, list) and len(resolved) == 1:
|
|
1345
|
+
return self._resolve_type_str(resolved[0], depth + 1)
|
|
1346
|
+
return resolved
|
|
1347
|
+
return type_str
|
|
1348
|
+
|
|
1349
|
+
def _get_ir_type(self, type_str):
|
|
1350
|
+
"""Get IR type, resolving typedefs."""
|
|
1351
|
+
resolved = self._resolve_type_str(type_str)
|
|
1352
|
+
if isinstance(resolved, ir.Type):
|
|
1353
|
+
return resolved
|
|
1354
|
+
return get_ir_type(resolved)
|
|
1355
|
+
|
|
1356
|
+
def _is_unsigned_type_names(self, type_str):
|
|
1357
|
+
"""Check if a type name list resolves to an unsigned type."""
|
|
1358
|
+
if isinstance(type_str, list):
|
|
1359
|
+
if _is_unsigned_names(type_str):
|
|
1360
|
+
return True
|
|
1361
|
+
# Single-element list: check typedef chain
|
|
1362
|
+
if len(type_str) == 1:
|
|
1363
|
+
return self._is_unsigned_type_names(type_str[0])
|
|
1364
|
+
s = " ".join(sorted(type_str))
|
|
1365
|
+
return s in _UNSIGNED_TYPE_NAMES
|
|
1366
|
+
# String: check typedef chain
|
|
1367
|
+
key = f"__typedef_{type_str}"
|
|
1368
|
+
if key in self.env:
|
|
1369
|
+
resolved = self.env[key]
|
|
1370
|
+
if isinstance(resolved, list):
|
|
1371
|
+
return self._is_unsigned_type_names(resolved)
|
|
1372
|
+
if isinstance(resolved, str):
|
|
1373
|
+
return self._is_unsigned_type_names(resolved)
|
|
1374
|
+
return type_str in _UNSIGNED_TYPE_NAMES or type_str == "size_t"
|
|
1375
|
+
|
|
1376
|
+
def _is_unsigned_scalar_decl_type(self, node_type):
|
|
1377
|
+
if not isinstance(node_type, c_ast.TypeDecl):
|
|
1378
|
+
return False
|
|
1379
|
+
inner = node_type.type
|
|
1380
|
+
if not isinstance(inner, c_ast.IdentifierType):
|
|
1381
|
+
return False
|
|
1382
|
+
return self._is_unsigned_type_names(inner.names)
|
|
1383
|
+
|
|
1384
|
+
def _has_unsigned_scalar_pointee(self, node_type):
|
|
1385
|
+
if isinstance(node_type, (c_ast.ArrayDecl, c_ast.PtrDecl)):
|
|
1386
|
+
return self._is_unsigned_scalar_decl_type(node_type.type)
|
|
1387
|
+
return False
|
|
1388
|
+
|
|
1389
|
+
def _func_decl_returns_unsigned(self, node_type):
|
|
1390
|
+
return isinstance(
|
|
1391
|
+
node_type, c_ast.FuncDecl
|
|
1392
|
+
) and self._is_unsigned_scalar_decl_type(node_type.type)
|
|
1393
|
+
|
|
1394
|
+
def _tag_value_from_decl_type(self, value, decl_type):
|
|
1395
|
+
if value is None:
|
|
1396
|
+
return value
|
|
1397
|
+
if isinstance(getattr(value, "type", None), ir.IntType):
|
|
1398
|
+
if self._is_unsigned_scalar_decl_type(decl_type):
|
|
1399
|
+
self._tag_unsigned(value)
|
|
1400
|
+
elif isinstance(decl_type, c_ast.TypeDecl):
|
|
1401
|
+
self._clear_unsigned(value)
|
|
1402
|
+
if self._has_unsigned_scalar_pointee(decl_type) and isinstance(
|
|
1403
|
+
getattr(value, "type", None), ir.PointerType
|
|
1404
|
+
):
|
|
1405
|
+
self._tag_unsigned_pointee(value)
|
|
1406
|
+
if (
|
|
1407
|
+
isinstance(decl_type, c_ast.PtrDecl)
|
|
1408
|
+
and self._func_decl_returns_unsigned(decl_type.type)
|
|
1409
|
+
and isinstance(getattr(value, "type", None), ir.PointerType)
|
|
1410
|
+
):
|
|
1411
|
+
self._tag_unsigned_return(value)
|
|
1412
|
+
return value
|
|
1413
|
+
|
|
1414
|
+
def _build_const_array_init(self, init_list, array_type, elem_ir_type):
|
|
1415
|
+
"""Build a constant initializer for a global array."""
|
|
1416
|
+
actual_elem = (
|
|
1417
|
+
array_type.element if isinstance(array_type, ir.ArrayType) else elem_ir_type
|
|
1418
|
+
)
|
|
1419
|
+
values = []
|
|
1420
|
+
for expr in init_list.exprs:
|
|
1421
|
+
if isinstance(expr, c_ast.InitList):
|
|
1422
|
+
sub_type = actual_elem
|
|
1423
|
+
values.append(
|
|
1424
|
+
self._build_const_array_init(expr, sub_type, elem_ir_type)
|
|
1425
|
+
)
|
|
1426
|
+
else:
|
|
1427
|
+
try:
|
|
1428
|
+
val = self._eval_const_expr(expr)
|
|
1429
|
+
c = ir.Constant(actual_elem, val)
|
|
1430
|
+
str(c) # verify serializable
|
|
1431
|
+
values.append(c)
|
|
1432
|
+
except Exception:
|
|
1433
|
+
values.append(ir.Constant(actual_elem, None))
|
|
1434
|
+
try:
|
|
1435
|
+
result = ir.Constant(array_type, values)
|
|
1436
|
+
str(result) # verify
|
|
1437
|
+
return result
|
|
1438
|
+
except Exception:
|
|
1439
|
+
return ir.Constant(array_type, None)
|
|
1440
|
+
|
|
1441
|
+
def _zero_initializer(self, ir_type):
|
|
1442
|
+
if isinstance(ir_type, ir.PointerType):
|
|
1443
|
+
return ir.Constant(ir_type, None)
|
|
1444
|
+
if self._is_floating_ir_type(ir_type):
|
|
1445
|
+
return ir.Constant(ir_type, 0.0)
|
|
1446
|
+
if isinstance(ir_type, ir.IntType):
|
|
1447
|
+
return ir.Constant(ir_type, 0)
|
|
1448
|
+
return ir.Constant(ir_type, None)
|
|
1449
|
+
|
|
1450
|
+
def _make_global_string_constant(self, raw, name_hint="str"):
|
|
1451
|
+
processed = self._process_escapes(raw)
|
|
1452
|
+
data = self._string_bytes(processed + "\00")
|
|
1453
|
+
arr_type = ir.ArrayType(int8_t, len(data))
|
|
1454
|
+
gv = ir.GlobalVariable(
|
|
1455
|
+
self.module, arr_type, self.module.get_unique_name(name_hint)
|
|
1456
|
+
)
|
|
1457
|
+
gv.initializer = ir.Constant(arr_type, data)
|
|
1458
|
+
gv.global_constant = True
|
|
1459
|
+
gv.linkage = "internal"
|
|
1460
|
+
return gv
|
|
1461
|
+
|
|
1462
|
+
def _const_pointer_to_first_elem(self, gv, target_type):
|
|
1463
|
+
idx0 = ir.Constant(ir.IntType(32), 0)
|
|
1464
|
+
ptr = gv.gep([idx0, idx0])
|
|
1465
|
+
return ptr if ptr.type == target_type else ptr.bitcast(target_type)
|
|
1466
|
+
|
|
1467
|
+
def _is_little_endian(self):
|
|
1468
|
+
return not str(self.module.data_layout).startswith("E")
|
|
1469
|
+
|
|
1470
|
+
def _zero_bytes(self, size):
|
|
1471
|
+
return [ir.Constant(int8_t, 0) for _ in range(size)]
|
|
1472
|
+
|
|
1473
|
+
def _scalar_init_node(self, init_node):
|
|
1474
|
+
if not isinstance(init_node, c_ast.InitList):
|
|
1475
|
+
return init_node
|
|
1476
|
+
if not init_node.exprs:
|
|
1477
|
+
return None
|
|
1478
|
+
return self._scalar_init_node(init_node.exprs[0])
|
|
1479
|
+
|
|
1480
|
+
def _build_pointer_const(self, init_node, ir_type):
|
|
1481
|
+
if isinstance(init_node, c_ast.InitList):
|
|
1482
|
+
if init_node.exprs:
|
|
1483
|
+
return self._build_pointer_const(init_node.exprs[0], ir_type)
|
|
1484
|
+
return ir.Constant(ir_type, None)
|
|
1485
|
+
if (
|
|
1486
|
+
isinstance(init_node, c_ast.Constant)
|
|
1487
|
+
and getattr(init_node, "type", None) == "string"
|
|
1488
|
+
):
|
|
1489
|
+
gv = self._make_global_string_constant(init_node.value[1:-1])
|
|
1490
|
+
return self._const_pointer_to_first_elem(gv, ir_type)
|
|
1491
|
+
if isinstance(init_node, c_ast.ID):
|
|
1492
|
+
try:
|
|
1493
|
+
_, sym = self.lookup(init_node.name)
|
|
1494
|
+
except Exception:
|
|
1495
|
+
sym = None
|
|
1496
|
+
if isinstance(sym, ir.Function):
|
|
1497
|
+
return sym if sym.type == ir_type else sym.bitcast(ir_type)
|
|
1498
|
+
if isinstance(sym, ir.GlobalVariable):
|
|
1499
|
+
if isinstance(sym.value_type, ir.ArrayType):
|
|
1500
|
+
return self._const_pointer_to_first_elem(sym, ir_type)
|
|
1501
|
+
if sym.type == ir_type:
|
|
1502
|
+
return sym
|
|
1503
|
+
if isinstance(sym.type, ir.PointerType):
|
|
1504
|
+
return sym.bitcast(ir_type)
|
|
1505
|
+
if (
|
|
1506
|
+
isinstance(init_node, c_ast.UnaryOp)
|
|
1507
|
+
and init_node.op == "&"
|
|
1508
|
+
and isinstance(init_node.expr, c_ast.ID)
|
|
1509
|
+
):
|
|
1510
|
+
try:
|
|
1511
|
+
_, sym = self.lookup(init_node.expr.name)
|
|
1512
|
+
except Exception:
|
|
1513
|
+
sym = None
|
|
1514
|
+
if isinstance(sym, ir.Function):
|
|
1515
|
+
return sym if sym.type == ir_type else sym.bitcast(ir_type)
|
|
1516
|
+
if isinstance(sym, ir.GlobalVariable):
|
|
1517
|
+
if sym.type == ir_type:
|
|
1518
|
+
return sym
|
|
1519
|
+
if isinstance(sym.type, ir.PointerType):
|
|
1520
|
+
return sym.bitcast(ir_type)
|
|
1521
|
+
try:
|
|
1522
|
+
val = self._eval_const_expr(init_node)
|
|
1523
|
+
if val == 0:
|
|
1524
|
+
return ir.Constant(ir_type, None)
|
|
1525
|
+
except Exception:
|
|
1526
|
+
return None
|
|
1527
|
+
return None
|
|
1528
|
+
|
|
1529
|
+
def _const_int_to_bytes(self, value, byte_width):
|
|
1530
|
+
if byte_width <= 0:
|
|
1531
|
+
return []
|
|
1532
|
+
mask = (1 << (byte_width * 8)) - 1
|
|
1533
|
+
raw = int(value) & mask
|
|
1534
|
+
return [
|
|
1535
|
+
ir.Constant(int8_t, b)
|
|
1536
|
+
for b in raw.to_bytes(
|
|
1537
|
+
byte_width,
|
|
1538
|
+
byteorder="little" if self._is_little_endian() else "big",
|
|
1539
|
+
signed=False,
|
|
1540
|
+
)
|
|
1541
|
+
]
|
|
1542
|
+
|
|
1543
|
+
def _split_int_constant_to_bytes(self, int_const, byte_width):
|
|
1544
|
+
if byte_width <= 0:
|
|
1545
|
+
return []
|
|
1546
|
+
|
|
1547
|
+
raw_const = getattr(int_const, "constant", None)
|
|
1548
|
+
if isinstance(raw_const, int):
|
|
1549
|
+
return self._const_int_to_bytes(raw_const, byte_width)
|
|
1550
|
+
|
|
1551
|
+
int_bits = byte_width * 8
|
|
1552
|
+
if int_const.type.width != int_bits:
|
|
1553
|
+
if int_const.type.width < int_bits:
|
|
1554
|
+
int_const = int_const.zext(ir.IntType(int_bits))
|
|
1555
|
+
else:
|
|
1556
|
+
int_const = int_const.trunc(ir.IntType(int_bits))
|
|
1557
|
+
|
|
1558
|
+
byte_values = []
|
|
1559
|
+
for i in range(byte_width):
|
|
1560
|
+
shift_bits = 8 * (i if self._is_little_endian() else (byte_width - 1 - i))
|
|
1561
|
+
part = int_const
|
|
1562
|
+
if shift_bits:
|
|
1563
|
+
part = part.lshr(ir.Constant(part.type, shift_bits))
|
|
1564
|
+
if part.type.width != 8:
|
|
1565
|
+
part = part.trunc(int8_t)
|
|
1566
|
+
byte_values.append(part)
|
|
1567
|
+
return byte_values
|
|
1568
|
+
|
|
1569
|
+
def _pointer_const_to_bytes(self, ptr_const):
|
|
1570
|
+
if (
|
|
1571
|
+
isinstance(ptr_const, ir.Constant)
|
|
1572
|
+
and getattr(ptr_const, "constant", None) is None
|
|
1573
|
+
):
|
|
1574
|
+
return self._zero_bytes(self._ir_type_size(ptr_const.type))
|
|
1575
|
+
return self._split_int_constant_to_bytes(
|
|
1576
|
+
ptr_const.ptrtoint(int64_t), self._ir_type_size(ptr_const.type)
|
|
1577
|
+
)
|
|
1578
|
+
|
|
1579
|
+
def _bytes_to_int_constant(self, byte_values, int_type):
|
|
1580
|
+
byte_width = int_type.width // 8
|
|
1581
|
+
values = list(byte_values[:byte_width])
|
|
1582
|
+
if len(values) < byte_width:
|
|
1583
|
+
values.extend(self._zero_bytes(byte_width - len(values)))
|
|
1584
|
+
|
|
1585
|
+
result = 0
|
|
1586
|
+
for i, byte_val in enumerate(values):
|
|
1587
|
+
shift_bits = 8 * (i if self._is_little_endian() else (byte_width - 1 - i))
|
|
1588
|
+
raw = getattr(byte_val, "constant", 0)
|
|
1589
|
+
if not isinstance(raw, int):
|
|
1590
|
+
raw = 0
|
|
1591
|
+
result |= (raw & 0xFF) << shift_bits
|
|
1592
|
+
|
|
1593
|
+
bits = int_type.width
|
|
1594
|
+
mask = (1 << bits) - 1
|
|
1595
|
+
result &= mask
|
|
1596
|
+
sign_bit = 1 << (bits - 1)
|
|
1597
|
+
if result & sign_bit:
|
|
1598
|
+
result -= 1 << bits
|
|
1599
|
+
return ir.Constant(int_type, result)
|
|
1600
|
+
|
|
1601
|
+
def _const_init_bytes(self, init_node, ir_type):
|
|
1602
|
+
size = self._ir_type_size(ir_type)
|
|
1603
|
+
if init_node is None:
|
|
1604
|
+
return self._zero_bytes(size)
|
|
1605
|
+
|
|
1606
|
+
if getattr(ir_type, "is_union", False):
|
|
1607
|
+
raw = self._zero_bytes(size)
|
|
1608
|
+
member_names = getattr(ir_type, "members", None) or list(
|
|
1609
|
+
ir_type.member_types.keys()
|
|
1610
|
+
)
|
|
1611
|
+
if not member_names:
|
|
1612
|
+
return raw
|
|
1613
|
+
|
|
1614
|
+
first_name = member_names[0]
|
|
1615
|
+
member_type = ir_type.member_types[first_name]
|
|
1616
|
+
member_init = init_node
|
|
1617
|
+
if isinstance(init_node, c_ast.InitList):
|
|
1618
|
+
exprs = init_node.exprs or []
|
|
1619
|
+
if not exprs:
|
|
1620
|
+
member_init = None
|
|
1621
|
+
elif isinstance(member_type, (ir.ArrayType, ir.LiteralStructType)):
|
|
1622
|
+
member_init = (
|
|
1623
|
+
exprs[0]
|
|
1624
|
+
if len(exprs) == 1 and isinstance(exprs[0], c_ast.InitList)
|
|
1625
|
+
else init_node
|
|
1626
|
+
)
|
|
1627
|
+
else:
|
|
1628
|
+
member_init = exprs[0]
|
|
1629
|
+
|
|
1630
|
+
member_bytes = self._const_init_bytes(member_init, member_type)
|
|
1631
|
+
raw[: min(size, len(member_bytes))] = member_bytes[:size]
|
|
1632
|
+
return raw
|
|
1633
|
+
|
|
1634
|
+
if isinstance(ir_type, ir.PointerType):
|
|
1635
|
+
ptr_const = self._build_pointer_const(init_node, ir_type)
|
|
1636
|
+
if ptr_const is None:
|
|
1637
|
+
return self._zero_bytes(size)
|
|
1638
|
+
return self._pointer_const_to_bytes(ptr_const)
|
|
1639
|
+
|
|
1640
|
+
if self._is_floating_ir_type(ir_type):
|
|
1641
|
+
scalar_node = self._scalar_init_node(init_node)
|
|
1642
|
+
if scalar_node is None:
|
|
1643
|
+
value = 0.0
|
|
1644
|
+
elif isinstance(scalar_node, c_ast.Constant):
|
|
1645
|
+
try:
|
|
1646
|
+
value = self._parse_float_constant(scalar_node.value)
|
|
1647
|
+
except ValueError:
|
|
1648
|
+
value = float(self._eval_const_expr(scalar_node))
|
|
1649
|
+
else:
|
|
1650
|
+
value = float(self._eval_const_expr(scalar_node))
|
|
1651
|
+
fmt = "d" if isinstance(ir_type, ir.DoubleType) else "f"
|
|
1652
|
+
packed = struct.pack(
|
|
1653
|
+
("<" if self._is_little_endian() else ">") + fmt,
|
|
1654
|
+
value,
|
|
1655
|
+
)
|
|
1656
|
+
return [ir.Constant(int8_t, b) for b in packed]
|
|
1657
|
+
|
|
1658
|
+
if isinstance(ir_type, ir.IntType):
|
|
1659
|
+
scalar_node = self._scalar_init_node(init_node)
|
|
1660
|
+
if scalar_node is None:
|
|
1661
|
+
return self._zero_bytes(size)
|
|
1662
|
+
return self._const_int_to_bytes(self._eval_const_expr(scalar_node), size)
|
|
1663
|
+
|
|
1664
|
+
if isinstance(ir_type, ir.ArrayType):
|
|
1665
|
+
if (
|
|
1666
|
+
isinstance(init_node, c_ast.Constant)
|
|
1667
|
+
and getattr(init_node, "type", None) == "string"
|
|
1668
|
+
and isinstance(ir_type.element, ir.IntType)
|
|
1669
|
+
and ir_type.element.width == 8
|
|
1670
|
+
):
|
|
1671
|
+
raw = init_node.value[1:-1]
|
|
1672
|
+
processed = self._process_escapes(raw)
|
|
1673
|
+
data = self._string_bytes(processed + "\00")
|
|
1674
|
+
if len(data) < ir_type.count:
|
|
1675
|
+
data.extend(b"\x00" * (ir_type.count - len(data)))
|
|
1676
|
+
else:
|
|
1677
|
+
data = data[: ir_type.count]
|
|
1678
|
+
return [ir.Constant(int8_t, b) for b in data]
|
|
1679
|
+
|
|
1680
|
+
if isinstance(init_node, c_ast.InitList):
|
|
1681
|
+
values = []
|
|
1682
|
+
for i in range(ir_type.count):
|
|
1683
|
+
expr = init_node.exprs[i] if i < len(init_node.exprs) else None
|
|
1684
|
+
values.extend(self._const_init_bytes(expr, ir_type.element))
|
|
1685
|
+
return values
|
|
1686
|
+
|
|
1687
|
+
return self._zero_bytes(size)
|
|
1688
|
+
|
|
1689
|
+
if isinstance(ir_type, ir.LiteralStructType):
|
|
1690
|
+
raw = self._zero_bytes(size)
|
|
1691
|
+
if not isinstance(init_node, c_ast.InitList):
|
|
1692
|
+
return raw
|
|
1693
|
+
|
|
1694
|
+
offset = 0
|
|
1695
|
+
for i, member_type in enumerate(ir_type.elements):
|
|
1696
|
+
align = self._ir_type_align(member_type)
|
|
1697
|
+
offset = (offset + align - 1) & ~(align - 1)
|
|
1698
|
+
expr = init_node.exprs[i] if i < len(init_node.exprs) else None
|
|
1699
|
+
field_bytes = self._const_init_bytes(expr, member_type)
|
|
1700
|
+
field_size = self._ir_type_size(member_type)
|
|
1701
|
+
raw[offset : offset + field_size] = field_bytes[:field_size]
|
|
1702
|
+
offset += field_size
|
|
1703
|
+
return raw
|
|
1704
|
+
|
|
1705
|
+
scalar_node = self._scalar_init_node(init_node)
|
|
1706
|
+
if scalar_node is None:
|
|
1707
|
+
return self._zero_bytes(size)
|
|
1708
|
+
try:
|
|
1709
|
+
return self._const_int_to_bytes(self._eval_const_expr(scalar_node), size)
|
|
1710
|
+
except Exception:
|
|
1711
|
+
return self._zero_bytes(size)
|
|
1712
|
+
|
|
1713
|
+
def _build_const_init(self, init_node, ir_type):
|
|
1714
|
+
if init_node is None:
|
|
1715
|
+
return self._zero_initializer(ir_type)
|
|
1716
|
+
|
|
1717
|
+
if getattr(ir_type, "is_union", False):
|
|
1718
|
+
try:
|
|
1719
|
+
raw = self._const_init_bytes(init_node, ir_type)
|
|
1720
|
+
fields = []
|
|
1721
|
+
head_type = ir_type.elements[0]
|
|
1722
|
+
if not isinstance(head_type, ir.IntType):
|
|
1723
|
+
return self._zero_initializer(ir_type)
|
|
1724
|
+
head_size = self._ir_type_size(head_type)
|
|
1725
|
+
fields.append(self._bytes_to_int_constant(raw[:head_size], head_type))
|
|
1726
|
+
if len(ir_type.elements) > 1:
|
|
1727
|
+
tail_type = ir_type.elements[1]
|
|
1728
|
+
tail_size = self._ir_type_size(tail_type)
|
|
1729
|
+
tail_bytes = raw[head_size : head_size + tail_size]
|
|
1730
|
+
fields.append(ir.Constant(tail_type, tail_bytes))
|
|
1731
|
+
return ir.Constant(ir_type, fields)
|
|
1732
|
+
except Exception:
|
|
1733
|
+
return self._zero_initializer(ir_type)
|
|
1734
|
+
|
|
1735
|
+
if isinstance(ir_type, ir.PointerType):
|
|
1736
|
+
ptr_const = self._build_pointer_const(init_node, ir_type)
|
|
1737
|
+
if ptr_const is not None:
|
|
1738
|
+
return ptr_const
|
|
1739
|
+
return self._zero_initializer(ir_type)
|
|
1740
|
+
|
|
1741
|
+
if isinstance(ir_type, ir.ArrayType):
|
|
1742
|
+
if (
|
|
1743
|
+
isinstance(init_node, c_ast.Constant)
|
|
1744
|
+
and getattr(init_node, "type", None) == "string"
|
|
1745
|
+
):
|
|
1746
|
+
raw = init_node.value[1:-1]
|
|
1747
|
+
processed = self._process_escapes(raw)
|
|
1748
|
+
data = self._string_bytes(processed + "\00")
|
|
1749
|
+
if len(data) < ir_type.count:
|
|
1750
|
+
data.extend(b"\x00" * (ir_type.count - len(data)))
|
|
1751
|
+
else:
|
|
1752
|
+
data = data[: ir_type.count]
|
|
1753
|
+
try:
|
|
1754
|
+
return ir.Constant(ir_type, data)
|
|
1755
|
+
except Exception:
|
|
1756
|
+
return self._zero_initializer(ir_type)
|
|
1757
|
+
|
|
1758
|
+
if isinstance(init_node, c_ast.InitList):
|
|
1759
|
+
values = []
|
|
1760
|
+
for i in range(ir_type.count):
|
|
1761
|
+
expr = init_node.exprs[i] if i < len(init_node.exprs) else None
|
|
1762
|
+
values.append(self._build_const_init(expr, ir_type.element))
|
|
1763
|
+
try:
|
|
1764
|
+
return ir.Constant(ir_type, values)
|
|
1765
|
+
except Exception:
|
|
1766
|
+
return self._zero_initializer(ir_type)
|
|
1767
|
+
|
|
1768
|
+
return self._zero_initializer(ir_type)
|
|
1769
|
+
|
|
1770
|
+
if isinstance(ir_type, ir.LiteralStructType):
|
|
1771
|
+
if isinstance(init_node, c_ast.InitList):
|
|
1772
|
+
values = []
|
|
1773
|
+
for i, member_type in enumerate(ir_type.elements):
|
|
1774
|
+
expr = init_node.exprs[i] if i < len(init_node.exprs) else None
|
|
1775
|
+
values.append(self._build_const_init(expr, member_type))
|
|
1776
|
+
try:
|
|
1777
|
+
return ir.Constant(ir_type, values)
|
|
1778
|
+
except Exception:
|
|
1779
|
+
return self._zero_initializer(ir_type)
|
|
1780
|
+
return self._zero_initializer(ir_type)
|
|
1781
|
+
|
|
1782
|
+
if isinstance(init_node, c_ast.InitList):
|
|
1783
|
+
if init_node.exprs:
|
|
1784
|
+
return self._build_const_init(init_node.exprs[0], ir_type)
|
|
1785
|
+
return self._zero_initializer(ir_type)
|
|
1786
|
+
|
|
1787
|
+
try:
|
|
1788
|
+
val = self._eval_const_expr(init_node)
|
|
1789
|
+
result = ir.Constant(ir_type, val)
|
|
1790
|
+
str(result)
|
|
1791
|
+
return result
|
|
1792
|
+
except Exception:
|
|
1793
|
+
return self._zero_initializer(ir_type)
|
|
1794
|
+
|
|
1795
|
+
def _init_array(self, base_addr, init_list, elem_ir_type, prefix_idx):
|
|
1796
|
+
"""Recursively initialize array elements from an InitList."""
|
|
1797
|
+
for i, expr in enumerate(init_list.exprs):
|
|
1798
|
+
idx = prefix_idx + [ir.Constant(ir.IntType(32), i)]
|
|
1799
|
+
if isinstance(expr, c_ast.InitList):
|
|
1800
|
+
self._init_array(base_addr, expr, elem_ir_type, idx)
|
|
1801
|
+
else:
|
|
1802
|
+
val, _ = self.codegen(expr)
|
|
1803
|
+
val = self._implicit_convert(val, elem_ir_type)
|
|
1804
|
+
elem_ptr = self.builder.gep(base_addr, idx, inbounds=True)
|
|
1805
|
+
self._safe_store(val, elem_ptr)
|
|
1806
|
+
|
|
1807
|
+
def _build_array_ir_type(self, array_decl):
|
|
1808
|
+
dims = []
|
|
1809
|
+
node = array_decl
|
|
1810
|
+
while isinstance(node, c_ast.ArrayDecl):
|
|
1811
|
+
dims.append(self._eval_dim(node.dim) if node.dim else 0)
|
|
1812
|
+
node = node.type
|
|
1813
|
+
elem_ir_type = self._resolve_ast_type(node)
|
|
1814
|
+
if isinstance(elem_ir_type, ir.VoidType):
|
|
1815
|
+
elem_ir_type = int8_t
|
|
1816
|
+
arr_ir_type = elem_ir_type
|
|
1817
|
+
for dim in reversed(dims):
|
|
1818
|
+
arr_ir_type = ir.ArrayType(arr_ir_type, dim)
|
|
1819
|
+
arr_ir_type.dim_array = dims
|
|
1820
|
+
return arr_ir_type
|
|
1821
|
+
|
|
1822
|
+
def _resolve_param_type(self, param):
|
|
1823
|
+
"""Resolve a function parameter type, handling typedefs and pointers."""
|
|
1824
|
+
if isinstance(param.type, c_ast.ArrayDecl):
|
|
1825
|
+
arr_type = self._build_array_ir_type(param.type)
|
|
1826
|
+
return ir.PointerType(arr_type.element)
|
|
1827
|
+
t = self._resolve_ast_type(param.type)
|
|
1828
|
+
if isinstance(t, ir.ArrayType):
|
|
1829
|
+
return ir.PointerType(t.element)
|
|
1830
|
+
if isinstance(t, ir.VoidType):
|
|
1831
|
+
return None # void params mean "no params" in C
|
|
1832
|
+
return t
|
|
1833
|
+
|
|
1834
|
+
def _resolve_ast_type(self, node_type):
|
|
1835
|
+
"""Recursively resolve an AST type to IR type, with typedef support."""
|
|
1836
|
+
if isinstance(node_type, c_ast.PtrDecl):
|
|
1837
|
+
inner = node_type.type
|
|
1838
|
+
if isinstance(inner, c_ast.FuncDecl):
|
|
1839
|
+
return self._build_func_ptr_type(inner)
|
|
1840
|
+
pointee = self._resolve_ast_type(inner)
|
|
1841
|
+
if isinstance(pointee, ir.VoidType):
|
|
1842
|
+
return voidptr_t
|
|
1843
|
+
return ir.PointerType(pointee)
|
|
1844
|
+
elif isinstance(node_type, c_ast.TypeDecl):
|
|
1845
|
+
if isinstance(node_type.type, c_ast.IdentifierType):
|
|
1846
|
+
return self._get_ir_type(node_type.type.names)
|
|
1847
|
+
elif isinstance(node_type.type, c_ast.Struct):
|
|
1848
|
+
return self.codegen_Struct(node_type.type)
|
|
1849
|
+
elif isinstance(node_type.type, c_ast.Union):
|
|
1850
|
+
return self.codegen_Union(node_type.type)
|
|
1851
|
+
return int64_t
|
|
1852
|
+
elif isinstance(node_type, c_ast.ArrayDecl):
|
|
1853
|
+
return voidptr_t
|
|
1854
|
+
return int64_t
|
|
1855
|
+
|
|
1856
|
+
def _eval_dim(self, dim_node):
|
|
1857
|
+
"""Evaluate array dimension (may be a constant or expression)."""
|
|
1858
|
+
if dim_node is None:
|
|
1859
|
+
return 0
|
|
1860
|
+
if isinstance(dim_node, c_ast.Constant):
|
|
1861
|
+
v = dim_node.value.rstrip("uUlL")
|
|
1862
|
+
return int(v, 0) # handles hex/octal/decimal
|
|
1863
|
+
return self._eval_const_expr(dim_node)
|
|
1864
|
+
|
|
1865
|
+
def _build_func_ptr_type(self, func_decl_node):
|
|
1866
|
+
"""Build an IR function pointer type from a FuncDecl AST node."""
|
|
1867
|
+
ret_ir, _ = self.codegen(func_decl_node)
|
|
1868
|
+
param_types = []
|
|
1869
|
+
if func_decl_node.args:
|
|
1870
|
+
for param in func_decl_node.args.params:
|
|
1871
|
+
if isinstance(param, c_ast.EllipsisParam):
|
|
1872
|
+
continue
|
|
1873
|
+
if isinstance(param, c_ast.Typename):
|
|
1874
|
+
t = self._resolve_ast_type(param.type)
|
|
1875
|
+
if not isinstance(t, ir.VoidType):
|
|
1876
|
+
param_types.append(t)
|
|
1877
|
+
elif isinstance(param, c_ast.Decl):
|
|
1878
|
+
t = self._resolve_param_type(param)
|
|
1879
|
+
if t is not None:
|
|
1880
|
+
param_types.append(t)
|
|
1881
|
+
if isinstance(ret_ir, ir.VoidType):
|
|
1882
|
+
ret_ir = ir.VoidType()
|
|
1883
|
+
func_type = ir.FunctionType(ret_ir, param_types)
|
|
1884
|
+
return func_type.as_pointer()
|
|
1885
|
+
|
|
1886
|
+
def _safe_load(self, ptr, name=""):
|
|
1887
|
+
"""Load from ptr, guard against non-pointer types."""
|
|
1888
|
+
if not isinstance(ptr.type, ir.PointerType):
|
|
1889
|
+
return ptr
|
|
1890
|
+
if isinstance(ptr.type.pointee, ir.FunctionType):
|
|
1891
|
+
return ptr # function pointers are first-class as pointers
|
|
1892
|
+
try:
|
|
1893
|
+
return self.builder.load(ptr, name=name)
|
|
1894
|
+
except Exception:
|
|
1895
|
+
return ptr
|
|
1896
|
+
|
|
1897
|
+
def _decay_array_value_to_pointer(self, value, name="arraydecay"):
|
|
1898
|
+
"""Convert an array value (including string literals) to &value[0]."""
|
|
1899
|
+
if not isinstance(value.type, ir.ArrayType):
|
|
1900
|
+
return value
|
|
1901
|
+
base = value
|
|
1902
|
+
if isinstance(value, ir.values.Constant):
|
|
1903
|
+
gv = ir.GlobalVariable(
|
|
1904
|
+
self.module, value.type, self.module.get_unique_name("strlit")
|
|
1905
|
+
)
|
|
1906
|
+
gv.initializer = value
|
|
1907
|
+
gv.global_constant = True
|
|
1908
|
+
gv.linkage = "internal"
|
|
1909
|
+
base = gv
|
|
1910
|
+
idx0 = ir.Constant(ir.IntType(32), 0)
|
|
1911
|
+
return self.builder.gep(base, [idx0, idx0], name=name)
|
|
1912
|
+
|
|
1913
|
+
def _safe_store(self, value, ptr):
|
|
1914
|
+
"""Store value to ptr, auto-converting types if needed."""
|
|
1915
|
+
if value is None or ptr is None:
|
|
1916
|
+
return
|
|
1917
|
+
if isinstance(value.type, ir.VoidType):
|
|
1918
|
+
return # Can't store void
|
|
1919
|
+
if not isinstance(ptr.type, ir.PointerType):
|
|
1920
|
+
return
|
|
1921
|
+
if hasattr(ptr.type, "pointee") and value.type != ptr.type.pointee:
|
|
1922
|
+
value = self._implicit_convert(value, ptr.type.pointee)
|
|
1923
|
+
try:
|
|
1924
|
+
self.builder.store(value, ptr)
|
|
1925
|
+
except (TypeError, Exception):
|
|
1926
|
+
pass
|
|
1927
|
+
|
|
1928
|
+
def _implicit_convert(self, val, target_type):
|
|
1929
|
+
"""Convert val to target_type if needed (implicit C promotion/truncation)."""
|
|
1930
|
+
if val is None or isinstance(val.type, ir.VoidType):
|
|
1931
|
+
# Can't convert void — return a zero of target type
|
|
1932
|
+
if isinstance(target_type, ir.PointerType):
|
|
1933
|
+
return ir.Constant(target_type, None)
|
|
1934
|
+
elif isinstance(target_type, ir.VoidType):
|
|
1935
|
+
return val
|
|
1936
|
+
return ir.Constant(target_type, 0)
|
|
1937
|
+
if val.type == target_type:
|
|
1938
|
+
return val
|
|
1939
|
+
if isinstance(val.type, ir.IntType) and self._is_floating_ir_type(target_type):
|
|
1940
|
+
return self._int_to_float(val, target_type)
|
|
1941
|
+
if self._is_floating_ir_type(val.type) and isinstance(target_type, ir.IntType):
|
|
1942
|
+
return self.builder.fptosi(val, target_type)
|
|
1943
|
+
if self._is_floating_ir_type(val.type) and self._is_floating_ir_type(
|
|
1944
|
+
target_type
|
|
1945
|
+
):
|
|
1946
|
+
if isinstance(val.type, ir.FloatType) and isinstance(
|
|
1947
|
+
target_type, ir.DoubleType
|
|
1948
|
+
):
|
|
1949
|
+
return self.builder.fpext(val, target_type)
|
|
1950
|
+
if isinstance(val.type, ir.DoubleType) and isinstance(
|
|
1951
|
+
target_type, ir.FloatType
|
|
1952
|
+
):
|
|
1953
|
+
return self.builder.fptrunc(val, target_type)
|
|
1954
|
+
return val
|
|
1955
|
+
# int -> int (wider or narrower)
|
|
1956
|
+
if isinstance(val.type, ir.IntType) and isinstance(target_type, ir.IntType):
|
|
1957
|
+
if val.type.width < target_type.width:
|
|
1958
|
+
if self._is_unsigned_val(val):
|
|
1959
|
+
result = self.builder.zext(val, target_type)
|
|
1960
|
+
return self._tag_unsigned(result)
|
|
1961
|
+
return self.builder.sext(val, target_type)
|
|
1962
|
+
elif val.type.width > target_type.width:
|
|
1963
|
+
result = self.builder.trunc(val, target_type)
|
|
1964
|
+
if self._is_unsigned_val(val):
|
|
1965
|
+
return self._tag_unsigned(result)
|
|
1966
|
+
return result
|
|
1967
|
+
# int -> pointer (e.g., NULL assignment, p = 0)
|
|
1968
|
+
if isinstance(val.type, ir.IntType) and isinstance(target_type, ir.PointerType):
|
|
1969
|
+
# inttoptr only works for simple pointer types, not function pointers
|
|
1970
|
+
raw_ptr = self.builder.inttoptr(val, voidptr_t)
|
|
1971
|
+
if target_type == voidptr_t:
|
|
1972
|
+
return raw_ptr
|
|
1973
|
+
return self.builder.bitcast(raw_ptr, target_type)
|
|
1974
|
+
# pointer -> int
|
|
1975
|
+
if isinstance(val.type, ir.PointerType) and isinstance(target_type, ir.IntType):
|
|
1976
|
+
return self.builder.ptrtoint(val, target_type)
|
|
1977
|
+
# pointer -> different pointer
|
|
1978
|
+
if isinstance(val.type, ir.PointerType) and isinstance(
|
|
1979
|
+
target_type, ir.PointerType
|
|
1980
|
+
):
|
|
1981
|
+
result = self.builder.bitcast(val, target_type)
|
|
1982
|
+
if self._is_unsigned_pointee(val):
|
|
1983
|
+
self._tag_unsigned_pointee(result)
|
|
1984
|
+
if self._is_unsigned_return(val):
|
|
1985
|
+
self._tag_unsigned_return(result)
|
|
1986
|
+
return result
|
|
1987
|
+
# array -> pointer (string literal to char*)
|
|
1988
|
+
if isinstance(val.type, ir.ArrayType) and isinstance(
|
|
1989
|
+
target_type, ir.PointerType
|
|
1990
|
+
):
|
|
1991
|
+
ptr = self._decay_array_value_to_pointer(val)
|
|
1992
|
+
if ptr.type == target_type:
|
|
1993
|
+
return ptr
|
|
1994
|
+
return self.builder.bitcast(ptr, target_type)
|
|
1995
|
+
return val
|
|
1996
|
+
|
|
1997
|
+
def _extend_call_result(self, result, returns_unsigned=False):
|
|
1998
|
+
if not isinstance(result.type, ir.IntType):
|
|
1999
|
+
return result
|
|
2000
|
+
if returns_unsigned:
|
|
2001
|
+
self._tag_unsigned(result)
|
|
2002
|
+
else:
|
|
2003
|
+
self._clear_unsigned(result)
|
|
2004
|
+
return result
|
|
2005
|
+
|
|
2006
|
+
def _to_bool(self, val, name="cond"):
|
|
2007
|
+
"""Convert any value to an i1 boolean (!=0)."""
|
|
2008
|
+
if isinstance(val.type, ir.IntType):
|
|
2009
|
+
if val.type.width == 1:
|
|
2010
|
+
return val
|
|
2011
|
+
return self.builder.icmp_signed("!=", val, ir.Constant(val.type, 0), name)
|
|
2012
|
+
elif isinstance(val.type, ir.PointerType):
|
|
2013
|
+
null = ir.Constant(val.type, None)
|
|
2014
|
+
return self.builder.icmp_unsigned("!=", val, null, name)
|
|
2015
|
+
else:
|
|
2016
|
+
return self.builder.fcmp_unordered(
|
|
2017
|
+
"!=", val, ir.Constant(val.type, 0.0), name
|
|
2018
|
+
)
|
|
2019
|
+
|
|
2020
|
+
def _ir_type_align(self, ir_type):
|
|
2021
|
+
"""Return natural alignment of an IR type in bytes."""
|
|
2022
|
+
if isinstance(ir_type, ir.IntType):
|
|
2023
|
+
return min(ir_type.width // 8, 8)
|
|
2024
|
+
elif isinstance(ir_type, ir.FloatType):
|
|
2025
|
+
return 4
|
|
2026
|
+
elif isinstance(ir_type, ir.DoubleType):
|
|
2027
|
+
return 8
|
|
2028
|
+
elif isinstance(ir_type, ir.PointerType):
|
|
2029
|
+
return 8
|
|
2030
|
+
elif isinstance(ir_type, ir.ArrayType):
|
|
2031
|
+
return self._ir_type_align(ir_type.element)
|
|
2032
|
+
elif isinstance(ir_type, ir.LiteralStructType):
|
|
2033
|
+
if not ir_type.elements:
|
|
2034
|
+
return 1
|
|
2035
|
+
return max(self._ir_type_align(e) for e in ir_type.elements)
|
|
2036
|
+
return 8
|
|
2037
|
+
|
|
2038
|
+
def _ir_type_size(self, ir_type):
|
|
2039
|
+
"""Compute byte size of an IR type with proper alignment/padding."""
|
|
2040
|
+
if isinstance(ir_type, ir.IntType):
|
|
2041
|
+
return ir_type.width // 8
|
|
2042
|
+
elif isinstance(ir_type, ir.FloatType):
|
|
2043
|
+
return 4
|
|
2044
|
+
elif isinstance(ir_type, ir.DoubleType):
|
|
2045
|
+
return 8
|
|
2046
|
+
elif isinstance(ir_type, ir.PointerType):
|
|
2047
|
+
return 8
|
|
2048
|
+
elif isinstance(ir_type, ir.ArrayType):
|
|
2049
|
+
return int(ir_type.count) * self._ir_type_size(ir_type.element)
|
|
2050
|
+
elif isinstance(ir_type, ir.LiteralStructType):
|
|
2051
|
+
offset = 0
|
|
2052
|
+
for elem in ir_type.elements:
|
|
2053
|
+
align = self._ir_type_align(elem)
|
|
2054
|
+
offset = (offset + align - 1) & ~(align - 1) # align up
|
|
2055
|
+
offset += self._ir_type_size(elem)
|
|
2056
|
+
# Tail padding: align to struct's overall alignment
|
|
2057
|
+
struct_align = self._ir_type_align(ir_type)
|
|
2058
|
+
offset = (offset + struct_align - 1) & ~(struct_align - 1)
|
|
2059
|
+
return offset
|
|
2060
|
+
return 8
|
|
2061
|
+
|
|
2062
|
+
def _refine_member_ir_type(self, aggregate_type, member_key, field_type):
|
|
2063
|
+
"""Prefer semantic member types over storage types when available."""
|
|
2064
|
+
semantic_field_type = field_type
|
|
2065
|
+
member_decl_types = getattr(aggregate_type, "member_decl_types", None)
|
|
2066
|
+
decl_type = None
|
|
2067
|
+
|
|
2068
|
+
if isinstance(member_decl_types, dict):
|
|
2069
|
+
decl_type = member_decl_types.get(member_key)
|
|
2070
|
+
elif (
|
|
2071
|
+
isinstance(member_key, int)
|
|
2072
|
+
and member_decl_types is not None
|
|
2073
|
+
and member_key < len(member_decl_types)
|
|
2074
|
+
):
|
|
2075
|
+
decl_type = member_decl_types[member_key]
|
|
2076
|
+
|
|
2077
|
+
if decl_type is None:
|
|
2078
|
+
return semantic_field_type
|
|
2079
|
+
|
|
2080
|
+
try:
|
|
2081
|
+
resolved = self._resolve_ast_type(decl_type)
|
|
2082
|
+
if isinstance(field_type, ir.ArrayType) and isinstance(
|
|
2083
|
+
resolved, ir.PointerType
|
|
2084
|
+
):
|
|
2085
|
+
return semantic_field_type
|
|
2086
|
+
if isinstance(
|
|
2087
|
+
resolved, (ir.ArrayType, ir.LiteralStructType, ir.PointerType)
|
|
2088
|
+
):
|
|
2089
|
+
return resolved
|
|
2090
|
+
except Exception:
|
|
2091
|
+
pass
|
|
2092
|
+
|
|
2093
|
+
return semantic_field_type
|
|
2094
|
+
|
|
2095
|
+
def _get_aggregate_field_info(self, aggregate_type, field_name):
|
|
2096
|
+
"""Return byte offset and semantic IR type for a struct/union field."""
|
|
2097
|
+
if getattr(aggregate_type, "is_union", False):
|
|
2098
|
+
field_type = aggregate_type.member_types[field_name]
|
|
2099
|
+
semantic_field_type = self._refine_member_ir_type(
|
|
2100
|
+
aggregate_type, field_name, field_type
|
|
2101
|
+
)
|
|
2102
|
+
return 0, semantic_field_type
|
|
2103
|
+
|
|
2104
|
+
if not hasattr(aggregate_type, "members"):
|
|
2105
|
+
raise CodegenError(f"Aggregate has no named fields: {aggregate_type}")
|
|
2106
|
+
|
|
2107
|
+
field_index = None
|
|
2108
|
+
for i, member in enumerate(aggregate_type.members):
|
|
2109
|
+
if member == field_name:
|
|
2110
|
+
field_index = i
|
|
2111
|
+
break
|
|
2112
|
+
|
|
2113
|
+
if field_index is None:
|
|
2114
|
+
raise CodegenError(f"Field '{field_name}' not found in aggregate")
|
|
2115
|
+
|
|
2116
|
+
offset = 0
|
|
2117
|
+
for i, elem in enumerate(aggregate_type.elements):
|
|
2118
|
+
align = self._ir_type_align(elem)
|
|
2119
|
+
offset = (offset + align - 1) & ~(align - 1)
|
|
2120
|
+
if i == field_index:
|
|
2121
|
+
field_type = aggregate_type.elements[field_index]
|
|
2122
|
+
semantic_field_type = self._refine_member_ir_type(
|
|
2123
|
+
aggregate_type, field_index, field_type
|
|
2124
|
+
)
|
|
2125
|
+
return offset, semantic_field_type
|
|
2126
|
+
offset += self._ir_type_size(elem)
|
|
2127
|
+
|
|
2128
|
+
raise CodegenError(f"Field '{field_name}' not found in aggregate")
|
|
2129
|
+
|
|
2130
|
+
def _eval_offsetof_structref(self, node):
|
|
2131
|
+
"""Evaluate offsetof-like expressions expanded as &((T*)0)->field."""
|
|
2132
|
+
if isinstance(node, c_ast.StructRef):
|
|
2133
|
+
base_offset, base_type = self._eval_offsetof_structref(node.name)
|
|
2134
|
+
aggregate_type = base_type
|
|
2135
|
+
if node.type == "->" and isinstance(aggregate_type, ir.PointerType):
|
|
2136
|
+
aggregate_type = aggregate_type.pointee
|
|
2137
|
+
field_offset, field_type = self._get_aggregate_field_info(
|
|
2138
|
+
aggregate_type, node.field.name
|
|
2139
|
+
)
|
|
2140
|
+
return base_offset + field_offset, field_type
|
|
2141
|
+
|
|
2142
|
+
if isinstance(node, c_ast.Cast):
|
|
2143
|
+
target_type = self._resolve_ast_type(node.to_type.type)
|
|
2144
|
+
return 0, target_type
|
|
2145
|
+
|
|
2146
|
+
raise CodegenError(f"Not an offsetof base: {type(node).__name__}")
|
|
2147
|
+
|
|
2148
|
+
def codegen_Typename(self, node):
|
|
2149
|
+
# Used inside sizeof(type) — not directly code-generated
|
|
2150
|
+
return None, None
|
|
2151
|
+
|
|
2152
|
+
def codegen_BinaryOp(self, node):
|
|
2153
|
+
# Short-circuit && and || before evaluating both sides
|
|
2154
|
+
if node.op == "&&":
|
|
2155
|
+
return self._codegen_short_circuit_and(node)
|
|
2156
|
+
elif node.op == "||":
|
|
2157
|
+
return self._codegen_short_circuit_or(node)
|
|
2158
|
+
|
|
2159
|
+
lhs, _ = self.codegen(node.left)
|
|
2160
|
+
rhs, _ = self.codegen(node.right)
|
|
2161
|
+
if lhs is None or rhs is None:
|
|
2162
|
+
return ir.Constant(int64_t, 0), None
|
|
2163
|
+
|
|
2164
|
+
# Pointer arithmetic: ptr + int or ptr - int
|
|
2165
|
+
if (
|
|
2166
|
+
node.op in ("+", "-")
|
|
2167
|
+
and isinstance(lhs.type, ir.PointerType)
|
|
2168
|
+
and isinstance(rhs.type, ir.IntType)
|
|
2169
|
+
):
|
|
2170
|
+
rhs = self._integer_promotion(rhs)
|
|
2171
|
+
rhs = self._convert_int_value(rhs, int64_t, result_unsigned=False)
|
|
2172
|
+
if node.op == "-":
|
|
2173
|
+
rhs = self.builder.neg(rhs, "negidx")
|
|
2174
|
+
return self.builder.gep(lhs, [rhs], name="ptradd"), None
|
|
2175
|
+
if (
|
|
2176
|
+
node.op == "+"
|
|
2177
|
+
and isinstance(rhs.type, ir.PointerType)
|
|
2178
|
+
and isinstance(lhs.type, ir.IntType)
|
|
2179
|
+
):
|
|
2180
|
+
lhs = self._integer_promotion(lhs)
|
|
2181
|
+
lhs = self._convert_int_value(lhs, int64_t, result_unsigned=False)
|
|
2182
|
+
return self.builder.gep(rhs, [lhs], name="ptradd"), None
|
|
2183
|
+
|
|
2184
|
+
# Pointer subtraction: ptr - ptr -> int (element count)
|
|
2185
|
+
if (
|
|
2186
|
+
node.op == "-"
|
|
2187
|
+
and isinstance(lhs.type, ir.PointerType)
|
|
2188
|
+
and isinstance(rhs.type, ir.PointerType)
|
|
2189
|
+
):
|
|
2190
|
+
lhs_int = self.builder.ptrtoint(lhs, int64_t)
|
|
2191
|
+
rhs_int = self.builder.ptrtoint(rhs, int64_t)
|
|
2192
|
+
diff = self.builder.sub(lhs_int, rhs_int, "ptrdiff")
|
|
2193
|
+
elem_size = self._ir_type_size(lhs.type.pointee)
|
|
2194
|
+
return (
|
|
2195
|
+
self.builder.sdiv(
|
|
2196
|
+
diff, ir.Constant(int64_t, elem_size), "ptrdiff_elems"
|
|
2197
|
+
),
|
|
2198
|
+
None,
|
|
2199
|
+
)
|
|
2200
|
+
|
|
2201
|
+
# Promote int/pointer mix
|
|
2202
|
+
if isinstance(lhs.type, ir.PointerType) and isinstance(rhs.type, ir.IntType):
|
|
2203
|
+
rhs = self._implicit_convert(rhs, lhs.type)
|
|
2204
|
+
elif isinstance(rhs.type, ir.PointerType) and isinstance(lhs.type, ir.IntType):
|
|
2205
|
+
lhs = self._implicit_convert(lhs, rhs.type)
|
|
2206
|
+
|
|
2207
|
+
# Promotion above can turn int/pointer into ptr/ptr; handle subtraction
|
|
2208
|
+
if (
|
|
2209
|
+
node.op == "-"
|
|
2210
|
+
and isinstance(lhs.type, ir.PointerType)
|
|
2211
|
+
and isinstance(rhs.type, ir.PointerType)
|
|
2212
|
+
):
|
|
2213
|
+
lhs_int = self.builder.ptrtoint(lhs, int64_t)
|
|
2214
|
+
rhs_int = self.builder.ptrtoint(rhs, int64_t)
|
|
2215
|
+
diff = self.builder.sub(lhs_int, rhs_int, "ptrdiff")
|
|
2216
|
+
elem_size = self._ir_type_size(lhs.type.pointee)
|
|
2217
|
+
return (
|
|
2218
|
+
self.builder.sdiv(
|
|
2219
|
+
diff, ir.Constant(int64_t, elem_size), "ptrdiff_elems"
|
|
2220
|
+
),
|
|
2221
|
+
None,
|
|
2222
|
+
)
|
|
2223
|
+
|
|
2224
|
+
is_unsigned = False
|
|
2225
|
+
if isinstance(lhs.type, ir.IntType) and self._is_floating_ir_type(rhs.type):
|
|
2226
|
+
lhs = self._implicit_convert(lhs, rhs.type)
|
|
2227
|
+
elif self._is_floating_ir_type(lhs.type) and isinstance(rhs.type, ir.IntType):
|
|
2228
|
+
rhs = self._implicit_convert(rhs, lhs.type)
|
|
2229
|
+
elif self._is_floating_ir_type(lhs.type) and self._is_floating_ir_type(
|
|
2230
|
+
rhs.type
|
|
2231
|
+
):
|
|
2232
|
+
if lhs.type != rhs.type:
|
|
2233
|
+
target = self._common_float_type(lhs.type, rhs.type)
|
|
2234
|
+
lhs = self._implicit_convert(lhs, target)
|
|
2235
|
+
rhs = self._implicit_convert(rhs, target)
|
|
2236
|
+
elif isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
|
|
2237
|
+
if node.op in ("<<", ">>"):
|
|
2238
|
+
lhs, rhs, is_unsigned = self._shift_operand_conversion(lhs, rhs)
|
|
2239
|
+
else:
|
|
2240
|
+
lhs, rhs, is_unsigned = self._usual_arithmetic_conversion(lhs, rhs)
|
|
2241
|
+
|
|
2242
|
+
dispatch_type_double = 1
|
|
2243
|
+
dispatch_type_int = 0
|
|
2244
|
+
|
|
2245
|
+
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
|
|
2246
|
+
dispatch_type = dispatch_type_int
|
|
2247
|
+
else:
|
|
2248
|
+
dispatch_type = dispatch_type_double
|
|
2249
|
+
|
|
2250
|
+
if node.op in ["+", "-", "*", "/", "%"]:
|
|
2251
|
+
if dispatch_type == dispatch_type_double:
|
|
2252
|
+
ops = {
|
|
2253
|
+
"+": self.builder.fadd,
|
|
2254
|
+
"-": self.builder.fsub,
|
|
2255
|
+
"*": self.builder.fmul,
|
|
2256
|
+
"/": self.builder.fdiv,
|
|
2257
|
+
"%": self.builder.frem,
|
|
2258
|
+
}
|
|
2259
|
+
return ops[node.op](lhs, rhs, "tmp"), None
|
|
2260
|
+
else:
|
|
2261
|
+
if node.op in ("/", "%") and is_unsigned:
|
|
2262
|
+
op = self.builder.udiv if node.op == "/" else self.builder.urem
|
|
2263
|
+
else:
|
|
2264
|
+
ops = {
|
|
2265
|
+
"+": self.builder.add,
|
|
2266
|
+
"-": self.builder.sub,
|
|
2267
|
+
"*": self.builder.mul,
|
|
2268
|
+
"/": self.builder.sdiv,
|
|
2269
|
+
"%": self.builder.srem,
|
|
2270
|
+
}
|
|
2271
|
+
op = ops[node.op]
|
|
2272
|
+
result = op(lhs, rhs, "tmp")
|
|
2273
|
+
if is_unsigned:
|
|
2274
|
+
self._tag_unsigned(result)
|
|
2275
|
+
return result, None
|
|
2276
|
+
elif node.op in [">", "<", ">=", "<=", "!=", "=="]:
|
|
2277
|
+
if isinstance(lhs.type, ir.PointerType) and isinstance(
|
|
2278
|
+
rhs.type, ir.PointerType
|
|
2279
|
+
):
|
|
2280
|
+
lhs_i = self.builder.ptrtoint(lhs, int64_t)
|
|
2281
|
+
rhs_i = self.builder.ptrtoint(rhs, int64_t)
|
|
2282
|
+
cmp = self.builder.icmp_unsigned(node.op, lhs_i, rhs_i, "ptrcmp")
|
|
2283
|
+
elif dispatch_type == dispatch_type_int:
|
|
2284
|
+
if is_unsigned:
|
|
2285
|
+
cmp = self.builder.icmp_unsigned(node.op, lhs, rhs, "cmptmp")
|
|
2286
|
+
else:
|
|
2287
|
+
cmp = self.builder.icmp_signed(node.op, lhs, rhs, "cmptmp")
|
|
2288
|
+
else:
|
|
2289
|
+
cmp = self._float_compare(node.op, lhs, rhs, "cmptmp")
|
|
2290
|
+
return self.builder.zext(cmp, int64_t, "booltmp"), None
|
|
2291
|
+
elif node.op == "&":
|
|
2292
|
+
result = self.builder.and_(lhs, rhs, "andtmp")
|
|
2293
|
+
if is_unsigned:
|
|
2294
|
+
self._tag_unsigned(result)
|
|
2295
|
+
return result, None
|
|
2296
|
+
elif node.op == "|":
|
|
2297
|
+
result = self.builder.or_(lhs, rhs, "ortmp")
|
|
2298
|
+
if is_unsigned:
|
|
2299
|
+
self._tag_unsigned(result)
|
|
2300
|
+
return result, None
|
|
2301
|
+
elif node.op == "^":
|
|
2302
|
+
return self.builder.xor(lhs, rhs, "xortmp"), None
|
|
2303
|
+
elif node.op == "<<":
|
|
2304
|
+
result = self.builder.shl(lhs, rhs, "shltmp")
|
|
2305
|
+
if is_unsigned:
|
|
2306
|
+
self._tag_unsigned(result)
|
|
2307
|
+
return result, None
|
|
2308
|
+
elif node.op == ">>":
|
|
2309
|
+
if is_unsigned:
|
|
2310
|
+
return self.builder.lshr(lhs, rhs, "shrtmp"), None
|
|
2311
|
+
return self.builder.ashr(lhs, rhs, "shrtmp"), None
|
|
2312
|
+
else:
|
|
2313
|
+
func = self.module.globals.get("binary{0}".format(node.op))
|
|
2314
|
+
return self.builder.call(func, [lhs, rhs], "binop"), None
|
|
2315
|
+
|
|
2316
|
+
def _codegen_short_circuit_and(self, node):
|
|
2317
|
+
"""Short-circuit &&: if lhs is false, skip rhs."""
|
|
2318
|
+
lhs, _ = self.codegen(node.left)
|
|
2319
|
+
lhs_bool = self._to_bool(lhs, "and_lhs")
|
|
2320
|
+
|
|
2321
|
+
rhs_bb = self.builder.function.append_basic_block("and_rhs")
|
|
2322
|
+
merge_bb = self.builder.function.append_basic_block("and_merge")
|
|
2323
|
+
lhs_bb = self.builder.block
|
|
2324
|
+
|
|
2325
|
+
self.builder.cbranch(lhs_bool, rhs_bb, merge_bb)
|
|
2326
|
+
|
|
2327
|
+
self.builder.position_at_end(rhs_bb)
|
|
2328
|
+
rhs, _ = self.codegen(node.right)
|
|
2329
|
+
rhs_bool = self._to_bool(rhs, "and_rhs")
|
|
2330
|
+
rhs_result = self.builder.zext(rhs_bool, int64_t, "and_rhs_ext")
|
|
2331
|
+
rhs_bb_end = self.builder.block
|
|
2332
|
+
self.builder.branch(merge_bb)
|
|
2333
|
+
|
|
2334
|
+
self.builder.position_at_end(merge_bb)
|
|
2335
|
+
phi = self.builder.phi(int64_t, "and_result")
|
|
2336
|
+
phi.add_incoming(ir.Constant(int64_t, 0), lhs_bb)
|
|
2337
|
+
phi.add_incoming(rhs_result, rhs_bb_end)
|
|
2338
|
+
return phi, None
|
|
2339
|
+
|
|
2340
|
+
def _codegen_short_circuit_or(self, node):
|
|
2341
|
+
"""Short-circuit ||: if lhs is true, skip rhs."""
|
|
2342
|
+
lhs, _ = self.codegen(node.left)
|
|
2343
|
+
lhs_bool = self._to_bool(lhs, "or_lhs")
|
|
2344
|
+
|
|
2345
|
+
rhs_bb = self.builder.function.append_basic_block("or_rhs")
|
|
2346
|
+
merge_bb = self.builder.function.append_basic_block("or_merge")
|
|
2347
|
+
lhs_bb = self.builder.block
|
|
2348
|
+
|
|
2349
|
+
self.builder.cbranch(lhs_bool, merge_bb, rhs_bb)
|
|
2350
|
+
|
|
2351
|
+
self.builder.position_at_end(rhs_bb)
|
|
2352
|
+
rhs, _ = self.codegen(node.right)
|
|
2353
|
+
rhs_bool = self._to_bool(rhs, "or_rhs")
|
|
2354
|
+
rhs_result = self.builder.zext(rhs_bool, int64_t, "or_rhs_ext")
|
|
2355
|
+
rhs_bb_end = self.builder.block
|
|
2356
|
+
self.builder.branch(merge_bb)
|
|
2357
|
+
|
|
2358
|
+
self.builder.position_at_end(merge_bb)
|
|
2359
|
+
phi = self.builder.phi(int64_t, "or_result")
|
|
2360
|
+
phi.add_incoming(ir.Constant(int64_t, 1), lhs_bb)
|
|
2361
|
+
phi.add_incoming(rhs_result, rhs_bb_end)
|
|
2362
|
+
return phi, None
|
|
2363
|
+
|
|
2364
|
+
def codegen_If(self, node):
|
|
2365
|
+
|
|
2366
|
+
cond_val, _ = self.codegen(node.cond)
|
|
2367
|
+
cmp = self._to_bool(cond_val)
|
|
2368
|
+
|
|
2369
|
+
then_bb = self.builder.function.append_basic_block("then")
|
|
2370
|
+
else_bb = self.builder.function.append_basic_block("else")
|
|
2371
|
+
merge_bb = self.builder.function.append_basic_block("ifend")
|
|
2372
|
+
|
|
2373
|
+
self.builder.cbranch(cmp, then_bb, else_bb)
|
|
2374
|
+
|
|
2375
|
+
with self.new_scope():
|
|
2376
|
+
self.builder.position_at_end(then_bb)
|
|
2377
|
+
self.codegen(node.iftrue)
|
|
2378
|
+
if not self.builder.block.is_terminated:
|
|
2379
|
+
self.builder.branch(merge_bb)
|
|
2380
|
+
|
|
2381
|
+
with self.new_scope():
|
|
2382
|
+
self.builder.position_at_end(else_bb)
|
|
2383
|
+
if node.iffalse:
|
|
2384
|
+
self.codegen(node.iffalse)
|
|
2385
|
+
if not self.builder.block.is_terminated:
|
|
2386
|
+
self.builder.branch(merge_bb)
|
|
2387
|
+
self.builder.position_at_end(merge_bb)
|
|
2388
|
+
# self.builder.block = merge_bb
|
|
2389
|
+
|
|
2390
|
+
return None, None
|
|
2391
|
+
|
|
2392
|
+
def codegen_NoneType(self, node):
|
|
2393
|
+
return None, None
|
|
2394
|
+
|
|
2395
|
+
def codegen_For(self, node):
|
|
2396
|
+
|
|
2397
|
+
saved_block = self.builder.block
|
|
2398
|
+
self.builder.position_at_end(saved_block) # why the save_block at the end
|
|
2399
|
+
|
|
2400
|
+
if node.init is not None:
|
|
2401
|
+
self.codegen(node.init)
|
|
2402
|
+
|
|
2403
|
+
# The builder is what? loop is a block which begin with loop
|
|
2404
|
+
test_bb = self.builder.function.append_basic_block("test")
|
|
2405
|
+
loop_bb = self.builder.function.append_basic_block("loop")
|
|
2406
|
+
next_bb = self.builder.function.append_basic_block("next")
|
|
2407
|
+
|
|
2408
|
+
# append by name nor just add it
|
|
2409
|
+
after_loop_label = self.new_label("afterloop")
|
|
2410
|
+
after_bb = ir.Block(self.builder.function, after_loop_label)
|
|
2411
|
+
# self.builder.function.append_basic_block('afterloop')
|
|
2412
|
+
|
|
2413
|
+
self.builder.branch(test_bb)
|
|
2414
|
+
self.builder.position_at_end(test_bb)
|
|
2415
|
+
|
|
2416
|
+
if node.cond is not None:
|
|
2417
|
+
endcond, _ = self.codegen(node.cond)
|
|
2418
|
+
cmp = self._to_bool(endcond, "loopcond")
|
|
2419
|
+
self.builder.cbranch(cmp, loop_bb, after_bb)
|
|
2420
|
+
else:
|
|
2421
|
+
# for(;;) - infinite loop, always branch to body
|
|
2422
|
+
self.builder.branch(loop_bb)
|
|
2423
|
+
|
|
2424
|
+
with self.new_scope():
|
|
2425
|
+
self.define("break", after_bb)
|
|
2426
|
+
self.define("continue", next_bb)
|
|
2427
|
+
self.builder.position_at_end(loop_bb)
|
|
2428
|
+
body_val, _ = self.codegen(node.stmt) # if was ready codegen
|
|
2429
|
+
if not self.builder.block.is_terminated:
|
|
2430
|
+
self.builder.branch(next_bb)
|
|
2431
|
+
self.builder.position_at_end(next_bb)
|
|
2432
|
+
if node.next is not None:
|
|
2433
|
+
self.codegen(node.next)
|
|
2434
|
+
self.builder.branch(test_bb)
|
|
2435
|
+
# this append_basic_blook change the label
|
|
2436
|
+
# after_bb = self.builder.function.append_basic_block(after_loop_label)
|
|
2437
|
+
self.builder.function.basic_blocks.append(after_bb)
|
|
2438
|
+
self.builder.position_at_end(after_bb)
|
|
2439
|
+
|
|
2440
|
+
return ir.values.Constant(ir.DoubleType(), 0.0), None
|
|
2441
|
+
|
|
2442
|
+
def codegen_While(self, node):
|
|
2443
|
+
|
|
2444
|
+
saved_block = self.builder.block
|
|
2445
|
+
id_name = node.__class__.__name__
|
|
2446
|
+
self.builder.position_at_end(saved_block)
|
|
2447
|
+
# The builder is what? loop is a block which begin with loop
|
|
2448
|
+
test_bb = self.builder.function.append_basic_block(
|
|
2449
|
+
"test"
|
|
2450
|
+
) # just create some block need to be filled
|
|
2451
|
+
loop_bb = self.builder.function.append_basic_block("loop")
|
|
2452
|
+
after_bb = self.builder.function.append_basic_block("afterloop")
|
|
2453
|
+
|
|
2454
|
+
self.builder.branch(test_bb)
|
|
2455
|
+
self.builder.position_at_start(test_bb)
|
|
2456
|
+
endcond, _ = self.codegen(node.cond)
|
|
2457
|
+
cmp = self._to_bool(endcond, "loopcond")
|
|
2458
|
+
self.builder.cbranch(cmp, loop_bb, after_bb)
|
|
2459
|
+
|
|
2460
|
+
with self.new_scope():
|
|
2461
|
+
self.define("break", after_bb)
|
|
2462
|
+
self.define("continue", test_bb)
|
|
2463
|
+
self.builder.position_at_end(loop_bb)
|
|
2464
|
+
body_val, _ = self.codegen(node.stmt)
|
|
2465
|
+
# after eval body we need to goto test_bb
|
|
2466
|
+
# New code will be inserted into after_bb
|
|
2467
|
+
if not self.builder.block.is_terminated:
|
|
2468
|
+
self.builder.branch(test_bb)
|
|
2469
|
+
self.builder.position_at_end(after_bb)
|
|
2470
|
+
|
|
2471
|
+
# The 'for' expression always returns 0
|
|
2472
|
+
return ir.values.Constant(ir.DoubleType(), 0.0)
|
|
2473
|
+
|
|
2474
|
+
def codegen_Break(self, node):
|
|
2475
|
+
self.builder.branch(self.lookup("break"))
|
|
2476
|
+
return None, None
|
|
2477
|
+
|
|
2478
|
+
def codegen_Continue(self, node):
|
|
2479
|
+
self.builder.branch(self.lookup("continue"))
|
|
2480
|
+
return None, None
|
|
2481
|
+
|
|
2482
|
+
def codegen_DoWhile(self, node):
|
|
2483
|
+
|
|
2484
|
+
saved_block = self.builder.block
|
|
2485
|
+
self.builder.position_at_end(saved_block)
|
|
2486
|
+
|
|
2487
|
+
loop_bb = self.builder.function.append_basic_block("dowhile_body")
|
|
2488
|
+
test_bb = self.builder.function.append_basic_block("dowhile_test")
|
|
2489
|
+
after_bb = self.builder.function.append_basic_block("dowhile_end")
|
|
2490
|
+
|
|
2491
|
+
self.builder.branch(loop_bb)
|
|
2492
|
+
|
|
2493
|
+
with self.new_scope():
|
|
2494
|
+
self.define("break", after_bb)
|
|
2495
|
+
self.define("continue", test_bb)
|
|
2496
|
+
self.builder.position_at_end(loop_bb)
|
|
2497
|
+
self.codegen(node.stmt)
|
|
2498
|
+
if not self.builder.block.is_terminated:
|
|
2499
|
+
self.builder.branch(test_bb)
|
|
2500
|
+
|
|
2501
|
+
self.builder.position_at_end(test_bb)
|
|
2502
|
+
endcond, _ = self.codegen(node.cond)
|
|
2503
|
+
cmp = self._to_bool(endcond, "loopcond")
|
|
2504
|
+
self.builder.cbranch(cmp, loop_bb, after_bb)
|
|
2505
|
+
|
|
2506
|
+
self.builder.position_at_end(after_bb)
|
|
2507
|
+
return ir.values.Constant(ir.DoubleType(), 0.0), None
|
|
2508
|
+
|
|
2509
|
+
def codegen_Switch(self, node):
|
|
2510
|
+
|
|
2511
|
+
cond_val, _ = self.codegen(node.cond)
|
|
2512
|
+
# Switch requires integer condition
|
|
2513
|
+
if isinstance(cond_val.type, ir.PointerType):
|
|
2514
|
+
cond_val = self.builder.ptrtoint(cond_val, int64_t)
|
|
2515
|
+
elif self._is_floating_ir_type(cond_val.type):
|
|
2516
|
+
cond_val = self.builder.fptosi(cond_val, int64_t)
|
|
2517
|
+
elif isinstance(cond_val.type, ir.IntType) and cond_val.type.width != 64:
|
|
2518
|
+
cond_val = self._implicit_convert(cond_val, int64_t)
|
|
2519
|
+
|
|
2520
|
+
after_bb = self.builder.function.append_basic_block("switch_end")
|
|
2521
|
+
|
|
2522
|
+
# Preserve C switch semantics: grouped case labels and fallthrough
|
|
2523
|
+
# share code by jumping into the next label block, not directly to
|
|
2524
|
+
# the switch epilogue.
|
|
2525
|
+
if isinstance(node.stmt, c_ast.Compound):
|
|
2526
|
+
switch_items = list(node.stmt.block_items or [])
|
|
2527
|
+
elif node.stmt is not None:
|
|
2528
|
+
switch_items = [node.stmt]
|
|
2529
|
+
else:
|
|
2530
|
+
switch_items = []
|
|
2531
|
+
labels = [
|
|
2532
|
+
item
|
|
2533
|
+
for item in switch_items
|
|
2534
|
+
if isinstance(item, (c_ast.Case, c_ast.Default))
|
|
2535
|
+
]
|
|
2536
|
+
|
|
2537
|
+
label_blocks = {}
|
|
2538
|
+
default_bb = after_bb
|
|
2539
|
+
for item in labels:
|
|
2540
|
+
bb_name = (
|
|
2541
|
+
"switch_default" if isinstance(item, c_ast.Default) else "switch_case"
|
|
2542
|
+
)
|
|
2543
|
+
bb = self.builder.function.append_basic_block(bb_name)
|
|
2544
|
+
label_blocks[id(item)] = bb
|
|
2545
|
+
if isinstance(item, c_ast.Default):
|
|
2546
|
+
default_bb = bb
|
|
2547
|
+
|
|
2548
|
+
switch_inst = self.builder.switch(cond_val, default_bb)
|
|
2549
|
+
|
|
2550
|
+
with self.new_scope():
|
|
2551
|
+
self.define("break", after_bb)
|
|
2552
|
+
|
|
2553
|
+
for item in labels:
|
|
2554
|
+
if not isinstance(item, c_ast.Case):
|
|
2555
|
+
continue
|
|
2556
|
+
# Case values must be compile-time constants
|
|
2557
|
+
try:
|
|
2558
|
+
const_int = self._eval_const_expr(item.expr)
|
|
2559
|
+
case_val = ir.Constant(cond_val.type, const_int)
|
|
2560
|
+
except Exception:
|
|
2561
|
+
case_val, _ = self.codegen(item.expr)
|
|
2562
|
+
if case_val is None:
|
|
2563
|
+
continue
|
|
2564
|
+
if not isinstance(case_val, ir.Constant):
|
|
2565
|
+
# Non-constant case: skip (LLVM requires constants)
|
|
2566
|
+
continue
|
|
2567
|
+
if case_val.type != cond_val.type:
|
|
2568
|
+
case_val = ir.Constant(cond_val.type, case_val.constant)
|
|
2569
|
+
switch_inst.add_case(case_val, label_blocks[id(item)])
|
|
2570
|
+
|
|
2571
|
+
for idx, item in enumerate(labels):
|
|
2572
|
+
self.builder.position_at_end(label_blocks[id(item)])
|
|
2573
|
+
for stmt in item.stmts or []:
|
|
2574
|
+
self.codegen(stmt)
|
|
2575
|
+
if self.builder.block.is_terminated:
|
|
2576
|
+
break
|
|
2577
|
+
if not self.builder.block.is_terminated:
|
|
2578
|
+
next_bb = after_bb
|
|
2579
|
+
if idx + 1 < len(labels):
|
|
2580
|
+
next_bb = label_blocks[id(labels[idx + 1])]
|
|
2581
|
+
self.builder.branch(next_bb)
|
|
2582
|
+
|
|
2583
|
+
self.builder.position_at_end(after_bb)
|
|
2584
|
+
return None, None
|
|
2585
|
+
|
|
2586
|
+
def codegen_TernaryOp(self, node):
|
|
2587
|
+
|
|
2588
|
+
cond_val, _ = self.codegen(node.cond)
|
|
2589
|
+
cmp = self._to_bool(cond_val)
|
|
2590
|
+
|
|
2591
|
+
then_bb = self.builder.function.append_basic_block("ternary_true")
|
|
2592
|
+
else_bb = self.builder.function.append_basic_block("ternary_false")
|
|
2593
|
+
merge_bb = self.builder.function.append_basic_block("ternary_end")
|
|
2594
|
+
|
|
2595
|
+
self.builder.cbranch(cmp, then_bb, else_bb)
|
|
2596
|
+
|
|
2597
|
+
self.builder.position_at_end(then_bb)
|
|
2598
|
+
true_val, _ = self.codegen(node.iftrue)
|
|
2599
|
+
true_bb_end = self.builder.block
|
|
2600
|
+
|
|
2601
|
+
self.builder.position_at_end(else_bb)
|
|
2602
|
+
false_val, _ = self.codegen(node.iffalse)
|
|
2603
|
+
false_bb_end = self.builder.block
|
|
2604
|
+
|
|
2605
|
+
def zero_value(target_type):
|
|
2606
|
+
if isinstance(target_type, ir.PointerType):
|
|
2607
|
+
return ir.Constant(target_type, None)
|
|
2608
|
+
if self._is_floating_ir_type(target_type):
|
|
2609
|
+
return ir.Constant(target_type, 0.0)
|
|
2610
|
+
return ir.Constant(target_type, 0)
|
|
2611
|
+
|
|
2612
|
+
def pick_target_type(lhs, rhs):
|
|
2613
|
+
if lhs is None and rhs is None:
|
|
2614
|
+
return int64_t
|
|
2615
|
+
if lhs is None:
|
|
2616
|
+
return rhs.type
|
|
2617
|
+
if rhs is None:
|
|
2618
|
+
return lhs.type
|
|
2619
|
+
if isinstance(lhs.type, ir.ArrayType) or isinstance(rhs.type, ir.ArrayType):
|
|
2620
|
+
if isinstance(lhs.type, ir.PointerType):
|
|
2621
|
+
return lhs.type
|
|
2622
|
+
if isinstance(rhs.type, ir.PointerType):
|
|
2623
|
+
return rhs.type
|
|
2624
|
+
if isinstance(lhs.type, ir.ArrayType):
|
|
2625
|
+
return ir.PointerType(lhs.type.element)
|
|
2626
|
+
return ir.PointerType(rhs.type.element)
|
|
2627
|
+
if lhs.type == rhs.type:
|
|
2628
|
+
return lhs.type
|
|
2629
|
+
if isinstance(lhs.type, ir.PointerType) and isinstance(
|
|
2630
|
+
rhs.type, ir.PointerType
|
|
2631
|
+
):
|
|
2632
|
+
if lhs.type == rhs.type:
|
|
2633
|
+
return lhs.type
|
|
2634
|
+
return voidptr_t
|
|
2635
|
+
if isinstance(lhs.type, ir.PointerType) and isinstance(
|
|
2636
|
+
rhs.type, ir.IntType
|
|
2637
|
+
):
|
|
2638
|
+
return lhs.type
|
|
2639
|
+
if isinstance(rhs.type, ir.PointerType) and isinstance(
|
|
2640
|
+
lhs.type, ir.IntType
|
|
2641
|
+
):
|
|
2642
|
+
return rhs.type
|
|
2643
|
+
if self._is_floating_ir_type(lhs.type) or self._is_floating_ir_type(
|
|
2644
|
+
rhs.type
|
|
2645
|
+
):
|
|
2646
|
+
return self._common_float_type(lhs.type, rhs.type)
|
|
2647
|
+
if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
|
|
2648
|
+
return lhs.type if lhs.type.width >= rhs.type.width else rhs.type
|
|
2649
|
+
return lhs.type
|
|
2650
|
+
|
|
2651
|
+
target = pick_target_type(true_val, false_val)
|
|
2652
|
+
incoming = []
|
|
2653
|
+
for branch_end, branch_val in (
|
|
2654
|
+
(true_bb_end, true_val),
|
|
2655
|
+
(false_bb_end, false_val),
|
|
2656
|
+
):
|
|
2657
|
+
if branch_end.is_terminated:
|
|
2658
|
+
continue
|
|
2659
|
+
self.builder.position_at_end(branch_end)
|
|
2660
|
+
value = branch_val if branch_val is not None else zero_value(target)
|
|
2661
|
+
if value.type != target or isinstance(value.type, ir.ArrayType):
|
|
2662
|
+
value = self._implicit_convert(value, target)
|
|
2663
|
+
incoming.append((self.builder.block, value))
|
|
2664
|
+
self.builder.branch(merge_bb)
|
|
2665
|
+
|
|
2666
|
+
self.builder.position_at_end(merge_bb)
|
|
2667
|
+
if not incoming:
|
|
2668
|
+
return zero_value(target), None
|
|
2669
|
+
if len(incoming) == 1:
|
|
2670
|
+
return incoming[0][1], None
|
|
2671
|
+
|
|
2672
|
+
phi = self.builder.phi(target, "ternary")
|
|
2673
|
+
for pred, value in incoming:
|
|
2674
|
+
phi.add_incoming(value, pred)
|
|
2675
|
+
return phi, None
|
|
2676
|
+
|
|
2677
|
+
def codegen_Cast(self, node):
|
|
2678
|
+
|
|
2679
|
+
expr, ptr = self.codegen(node.expr)
|
|
2680
|
+
|
|
2681
|
+
dest_ir_type = self._resolve_ast_type(node.to_type.type)
|
|
2682
|
+
# Check if casting to unsigned type
|
|
2683
|
+
is_unsigned = False
|
|
2684
|
+
if isinstance(node.to_type.type, c_ast.TypeDecl) and isinstance(
|
|
2685
|
+
node.to_type.type.type, c_ast.IdentifierType
|
|
2686
|
+
):
|
|
2687
|
+
is_unsigned = self._is_unsigned_type_names(node.to_type.type.type.names)
|
|
2688
|
+
if self._is_floating_ir_type(expr.type) and isinstance(
|
|
2689
|
+
dest_ir_type, ir.IntType
|
|
2690
|
+
):
|
|
2691
|
+
if is_unsigned:
|
|
2692
|
+
result = self.builder.fptoui(expr, dest_ir_type)
|
|
2693
|
+
self._tag_value_from_decl_type(result, node.to_type.type)
|
|
2694
|
+
return result, None
|
|
2695
|
+
result = self.builder.fptosi(expr, dest_ir_type)
|
|
2696
|
+
self._clear_unsigned(result)
|
|
2697
|
+
self._tag_value_from_decl_type(result, node.to_type.type)
|
|
2698
|
+
return result, None
|
|
2699
|
+
if expr.type == dest_ir_type:
|
|
2700
|
+
if isinstance(dest_ir_type, ir.IntType):
|
|
2701
|
+
if is_unsigned:
|
|
2702
|
+
if self._is_unsigned_val(expr):
|
|
2703
|
+
self._tag_value_from_decl_type(expr, node.to_type.type)
|
|
2704
|
+
return expr, None
|
|
2705
|
+
result = self.builder.add(
|
|
2706
|
+
expr, ir.Constant(dest_ir_type, 0), "casttmp"
|
|
2707
|
+
)
|
|
2708
|
+
self._tag_unsigned(result)
|
|
2709
|
+
self._tag_value_from_decl_type(result, node.to_type.type)
|
|
2710
|
+
return result, None
|
|
2711
|
+
if self._is_unsigned_val(expr):
|
|
2712
|
+
result = self.builder.add(
|
|
2713
|
+
expr, ir.Constant(dest_ir_type, 0), "casttmp"
|
|
2714
|
+
)
|
|
2715
|
+
self._tag_value_from_decl_type(result, node.to_type.type)
|
|
2716
|
+
return result, None
|
|
2717
|
+
self._clear_unsigned(expr)
|
|
2718
|
+
if is_unsigned:
|
|
2719
|
+
self._tag_unsigned(expr)
|
|
2720
|
+
self._tag_value_from_decl_type(expr, node.to_type.type)
|
|
2721
|
+
return expr, ptr
|
|
2722
|
+
result = self._implicit_convert(expr, dest_ir_type)
|
|
2723
|
+
if is_unsigned:
|
|
2724
|
+
self._tag_unsigned(result)
|
|
2725
|
+
elif isinstance(dest_ir_type, ir.IntType):
|
|
2726
|
+
self._clear_unsigned(result)
|
|
2727
|
+
self._tag_value_from_decl_type(result, node.to_type.type)
|
|
2728
|
+
return result, None
|
|
2729
|
+
|
|
2730
|
+
def codegen_FuncCall(self, node):
|
|
2731
|
+
|
|
2732
|
+
callee = None
|
|
2733
|
+
if isinstance(node.name, c_ast.ID):
|
|
2734
|
+
callee = node.name.name
|
|
2735
|
+
if callee == "__builtin_va_start":
|
|
2736
|
+
return self._codegen_builtin_va_start(node)
|
|
2737
|
+
if callee == "__builtin_va_end":
|
|
2738
|
+
return self._codegen_builtin_va_end(node)
|
|
2739
|
+
if callee == "__builtin_va_copy":
|
|
2740
|
+
return self._codegen_builtin_va_copy(node)
|
|
2741
|
+
if callee == "__builtin_va_arg":
|
|
2742
|
+
return ir.Constant(voidptr_t, None), None
|
|
2743
|
+
else:
|
|
2744
|
+
# Calling function pointer in struct: s.fn(args)
|
|
2745
|
+
call_args = []
|
|
2746
|
+
if node.args:
|
|
2747
|
+
call_args = [self.codegen(arg)[0] for arg in node.args.exprs]
|
|
2748
|
+
fp_val, _ = self.codegen(node.name)
|
|
2749
|
+
if isinstance(fp_val.type, ir.PointerType) and isinstance(
|
|
2750
|
+
fp_val.type.pointee, ir.FunctionType
|
|
2751
|
+
):
|
|
2752
|
+
# Coerce args to match function pointer param types
|
|
2753
|
+
ftype = fp_val.type.pointee
|
|
2754
|
+
coerced = []
|
|
2755
|
+
for j, a in enumerate(call_args):
|
|
2756
|
+
if j < len(ftype.args):
|
|
2757
|
+
coerced.append(self._coerce_arg(a, ftype.args[j]))
|
|
2758
|
+
else:
|
|
2759
|
+
coerced.append(a)
|
|
2760
|
+
call_args = coerced
|
|
2761
|
+
ret_type = ftype.return_type
|
|
2762
|
+
if isinstance(ret_type, ir.VoidType):
|
|
2763
|
+
self.builder.call(fp_val, call_args)
|
|
2764
|
+
return ir.Constant(int64_t, 0), None
|
|
2765
|
+
result = self.builder.call(fp_val, call_args, "fpcall")
|
|
2766
|
+
return (
|
|
2767
|
+
self._extend_call_result(
|
|
2768
|
+
result, returns_unsigned=self._is_unsigned_return(fp_val)
|
|
2769
|
+
),
|
|
2770
|
+
None,
|
|
2771
|
+
)
|
|
2772
|
+
# Not a function pointer — can't call, return dummy
|
|
2773
|
+
return ir.Constant(int64_t, 0), None
|
|
2774
|
+
|
|
2775
|
+
_, callee_func = self.lookup(callee)
|
|
2776
|
+
|
|
2777
|
+
call_args = []
|
|
2778
|
+
if node.args:
|
|
2779
|
+
call_args = [self.codegen(arg)[0] for arg in node.args.exprs]
|
|
2780
|
+
|
|
2781
|
+
# Function pointer: load the pointer and call through it
|
|
2782
|
+
if not isinstance(callee_func, ir.Function):
|
|
2783
|
+
if hasattr(callee_func, "type") and isinstance(
|
|
2784
|
+
callee_func.type, ir.PointerType
|
|
2785
|
+
):
|
|
2786
|
+
loaded = self._safe_load(callee_func, name="fptr")
|
|
2787
|
+
if self._is_unsigned_return_binding(callee_func):
|
|
2788
|
+
self._tag_unsigned_return(loaded)
|
|
2789
|
+
# loaded could be a function pointer (ptr to FunctionType)
|
|
2790
|
+
# or the alloca's pointee could be a function ptr
|
|
2791
|
+
func_val = loaded
|
|
2792
|
+
if isinstance(func_val.type, ir.PointerType) and isinstance(
|
|
2793
|
+
func_val.type.pointee, ir.FunctionType
|
|
2794
|
+
):
|
|
2795
|
+
ftype = func_val.type.pointee
|
|
2796
|
+
coerced = [
|
|
2797
|
+
self._coerce_arg(a, ftype.args[j]) if j < len(ftype.args) else a
|
|
2798
|
+
for j, a in enumerate(call_args)
|
|
2799
|
+
]
|
|
2800
|
+
ret_type = ftype.return_type
|
|
2801
|
+
is_void = isinstance(ret_type, ir.VoidType)
|
|
2802
|
+
if is_void:
|
|
2803
|
+
self.builder.call(func_val, coerced)
|
|
2804
|
+
return ir.Constant(int64_t, 0), None
|
|
2805
|
+
result = self.builder.call(func_val, coerced, "fpcall")
|
|
2806
|
+
return (
|
|
2807
|
+
self._extend_call_result(
|
|
2808
|
+
result, returns_unsigned=self._is_unsigned_return(func_val)
|
|
2809
|
+
),
|
|
2810
|
+
None,
|
|
2811
|
+
)
|
|
2812
|
+
return ir.Constant(int64_t, 0), None # unknown function — return dummy
|
|
2813
|
+
|
|
2814
|
+
if callee_func is None or not isinstance(callee_func, (ir.Function,)):
|
|
2815
|
+
return ir.Constant(int64_t, 0), None
|
|
2816
|
+
|
|
2817
|
+
# Convert arguments to match function parameter types
|
|
2818
|
+
converted = self._convert_call_args(call_args, callee_func)
|
|
2819
|
+
|
|
2820
|
+
# Call and handle return type
|
|
2821
|
+
try:
|
|
2822
|
+
is_void = isinstance(callee_func.return_value.type, ir.VoidType)
|
|
2823
|
+
except Exception:
|
|
2824
|
+
is_void = False
|
|
2825
|
+
try:
|
|
2826
|
+
if is_void:
|
|
2827
|
+
self.builder.call(callee_func, converted)
|
|
2828
|
+
return ir.Constant(int64_t, 0), None
|
|
2829
|
+
result = self.builder.call(callee_func, converted, "calltmp")
|
|
2830
|
+
except (TypeError, IndexError):
|
|
2831
|
+
# Arg count/type mismatch — return dummy value
|
|
2832
|
+
return ir.Constant(int64_t, 0), None
|
|
2833
|
+
|
|
2834
|
+
# Widen small int returns (e.g., i32 from strcmp) to i64
|
|
2835
|
+
return (
|
|
2836
|
+
self._extend_call_result(
|
|
2837
|
+
result, returns_unsigned=self._is_unsigned_return_binding(callee_func)
|
|
2838
|
+
),
|
|
2839
|
+
None,
|
|
2840
|
+
)
|
|
2841
|
+
|
|
2842
|
+
def _get_or_declare_intrinsic(self, name, ret_type, arg_types):
|
|
2843
|
+
existing = self.module.globals.get(name)
|
|
2844
|
+
if existing is not None:
|
|
2845
|
+
return existing
|
|
2846
|
+
return ir.Function(self.module, ir.FunctionType(ret_type, arg_types), name=name)
|
|
2847
|
+
|
|
2848
|
+
def _codegen_builtin_va_start(self, node):
|
|
2849
|
+
if not node.args or not node.args.exprs:
|
|
2850
|
+
return ir.Constant(int64_t, 0), None
|
|
2851
|
+
ap_addr, _ = self.codegen(node.args.exprs[0])
|
|
2852
|
+
if not isinstance(getattr(ap_addr, "type", None), ir.PointerType):
|
|
2853
|
+
return ir.Constant(int64_t, 0), None
|
|
2854
|
+
intrinsic = self._get_or_declare_intrinsic(
|
|
2855
|
+
"llvm.va_start", ir.VoidType(), [voidptr_t]
|
|
2856
|
+
)
|
|
2857
|
+
arg = ap_addr
|
|
2858
|
+
if arg.type != voidptr_t:
|
|
2859
|
+
arg = self.builder.bitcast(arg, voidptr_t, name="vastartarg")
|
|
2860
|
+
self.builder.call(intrinsic, [arg])
|
|
2861
|
+
return ir.Constant(int64_t, 0), None
|
|
2862
|
+
|
|
2863
|
+
def _codegen_builtin_va_end(self, node):
|
|
2864
|
+
if not node.args or not node.args.exprs:
|
|
2865
|
+
return ir.Constant(int64_t, 0), None
|
|
2866
|
+
ap_addr, _ = self.codegen(node.args.exprs[0])
|
|
2867
|
+
if not isinstance(getattr(ap_addr, "type", None), ir.PointerType):
|
|
2868
|
+
return ir.Constant(int64_t, 0), None
|
|
2869
|
+
intrinsic = self._get_or_declare_intrinsic(
|
|
2870
|
+
"llvm.va_end", ir.VoidType(), [voidptr_t]
|
|
2871
|
+
)
|
|
2872
|
+
arg = ap_addr
|
|
2873
|
+
if arg.type != voidptr_t:
|
|
2874
|
+
arg = self.builder.bitcast(arg, voidptr_t, name="vaendarg")
|
|
2875
|
+
self.builder.call(intrinsic, [arg])
|
|
2876
|
+
return ir.Constant(int64_t, 0), None
|
|
2877
|
+
|
|
2878
|
+
def _codegen_builtin_va_copy(self, node):
|
|
2879
|
+
if not node.args or len(node.args.exprs) < 2:
|
|
2880
|
+
return ir.Constant(int64_t, 0), None
|
|
2881
|
+
dst_addr, _ = self.codegen(node.args.exprs[0])
|
|
2882
|
+
src_addr, _ = self.codegen(node.args.exprs[1])
|
|
2883
|
+
if not isinstance(getattr(dst_addr, "type", None), ir.PointerType):
|
|
2884
|
+
return ir.Constant(int64_t, 0), None
|
|
2885
|
+
if not isinstance(getattr(src_addr, "type", None), ir.PointerType):
|
|
2886
|
+
return ir.Constant(int64_t, 0), None
|
|
2887
|
+
src_val = self._safe_load(src_addr)
|
|
2888
|
+
dst_pointee = dst_addr.type.pointee
|
|
2889
|
+
if src_val.type != dst_pointee:
|
|
2890
|
+
src_val = self._implicit_convert(src_val, dst_pointee)
|
|
2891
|
+
self._safe_store(src_val, dst_addr)
|
|
2892
|
+
return ir.Constant(int64_t, 0), None
|
|
2893
|
+
|
|
2894
|
+
def _convert_call_args(self, call_args, callee_func):
|
|
2895
|
+
"""Convert call arguments to match function parameter types."""
|
|
2896
|
+
converted = []
|
|
2897
|
+
param_types = [p.type for p in callee_func.args]
|
|
2898
|
+
|
|
2899
|
+
for i, arg in enumerate(call_args):
|
|
2900
|
+
if i < len(param_types):
|
|
2901
|
+
expected = param_types[i]
|
|
2902
|
+
arg = self._coerce_arg(arg, expected)
|
|
2903
|
+
else:
|
|
2904
|
+
arg = self._default_arg_promotion(arg)
|
|
2905
|
+
converted.append(arg)
|
|
2906
|
+
return converted
|
|
2907
|
+
|
|
2908
|
+
def _default_arg_promotion(self, arg):
|
|
2909
|
+
"""Apply C default argument promotions for variadic calls."""
|
|
2910
|
+
if arg is None or isinstance(getattr(arg, "type", None), ir.VoidType):
|
|
2911
|
+
return ir.Constant(int64_t, 0)
|
|
2912
|
+
if isinstance(arg.type, ir.ArrayType):
|
|
2913
|
+
return self._implicit_convert(arg, ir.PointerType(arg.type.element))
|
|
2914
|
+
if isinstance(arg.type, ir.FloatType):
|
|
2915
|
+
return self.builder.fpext(arg, ir.DoubleType())
|
|
2916
|
+
if isinstance(arg.type, ir.IntType) and arg.type.width < int32_t.width:
|
|
2917
|
+
return self._integer_promotion(arg)
|
|
2918
|
+
return arg
|
|
2919
|
+
|
|
2920
|
+
def _coerce_arg(self, arg, expected):
|
|
2921
|
+
"""Coerce a single argument to the expected type."""
|
|
2922
|
+
if arg is None or isinstance(getattr(arg, "type", None), ir.VoidType):
|
|
2923
|
+
return (
|
|
2924
|
+
ir.Constant(expected, None)
|
|
2925
|
+
if isinstance(expected, ir.PointerType)
|
|
2926
|
+
else ir.Constant(int64_t, 0)
|
|
2927
|
+
)
|
|
2928
|
+
if arg.type == expected:
|
|
2929
|
+
return arg
|
|
2930
|
+
# String literal [N x i8] -> pointer
|
|
2931
|
+
if isinstance(arg.type, ir.ArrayType) and isinstance(expected, ir.PointerType):
|
|
2932
|
+
gv = ir.GlobalVariable(
|
|
2933
|
+
self.module, arg.type, self.module.get_unique_name("str")
|
|
2934
|
+
)
|
|
2935
|
+
gv.initializer = arg
|
|
2936
|
+
gv.global_constant = True
|
|
2937
|
+
return self.builder.bitcast(gv, expected)
|
|
2938
|
+
# Pointer -> different pointer: bitcast
|
|
2939
|
+
if isinstance(arg.type, ir.PointerType) and isinstance(
|
|
2940
|
+
expected, ir.PointerType
|
|
2941
|
+
):
|
|
2942
|
+
return self.builder.bitcast(arg, expected)
|
|
2943
|
+
# Numeric conversions
|
|
2944
|
+
return self._implicit_convert(arg, expected)
|
|
2945
|
+
|
|
2946
|
+
def codegen_Decl(self, node):
|
|
2947
|
+
|
|
2948
|
+
type_str = ""
|
|
2949
|
+
|
|
2950
|
+
# Skip anonymous/unnamed declarations
|
|
2951
|
+
if node.name is None and not isinstance(
|
|
2952
|
+
node.type, (c_ast.Struct, c_ast.Union, c_ast.Enum, c_ast.FuncDecl)
|
|
2953
|
+
):
|
|
2954
|
+
if not (
|
|
2955
|
+
isinstance(node.type, c_ast.TypeDecl)
|
|
2956
|
+
and isinstance(node.type.type, (c_ast.Struct, c_ast.Union, c_ast.Enum))
|
|
2957
|
+
):
|
|
2958
|
+
return None, None
|
|
2959
|
+
|
|
2960
|
+
# Static local variables: stored as globals with function-scoped names
|
|
2961
|
+
is_static = node.storage and "static" in node.storage
|
|
2962
|
+
if is_static and not self.in_global and isinstance(node.type, c_ast.TypeDecl):
|
|
2963
|
+
type_str = node.type.type.names
|
|
2964
|
+
ir_type = self._get_ir_type(type_str)
|
|
2965
|
+
# Create unique global name
|
|
2966
|
+
global_name = f"__static_{self.function.name}_{node.name}"
|
|
2967
|
+
gv = ir.GlobalVariable(self.module, ir_type, global_name)
|
|
2968
|
+
if node.init:
|
|
2969
|
+
gv.initializer = self._build_const_init(node.init, ir_type)
|
|
2970
|
+
else:
|
|
2971
|
+
gv.initializer = self._zero_initializer(ir_type)
|
|
2972
|
+
self.define(node.name, (ir_type, gv))
|
|
2973
|
+
return None, None
|
|
2974
|
+
|
|
2975
|
+
if isinstance(node.type, c_ast.Enum):
|
|
2976
|
+
return self.codegen_Enum(node.type)
|
|
2977
|
+
|
|
2978
|
+
# Forward function declaration: int foo(int x);
|
|
2979
|
+
if isinstance(node.type, c_ast.FuncDecl):
|
|
2980
|
+
funcname = node.name
|
|
2981
|
+
# Skip if already exists (module globals, libc, or env)
|
|
2982
|
+
existing = self.module.globals.get(funcname)
|
|
2983
|
+
if existing:
|
|
2984
|
+
if self._func_decl_returns_unsigned(node.type):
|
|
2985
|
+
self._mark_unsigned_return(existing)
|
|
2986
|
+
self.define(funcname, (None, existing))
|
|
2987
|
+
return None, None
|
|
2988
|
+
if funcname in LIBC_FUNCTIONS:
|
|
2989
|
+
self._declare_libc(funcname)
|
|
2990
|
+
return None, None
|
|
2991
|
+
ir_type, _ = self.codegen(node.type)
|
|
2992
|
+
arg_types = []
|
|
2993
|
+
is_va = False
|
|
2994
|
+
if node.type.args:
|
|
2995
|
+
for arg in node.type.args.params:
|
|
2996
|
+
if isinstance(arg, c_ast.EllipsisParam):
|
|
2997
|
+
is_va = True
|
|
2998
|
+
continue
|
|
2999
|
+
t = self._resolve_param_type(arg)
|
|
3000
|
+
if t is not None:
|
|
3001
|
+
arg_types.append(t)
|
|
3002
|
+
try:
|
|
3003
|
+
func = ir.Function(
|
|
3004
|
+
self.module,
|
|
3005
|
+
ir.FunctionType(ir_type, arg_types, var_arg=is_va),
|
|
3006
|
+
name=funcname,
|
|
3007
|
+
)
|
|
3008
|
+
if self._func_decl_returns_unsigned(node.type):
|
|
3009
|
+
self._mark_unsigned_return(func)
|
|
3010
|
+
self.define(funcname, (ir_type, func))
|
|
3011
|
+
except Exception:
|
|
3012
|
+
# Already exists (libc or previous decl)
|
|
3013
|
+
existing = self.module.globals.get(funcname)
|
|
3014
|
+
if existing:
|
|
3015
|
+
if self._func_decl_returns_unsigned(node.type):
|
|
3016
|
+
self._mark_unsigned_return(existing)
|
|
3017
|
+
self.define(funcname, (ir_type, existing))
|
|
3018
|
+
return None, None
|
|
3019
|
+
|
|
3020
|
+
# Bare struct/union/type definition
|
|
3021
|
+
if isinstance(node.type, c_ast.Union):
|
|
3022
|
+
if node.name is None:
|
|
3023
|
+
self.codegen_Union(node.type)
|
|
3024
|
+
return None, None
|
|
3025
|
+
|
|
3026
|
+
if isinstance(node.type, c_ast.Struct) and node.name is None:
|
|
3027
|
+
self.codegen_Struct(node.type)
|
|
3028
|
+
return None, None
|
|
3029
|
+
|
|
3030
|
+
if isinstance(node.type, c_ast.TypeDecl):
|
|
3031
|
+
if isinstance(node.type.type, c_ast.IdentifierType):
|
|
3032
|
+
# Check if the type resolves to a struct or pointer via typedef
|
|
3033
|
+
resolved = self._resolve_type_str(node.type.type.names)
|
|
3034
|
+
if isinstance(
|
|
3035
|
+
resolved, (ir.LiteralStructType, ir.PointerType, ir.ArrayType)
|
|
3036
|
+
):
|
|
3037
|
+
name = node.type.declname
|
|
3038
|
+
ir_type = resolved
|
|
3039
|
+
if not self.in_global:
|
|
3040
|
+
ret = self._alloca_in_entry(ir_type, name)
|
|
3041
|
+
self.define(name, (ir_type, ret))
|
|
3042
|
+
else:
|
|
3043
|
+
ret = ir.GlobalVariable(self.module, ir_type, name)
|
|
3044
|
+
self.define(name, (ir_type, ret))
|
|
3045
|
+
if node.init is not None:
|
|
3046
|
+
if self.in_global:
|
|
3047
|
+
ret.initializer = self._build_const_init(node.init, ir_type)
|
|
3048
|
+
else:
|
|
3049
|
+
init_val, _ = self.codegen(node.init)
|
|
3050
|
+
if init_val is not None:
|
|
3051
|
+
if init_val.type != ir_type:
|
|
3052
|
+
init_val = self._implicit_convert(init_val, ir_type)
|
|
3053
|
+
self._safe_store(init_val, ret)
|
|
3054
|
+
elif self.in_global:
|
|
3055
|
+
ret.initializer = self._zero_initializer(ir_type)
|
|
3056
|
+
return None, None
|
|
3057
|
+
|
|
3058
|
+
if isinstance(node.type.type, (c_ast.Struct, c_ast.Union)):
|
|
3059
|
+
name = node.type.declname
|
|
3060
|
+
codegen_fn = (
|
|
3061
|
+
self.codegen_Union
|
|
3062
|
+
if isinstance(node.type.type, c_ast.Union)
|
|
3063
|
+
else self.codegen_Struct
|
|
3064
|
+
)
|
|
3065
|
+
if node.type.type.name is None:
|
|
3066
|
+
struct_type = codegen_fn(node.type.type)
|
|
3067
|
+
if not self.in_global:
|
|
3068
|
+
ret = self._alloca_in_entry(struct_type, name)
|
|
3069
|
+
self.define(name, (struct_type, ret))
|
|
3070
|
+
else:
|
|
3071
|
+
ret = ir.GlobalVariable(self.module, struct_type, name)
|
|
3072
|
+
self.define(name, (struct_type, ret))
|
|
3073
|
+
if node.init is not None:
|
|
3074
|
+
if self.in_global:
|
|
3075
|
+
ret.initializer = self._build_const_init(
|
|
3076
|
+
node.init, struct_type
|
|
3077
|
+
)
|
|
3078
|
+
else:
|
|
3079
|
+
init_val, _ = self.codegen(node.init)
|
|
3080
|
+
if init_val is not None:
|
|
3081
|
+
if init_val.type != struct_type:
|
|
3082
|
+
init_val = self._implicit_convert(
|
|
3083
|
+
init_val, struct_type
|
|
3084
|
+
)
|
|
3085
|
+
self._safe_store(init_val, ret)
|
|
3086
|
+
elif self.in_global:
|
|
3087
|
+
ret.initializer = self._zero_initializer(struct_type)
|
|
3088
|
+
return None, None
|
|
3089
|
+
else:
|
|
3090
|
+
struct_type = self.env[node.type.type.name][0]
|
|
3091
|
+
if not self.in_global:
|
|
3092
|
+
ret = self._alloca_in_entry(struct_type, name)
|
|
3093
|
+
self.define(name, (struct_type, ret))
|
|
3094
|
+
else:
|
|
3095
|
+
ret = ir.GlobalVariable(self.module, struct_type, name)
|
|
3096
|
+
self.define(name, (struct_type, ret))
|
|
3097
|
+
if node.init is not None:
|
|
3098
|
+
if self.in_global:
|
|
3099
|
+
ret.initializer = self._build_const_init(
|
|
3100
|
+
node.init, struct_type
|
|
3101
|
+
)
|
|
3102
|
+
else:
|
|
3103
|
+
init_val, _ = self.codegen(node.init)
|
|
3104
|
+
if init_val is not None:
|
|
3105
|
+
if init_val.type != struct_type:
|
|
3106
|
+
init_val = self._implicit_convert(
|
|
3107
|
+
init_val, struct_type
|
|
3108
|
+
)
|
|
3109
|
+
self._safe_store(init_val, ret)
|
|
3110
|
+
elif self.in_global:
|
|
3111
|
+
ret.initializer = self._zero_initializer(struct_type)
|
|
3112
|
+
return None, None
|
|
3113
|
+
else:
|
|
3114
|
+
type_str = node.type.type.names
|
|
3115
|
+
is_unsigned = self._is_unsigned_type_names(type_str)
|
|
3116
|
+
ir_type = self._get_ir_type(type_str)
|
|
3117
|
+
type_str = self._resolve_type_str(type_str)
|
|
3118
|
+
if isinstance(type_str, ir.Type):
|
|
3119
|
+
type_str = "int" # fallback for alloca name
|
|
3120
|
+
if self._is_floating_ir_type(ir_type):
|
|
3121
|
+
init = 0.0
|
|
3122
|
+
else:
|
|
3123
|
+
init = 0
|
|
3124
|
+
|
|
3125
|
+
if node.init is not None:
|
|
3126
|
+
if self.in_global:
|
|
3127
|
+
init_val = self._build_const_init(node.init, ir_type)
|
|
3128
|
+
else:
|
|
3129
|
+
init_val, _ = self.codegen(node.init)
|
|
3130
|
+
else:
|
|
3131
|
+
init_val = self._zero_initializer(ir_type)
|
|
3132
|
+
|
|
3133
|
+
var_addr, var_ir_type = self.create_entry_block_alloca(
|
|
3134
|
+
node.name, type_str, 1
|
|
3135
|
+
)
|
|
3136
|
+
if is_unsigned:
|
|
3137
|
+
self._mark_unsigned(var_addr)
|
|
3138
|
+
|
|
3139
|
+
if self.in_global:
|
|
3140
|
+
var_addr.initializer = init_val
|
|
3141
|
+
else:
|
|
3142
|
+
init_val = self._implicit_convert(init_val, ir_type)
|
|
3143
|
+
self._safe_store(init_val, var_addr)
|
|
3144
|
+
|
|
3145
|
+
elif isinstance(node.type, c_ast.ArrayDecl):
|
|
3146
|
+
array_list = []
|
|
3147
|
+
array_node = node.type
|
|
3148
|
+
var_addr = None
|
|
3149
|
+
var_ir_type = None
|
|
3150
|
+
elem_ir_type = None
|
|
3151
|
+
while True:
|
|
3152
|
+
array_next_type = array_node.type
|
|
3153
|
+
if isinstance(array_next_type, c_ast.TypeDecl):
|
|
3154
|
+
dim_val = self._eval_dim(array_node.dim) if array_node.dim else 0
|
|
3155
|
+
array_list.append(dim_val)
|
|
3156
|
+
elem_ir_type = self._resolve_ast_type(array_next_type)
|
|
3157
|
+
break
|
|
3158
|
+
|
|
3159
|
+
elif isinstance(array_next_type, c_ast.ArrayDecl):
|
|
3160
|
+
array_list.append(self._eval_dim(array_node.dim))
|
|
3161
|
+
array_node = array_next_type
|
|
3162
|
+
continue
|
|
3163
|
+
elif isinstance(array_next_type, c_ast.PtrDecl):
|
|
3164
|
+
# Array of pointers: int *arr[3]
|
|
3165
|
+
dim = self._eval_dim(array_node.dim)
|
|
3166
|
+
inner = array_next_type.type
|
|
3167
|
+
if isinstance(inner, c_ast.TypeDecl):
|
|
3168
|
+
elem_type_str = inner.type.names
|
|
3169
|
+
else:
|
|
3170
|
+
elem_type_str = "int"
|
|
3171
|
+
elem_ir = ir.PointerType(get_ir_type(elem_type_str))
|
|
3172
|
+
elem_ir_type = elem_ir
|
|
3173
|
+
arr_ir = ir.ArrayType(elem_ir, dim)
|
|
3174
|
+
arr_ir.dim_array = [dim]
|
|
3175
|
+
if not self.in_global:
|
|
3176
|
+
var_addr = self._alloca_in_entry(arr_ir, node.name)
|
|
3177
|
+
self.define(node.name, (arr_ir, var_addr))
|
|
3178
|
+
else:
|
|
3179
|
+
existing = self.module.globals.get(node.name)
|
|
3180
|
+
if existing:
|
|
3181
|
+
var_addr = existing
|
|
3182
|
+
else:
|
|
3183
|
+
var_addr = ir.GlobalVariable(self.module, arr_ir, node.name)
|
|
3184
|
+
self.define(node.name, (arr_ir, var_addr))
|
|
3185
|
+
var_ir_type = arr_ir
|
|
3186
|
+
break
|
|
3187
|
+
else:
|
|
3188
|
+
raise Exception("TODO implement")
|
|
3189
|
+
|
|
3190
|
+
if var_addr is None:
|
|
3191
|
+
var_ir_type = elem_ir_type
|
|
3192
|
+
for dim in reversed(array_list):
|
|
3193
|
+
var_ir_type = ir.ArrayType(var_ir_type, dim)
|
|
3194
|
+
var_ir_type.dim_array = array_list
|
|
3195
|
+
if not self.in_global:
|
|
3196
|
+
var_addr = self._alloca_in_entry(var_ir_type, node.name)
|
|
3197
|
+
else:
|
|
3198
|
+
existing = self.module.globals.get(node.name)
|
|
3199
|
+
if existing:
|
|
3200
|
+
var_addr = existing
|
|
3201
|
+
else:
|
|
3202
|
+
var_addr = ir.GlobalVariable(
|
|
3203
|
+
self.module, var_ir_type, node.name
|
|
3204
|
+
)
|
|
3205
|
+
self.define(node.name, (var_ir_type, var_addr))
|
|
3206
|
+
|
|
3207
|
+
if self._has_unsigned_scalar_pointee(node.type):
|
|
3208
|
+
self._mark_unsigned_pointee(var_addr)
|
|
3209
|
+
|
|
3210
|
+
# Infer the size of zero-length arrays from the initializer.
|
|
3211
|
+
if (
|
|
3212
|
+
isinstance(var_ir_type, ir.ArrayType)
|
|
3213
|
+
and var_ir_type.count == 0
|
|
3214
|
+
and node.init is not None
|
|
3215
|
+
):
|
|
3216
|
+
actual_count = None
|
|
3217
|
+
if isinstance(node.init, c_ast.InitList):
|
|
3218
|
+
actual_count = len(node.init.exprs)
|
|
3219
|
+
elif (
|
|
3220
|
+
isinstance(node.init, c_ast.Constant)
|
|
3221
|
+
and getattr(node.init, "type", None) == "string"
|
|
3222
|
+
):
|
|
3223
|
+
raw = node.init.value[1:-1]
|
|
3224
|
+
actual_count = len(self._process_escapes(raw)) + 1
|
|
3225
|
+
if actual_count is not None and elem_ir_type is not None:
|
|
3226
|
+
var_ir_type = ir.ArrayType(elem_ir_type, actual_count)
|
|
3227
|
+
var_ir_type.dim_array = [actual_count]
|
|
3228
|
+
if self.in_global:
|
|
3229
|
+
new_name = self.module.get_unique_name(node.name)
|
|
3230
|
+
var_addr = ir.GlobalVariable(self.module, var_ir_type, new_name)
|
|
3231
|
+
self.define(node.name, (var_ir_type, var_addr))
|
|
3232
|
+
if not hasattr(self, "_array_renames"):
|
|
3233
|
+
self._array_renames = {}
|
|
3234
|
+
self._array_renames[f'@"{node.name}"'] = f'@"{new_name}"'
|
|
3235
|
+
else:
|
|
3236
|
+
var_addr = self._alloca_in_entry(var_ir_type, node.name)
|
|
3237
|
+
self.define(node.name, (var_ir_type, var_addr))
|
|
3238
|
+
|
|
3239
|
+
if self._has_unsigned_scalar_pointee(node.type):
|
|
3240
|
+
self._mark_unsigned_pointee(var_addr)
|
|
3241
|
+
|
|
3242
|
+
# Handle array initialization: int a[3] = {1, 2, 3}; or
|
|
3243
|
+
# char s[] = "hi"; or const char *names[] = {"a", helper};
|
|
3244
|
+
if node.init is not None:
|
|
3245
|
+
if self.in_global:
|
|
3246
|
+
try:
|
|
3247
|
+
const_init = self._build_const_init(node.init, var_ir_type)
|
|
3248
|
+
str(const_init)
|
|
3249
|
+
var_addr.initializer = const_init
|
|
3250
|
+
except Exception:
|
|
3251
|
+
var_addr.initializer = self._zero_initializer(var_ir_type)
|
|
3252
|
+
elif isinstance(node.init, c_ast.InitList):
|
|
3253
|
+
self._init_array(
|
|
3254
|
+
var_addr,
|
|
3255
|
+
node.init,
|
|
3256
|
+
elem_ir_type,
|
|
3257
|
+
[ir.Constant(ir.IntType(32), 0)],
|
|
3258
|
+
)
|
|
3259
|
+
elif (
|
|
3260
|
+
isinstance(node.init, c_ast.Constant)
|
|
3261
|
+
and getattr(node.init, "type", None) == "string"
|
|
3262
|
+
and isinstance(elem_ir_type, ir.IntType)
|
|
3263
|
+
and elem_ir_type.width == 8
|
|
3264
|
+
):
|
|
3265
|
+
raw = self._process_escapes(node.init.value[1:-1]) + "\00"
|
|
3266
|
+
idx0 = ir.Constant(ir.IntType(32), 0)
|
|
3267
|
+
for i, ch in enumerate(raw[: var_ir_type.count]):
|
|
3268
|
+
elem_ptr = self.builder.gep(
|
|
3269
|
+
var_addr,
|
|
3270
|
+
[idx0, ir.Constant(ir.IntType(32), i)],
|
|
3271
|
+
inbounds=True,
|
|
3272
|
+
)
|
|
3273
|
+
self.builder.store(int8_t(ord(ch)), elem_ptr)
|
|
3274
|
+
elif self.in_global:
|
|
3275
|
+
var_addr.initializer = self._zero_initializer(var_ir_type)
|
|
3276
|
+
|
|
3277
|
+
elif isinstance(node.type, c_ast.PtrDecl):
|
|
3278
|
+
|
|
3279
|
+
point_level = 1
|
|
3280
|
+
sub_node = node.type
|
|
3281
|
+
resolved_pointee_type = None
|
|
3282
|
+
|
|
3283
|
+
while True:
|
|
3284
|
+
sub_next_type = sub_node.type
|
|
3285
|
+
if isinstance(sub_next_type, c_ast.TypeDecl):
|
|
3286
|
+
if isinstance(sub_next_type.type, c_ast.Struct):
|
|
3287
|
+
# pointer to struct: struct { int x; } *p
|
|
3288
|
+
resolved_pointee_type = self.codegen_Struct(sub_next_type.type)
|
|
3289
|
+
type_str = "struct"
|
|
3290
|
+
elif isinstance(sub_next_type.type, c_ast.Union):
|
|
3291
|
+
resolved_pointee_type = self.codegen_Union(sub_next_type.type)
|
|
3292
|
+
type_str = "union"
|
|
3293
|
+
else:
|
|
3294
|
+
type_str = sub_next_type.type.names
|
|
3295
|
+
resolved = self._get_ir_type(type_str)
|
|
3296
|
+
if isinstance(resolved, ir.Type):
|
|
3297
|
+
resolved_pointee_type = resolved
|
|
3298
|
+
if isinstance(resolved, ir.LiteralStructType):
|
|
3299
|
+
type_str = "struct"
|
|
3300
|
+
break
|
|
3301
|
+
elif isinstance(sub_next_type, c_ast.PtrDecl):
|
|
3302
|
+
point_level += 1
|
|
3303
|
+
sub_node = sub_next_type
|
|
3304
|
+
continue
|
|
3305
|
+
elif isinstance(sub_next_type, c_ast.FuncDecl):
|
|
3306
|
+
# Function pointer: int (*fp)(int, int)
|
|
3307
|
+
func_ir_type = self._build_func_ptr_type(sub_next_type)
|
|
3308
|
+
if not self.in_global:
|
|
3309
|
+
var_addr = self._alloca_in_entry(func_ir_type, node.name)
|
|
3310
|
+
self.define(node.name, (func_ir_type, var_addr))
|
|
3311
|
+
else:
|
|
3312
|
+
var_addr = ir.GlobalVariable(
|
|
3313
|
+
self.module, func_ir_type, node.name
|
|
3314
|
+
)
|
|
3315
|
+
var_addr.initializer = ir.Constant(func_ir_type, None)
|
|
3316
|
+
self.define(node.name, (func_ir_type, var_addr))
|
|
3317
|
+
if self._func_decl_returns_unsigned(sub_next_type):
|
|
3318
|
+
self._mark_unsigned_return(var_addr)
|
|
3319
|
+
if node.init is not None:
|
|
3320
|
+
init_val, _ = self.codegen(node.init)
|
|
3321
|
+
# init_val is an ir.Function, bitcast to func ptr type
|
|
3322
|
+
if init_val.type != func_ir_type:
|
|
3323
|
+
init_val = self.builder.bitcast(init_val, func_ir_type)
|
|
3324
|
+
self._safe_store(init_val, var_addr)
|
|
3325
|
+
return None, var_addr
|
|
3326
|
+
pass
|
|
3327
|
+
|
|
3328
|
+
if resolved_pointee_type is not None:
|
|
3329
|
+
ir_type = resolved_pointee_type
|
|
3330
|
+
if isinstance(ir_type, ir.VoidType):
|
|
3331
|
+
ir_type = int8_t
|
|
3332
|
+
for _ in range(point_level):
|
|
3333
|
+
ir_type = ir.PointerType(ir_type)
|
|
3334
|
+
if not self.in_global:
|
|
3335
|
+
var_addr = self._alloca_in_entry(ir_type, node.name)
|
|
3336
|
+
self.define(node.name, (ir_type, var_addr))
|
|
3337
|
+
else:
|
|
3338
|
+
var_addr = ir.GlobalVariable(self.module, ir_type, node.name)
|
|
3339
|
+
self.define(node.name, (ir_type, var_addr))
|
|
3340
|
+
var_ir_type = ir_type
|
|
3341
|
+
else:
|
|
3342
|
+
var_addr, var_ir_type = self.create_entry_block_alloca(
|
|
3343
|
+
node.name, type_str, 1, point_level=point_level
|
|
3344
|
+
)
|
|
3345
|
+
|
|
3346
|
+
if self._has_unsigned_scalar_pointee(node.type):
|
|
3347
|
+
self._mark_unsigned_pointee(var_addr)
|
|
3348
|
+
|
|
3349
|
+
if node.init is not None:
|
|
3350
|
+
if self.in_global:
|
|
3351
|
+
try:
|
|
3352
|
+
const_init = self._build_const_init(node.init, var_ir_type)
|
|
3353
|
+
str(const_init)
|
|
3354
|
+
var_addr.initializer = const_init
|
|
3355
|
+
except Exception:
|
|
3356
|
+
var_addr.initializer = ir.Constant(var_ir_type, None)
|
|
3357
|
+
else:
|
|
3358
|
+
init_val, _ = self.codegen(node.init)
|
|
3359
|
+
if isinstance(init_val.type, ir.ArrayType) and isinstance(
|
|
3360
|
+
var_ir_type, ir.PointerType
|
|
3361
|
+
):
|
|
3362
|
+
gv = ir.GlobalVariable(
|
|
3363
|
+
self.module,
|
|
3364
|
+
init_val.type,
|
|
3365
|
+
self.module.get_unique_name("str"),
|
|
3366
|
+
)
|
|
3367
|
+
gv.initializer = init_val
|
|
3368
|
+
gv.global_constant = True
|
|
3369
|
+
init_val = self.builder.bitcast(gv, var_ir_type)
|
|
3370
|
+
elif init_val.type != var_ir_type:
|
|
3371
|
+
init_val = self._implicit_convert(init_val, var_ir_type)
|
|
3372
|
+
self._safe_store(init_val, var_addr)
|
|
3373
|
+
else:
|
|
3374
|
+
return None, None
|
|
3375
|
+
|
|
3376
|
+
return None, var_addr
|
|
3377
|
+
|
|
3378
|
+
def codegen_ID(self, node):
|
|
3379
|
+
|
|
3380
|
+
valtype, var = self.lookup(node.name)
|
|
3381
|
+
node.ir_type = valtype
|
|
3382
|
+
# Enum constants are stored as ir.Constant, not alloca'd
|
|
3383
|
+
if isinstance(var, ir.values.Constant):
|
|
3384
|
+
return var, None
|
|
3385
|
+
# Function reference: return function pointer directly
|
|
3386
|
+
if isinstance(var, ir.Function):
|
|
3387
|
+
if self._is_unsigned_return_binding(var):
|
|
3388
|
+
self._tag_unsigned_return(var)
|
|
3389
|
+
return var, None
|
|
3390
|
+
# Array types: decay to pointer to first element
|
|
3391
|
+
if isinstance(valtype, ir.ArrayType):
|
|
3392
|
+
ptr = self.builder.gep(
|
|
3393
|
+
var,
|
|
3394
|
+
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
|
|
3395
|
+
name="arraydecay",
|
|
3396
|
+
)
|
|
3397
|
+
if self._is_unsigned_pointee_binding(var):
|
|
3398
|
+
self._tag_unsigned_pointee(ptr)
|
|
3399
|
+
return ptr, var
|
|
3400
|
+
# Guard: only load from pointer types
|
|
3401
|
+
if not isinstance(var.type, ir.PointerType):
|
|
3402
|
+
return var, None
|
|
3403
|
+
result = self._safe_load(var)
|
|
3404
|
+
# Propagate unsigned-ness from variable to loaded value
|
|
3405
|
+
if self._is_unsigned_binding(var):
|
|
3406
|
+
self._tag_unsigned(result)
|
|
3407
|
+
if self._is_unsigned_pointee_binding(var):
|
|
3408
|
+
self._tag_unsigned_pointee(result)
|
|
3409
|
+
if self._is_unsigned_return_binding(var):
|
|
3410
|
+
self._tag_unsigned_return(result)
|
|
3411
|
+
return result, var
|
|
3412
|
+
|
|
3413
|
+
def codegen_ArrayRef(self, node):
|
|
3414
|
+
|
|
3415
|
+
name = node.name
|
|
3416
|
+
subscript = node.subscript
|
|
3417
|
+
name_ir, name_ptr = self.codegen(name)
|
|
3418
|
+
if name_ir is None:
|
|
3419
|
+
return ir.Constant(int64_t, 0), None
|
|
3420
|
+
if (
|
|
3421
|
+
name_ptr is None
|
|
3422
|
+
and isinstance(name_ir, ir.values.Constant)
|
|
3423
|
+
and isinstance(name_ir.type, ir.ArrayType)
|
|
3424
|
+
):
|
|
3425
|
+
gv = ir.GlobalVariable(
|
|
3426
|
+
self.module, name_ir.type, self.module.get_unique_name("strlit")
|
|
3427
|
+
)
|
|
3428
|
+
gv.initializer = name_ir
|
|
3429
|
+
gv.global_constant = True
|
|
3430
|
+
gv.linkage = "internal"
|
|
3431
|
+
name_ptr = gv
|
|
3432
|
+
subscript_ir, subscript_ptr = self.codegen(subscript)
|
|
3433
|
+
if subscript_ir is None:
|
|
3434
|
+
return ir.Constant(int64_t, 0), None
|
|
3435
|
+
|
|
3436
|
+
if isinstance(subscript_ir.type, ir.IntType):
|
|
3437
|
+
subscript_ir = self._implicit_convert(subscript_ir, ir.IntType(64))
|
|
3438
|
+
else:
|
|
3439
|
+
subscript_ir = self.builder.fptoui(subscript_ir, ir.IntType(64))
|
|
3440
|
+
|
|
3441
|
+
# Pointer subscript: p[i] -> *(p + i)
|
|
3442
|
+
name_type = self._get_expr_ir_type(name) or name_ir.type
|
|
3443
|
+
if isinstance(name_type, ir.PointerType) and isinstance(
|
|
3444
|
+
name_ir.type, ir.PointerType
|
|
3445
|
+
):
|
|
3446
|
+
value_ir_type = name_type.pointee
|
|
3447
|
+
elem_ptr = self.builder.gep(name_ir, [subscript_ir], name="ptridx")
|
|
3448
|
+
# If GEP result points to an array, return pointer (array decay)
|
|
3449
|
+
if isinstance(elem_ptr.type, ir.PointerType) and isinstance(
|
|
3450
|
+
elem_ptr.type.pointee, ir.ArrayType
|
|
3451
|
+
):
|
|
3452
|
+
node.ir_type = elem_ptr.type.pointee
|
|
3453
|
+
return elem_ptr, elem_ptr
|
|
3454
|
+
value_result = self._safe_load(elem_ptr)
|
|
3455
|
+
if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
|
|
3456
|
+
name_ptr
|
|
3457
|
+
):
|
|
3458
|
+
self._tag_unsigned(value_result)
|
|
3459
|
+
node.ir_type = value_ir_type
|
|
3460
|
+
return value_result, elem_ptr
|
|
3461
|
+
|
|
3462
|
+
# Non-array type (opaque struct etc): treat as pointer subscript
|
|
3463
|
+
if not isinstance(name_type, ir.ArrayType):
|
|
3464
|
+
ptr = (
|
|
3465
|
+
self.builder.bitcast(name_ir, ir.PointerType(int8_t))
|
|
3466
|
+
if not isinstance(name_ir.type, ir.PointerType)
|
|
3467
|
+
else name_ir
|
|
3468
|
+
)
|
|
3469
|
+
elem_ptr = self.builder.gep(ptr, [subscript_ir], name="ptridx")
|
|
3470
|
+
value_result = self._safe_load(elem_ptr)
|
|
3471
|
+
if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(ptr):
|
|
3472
|
+
self._tag_unsigned(value_result)
|
|
3473
|
+
node.ir_type = (
|
|
3474
|
+
elem_ptr.type.pointee
|
|
3475
|
+
if isinstance(elem_ptr.type, ir.PointerType)
|
|
3476
|
+
else name_type
|
|
3477
|
+
)
|
|
3478
|
+
return value_result, elem_ptr
|
|
3479
|
+
|
|
3480
|
+
# Array subscript: a[i] using GEP for correct stride calculation
|
|
3481
|
+
value_ir_type = name_type.element
|
|
3482
|
+
|
|
3483
|
+
# If no address pointer, use name_ir as base
|
|
3484
|
+
if name_ptr is None:
|
|
3485
|
+
name_ptr = name_ir
|
|
3486
|
+
if name_ptr is None:
|
|
3487
|
+
return ir.Constant(int64_t, 0), None
|
|
3488
|
+
|
|
3489
|
+
# GEP requires a pointer base; if name_ptr is a pointer to array, use GEP
|
|
3490
|
+
if isinstance(name_ptr.type, ir.PointerType):
|
|
3491
|
+
zero = ir.Constant(ir.IntType(32), 0)
|
|
3492
|
+
idx = (
|
|
3493
|
+
self.builder.trunc(subscript_ir, ir.IntType(32))
|
|
3494
|
+
if isinstance(subscript_ir.type, ir.IntType)
|
|
3495
|
+
and subscript_ir.type.width > 32
|
|
3496
|
+
else subscript_ir
|
|
3497
|
+
)
|
|
3498
|
+
elem_ptr = self.builder.gep(name_ptr, [zero, idx], name="arridx")
|
|
3499
|
+
|
|
3500
|
+
# If element is sub-array, return pointer (array decay)
|
|
3501
|
+
if isinstance(value_ir_type, ir.ArrayType):
|
|
3502
|
+
node.ir_type = value_ir_type
|
|
3503
|
+
return elem_ptr, elem_ptr
|
|
3504
|
+
else:
|
|
3505
|
+
value_result = self._safe_load(elem_ptr)
|
|
3506
|
+
if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
|
|
3507
|
+
name_ptr
|
|
3508
|
+
):
|
|
3509
|
+
self._tag_unsigned(value_result)
|
|
3510
|
+
node.ir_type = value_ir_type
|
|
3511
|
+
return value_result, elem_ptr
|
|
3512
|
+
|
|
3513
|
+
# Fallback: byte offset arithmetic (for non-pointer base)
|
|
3514
|
+
elem_size = self._ir_type_size(value_ir_type)
|
|
3515
|
+
stride = ir.Constant(ir.IntType(64), elem_size)
|
|
3516
|
+
offset = self.builder.mul(stride, subscript_ir, "array_add")
|
|
3517
|
+
base_int = (
|
|
3518
|
+
self.builder.ptrtoint(name_ptr, ir.IntType(64))
|
|
3519
|
+
if isinstance(name_ptr.type, ir.PointerType)
|
|
3520
|
+
else (
|
|
3521
|
+
name_ptr
|
|
3522
|
+
if isinstance(name_ptr.type, ir.IntType)
|
|
3523
|
+
else self.builder.ptrtoint(name_ptr, ir.IntType(64))
|
|
3524
|
+
)
|
|
3525
|
+
)
|
|
3526
|
+
addr = self.builder.add(offset, base_int, "addtmp")
|
|
3527
|
+
value_ptr = self.builder.inttoptr(addr, ir.PointerType(value_ir_type))
|
|
3528
|
+
if isinstance(value_ir_type, ir.ArrayType):
|
|
3529
|
+
node.ir_type = value_ir_type
|
|
3530
|
+
return value_ptr, value_ptr
|
|
3531
|
+
else:
|
|
3532
|
+
value_result = self._safe_load(value_ptr)
|
|
3533
|
+
if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
|
|
3534
|
+
name_ptr
|
|
3535
|
+
):
|
|
3536
|
+
self._tag_unsigned(value_result)
|
|
3537
|
+
node.ir_type = value_ir_type
|
|
3538
|
+
return value_result, value_ptr
|
|
3539
|
+
|
|
3540
|
+
def codegen_Return(self, node):
|
|
3541
|
+
|
|
3542
|
+
if node.expr is None:
|
|
3543
|
+
self.builder.ret_void()
|
|
3544
|
+
else:
|
|
3545
|
+
retval, _ = self.codegen(node.expr)
|
|
3546
|
+
# Implicit convert to function return type
|
|
3547
|
+
func_ret_type = self.function.return_value.type
|
|
3548
|
+
if retval.type != func_ret_type:
|
|
3549
|
+
retval = self._implicit_convert(retval, func_ret_type)
|
|
3550
|
+
self.builder.ret(retval)
|
|
3551
|
+
return None, None
|
|
3552
|
+
|
|
3553
|
+
def codegen_Compound(self, node):
|
|
3554
|
+
|
|
3555
|
+
if node.block_items:
|
|
3556
|
+
for stmt in node.block_items:
|
|
3557
|
+
if self.builder and self.builder.block.is_terminated:
|
|
3558
|
+
# After a terminator (goto/break/continue/return),
|
|
3559
|
+
# only process labels — skip unreachable code
|
|
3560
|
+
if isinstance(stmt, c_ast.Label):
|
|
3561
|
+
self.codegen(stmt)
|
|
3562
|
+
continue
|
|
3563
|
+
self.codegen(stmt)
|
|
3564
|
+
return None, None
|
|
3565
|
+
|
|
3566
|
+
def codegen_FuncDecl(self, node):
|
|
3567
|
+
ir_type = self._resolve_ast_type(node.type)
|
|
3568
|
+
return ir_type, None
|
|
3569
|
+
|
|
3570
|
+
def codegen_FuncDef(self, node):
|
|
3571
|
+
|
|
3572
|
+
# deep level func have deep level
|
|
3573
|
+
# we don't want funcdecl in codegen_decl too
|
|
3574
|
+
ir_type, _ = self.codegen(node.decl.type)
|
|
3575
|
+
funcname = node.decl.name
|
|
3576
|
+
|
|
3577
|
+
if funcname == "main":
|
|
3578
|
+
self.return_type = ir_type # for call in C
|
|
3579
|
+
|
|
3580
|
+
arg_types = []
|
|
3581
|
+
is_var_arg = False
|
|
3582
|
+
if node.decl.type.args:
|
|
3583
|
+
for arg_type in node.decl.type.args.params:
|
|
3584
|
+
if isinstance(arg_type, c_ast.EllipsisParam):
|
|
3585
|
+
is_var_arg = True
|
|
3586
|
+
continue
|
|
3587
|
+
t = self._resolve_param_type(arg_type)
|
|
3588
|
+
if t is not None:
|
|
3589
|
+
arg_types.append(t)
|
|
3590
|
+
|
|
3591
|
+
with self.new_function():
|
|
3592
|
+
|
|
3593
|
+
existing = self.module.globals.get(funcname)
|
|
3594
|
+
if existing and isinstance(existing, ir.Function):
|
|
3595
|
+
if existing.is_declaration:
|
|
3596
|
+
self.function = existing
|
|
3597
|
+
else:
|
|
3598
|
+
# Already defined — skip this duplicate definition
|
|
3599
|
+
return None, None
|
|
3600
|
+
else:
|
|
3601
|
+
try:
|
|
3602
|
+
self.function = ir.Function(
|
|
3603
|
+
self.module,
|
|
3604
|
+
ir.FunctionType(ir_type, arg_types, var_arg=is_var_arg),
|
|
3605
|
+
name=funcname,
|
|
3606
|
+
)
|
|
3607
|
+
except Exception:
|
|
3608
|
+
return None, None
|
|
3609
|
+
if self._func_decl_returns_unsigned(node.decl.type):
|
|
3610
|
+
self._mark_unsigned_return(self.function)
|
|
3611
|
+
self.block = self.function.append_basic_block()
|
|
3612
|
+
self.builder = ir.IRBuilder(self.block)
|
|
3613
|
+
if len(self.env.maps) > 1:
|
|
3614
|
+
self.env.maps[1][funcname] = (ir_type, self.function)
|
|
3615
|
+
self.define(funcname, (ir_type, self.function))
|
|
3616
|
+
if node.decl.type.args:
|
|
3617
|
+
param_idx = 0
|
|
3618
|
+
for p in node.decl.type.args.params:
|
|
3619
|
+
if isinstance(p, c_ast.EllipsisParam):
|
|
3620
|
+
continue
|
|
3621
|
+
# Skip void params (f(void) means no params)
|
|
3622
|
+
if isinstance(p, c_ast.Typename) and isinstance(
|
|
3623
|
+
getattr(p, "type", None), c_ast.TypeDecl
|
|
3624
|
+
):
|
|
3625
|
+
if isinstance(
|
|
3626
|
+
p.type.type, c_ast.IdentifierType
|
|
3627
|
+
) and p.type.type.names == ["void"]:
|
|
3628
|
+
continue
|
|
3629
|
+
if param_idx >= len(arg_types):
|
|
3630
|
+
break
|
|
3631
|
+
arg_type = arg_types[param_idx]
|
|
3632
|
+
pname = p.name if isinstance(p.name, str) else f"arg{param_idx}"
|
|
3633
|
+
var = self._alloca_in_entry(arg_type, pname)
|
|
3634
|
+
self.define(pname, (arg_type, var))
|
|
3635
|
+
self._safe_store(self.function.args[param_idx], var)
|
|
3636
|
+
# Track unsigned params
|
|
3637
|
+
if isinstance(p, c_ast.Decl) and isinstance(
|
|
3638
|
+
getattr(p, "type", None), c_ast.TypeDecl
|
|
3639
|
+
):
|
|
3640
|
+
if isinstance(p.type.type, c_ast.IdentifierType):
|
|
3641
|
+
if self._is_unsigned_type_names(p.type.type.names):
|
|
3642
|
+
self._mark_unsigned(var)
|
|
3643
|
+
if isinstance(p, c_ast.Decl):
|
|
3644
|
+
if self._has_unsigned_scalar_pointee(p.type):
|
|
3645
|
+
self._mark_unsigned_pointee(var)
|
|
3646
|
+
if isinstance(
|
|
3647
|
+
p.type, c_ast.PtrDecl
|
|
3648
|
+
) and self._func_decl_returns_unsigned(p.type.type):
|
|
3649
|
+
self._mark_unsigned_return(var)
|
|
3650
|
+
param_idx += 1
|
|
3651
|
+
|
|
3652
|
+
self.codegen(node.body)
|
|
3653
|
+
|
|
3654
|
+
if not self.builder.block.is_terminated:
|
|
3655
|
+
if isinstance(ir_type, ir.VoidType):
|
|
3656
|
+
self.builder.ret_void()
|
|
3657
|
+
elif isinstance(ir_type, ir.PointerType):
|
|
3658
|
+
self.builder.ret(ir.Constant(ir_type, None))
|
|
3659
|
+
elif self._is_floating_ir_type(ir_type):
|
|
3660
|
+
self.builder.ret(ir.Constant(ir_type, 0.0))
|
|
3661
|
+
else:
|
|
3662
|
+
self.builder.ret(ir.Constant(ir_type, 0))
|
|
3663
|
+
|
|
3664
|
+
return None, None
|
|
3665
|
+
|
|
3666
|
+
def codegen_Struct(self, node):
|
|
3667
|
+
# Generate LLVM types for struct members
|
|
3668
|
+
|
|
3669
|
+
# If this is a reference to a named struct without decls, look it up
|
|
3670
|
+
if node.name and (node.decls is None or len(node.decls) == 0):
|
|
3671
|
+
if node.name in self.env:
|
|
3672
|
+
return self.env[node.name][0]
|
|
3673
|
+
# Opaque/forward-declared struct: treat as i8 (byte) for pointer use
|
|
3674
|
+
opaque = ir.IntType(8)
|
|
3675
|
+
self.define(node.name, (opaque, None))
|
|
3676
|
+
return opaque
|
|
3677
|
+
|
|
3678
|
+
member_types = []
|
|
3679
|
+
member_names = []
|
|
3680
|
+
member_decl_types = []
|
|
3681
|
+
for decl in node.decls:
|
|
3682
|
+
if isinstance(decl.type, c_ast.TypeDecl) and isinstance(
|
|
3683
|
+
decl.type.type, c_ast.Struct
|
|
3684
|
+
):
|
|
3685
|
+
nested_type = self.codegen_Struct(decl.type.type)
|
|
3686
|
+
member_types.append(nested_type)
|
|
3687
|
+
elif isinstance(decl.type, c_ast.TypeDecl) and isinstance(
|
|
3688
|
+
decl.type.type, c_ast.Union
|
|
3689
|
+
):
|
|
3690
|
+
nested_type = self.codegen_Union(decl.type.type)
|
|
3691
|
+
member_types.append(nested_type)
|
|
3692
|
+
elif isinstance(decl.type, c_ast.ArrayDecl):
|
|
3693
|
+
# Handle multi-dimensional arrays: a[N][M] -> [N x [M x T]]
|
|
3694
|
+
def _build_array_type(arr_node):
|
|
3695
|
+
dim = self._eval_dim(arr_node.dim) if arr_node.dim else 0
|
|
3696
|
+
if isinstance(arr_node.type, c_ast.ArrayDecl):
|
|
3697
|
+
inner = _build_array_type(arr_node.type)
|
|
3698
|
+
else:
|
|
3699
|
+
inner = self._resolve_ast_type(arr_node.type)
|
|
3700
|
+
return ir.ArrayType(inner, dim)
|
|
3701
|
+
|
|
3702
|
+
member_types.append(_build_array_type(decl.type))
|
|
3703
|
+
elif isinstance(decl.type, c_ast.PtrDecl):
|
|
3704
|
+
member_types.append(self._resolve_ast_type(decl.type))
|
|
3705
|
+
elif isinstance(decl.type, c_ast.TypeDecl):
|
|
3706
|
+
type_str = decl.type.type.names
|
|
3707
|
+
member_types.append(self._get_ir_type(type_str))
|
|
3708
|
+
else:
|
|
3709
|
+
member_types.append(int64_t) # fallback
|
|
3710
|
+
member_names.append(decl.name)
|
|
3711
|
+
member_decl_types.append(decl.type)
|
|
3712
|
+
# Create the struct type
|
|
3713
|
+
struct_type = ir.LiteralStructType(member_types)
|
|
3714
|
+
struct_type.members = member_names
|
|
3715
|
+
struct_type.member_decl_types = member_decl_types
|
|
3716
|
+
|
|
3717
|
+
# Register named structs for later reuse
|
|
3718
|
+
if node.name:
|
|
3719
|
+
self.define(node.name, (struct_type, None))
|
|
3720
|
+
|
|
3721
|
+
return struct_type
|
|
3722
|
+
|
|
3723
|
+
def codegen_Union(self, node):
|
|
3724
|
+
"""Model union as a struct with alignment-preserving storage."""
|
|
3725
|
+
if node.name and (node.decls is None or len(node.decls) == 0):
|
|
3726
|
+
return self.env[node.name][0]
|
|
3727
|
+
|
|
3728
|
+
member_types = {}
|
|
3729
|
+
member_decl_types = {}
|
|
3730
|
+
max_size = 0
|
|
3731
|
+
max_align = 1
|
|
3732
|
+
for decl in node.decls:
|
|
3733
|
+
if isinstance(decl.type, c_ast.ArrayDecl):
|
|
3734
|
+
ir_t = self._build_array_ir_type(decl.type)
|
|
3735
|
+
else:
|
|
3736
|
+
ir_t = self._resolve_ast_type(decl.type)
|
|
3737
|
+
member_types[decl.name] = ir_t
|
|
3738
|
+
member_decl_types[decl.name] = decl.type
|
|
3739
|
+
sz = self._ir_type_size(ir_t)
|
|
3740
|
+
al = self._ir_type_align(ir_t)
|
|
3741
|
+
if sz > max_size:
|
|
3742
|
+
max_size = sz
|
|
3743
|
+
if al > max_align:
|
|
3744
|
+
max_align = al
|
|
3745
|
+
|
|
3746
|
+
# Use a struct {align_type, [padding x i8]} to preserve alignment
|
|
3747
|
+
# Pick an alignment element: i64 for 8, i32 for 4, i16 for 2, i8 for 1
|
|
3748
|
+
align_map = {8: int64_t, 4: int32_t, 2: int16_t, 1: int8_t}
|
|
3749
|
+
align_type = align_map.get(max_align, int64_t)
|
|
3750
|
+
align_size = max_align
|
|
3751
|
+
pad_size = max_size - align_size
|
|
3752
|
+
if pad_size > 0:
|
|
3753
|
+
union_type = ir.LiteralStructType(
|
|
3754
|
+
[align_type, ir.ArrayType(int8_t, pad_size)]
|
|
3755
|
+
)
|
|
3756
|
+
else:
|
|
3757
|
+
union_type = ir.LiteralStructType([align_type])
|
|
3758
|
+
union_type.members = list(member_types.keys())
|
|
3759
|
+
union_type.member_types = member_types
|
|
3760
|
+
union_type.member_decl_types = member_decl_types
|
|
3761
|
+
union_type.is_union = True
|
|
3762
|
+
|
|
3763
|
+
if node.name:
|
|
3764
|
+
self.define(node.name, (union_type, None))
|
|
3765
|
+
|
|
3766
|
+
return union_type
|
|
3767
|
+
|
|
3768
|
+
def codegen_StructRef(self, node):
|
|
3769
|
+
|
|
3770
|
+
if isinstance(node.name, c_ast.StructRef):
|
|
3771
|
+
inner_val, inner_addr = self.codegen_StructRef(node.name)
|
|
3772
|
+
if node.type == "->":
|
|
3773
|
+
# Chain: (a->b)->c — need to use the VALUE of a->b as pointer base
|
|
3774
|
+
# inner_val is the loaded field value (a pointer to next struct)
|
|
3775
|
+
base = inner_val
|
|
3776
|
+
semantic_base_type = self._get_expr_ir_type(node.name)
|
|
3777
|
+
if (
|
|
3778
|
+
isinstance(semantic_base_type, ir.PointerType)
|
|
3779
|
+
and base.type != semantic_base_type
|
|
3780
|
+
):
|
|
3781
|
+
try:
|
|
3782
|
+
base = self.builder.bitcast(base, semantic_base_type)
|
|
3783
|
+
except Exception:
|
|
3784
|
+
pass
|
|
3785
|
+
struct_type = (
|
|
3786
|
+
base.type.pointee if hasattr(base.type, "pointee") else int8_t
|
|
3787
|
+
)
|
|
3788
|
+
struct_addr = base
|
|
3789
|
+
else:
|
|
3790
|
+
# Chain: (a->b).c — use the ADDRESS of a->b as struct base
|
|
3791
|
+
semantic_base_type = self._get_expr_ir_type(node.name)
|
|
3792
|
+
if semantic_base_type is not None:
|
|
3793
|
+
expected_addr_type = ir.PointerType(semantic_base_type)
|
|
3794
|
+
if inner_addr.type != expected_addr_type:
|
|
3795
|
+
try:
|
|
3796
|
+
inner_addr = self.builder.bitcast(
|
|
3797
|
+
inner_addr, expected_addr_type
|
|
3798
|
+
)
|
|
3799
|
+
except Exception:
|
|
3800
|
+
pass
|
|
3801
|
+
struct_type = (
|
|
3802
|
+
inner_addr.type.pointee
|
|
3803
|
+
if hasattr(inner_addr.type, "pointee")
|
|
3804
|
+
else int8_t
|
|
3805
|
+
)
|
|
3806
|
+
struct_addr = inner_addr
|
|
3807
|
+
elif isinstance(node.name, c_ast.ID):
|
|
3808
|
+
struct_instance_addr = self.env[node.name.name][1]
|
|
3809
|
+
if not isinstance(struct_instance_addr.type, ir.PointerType):
|
|
3810
|
+
raise Exception("Invalid struct reference")
|
|
3811
|
+
|
|
3812
|
+
if node.type == "->":
|
|
3813
|
+
ptr_val = self._safe_load(struct_instance_addr)
|
|
3814
|
+
struct_type = (
|
|
3815
|
+
ptr_val.type.pointee if hasattr(ptr_val.type, "pointee") else int8_t
|
|
3816
|
+
)
|
|
3817
|
+
struct_addr = ptr_val
|
|
3818
|
+
else:
|
|
3819
|
+
struct_type = (
|
|
3820
|
+
struct_instance_addr.type.pointee
|
|
3821
|
+
if hasattr(struct_instance_addr.type, "pointee")
|
|
3822
|
+
else int8_t
|
|
3823
|
+
)
|
|
3824
|
+
struct_addr = struct_instance_addr
|
|
3825
|
+
else:
|
|
3826
|
+
# Cast/UnaryOp/other expression as struct base: ((Type*)ptr)->field
|
|
3827
|
+
val, addr = self.codegen(node.name)
|
|
3828
|
+
semantic_base_type = self._get_expr_ir_type(node.name)
|
|
3829
|
+
if node.type == "->":
|
|
3830
|
+
struct_addr = val
|
|
3831
|
+
if (
|
|
3832
|
+
isinstance(semantic_base_type, ir.PointerType)
|
|
3833
|
+
and struct_addr.type != semantic_base_type
|
|
3834
|
+
):
|
|
3835
|
+
try:
|
|
3836
|
+
struct_addr = self.builder.bitcast(
|
|
3837
|
+
struct_addr, semantic_base_type
|
|
3838
|
+
)
|
|
3839
|
+
except Exception:
|
|
3840
|
+
pass
|
|
3841
|
+
struct_type = (
|
|
3842
|
+
struct_addr.type.pointee
|
|
3843
|
+
if hasattr(struct_addr.type, "pointee")
|
|
3844
|
+
else int8_t
|
|
3845
|
+
)
|
|
3846
|
+
else:
|
|
3847
|
+
struct_addr = addr if addr else val
|
|
3848
|
+
if addr is not None and semantic_base_type is not None:
|
|
3849
|
+
expected_addr_type = ir.PointerType(semantic_base_type)
|
|
3850
|
+
if struct_addr.type != expected_addr_type:
|
|
3851
|
+
try:
|
|
3852
|
+
struct_addr = self.builder.bitcast(
|
|
3853
|
+
struct_addr, expected_addr_type
|
|
3854
|
+
)
|
|
3855
|
+
except Exception:
|
|
3856
|
+
pass
|
|
3857
|
+
struct_type = (
|
|
3858
|
+
struct_addr.type.pointee
|
|
3859
|
+
if hasattr(struct_addr.type, "pointee")
|
|
3860
|
+
else int8_t
|
|
3861
|
+
)
|
|
3862
|
+
else:
|
|
3863
|
+
struct_type = (
|
|
3864
|
+
semantic_base_type
|
|
3865
|
+
if semantic_base_type is not None
|
|
3866
|
+
else (val.type if hasattr(val.type, "members") else int8_t)
|
|
3867
|
+
)
|
|
3868
|
+
|
|
3869
|
+
# Union access: all fields share offset 0, use bitcast
|
|
3870
|
+
if getattr(struct_type, "is_union", False):
|
|
3871
|
+
member_ir_type = struct_type.member_types[node.field.name]
|
|
3872
|
+
semantic_field_type = member_ir_type
|
|
3873
|
+
member_decl_types = getattr(struct_type, "member_decl_types", None)
|
|
3874
|
+
decl_type = None
|
|
3875
|
+
if member_decl_types and node.field.name in member_decl_types:
|
|
3876
|
+
decl_type = member_decl_types[node.field.name]
|
|
3877
|
+
try:
|
|
3878
|
+
resolved = self._resolve_ast_type(decl_type)
|
|
3879
|
+
if isinstance(member_ir_type, ir.ArrayType) and isinstance(
|
|
3880
|
+
resolved, ir.PointerType
|
|
3881
|
+
):
|
|
3882
|
+
pass
|
|
3883
|
+
elif isinstance(
|
|
3884
|
+
resolved, (ir.ArrayType, ir.LiteralStructType, ir.PointerType)
|
|
3885
|
+
):
|
|
3886
|
+
semantic_field_type = resolved
|
|
3887
|
+
except Exception:
|
|
3888
|
+
pass
|
|
3889
|
+
ptr = self.builder.bitcast(struct_addr, ir.PointerType(semantic_field_type))
|
|
3890
|
+
if isinstance(semantic_field_type, ir.ArrayType):
|
|
3891
|
+
elem_ptr = self.builder.gep(
|
|
3892
|
+
ptr,
|
|
3893
|
+
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
|
|
3894
|
+
name="unionarraydecay",
|
|
3895
|
+
)
|
|
3896
|
+
if decl_type is not None:
|
|
3897
|
+
self._tag_value_from_decl_type(elem_ptr, decl_type)
|
|
3898
|
+
self._set_expr_ir_type(node, semantic_field_type)
|
|
3899
|
+
return elem_ptr, ptr
|
|
3900
|
+
val = self._safe_load(ptr)
|
|
3901
|
+
if decl_type is not None:
|
|
3902
|
+
self._tag_value_from_decl_type(val, decl_type)
|
|
3903
|
+
self._set_expr_ir_type(node, semantic_field_type)
|
|
3904
|
+
return val, ptr
|
|
3905
|
+
|
|
3906
|
+
# Opaque struct (no members) — treat as byte-offset access
|
|
3907
|
+
if not hasattr(struct_type, "members"):
|
|
3908
|
+
ptr = self.builder.bitcast(struct_addr, voidptr_t)
|
|
3909
|
+
val = self._safe_load(self.builder.bitcast(ptr, ir.PointerType(int64_t)))
|
|
3910
|
+
self._set_expr_ir_type(node, int64_t)
|
|
3911
|
+
return val, ptr
|
|
3912
|
+
|
|
3913
|
+
field_index = None
|
|
3914
|
+
for i, field in enumerate(struct_type.members):
|
|
3915
|
+
if field == node.field.name:
|
|
3916
|
+
field_index = i
|
|
3917
|
+
break
|
|
3918
|
+
|
|
3919
|
+
if field_index is None:
|
|
3920
|
+
raise RuntimeError(f"Field '{node.field.name}' not found in struct")
|
|
3921
|
+
|
|
3922
|
+
field_addr = self.builder.gep(
|
|
3923
|
+
struct_addr,
|
|
3924
|
+
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), field_index)],
|
|
3925
|
+
inbounds=True,
|
|
3926
|
+
)
|
|
3927
|
+
|
|
3928
|
+
field_type = struct_type.elements[field_index]
|
|
3929
|
+
semantic_field_type = field_type
|
|
3930
|
+
member_decl_types = getattr(struct_type, "member_decl_types", None)
|
|
3931
|
+
decl_type = None
|
|
3932
|
+
if member_decl_types and field_index < len(member_decl_types):
|
|
3933
|
+
decl_type = member_decl_types[field_index]
|
|
3934
|
+
try:
|
|
3935
|
+
resolved = self._resolve_ast_type(decl_type)
|
|
3936
|
+
# Only use semantic type if it's more specific (pointer/struct),
|
|
3937
|
+
# not if it decayed an array to pointer
|
|
3938
|
+
if isinstance(field_type, ir.ArrayType) and isinstance(
|
|
3939
|
+
resolved, ir.PointerType
|
|
3940
|
+
):
|
|
3941
|
+
pass # keep original array type
|
|
3942
|
+
elif isinstance(resolved, (ir.LiteralStructType, ir.PointerType)):
|
|
3943
|
+
semantic_field_type = resolved
|
|
3944
|
+
except Exception:
|
|
3945
|
+
pass
|
|
3946
|
+
|
|
3947
|
+
typed_field_addr = field_addr
|
|
3948
|
+
target_ptr_type = ir.PointerType(semantic_field_type)
|
|
3949
|
+
if field_addr.type != target_ptr_type:
|
|
3950
|
+
try:
|
|
3951
|
+
typed_field_addr = self.builder.bitcast(field_addr, target_ptr_type)
|
|
3952
|
+
except Exception:
|
|
3953
|
+
typed_field_addr = field_addr
|
|
3954
|
+
|
|
3955
|
+
if isinstance(semantic_field_type, ir.ArrayType):
|
|
3956
|
+
# Array field: decay to pointer to first element
|
|
3957
|
+
elem_ptr = self.builder.gep(
|
|
3958
|
+
typed_field_addr,
|
|
3959
|
+
[ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
|
|
3960
|
+
name="arraydecay",
|
|
3961
|
+
)
|
|
3962
|
+
if decl_type is not None:
|
|
3963
|
+
self._tag_value_from_decl_type(elem_ptr, decl_type)
|
|
3964
|
+
self._set_expr_ir_type(node, semantic_field_type)
|
|
3965
|
+
return elem_ptr, typed_field_addr
|
|
3966
|
+
|
|
3967
|
+
field_value = self._safe_load(typed_field_addr)
|
|
3968
|
+
if decl_type is not None:
|
|
3969
|
+
self._tag_value_from_decl_type(field_value, decl_type)
|
|
3970
|
+
self._set_expr_ir_type(node, semantic_field_type)
|
|
3971
|
+
return field_value, typed_field_addr
|
|
3972
|
+
|
|
3973
|
+
def codegen_EmptyStatement(self, node):
|
|
3974
|
+
return None, None
|
|
3975
|
+
|
|
3976
|
+
def codegen_ExprList(self, node):
|
|
3977
|
+
# Comma operator: evaluate all, return last
|
|
3978
|
+
result = None
|
|
3979
|
+
result_ptr = None
|
|
3980
|
+
last_expr = None
|
|
3981
|
+
for expr in node.exprs:
|
|
3982
|
+
last_expr = expr
|
|
3983
|
+
result, result_ptr = self.codegen(expr)
|
|
3984
|
+
if last_expr is not None:
|
|
3985
|
+
semantic_result_type = self._get_expr_ir_type(
|
|
3986
|
+
last_expr, getattr(result, "type", None)
|
|
3987
|
+
)
|
|
3988
|
+
if semantic_result_type is not None:
|
|
3989
|
+
self._set_expr_ir_type(node, semantic_result_type)
|
|
3990
|
+
return result, result_ptr
|
|
3991
|
+
|
|
3992
|
+
def codegen_Label(self, node):
|
|
3993
|
+
label_name = f"label_{node.name}"
|
|
3994
|
+
# Check if block already created by a forward goto
|
|
3995
|
+
if label_name in self._labels:
|
|
3996
|
+
label_bb = self._labels[label_name]
|
|
3997
|
+
else:
|
|
3998
|
+
label_bb = self.builder.function.append_basic_block(label_name)
|
|
3999
|
+
self._labels[label_name] = label_bb
|
|
4000
|
+
if not self.builder.block.is_terminated:
|
|
4001
|
+
self.builder.branch(label_bb)
|
|
4002
|
+
self.builder.position_at_end(label_bb)
|
|
4003
|
+
if node.stmt:
|
|
4004
|
+
self.codegen(node.stmt)
|
|
4005
|
+
return None, None
|
|
4006
|
+
|
|
4007
|
+
def codegen_Goto(self, node):
|
|
4008
|
+
label_name = f"label_{node.name}"
|
|
4009
|
+
if label_name in self._labels:
|
|
4010
|
+
target_bb = self._labels[label_name]
|
|
4011
|
+
else:
|
|
4012
|
+
# Forward reference: create the block now
|
|
4013
|
+
target_bb = self.builder.function.append_basic_block(label_name)
|
|
4014
|
+
self._labels[label_name] = target_bb
|
|
4015
|
+
self.builder.branch(target_bb)
|
|
4016
|
+
return None, None
|
|
4017
|
+
|
|
4018
|
+
def codegen_Enum(self, node):
|
|
4019
|
+
# Define each enumerator as a constant in the environment
|
|
4020
|
+
if node.values:
|
|
4021
|
+
current_val = 0
|
|
4022
|
+
for enumerator in node.values.enumerators:
|
|
4023
|
+
if enumerator.value:
|
|
4024
|
+
current_val = self._eval_const_expr(enumerator.value)
|
|
4025
|
+
self.define(
|
|
4026
|
+
enumerator.name, (int64_t, ir.Constant(int64_t, current_val))
|
|
4027
|
+
)
|
|
4028
|
+
current_val += 1
|
|
4029
|
+
return None, None
|
|
4030
|
+
|
|
4031
|
+
def _eval_const_expr(self, node):
|
|
4032
|
+
"""Evaluate a constant expression at compile time (for enum values)."""
|
|
4033
|
+
if isinstance(node, c_ast.Constant):
|
|
4034
|
+
if node.type == "string":
|
|
4035
|
+
return 0 # string constants can't be int-evaluated
|
|
4036
|
+
v = node.value.rstrip("uUlL")
|
|
4037
|
+
if v.startswith("'"):
|
|
4038
|
+
return self._char_constant_value(v)
|
|
4039
|
+
if v.startswith("0x") or v.startswith("0X"):
|
|
4040
|
+
return int(v, 16)
|
|
4041
|
+
elif v.startswith("0") and len(v) > 1 and v[1:].isdigit():
|
|
4042
|
+
return int(v, 8)
|
|
4043
|
+
try:
|
|
4044
|
+
return int(v)
|
|
4045
|
+
except ValueError:
|
|
4046
|
+
return 0
|
|
4047
|
+
elif isinstance(node, c_ast.UnaryOp):
|
|
4048
|
+
if node.op == "sizeof":
|
|
4049
|
+
if isinstance(node.expr, c_ast.Typename):
|
|
4050
|
+
ir_t = self._resolve_ast_type(node.expr.type)
|
|
4051
|
+
return self._ir_type_size(ir_t)
|
|
4052
|
+
if isinstance(node.expr, c_ast.Constant) and node.expr.type == "string":
|
|
4053
|
+
raw = node.expr.value[1:-1]
|
|
4054
|
+
processed = self._process_escapes(raw)
|
|
4055
|
+
return len(self._string_bytes(processed + "\00"))
|
|
4056
|
+
val = self._eval_const_expr(node.expr)
|
|
4057
|
+
return 8 # default sizeof for expressions
|
|
4058
|
+
if node.op == "&" and isinstance(node.expr, c_ast.StructRef):
|
|
4059
|
+
offset, _ = self._eval_offsetof_structref(node.expr)
|
|
4060
|
+
return offset
|
|
4061
|
+
val = self._eval_const_expr(node.expr)
|
|
4062
|
+
if node.op == "-":
|
|
4063
|
+
return -val
|
|
4064
|
+
elif node.op == "+":
|
|
4065
|
+
return val
|
|
4066
|
+
elif node.op == "~":
|
|
4067
|
+
return ~val
|
|
4068
|
+
elif node.op == "!":
|
|
4069
|
+
return 0 if val else 1
|
|
4070
|
+
elif isinstance(node, c_ast.BinaryOp):
|
|
4071
|
+
l = self._eval_const_expr(node.left)
|
|
4072
|
+
r = self._eval_const_expr(node.right)
|
|
4073
|
+
ops = {
|
|
4074
|
+
"+": lambda a, b: a + b,
|
|
4075
|
+
"-": lambda a, b: a - b,
|
|
4076
|
+
"*": lambda a, b: a * b,
|
|
4077
|
+
"/": lambda a, b: a // b,
|
|
4078
|
+
"%": lambda a, b: a % b,
|
|
4079
|
+
"<<": lambda a, b: a << b,
|
|
4080
|
+
">>": lambda a, b: a >> b,
|
|
4081
|
+
"&": lambda a, b: a & b,
|
|
4082
|
+
"|": lambda a, b: a | b,
|
|
4083
|
+
"^": lambda a, b: a ^ b,
|
|
4084
|
+
"==": lambda a, b: int(a == b),
|
|
4085
|
+
"!=": lambda a, b: int(a != b),
|
|
4086
|
+
"<": lambda a, b: int(a < b),
|
|
4087
|
+
"<=": lambda a, b: int(a <= b),
|
|
4088
|
+
">": lambda a, b: int(a > b),
|
|
4089
|
+
">=": lambda a, b: int(a >= b),
|
|
4090
|
+
"&&": lambda a, b: int(bool(a) and bool(b)),
|
|
4091
|
+
"||": lambda a, b: int(bool(a) or bool(b)),
|
|
4092
|
+
}
|
|
4093
|
+
return ops[node.op](l, r)
|
|
4094
|
+
elif isinstance(node, c_ast.TernaryOp):
|
|
4095
|
+
cond = self._eval_const_expr(node.cond)
|
|
4096
|
+
if cond:
|
|
4097
|
+
return self._eval_const_expr(node.iftrue)
|
|
4098
|
+
return self._eval_const_expr(node.iffalse)
|
|
4099
|
+
elif isinstance(node, c_ast.ID):
|
|
4100
|
+
# Try to look up as enum constant or defined value
|
|
4101
|
+
if node.name in self.env:
|
|
4102
|
+
_, val = self.env[node.name]
|
|
4103
|
+
if isinstance(val, ir.values.Constant) and isinstance(
|
|
4104
|
+
val.type, ir.IntType
|
|
4105
|
+
):
|
|
4106
|
+
return int(val.constant)
|
|
4107
|
+
return 0 # unknown identifier defaults to 0
|
|
4108
|
+
elif isinstance(node, c_ast.Cast):
|
|
4109
|
+
return self._eval_const_expr(node.expr)
|
|
4110
|
+
elif isinstance(node, c_ast.Typename):
|
|
4111
|
+
return 0
|
|
4112
|
+
raise CodegenError(f"Not a constant expression: {type(node).__name__}")
|
|
4113
|
+
|
|
4114
|
+
def codegen_InitList(self, node):
|
|
4115
|
+
# InitList as expression — return first element or zero
|
|
4116
|
+
if node.exprs:
|
|
4117
|
+
return self.codegen(node.exprs[0])
|
|
4118
|
+
return ir.Constant(int64_t, 0), None
|
|
4119
|
+
|
|
4120
|
+
def codegen_DeclList(self, node):
|
|
4121
|
+
for decl in node.decls:
|
|
4122
|
+
self.codegen(decl)
|
|
4123
|
+
return None, None
|
|
4124
|
+
|
|
4125
|
+
def codegen_Typedef(self, node):
|
|
4126
|
+
# typedef int myint; / typedef int* intptr; / typedef struct{...} Name;
|
|
4127
|
+
if isinstance(node.type, c_ast.TypeDecl):
|
|
4128
|
+
if isinstance(node.type.type, c_ast.IdentifierType):
|
|
4129
|
+
base_type = node.type.type.names
|
|
4130
|
+
self.define(f"__typedef_{node.name}", base_type)
|
|
4131
|
+
elif isinstance(node.type.type, c_ast.Struct):
|
|
4132
|
+
if node.type.type.name:
|
|
4133
|
+
# Named struct: store reference to struct name for lazy resolution
|
|
4134
|
+
self.codegen_Struct(node.type.type) # ensure it's registered
|
|
4135
|
+
self.define(
|
|
4136
|
+
f"__typedef_{node.name}", f"__struct_{node.type.type.name}"
|
|
4137
|
+
)
|
|
4138
|
+
else:
|
|
4139
|
+
struct_type = self.codegen_Struct(node.type.type)
|
|
4140
|
+
self.define(f"__typedef_{node.name}", struct_type)
|
|
4141
|
+
elif isinstance(node.type.type, c_ast.Union):
|
|
4142
|
+
if node.type.type.name:
|
|
4143
|
+
self.codegen_Union(node.type.type)
|
|
4144
|
+
self.define(
|
|
4145
|
+
f"__typedef_{node.name}", f"__struct_{node.type.type.name}"
|
|
4146
|
+
)
|
|
4147
|
+
else:
|
|
4148
|
+
union_type = self.codegen_Union(node.type.type)
|
|
4149
|
+
self.define(f"__typedef_{node.name}", union_type)
|
|
4150
|
+
elif isinstance(node.type.type, c_ast.Enum):
|
|
4151
|
+
# typedef enum { A, B, C } MyEnum;
|
|
4152
|
+
self.codegen_Enum(node.type.type)
|
|
4153
|
+
self.define(f"__typedef_{node.name}", int64_t)
|
|
4154
|
+
elif isinstance(node.type, c_ast.ArrayDecl):
|
|
4155
|
+
self.define(f"__typedef_{node.name}", self._build_array_ir_type(node.type))
|
|
4156
|
+
elif isinstance(node.type, c_ast.PtrDecl):
|
|
4157
|
+
inner = node.type.type
|
|
4158
|
+
if isinstance(inner, c_ast.FuncDecl):
|
|
4159
|
+
fp_type = self._build_func_ptr_type(inner)
|
|
4160
|
+
self.define(f"__typedef_{node.name}", fp_type)
|
|
4161
|
+
elif isinstance(inner, c_ast.TypeDecl):
|
|
4162
|
+
if isinstance(inner.type, c_ast.IdentifierType):
|
|
4163
|
+
base_ir = self._get_ir_type(inner.type.names)
|
|
4164
|
+
elif isinstance(inner.type, c_ast.Struct):
|
|
4165
|
+
base_ir = self.codegen_Struct(inner.type)
|
|
4166
|
+
elif isinstance(inner.type, c_ast.Union):
|
|
4167
|
+
base_ir = self.codegen_Union(inner.type)
|
|
4168
|
+
else:
|
|
4169
|
+
base_ir = get_ir_type(
|
|
4170
|
+
inner.type.names if hasattr(inner.type, "names") else ["int"]
|
|
4171
|
+
)
|
|
4172
|
+
if isinstance(base_ir, ir.VoidType):
|
|
4173
|
+
ptr_type = voidptr_t
|
|
4174
|
+
else:
|
|
4175
|
+
ptr_type = ir.PointerType(base_ir)
|
|
4176
|
+
self.define(f"__typedef_{node.name}", ptr_type)
|
|
4177
|
+
return None, None
|