python-cc 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4177 @@
1
+ import llvmlite.ir as ir
2
+ import re
3
+ import struct
4
+ from collections import ChainMap
5
+ from contextlib import contextmanager
6
+ from llvmlite.ir import IRBuilder
7
+ from ..ast import c_ast as c_ast
8
+
9
+ bool_t = ir.IntType(1)
10
+ int8_t = ir.IntType(8)
11
+ int32_t = ir.IntType(32)
12
+ int64_t = ir.IntType(64)
13
+ voidptr_t = int8_t.as_pointer()
14
+ int64ptr_t = int64_t.as_pointer()
15
+ true_bit = bool_t(1)
16
+ false_bit = bool_t(0)
17
+ true_byte = int8_t(1)
18
+ false_byte = int8_t(0)
19
+ cstring = voidptr_t
20
+ struct_types = {}
21
+
22
+ # Libc function signature registry: name -> (return_type, [param_types], var_arg)
23
+ # Covers: stdio.h, stdlib.h, string.h, ctype.h, math.h, unistd.h, time.h
24
+ _VOID = ir.VoidType()
25
+ _float = ir.FloatType()
26
+ _double = ir.DoubleType()
27
+ _FILE_ptr = voidptr_t # FILE* modeled as opaque void*
28
+ _size_t = int64_t
29
+ _time_t = int64_t
30
+
31
+ LIBC_FUNCTIONS = {
32
+ # === stdio.h ===
33
+ "printf": (int32_t, [cstring], True),
34
+ "fprintf": (int32_t, [_FILE_ptr, cstring], True),
35
+ "sprintf": (int32_t, [cstring, cstring], True),
36
+ "snprintf": (int32_t, [cstring, _size_t, cstring], True),
37
+ "vprintf": (int32_t, [cstring, voidptr_t], False),
38
+ "vfprintf": (int32_t, [_FILE_ptr, cstring, voidptr_t], False),
39
+ "vsprintf": (int32_t, [cstring, cstring, voidptr_t], False),
40
+ "vsnprintf": (int32_t, [cstring, _size_t, cstring, voidptr_t], False),
41
+ "scanf": (int32_t, [cstring], True),
42
+ "fscanf": (int32_t, [_FILE_ptr, cstring], True),
43
+ "sscanf": (int32_t, [cstring, cstring], True),
44
+ "fopen": (_FILE_ptr, [cstring, cstring], False),
45
+ "fclose": (int32_t, [_FILE_ptr], False),
46
+ "fread": (_size_t, [voidptr_t, _size_t, _size_t, _FILE_ptr], False),
47
+ "fwrite": (_size_t, [voidptr_t, _size_t, _size_t, _FILE_ptr], False),
48
+ "fseek": (int32_t, [_FILE_ptr, int64_t, int32_t], False),
49
+ "ftell": (int64_t, [_FILE_ptr], False),
50
+ "rewind": (_VOID, [_FILE_ptr], False),
51
+ "feof": (int32_t, [_FILE_ptr], False),
52
+ "ferror": (int32_t, [_FILE_ptr], False),
53
+ "fflush": (int32_t, [_FILE_ptr], False),
54
+ "fgets": (cstring, [cstring, int32_t, _FILE_ptr], False),
55
+ "fputs": (int32_t, [cstring, _FILE_ptr], False),
56
+ "fgetc": (int32_t, [_FILE_ptr], False),
57
+ "fputc": (int32_t, [int32_t, _FILE_ptr], False),
58
+ "getc": (int32_t, [_FILE_ptr], False),
59
+ "getc_unlocked": (int32_t, [_FILE_ptr], False),
60
+ "putc": (int32_t, [int32_t, _FILE_ptr], False),
61
+ "getchar": (int32_t, [], False),
62
+ "putchar": (int32_t, [int32_t], False),
63
+ "ungetc": (int32_t, [int32_t, _FILE_ptr], False),
64
+ "flockfile": (_VOID, [_FILE_ptr], False),
65
+ "funlockfile": (_VOID, [_FILE_ptr], False),
66
+ "puts": (int32_t, [cstring], False),
67
+ "perror": (_VOID, [cstring], False),
68
+ "remove": (int32_t, [cstring], False),
69
+ "rename": (int32_t, [cstring, cstring], False),
70
+ "fseeko": (int32_t, [_FILE_ptr, int64_t, int32_t], False),
71
+ "ftello": (int64_t, [_FILE_ptr], False),
72
+ # === stdlib.h ===
73
+ "malloc": (voidptr_t, [_size_t], False),
74
+ "calloc": (voidptr_t, [_size_t, _size_t], False),
75
+ "realloc": (voidptr_t, [voidptr_t, _size_t], False),
76
+ "free": (_VOID, [voidptr_t], False),
77
+ "exit": (_VOID, [int32_t], False),
78
+ "_Exit": (_VOID, [int32_t], False),
79
+ "abort": (_VOID, [], False),
80
+ "atexit": (int32_t, [voidptr_t], False),
81
+ "abs": (int32_t, [int32_t], False),
82
+ "labs": (int64_t, [int64_t], False),
83
+ "atoi": (int32_t, [cstring], False),
84
+ "atol": (int64_t, [cstring], False),
85
+ "atof": (_double, [cstring], False),
86
+ "strtol": (int64_t, [cstring, voidptr_t, int32_t], False),
87
+ "strtoul": (int64_t, [cstring, voidptr_t, int32_t], False),
88
+ "strtod": (_double, [cstring, voidptr_t], False),
89
+ "strtof": (_double, [cstring, voidptr_t], False),
90
+ "rand": (int32_t, [], False),
91
+ "srand": (_VOID, [int32_t], False),
92
+ "qsort": (_VOID, [voidptr_t, _size_t, _size_t, voidptr_t], False),
93
+ "bsearch": (voidptr_t, [voidptr_t, voidptr_t, _size_t, _size_t, voidptr_t], False),
94
+ "getenv": (cstring, [cstring], False),
95
+ "system": (int32_t, [cstring], False),
96
+ # === string.h ===
97
+ "strlen": (_size_t, [cstring], False),
98
+ "strcmp": (int32_t, [cstring, cstring], False),
99
+ "strncmp": (int32_t, [cstring, cstring, _size_t], False),
100
+ "strcpy": (cstring, [cstring, cstring], False),
101
+ "strncpy": (cstring, [cstring, cstring, _size_t], False),
102
+ "strcat": (cstring, [cstring, cstring], False),
103
+ "strncat": (cstring, [cstring, cstring, _size_t], False),
104
+ "strchr": (cstring, [cstring, int32_t], False),
105
+ "strrchr": (cstring, [cstring, int32_t], False),
106
+ "strstr": (cstring, [cstring, cstring], False),
107
+ "strpbrk": (cstring, [cstring, cstring], False),
108
+ "strspn": (_size_t, [cstring, cstring], False),
109
+ "strcspn": (_size_t, [cstring, cstring], False),
110
+ "strtok": (cstring, [cstring, cstring], False),
111
+ "memset": (voidptr_t, [voidptr_t, int32_t, _size_t], False),
112
+ "memcpy": (voidptr_t, [voidptr_t, voidptr_t, _size_t], False),
113
+ "memmove": (voidptr_t, [voidptr_t, voidptr_t, _size_t], False),
114
+ "memcmp": (int32_t, [voidptr_t, voidptr_t, _size_t], False),
115
+ "memchr": (voidptr_t, [voidptr_t, int32_t, _size_t], False),
116
+ "strerror": (cstring, [int32_t], False),
117
+ # === ctype.h ===
118
+ "isalpha": (int32_t, [int32_t], False),
119
+ "isdigit": (int32_t, [int32_t], False),
120
+ "isalnum": (int32_t, [int32_t], False),
121
+ "isspace": (int32_t, [int32_t], False),
122
+ "isupper": (int32_t, [int32_t], False),
123
+ "islower": (int32_t, [int32_t], False),
124
+ "isprint": (int32_t, [int32_t], False),
125
+ "ispunct": (int32_t, [int32_t], False),
126
+ "iscntrl": (int32_t, [int32_t], False),
127
+ "isxdigit": (int32_t, [int32_t], False),
128
+ "isgraph": (int32_t, [int32_t], False),
129
+ "toupper": (int32_t, [int32_t], False),
130
+ "tolower": (int32_t, [int32_t], False),
131
+ # === math.h ===
132
+ "sin": (_double, [_double], False),
133
+ "cos": (_double, [_double], False),
134
+ "tan": (_double, [_double], False),
135
+ "asin": (_double, [_double], False),
136
+ "acos": (_double, [_double], False),
137
+ "atan": (_double, [_double], False),
138
+ "atan2": (_double, [_double, _double], False),
139
+ "sinh": (_double, [_double], False),
140
+ "cosh": (_double, [_double], False),
141
+ "tanh": (_double, [_double], False),
142
+ "exp": (_double, [_double], False),
143
+ "exp2": (_double, [_double], False),
144
+ "log": (_double, [_double], False),
145
+ "log2": (_double, [_double], False),
146
+ "log10": (_double, [_double], False),
147
+ "pow": (_double, [_double, _double], False),
148
+ "sqrt": (_double, [_double], False),
149
+ "cbrt": (_double, [_double], False),
150
+ "hypot": (_double, [_double, _double], False),
151
+ "ceil": (_double, [_double], False),
152
+ "floor": (_double, [_double], False),
153
+ "round": (_double, [_double], False),
154
+ "trunc": (_double, [_double], False),
155
+ "fmod": (_double, [_double, _double], False),
156
+ "fabs": (_double, [_double], False),
157
+ "ldexp": (_double, [_double, int32_t], False),
158
+ # === time.h ===
159
+ "time": (_time_t, [voidptr_t], False),
160
+ "clock": (int64_t, [], False),
161
+ "difftime": (_double, [_time_t, _time_t], False),
162
+ "gmtime_r": (voidptr_t, [voidptr_t, voidptr_t], False),
163
+ "localtime_r": (voidptr_t, [voidptr_t, voidptr_t], False),
164
+ # === unistd.h (POSIX) ===
165
+ "sleep": (int32_t, [int32_t], False),
166
+ "usleep": (int32_t, [int32_t], False),
167
+ "read": (int64_t, [int32_t, voidptr_t, _size_t], False),
168
+ "write": (int64_t, [int32_t, voidptr_t, _size_t], False),
169
+ "open": (int32_t, [cstring, int32_t], True),
170
+ "close": (int32_t, [int32_t], False),
171
+ "getpid": (int32_t, [], False),
172
+ "getppid": (int32_t, [], False),
173
+ "isatty": (int32_t, [int32_t], False),
174
+ "mkstemp": (int32_t, [cstring], False),
175
+ # === setjmp.h ===
176
+ "setjmp": (int32_t, [voidptr_t], False),
177
+ "longjmp": (_VOID, [voidptr_t, int32_t], False),
178
+ "_setjmp": (int32_t, [voidptr_t], False),
179
+ "_longjmp": (_VOID, [voidptr_t, int32_t], False),
180
+ # === signal.h ===
181
+ "signal": (voidptr_t, [int32_t, voidptr_t], False),
182
+ "sigaction": (int32_t, [int32_t, voidptr_t, voidptr_t], False),
183
+ "sigemptyset": (int32_t, [voidptr_t], False),
184
+ "raise": (int32_t, [int32_t], False),
185
+ # === errno ===
186
+ "__errno_location": (ir.IntType(32).as_pointer(), [], False),
187
+ # === locale.h ===
188
+ "setlocale": (cstring, [int32_t, cstring], False),
189
+ "localeconv": (voidptr_t, [], False),
190
+ # === misc ===
191
+ "tmpnam": (cstring, [cstring], False),
192
+ "tmpfile": (voidptr_t, [], False),
193
+ "__errno_location": (int32_t.as_pointer(), [], False),
194
+ "gmtime": (voidptr_t, [voidptr_t], False),
195
+ "localtime": (voidptr_t, [voidptr_t], False),
196
+ "mktime": (_time_t, [voidptr_t], False),
197
+ "strftime": (_size_t, [cstring, _size_t, cstring, voidptr_t], False),
198
+ "ctime": (cstring, [voidptr_t], False),
199
+ "asctime": (cstring, [voidptr_t], False),
200
+ "frexp": (_double, [_double, int32_t.as_pointer()], False),
201
+ # GCC/Clang builtins (no-op stubs)
202
+ "__builtin_va_start": (_VOID, [voidptr_t], False),
203
+ "__builtin_va_end": (_VOID, [voidptr_t], False),
204
+ "__builtin_va_copy": (_VOID, [voidptr_t, voidptr_t], False),
205
+ "__builtin_expect": (int64_t, [int64_t, int64_t], False),
206
+ "__builtin_unreachable": (_VOID, [], False),
207
+ "__builtin_clz": (int32_t, [int32_t], False),
208
+ "__builtin_ctz": (int32_t, [int32_t], False),
209
+ "modf": (_double, [_double, ir.DoubleType().as_pointer()], False),
210
+ "ldexp": (_double, [_double, int32_t], False),
211
+ "__builtin_va_arg": (voidptr_t, [voidptr_t, int64_t], False),
212
+ "strcoll": (int32_t, [cstring, cstring], False),
213
+ "clearerr": (_VOID, [voidptr_t], False),
214
+ "fileno": (int32_t, [voidptr_t], False),
215
+ "popen": (voidptr_t, [cstring, cstring], False),
216
+ "pclose": (int32_t, [voidptr_t], False),
217
+ "dlopen": (voidptr_t, [cstring, int32_t], False),
218
+ "dlsym": (voidptr_t, [voidptr_t, cstring], False),
219
+ "dlclose": (int32_t, [voidptr_t], False),
220
+ "dlerror": (cstring, [], False),
221
+ "setvbuf": (int32_t, [voidptr_t, cstring, int32_t, _size_t], False),
222
+ "freopen": (voidptr_t, [cstring, cstring, voidptr_t], False),
223
+ "getc": (int32_t, [voidptr_t], False),
224
+ }
225
+
226
+
227
+ class CodegenError(Exception):
228
+ pass
229
+
230
+
231
+ int16_t = ir.IntType(16)
232
+
233
+
234
+ def get_ir_type(type_str):
235
+ """Get IR type from a single type name string."""
236
+ return get_ir_type_from_names([type_str] if isinstance(type_str, str) else type_str)
237
+
238
+
239
+ def _names_to_key(names):
240
+ """Convert a names list like ['unsigned', 'int'] to a canonical key string."""
241
+ return names[0] if len(names) == 1 else " ".join(sorted(names))
242
+
243
+
244
+ def _is_unsigned_names(names):
245
+ """Check if a type name list represents an unsigned type."""
246
+ return "unsigned" in names
247
+
248
+
249
+ # Known unsigned type names (after typedef resolution)
250
+ _UNSIGNED_TYPE_NAMES = frozenset(
251
+ {
252
+ "char unsigned",
253
+ "int unsigned",
254
+ "unsigned",
255
+ "int short unsigned",
256
+ "short unsigned",
257
+ "int long unsigned",
258
+ "long unsigned",
259
+ "long long unsigned",
260
+ "size_t",
261
+ "uint8_t",
262
+ "uint16_t",
263
+ "uint32_t",
264
+ "uint64_t",
265
+ }
266
+ )
267
+
268
+
269
+ _PCC_VAARG_DECL_RE = re.compile(r'^declare .+@"__pcc_va_arg_\d+"\(.+\)\n?', re.M)
270
+ _PCC_VAARG_CALL_RE = re.compile(
271
+ r"^(?P<lhs>\s*%\S+)\s*=\s*call\s+"
272
+ r'(?P<rettype>.+?)\s+@"(?P<name>__pcc_va_arg_\d+)"\('
273
+ r'(?P<argtype>.+?)\s+(?P<argval>%".+?"|%\S+)\)$',
274
+ re.M,
275
+ )
276
+
277
+
278
+ _INVALID_VOID_INSTR_RE = re.compile(
279
+ r"^(?:%\S+\s*=\s*)?(?:alloca|load|bitcast) void(?! \()([,\s]|$)|^store void(?! \()([,\s]|$)"
280
+ )
281
+
282
+ _LABEL_RE = re.compile(r"^[A-Za-z$._][A-Za-z0-9$._-]*:$")
283
+
284
+
285
+ def postprocess_ir_text(text):
286
+ """Apply textual IR rewrites that llvmlite cannot express directly."""
287
+
288
+ # --- simple regex rewrites ---
289
+ text = _PCC_VAARG_DECL_RE.sub("", text)
290
+ text = re.sub(
291
+ r"bitcast i64 (%\S+) to (i8\*|[^,\n]+\*)", r"inttoptr i64 \1 to \2", text
292
+ )
293
+ text = re.sub(
294
+ r"bitcast i8 (%\S+) to (i8\*|[^,\n]+\*)", r"inttoptr i8 \1 to \2", text
295
+ )
296
+ text = re.sub(r"ptrtoint \[\d+ x i8\] [^\n]+ to i64", "add i64 0, 0", text)
297
+
298
+ def repl(match):
299
+ lhs = match.group("lhs")
300
+ rettype = match.group("rettype")
301
+ argtype = match.group("argtype")
302
+ argval = match.group("argval")
303
+ return f"{lhs} = va_arg {argtype} {argval}, {rettype}"
304
+
305
+ text = _PCC_VAARG_CALL_RE.sub(repl, text)
306
+
307
+ # --- line-level fixups ---
308
+ lines = []
309
+ for line in text.splitlines():
310
+ # Fix Python repr leak in array initializers → zeroinitializer
311
+ if "<ir.Constant" in line:
312
+ m = re.match(r'(@"[^"]*"\s*=\s*(?:global|constant)\s*\[[^\]]*\]).*', line)
313
+ if m:
314
+ line = m.group(1) + " zeroinitializer"
315
+ else:
316
+ continue
317
+ s = line.strip()
318
+ # Drop invalid void instructions (alloca void, load void, store void)
319
+ if _INVALID_VOID_INSTR_RE.match(s):
320
+ continue
321
+ lines.append(line)
322
+
323
+ # Deduplicate switch case values
324
+ deduped = []
325
+ for line in lines:
326
+ s = line.strip()
327
+ if s.startswith("switch i64 ") and "[" in line and "]" in line:
328
+ prefix, rest = line.split("[", 1)
329
+ case_text, suffix = rest.rsplit("]", 1)
330
+ cases = re.findall(r'(i64 -?\d+, label %"[^"]*")', case_text)
331
+ if cases:
332
+ seen = set()
333
+ unique = []
334
+ for case in cases:
335
+ val = re.match(r"i64 (-?\d+)", case).group(1)
336
+ if val not in seen:
337
+ seen.add(val)
338
+ unique.append(case)
339
+ line = prefix + "[" + " ".join(unique) + "]" + suffix
340
+ deduped.append(line)
341
+
342
+ # Repair control flow: drop dead code after terminators, bridge empty labels
343
+ def _is_label(s):
344
+ return bool(_LABEL_RE.match(s))
345
+
346
+ def _is_terminator(s):
347
+ return s.startswith(("br ", "ret ", "switch ", "unreachable", "resume "))
348
+
349
+ repaired = []
350
+ skip_dead = False
351
+ for raw in deduped:
352
+ s = raw.strip()
353
+ if skip_dead:
354
+ if _is_label(s):
355
+ skip_dead = False
356
+ else:
357
+ continue
358
+ if (
359
+ repaired
360
+ and _is_terminator(repaired[-1].strip())
361
+ and s
362
+ and raw.startswith(" ")
363
+ and not _is_label(s)
364
+ ):
365
+ skip_dead = True
366
+ continue
367
+ if repaired and _is_label(repaired[-1].strip()) and _is_label(s):
368
+ repaired.append(f' br label %"{s[:-1]}"')
369
+ if s == "}" and repaired and _is_label(repaired[-1].strip()):
370
+ repaired.append(" unreachable")
371
+ repaired.append(raw)
372
+
373
+ return "\n".join(repaired)
374
+
375
+
376
+ def get_ir_type_from_names(names):
377
+ """Get IR type from a list of type specifier names like ['unsigned', 'int']."""
378
+ names = [
379
+ n
380
+ for n in names
381
+ if n
382
+ not in (
383
+ "const",
384
+ "volatile",
385
+ "register",
386
+ "restrict",
387
+ "inline",
388
+ "_Noreturn",
389
+ "signed",
390
+ "extern",
391
+ "static",
392
+ )
393
+ ]
394
+ s = " ".join(sorted(names))
395
+
396
+ # Exact matches
397
+ type_map = {
398
+ "int": int32_t,
399
+ "char": int8_t,
400
+ "void": ir.VoidType(),
401
+ "double": _double,
402
+ "float": _float,
403
+ "short": int16_t,
404
+ "long": int64_t,
405
+ "int short": int16_t,
406
+ "int long": int64_t,
407
+ "long long": int64_t,
408
+ "int long long": int64_t,
409
+ "char unsigned": int8_t,
410
+ "int unsigned": int32_t,
411
+ "unsigned": int32_t,
412
+ "int short unsigned": int16_t,
413
+ "short unsigned": int16_t,
414
+ "int long unsigned": int64_t,
415
+ "long unsigned": int64_t,
416
+ "long long unsigned": int64_t,
417
+ # size_t, etc.
418
+ "size_t": int64_t,
419
+ "ssize_t": int64_t,
420
+ "ptrdiff_t": int64_t,
421
+ "int8_t": int8_t,
422
+ "int16_t": int16_t,
423
+ "int32_t": int32_t,
424
+ "int64_t": int64_t,
425
+ "uint8_t": int8_t,
426
+ "uint16_t": int16_t,
427
+ "uint32_t": int32_t,
428
+ "uint64_t": int64_t,
429
+ }
430
+
431
+ if s in type_map:
432
+ return type_map[s]
433
+
434
+ if "double" in names:
435
+ return _double
436
+ if "float" in names:
437
+ return _float
438
+ # If it contains 'char', return i8
439
+ if "char" in names:
440
+ return int8_t
441
+ # If it contains 'short', return i16
442
+ if "short" in names:
443
+ return int16_t
444
+ # Default to i64
445
+ return int64_t
446
+
447
+
448
+ def get_ir_type_from_node(node):
449
+ if isinstance(node, c_ast.EllipsisParam):
450
+ return voidptr_t # shouldn't be called, but be safe
451
+
452
+ return _resolve_node_type(node.type)
453
+
454
+
455
+ def _resolve_node_type(node_type):
456
+ """Resolve an AST type node to an IR type."""
457
+ if isinstance(node_type, c_ast.PtrDecl):
458
+ inner = node_type.type
459
+ if isinstance(inner, c_ast.FuncDecl):
460
+ ret_type = _resolve_node_type(inner.type)
461
+ param_types = []
462
+ if inner.args:
463
+ for p in inner.args.params:
464
+ if isinstance(p, c_ast.EllipsisParam):
465
+ continue
466
+ t = get_ir_type_from_node(p)
467
+ if not isinstance(t, ir.VoidType):
468
+ param_types.append(t)
469
+ return ir.FunctionType(ret_type, param_types).as_pointer()
470
+ pointee = _resolve_node_type(inner)
471
+ if isinstance(pointee, ir.VoidType):
472
+ return voidptr_t
473
+ return ir.PointerType(pointee)
474
+ elif isinstance(node_type, c_ast.TypeDecl):
475
+ if isinstance(node_type.type, c_ast.IdentifierType):
476
+ return get_ir_type(node_type.type.names)
477
+ elif isinstance(node_type.type, c_ast.Struct):
478
+ snode = node_type.type
479
+ if snode.decls:
480
+ # Inline struct with declarations — build real type
481
+ member_types = []
482
+ for decl in snode.decls:
483
+ member_types.append(_resolve_node_type(decl.type))
484
+ st = ir.LiteralStructType(member_types)
485
+ st.members = [d.name for d in snode.decls]
486
+ st.member_decl_types = [d.type for d in snode.decls]
487
+ return st
488
+ return int8_t # opaque/forward-declared struct
489
+ elif isinstance(node_type.type, c_ast.Union):
490
+ unode = node_type.type
491
+ if unode.decls:
492
+ # Inline union with declarations — compute max size
493
+ max_size = 0
494
+ max_align = 1
495
+ member_types = {}
496
+
497
+ def _resolve_union_member_type(decl_type):
498
+ if isinstance(decl_type, c_ast.ArrayDecl):
499
+ dims = []
500
+ arr_node = decl_type
501
+ while isinstance(arr_node, c_ast.ArrayDecl):
502
+ dim = 0
503
+ if isinstance(arr_node.dim, c_ast.Constant):
504
+ dim = int(arr_node.dim.value.rstrip("uUlL"), 0)
505
+ elif arr_node.dim is not None:
506
+ dim = 0
507
+ dims.append(dim)
508
+ arr_node = arr_node.type
509
+ elem_ir_type = _resolve_node_type(arr_node)
510
+ arr_ir_type = elem_ir_type
511
+ for dim in reversed(dims):
512
+ arr_ir_type = ir.ArrayType(arr_ir_type, dim)
513
+ return arr_ir_type
514
+ return _resolve_node_type(decl_type)
515
+
516
+ for decl in unode.decls:
517
+ ir_t = _resolve_union_member_type(decl.type)
518
+ member_types[decl.name] = ir_t
519
+ sz = ir_t.width // 8 if isinstance(ir_t, ir.IntType) else 8
520
+ if isinstance(ir_t, ir.LiteralStructType):
521
+ sz = sum(
522
+ e.width // 8 if isinstance(e, ir.IntType) else 8
523
+ for e in ir_t.elements
524
+ )
525
+ if isinstance(ir_t, ir.PointerType):
526
+ sz = 8
527
+ if self._is_floating_ir_type(ir_t):
528
+ sz = self._ir_type_size(ir_t)
529
+ al = self._ir_type_align(ir_t)
530
+ if sz > max_size:
531
+ max_size = sz
532
+ if al > max_align:
533
+ max_align = al
534
+ align_map = {8: int64_t, 4: int32_t, 2: int16_t, 1: int8_t}
535
+ align_type = align_map.get(max_align, int64_t)
536
+ pad_size = max_size - max_align
537
+ if pad_size > 0:
538
+ ut = ir.LiteralStructType(
539
+ [align_type, ir.ArrayType(int8_t, pad_size)]
540
+ )
541
+ else:
542
+ ut = ir.LiteralStructType([align_type])
543
+ ut.members = list(member_types.keys())
544
+ ut.member_types = member_types
545
+ ut.is_union = True
546
+ return ut
547
+ return int8_t # opaque/forward-declared union
548
+ return int64_t
549
+ elif isinstance(node_type, c_ast.ArrayDecl):
550
+ return voidptr_t # array params decay to pointer
551
+ return int64_t
552
+
553
+
554
+ class LLVMCodeGenerator(object):
555
+
556
+ def __init__(self):
557
+ self.module = ir.Module()
558
+ # Set proper data layout for struct padding/alignment
559
+ import llvmlite.binding as _llvm
560
+
561
+ _llvm.initialize_native_target()
562
+ _triple = _llvm.get_default_triple()
563
+ _tm = _llvm.Target.from_default_triple().create_target_machine()
564
+ self.module.triple = _triple
565
+ self.module.data_layout = str(_tm.target_data)
566
+
567
+ #
568
+ self.builder = None
569
+ self.global_builder: IRBuilder = ir.IRBuilder()
570
+ self.env = ChainMap()
571
+ self.nlabels = 0
572
+ self.function = None
573
+ self.in_global = True
574
+ self._declared_libc = set()
575
+ self._unsigned_bindings = set() # alloca/global ids with unsigned type
576
+ self._unsigned_pointee_bindings = set()
577
+ self._unsigned_return_bindings = set()
578
+ self._expr_ir_types = {}
579
+ self._labels = {}
580
+ self._vaarg_counter = 0
581
+
582
+ def define(self, name, val):
583
+ self.env[name] = val
584
+
585
+ def _mark_unsigned(self, binding):
586
+ """Mark a concrete IR binding as having unsigned type."""
587
+ if binding is not None:
588
+ self._unsigned_bindings.add(id(binding))
589
+
590
+ def _mark_unsigned_pointee(self, binding):
591
+ """Mark a pointer/array binding whose immediate pointee is unsigned."""
592
+ if binding is not None:
593
+ self._unsigned_pointee_bindings.add(id(binding))
594
+
595
+ def _mark_unsigned_return(self, binding):
596
+ """Mark a function or function-pointer binding with unsigned return."""
597
+ if binding is not None:
598
+ self._unsigned_return_bindings.add(id(binding))
599
+
600
+ def _is_unsigned_val(self, val):
601
+ """Check if a value should use unsigned operations."""
602
+ # Check if the value was produced by an unsigned operation
603
+ return getattr(val, "_is_unsigned", False)
604
+
605
+ def _is_unsigned_binding(self, binding):
606
+ return binding is not None and id(binding) in self._unsigned_bindings
607
+
608
+ def _is_unsigned_pointee_binding(self, binding):
609
+ return binding is not None and id(binding) in self._unsigned_pointee_bindings
610
+
611
+ def _is_unsigned_return_binding(self, binding):
612
+ return binding is not None and id(binding) in self._unsigned_return_bindings
613
+
614
+ def _tag_unsigned(self, val):
615
+ """Tag an IR value as unsigned."""
616
+ try:
617
+ val._is_unsigned = True
618
+ except (AttributeError, TypeError):
619
+ pass
620
+ return val
621
+
622
+ def _clear_unsigned(self, val):
623
+ """Clear unsigned metadata from an IR value."""
624
+ try:
625
+ val._is_unsigned = False
626
+ except (AttributeError, TypeError):
627
+ pass
628
+ return val
629
+
630
+ def _tag_unsigned_pointee(self, val):
631
+ try:
632
+ val._pcc_unsigned_pointee = True
633
+ except (AttributeError, TypeError):
634
+ pass
635
+ return val
636
+
637
+ def _is_unsigned_pointee(self, val):
638
+ return getattr(val, "_pcc_unsigned_pointee", False)
639
+
640
+ def _tag_unsigned_return(self, val):
641
+ try:
642
+ val._pcc_unsigned_return = True
643
+ except (AttributeError, TypeError):
644
+ pass
645
+ return val
646
+
647
+ def _is_unsigned_return(self, val):
648
+ return getattr(val, "_pcc_unsigned_return", False)
649
+
650
+ def _set_expr_ir_type(self, node, ir_type):
651
+ if node is not None:
652
+ self._expr_ir_types[id(node)] = ir_type
653
+
654
+ def _get_expr_ir_type(self, node, default=None):
655
+ if node is None:
656
+ return default
657
+ return self._expr_ir_types.get(id(node), getattr(node, "ir_type", default))
658
+
659
+ def _either_unsigned(self, lhs, rhs):
660
+ """Check if either operand is unsigned (C promotion rules)."""
661
+ return self._is_unsigned_val(lhs) or self._is_unsigned_val(rhs)
662
+
663
+ def _int_to_float(self, val, target_type):
664
+ if self._is_unsigned_val(val):
665
+ return self.builder.uitofp(val, target_type)
666
+ return self.builder.sitofp(val, target_type)
667
+
668
+ def _convert_int_value(self, val, target_type, result_unsigned=None):
669
+ if not (
670
+ isinstance(getattr(val, "type", None), ir.IntType)
671
+ and isinstance(target_type, ir.IntType)
672
+ ):
673
+ return self._implicit_convert(val, target_type)
674
+
675
+ source_unsigned = self._is_unsigned_val(val)
676
+ if val.type.width < target_type.width:
677
+ if source_unsigned:
678
+ result = self.builder.zext(val, target_type)
679
+ else:
680
+ result = self.builder.sext(val, target_type)
681
+ elif val.type.width > target_type.width:
682
+ result = self.builder.trunc(val, target_type)
683
+ else:
684
+ result = val
685
+
686
+ if result_unsigned is None:
687
+ result_unsigned = source_unsigned
688
+ if result_unsigned:
689
+ return self._tag_unsigned(result)
690
+ return self._clear_unsigned(result)
691
+
692
+ def _integer_promotion(self, val):
693
+ if not isinstance(getattr(val, "type", None), ir.IntType):
694
+ return val
695
+ if val.type.width == 1:
696
+ return self._clear_unsigned(self.builder.zext(val, int32_t))
697
+ if val.type.width < int32_t.width:
698
+ return self._convert_int_value(val, int32_t, result_unsigned=False)
699
+ return val
700
+
701
+ def _usual_arithmetic_conversion(self, lhs, rhs):
702
+ lhs = self._integer_promotion(lhs)
703
+ rhs = self._integer_promotion(rhs)
704
+
705
+ lhs_unsigned = self._is_unsigned_val(lhs)
706
+ rhs_unsigned = self._is_unsigned_val(rhs)
707
+ lhs_width = lhs.type.width
708
+ rhs_width = rhs.type.width
709
+
710
+ if lhs_unsigned == rhs_unsigned:
711
+ target_type = lhs.type if lhs_width >= rhs_width else rhs.type
712
+ result_unsigned = lhs_unsigned
713
+ elif lhs_unsigned:
714
+ if lhs_width >= rhs_width:
715
+ target_type = lhs.type
716
+ result_unsigned = True
717
+ else:
718
+ target_type = rhs.type
719
+ result_unsigned = False
720
+ else:
721
+ if rhs_width >= lhs_width:
722
+ target_type = rhs.type
723
+ result_unsigned = True
724
+ else:
725
+ target_type = lhs.type
726
+ result_unsigned = False
727
+
728
+ lhs = self._convert_int_value(lhs, target_type, result_unsigned)
729
+ rhs = self._convert_int_value(rhs, target_type, result_unsigned)
730
+ return lhs, rhs, result_unsigned
731
+
732
+ def _shift_operand_conversion(self, lhs, rhs):
733
+ lhs = self._integer_promotion(lhs)
734
+ rhs = self._integer_promotion(rhs)
735
+ if lhs.type != rhs.type:
736
+ rhs = self._convert_int_value(
737
+ rhs, lhs.type, result_unsigned=self._is_unsigned_val(rhs)
738
+ )
739
+ return lhs, rhs, self._is_unsigned_val(lhs)
740
+
741
+ def _is_floating_ir_type(self, ir_type):
742
+ return isinstance(ir_type, (ir.FloatType, ir.DoubleType))
743
+
744
+ def _common_float_type(self, lhs_type, rhs_type):
745
+ if isinstance(lhs_type, ir.DoubleType) or isinstance(rhs_type, ir.DoubleType):
746
+ return _double
747
+ return _float
748
+
749
+ def _parse_float_constant(self, raw):
750
+ value = raw.rstrip("fFlL")
751
+ if value.lower().startswith("0x") and "p" in value.lower():
752
+ return float.fromhex(value)
753
+ return float(value)
754
+
755
+ def _float_literal_ir_type(self, raw):
756
+ if raw.endswith(("f", "F")):
757
+ return _float
758
+ return _double
759
+
760
+ def _float_compare(self, op, lhs, rhs, name):
761
+ if op == "!=":
762
+ return self.builder.fcmp_unordered(op, lhs, rhs, name)
763
+ return self.builder.fcmp_ordered(op, lhs, rhs, name)
764
+
765
+ def _safe_global_var(self, ir_type, name, external=False):
766
+ """Create or reuse a global variable, avoiding DuplicatedNameError."""
767
+ existing = self.module.globals.get(name)
768
+ if existing:
769
+ return existing
770
+ try:
771
+ gv = ir.GlobalVariable(self.module, ir_type, name)
772
+ if external:
773
+ gv.linkage = "external"
774
+ else:
775
+ gv.initializer = ir.Constant(ir_type, None)
776
+ return gv
777
+ except Exception:
778
+ gv = self.module.globals.get(name) or ir.GlobalVariable(
779
+ self.module, ir_type, self.module.get_unique_name(name)
780
+ )
781
+ if external:
782
+ gv.linkage = "external"
783
+ elif getattr(gv, "initializer", None) is None:
784
+ gv.initializer = ir.Constant(ir_type, None)
785
+ return gv
786
+
787
+ # External C globals lazily declared on first use.
788
+ _EXTERN_GLOBAL_VARS = {
789
+ "stdout": voidptr_t,
790
+ "stderr": voidptr_t,
791
+ "stdin": voidptr_t,
792
+ "__stdoutp": voidptr_t,
793
+ "__stderrp": voidptr_t,
794
+ "__stdinp": voidptr_t,
795
+ "errno": int32_t,
796
+ }
797
+
798
+ def lookup(self, name):
799
+ if not isinstance(name, str):
800
+ name = name.name if hasattr(name, "name") else str(name)
801
+ if name not in self.env:
802
+ if name in LIBC_FUNCTIONS:
803
+ self._declare_libc(name)
804
+ elif name in self._EXTERN_GLOBAL_VARS:
805
+ gv_type = self._EXTERN_GLOBAL_VARS[name]
806
+ gv = self._safe_global_var(gv_type, name, external=True)
807
+ self.define(name, (gv_type, gv))
808
+ return self.env[name]
809
+
810
+ def _declare_libc(self, name):
811
+ """Lazily declare a libc function on first use."""
812
+ existing = self.module.globals.get(name)
813
+ if existing:
814
+ self.define(name, (None, existing))
815
+ self._declared_libc.add(name)
816
+ return
817
+ ret_type, param_types, var_arg = LIBC_FUNCTIONS[name]
818
+ fnty = ir.FunctionType(ret_type, param_types, var_arg=var_arg)
819
+ try:
820
+ func = ir.Function(self.module, fnty, name=name)
821
+ except Exception:
822
+ func = self.module.globals.get(name)
823
+ if isinstance(func, ir.Function):
824
+ try:
825
+ if name in ("setjmp", "_setjmp"):
826
+ func.attributes.add("returns_twice")
827
+ elif name in ("longjmp", "_longjmp"):
828
+ func.attributes.add("noreturn")
829
+ except Exception:
830
+ pass
831
+ self.define(name, (fnty, func))
832
+ self._declared_libc.add(name)
833
+
834
+ def new_label(self, name):
835
+ self.nlabels += 1
836
+ return f"label_{name}_{self.nlabels}"
837
+
838
+ @contextmanager
839
+ def new_scope(self):
840
+ self.env = self.env.new_child()
841
+ yield
842
+ self.env = self.env.parents
843
+
844
+ @contextmanager
845
+ def new_function(self):
846
+ oldfunc = self.function
847
+ oldbuilder = self.builder
848
+ oldenv = self.env
849
+ oldlabels = self._labels
850
+ self.in_global = False
851
+ self.env = self.env.new_child()
852
+ self._labels = {}
853
+ try:
854
+ yield
855
+ finally:
856
+ self.function = oldfunc
857
+ self.builder = oldbuilder
858
+ self.env = oldenv
859
+ self._labels = oldlabels
860
+ self.in_global = True
861
+
862
+ def generate_code(self, node):
863
+ normal = self.codegen(node)
864
+
865
+ # for else end have no instruction
866
+ if self.builder:
867
+ if not self.builder.block.is_terminated:
868
+ self.builder.ret(ir.Constant(ir.IntType(64), int(0)))
869
+
870
+ pass # empty block fixes done in IR post-processing
871
+
872
+ return normal
873
+
874
+ def create_entry_block_alloca(
875
+ self, name, type_str, size, array_list=None, point_level=0
876
+ ):
877
+
878
+ ir_type = get_ir_type(type_str)
879
+
880
+ if array_list is not None:
881
+ reversed_list = reversed(array_list)
882
+ for dim in reversed_list:
883
+ ir_type = ir.ArrayType(ir_type, dim)
884
+ ir_type.dim_array = array_list
885
+
886
+ if point_level != 0:
887
+ if isinstance(ir_type, ir.VoidType):
888
+ ir_type = int8_t # void* -> i8*
889
+ for level in range(point_level):
890
+ ir_type = ir.PointerType(ir_type)
891
+
892
+ if not self.in_global:
893
+ ret = self._alloca_in_entry(ir_type, name)
894
+ self.define(name, (ir_type, ret))
895
+ else:
896
+ existing = self.module.globals.get(name)
897
+ if existing:
898
+ ret = existing
899
+ else:
900
+ try:
901
+ ret = ir.GlobalVariable(self.module, ir_type, name)
902
+ ret.initializer = ir.Constant(ir_type, None)
903
+ except Exception:
904
+ ret = self.module.globals.get(name) or ir.GlobalVariable(
905
+ self.module, ir_type, self.module.get_unique_name(name)
906
+ )
907
+ if hasattr(ret, "initializer") and ret.initializer is None:
908
+ ret.initializer = ir.Constant(ir_type, None)
909
+ self.define(name, (ir_type, ret))
910
+
911
+ return ret, ir_type
912
+
913
+ def _alloca_in_entry(self, ir_type, name):
914
+ if self.function is None:
915
+ return self.builder.alloca(ir_type, size=None, name=name)
916
+ entry_block = self.function.entry_basic_block
917
+ current_block = self.builder.block if self.builder is not None else None
918
+ entry_builder = ir.IRBuilder(entry_block)
919
+ insert_before = None
920
+ for inst in entry_block.instructions:
921
+ if inst.opname not in ("phi", "alloca"):
922
+ insert_before = inst
923
+ break
924
+ if insert_before is not None:
925
+ entry_builder.position_before(insert_before)
926
+ else:
927
+ entry_builder.position_at_end(entry_block)
928
+ ret = entry_builder.alloca(ir_type, size=None, name=name)
929
+ if (
930
+ self.builder is not None
931
+ and current_block is entry_block
932
+ and not current_block.is_terminated
933
+ ):
934
+ self.builder.position_at_end(current_block)
935
+ return ret
936
+
937
+ def codegen(self, node):
938
+ if node is None:
939
+ return None, None
940
+ method = "codegen_" + node.__class__.__name__
941
+ handler = getattr(self, method, None)
942
+ if handler is None:
943
+ return None, None
944
+ return handler(node)
945
+
946
+ def codegen_FileAST(self, node):
947
+ # Collect names of functions that have definitions (FuncDef)
948
+ funcdef_names = set()
949
+ for ext in node.ext:
950
+ if isinstance(ext, c_ast.FuncDef) and ext.decl:
951
+ funcdef_names.add(ext.decl.name)
952
+ self._funcdef_names = funcdef_names
953
+
954
+ # Two-pass: first types/typedefs, then everything else
955
+ pass1 = set()
956
+ for i, ext in enumerate(node.ext):
957
+ is_type_def = False
958
+ if isinstance(ext, c_ast.Decl):
959
+ if isinstance(ext.type, (c_ast.Struct, c_ast.Union, c_ast.Enum)):
960
+ is_type_def = True
961
+ elif isinstance(ext.type, c_ast.TypeDecl) and isinstance(
962
+ ext.type.type, (c_ast.Struct, c_ast.Union)
963
+ ):
964
+ is_type_def = True
965
+ elif isinstance(ext, c_ast.Typedef):
966
+ is_type_def = True
967
+ if is_type_def:
968
+ try:
969
+ self.codegen(ext)
970
+ except Exception:
971
+ pass
972
+ pass1.add(i)
973
+ for i, ext in enumerate(node.ext):
974
+ if i not in pass1:
975
+ try:
976
+ self.codegen(ext)
977
+ except Exception as e:
978
+ ename = type(e).__name__
979
+ # Non-fatal errors: skip the problematic declaration/definition
980
+ if ename in ("DuplicatedNameError",) or isinstance(
981
+ e, (AssertionError, TypeError)
982
+ ):
983
+ continue
984
+ if isinstance(e, KeyError) and e.args and e.args[0] is None:
985
+ continue
986
+ raise
987
+
988
+ _escape_map = {
989
+ "n": "\n",
990
+ "t": "\t",
991
+ "r": "\r",
992
+ "\\": "\\",
993
+ "0": "\0",
994
+ "'": "'",
995
+ '"': '"',
996
+ "a": "\a",
997
+ "b": "\b",
998
+ "f": "\f",
999
+ "v": "\v",
1000
+ }
1001
+
1002
+ def _process_escapes(self, s):
1003
+ """Process C escape sequences in a string."""
1004
+ result = []
1005
+ i = 0
1006
+ while i < len(s):
1007
+ if s[i] == "\\" and i + 1 < len(s):
1008
+ if s[i + 1] == "x":
1009
+ j = i + 2
1010
+ hex_digits = []
1011
+ while j < len(s) and s[j] in "0123456789abcdefABCDEF":
1012
+ hex_digits.append(s[j])
1013
+ j += 1
1014
+ if hex_digits:
1015
+ result.append(chr(int("".join(hex_digits), 16) & 0xFF))
1016
+ i = j
1017
+ continue
1018
+ if s[i + 1] in "01234567":
1019
+ j = i + 1
1020
+ oct_digits = []
1021
+ while j < len(s) and len(oct_digits) < 3 and s[j] in "01234567":
1022
+ oct_digits.append(s[j])
1023
+ j += 1
1024
+ result.append(chr(int("".join(oct_digits), 8) & 0xFF))
1025
+ i = j
1026
+ continue
1027
+ esc = self._escape_map.get(s[i + 1])
1028
+ if esc is not None:
1029
+ result.append(esc)
1030
+ i += 2
1031
+ continue
1032
+ result.append(s[i])
1033
+ i += 1
1034
+ return "".join(result)
1035
+
1036
+ @staticmethod
1037
+ def _string_bytes(s):
1038
+ return bytearray((ord(ch) & 0xFF) for ch in s)
1039
+
1040
+ def _char_constant_value(self, raw):
1041
+ if not raw or len(raw) < 2 or raw[0] != "'" or raw[-1] != "'":
1042
+ return 0
1043
+ processed = self._process_escapes(raw[1:-1])
1044
+ if not processed:
1045
+ return 0
1046
+ value = 0
1047
+ for ch in processed:
1048
+ value = (value << 8) | (ord(ch) & 0xFF)
1049
+ return value
1050
+
1051
+ def codegen_Constant(self, node):
1052
+
1053
+ if node.type == "int":
1054
+ # Support hex (0xFF), octal (077), and decimal literals
1055
+ raw = node.value
1056
+ is_unsigned = "u" in raw.lower() or "U" in raw
1057
+ val_str = raw.rstrip("uUlL")
1058
+ if val_str.startswith("0x") or val_str.startswith("0X"):
1059
+ int_val = int(val_str, 16)
1060
+ elif val_str.startswith("0") and len(val_str) > 1 and val_str[1:].isdigit():
1061
+ int_val = int(val_str, 8)
1062
+ else:
1063
+ int_val = int(val_str)
1064
+ result = ir.values.Constant(ir.IntType(64), int_val)
1065
+ if is_unsigned:
1066
+ self._tag_unsigned(result)
1067
+ return result, None
1068
+ elif node.type == "char":
1069
+ # char constant like 'a' -> i8
1070
+ return (
1071
+ ir.values.Constant(
1072
+ int8_t, self._char_constant_value(node.value) & 0xFF
1073
+ ),
1074
+ None,
1075
+ )
1076
+ elif node.type == "string":
1077
+ raw = node.value[1:-1]
1078
+ processed = self._process_escapes(raw)
1079
+ b = self._string_bytes(processed + "\00")
1080
+ n = len(b)
1081
+ array = ir.ArrayType(ir.IntType(8), n)
1082
+ tmp = ir.values.Constant(array, b)
1083
+ return tmp, None
1084
+ else:
1085
+ ir_type = self._float_literal_ir_type(node.value)
1086
+ return (
1087
+ ir.values.Constant(ir_type, self._parse_float_constant(node.value)),
1088
+ None,
1089
+ )
1090
+
1091
+ def codegen_Assignment(self, node):
1092
+
1093
+ lv, lv_addr = self.codegen(node.lvalue)
1094
+ rv, _ = self.codegen(node.rvalue)
1095
+ if lv is None or rv is None:
1096
+ return ir.Constant(int64_t, 0), None
1097
+ result = None
1098
+
1099
+ dispatch_type_double = 1
1100
+ dispatch_type_int = 0
1101
+ dispatch_dict = {
1102
+ ("+=", dispatch_type_double): self.builder.fadd,
1103
+ ("+=", dispatch_type_int): self.builder.add,
1104
+ ("-=", dispatch_type_double): self.builder.fsub,
1105
+ ("-=", dispatch_type_int): self.builder.sub,
1106
+ ("*=", dispatch_type_double): self.builder.fmul,
1107
+ ("*=", dispatch_type_int): self.builder.mul,
1108
+ ("/=", dispatch_type_double): self.builder.fdiv,
1109
+ ("/=", dispatch_type_int): self.builder.sdiv,
1110
+ ("%=", dispatch_type_int): self.builder.srem,
1111
+ ("%=", dispatch_type_double): self.builder.frem,
1112
+ ("<<=", dispatch_type_int): self.builder.shl,
1113
+ (">>=", dispatch_type_int): self.builder.ashr,
1114
+ ("&=", dispatch_type_int): self.builder.and_,
1115
+ ("|=", dispatch_type_int): self.builder.or_,
1116
+ ("^=", dispatch_type_int): self.builder.xor,
1117
+ }
1118
+ is_unsigned = False
1119
+ # Promote mismatched types before compound assignment
1120
+ if isinstance(lv.type, ir.IntType) and isinstance(rv.type, ir.IntType):
1121
+ if node.op in ("<<=", ">>="):
1122
+ lv, rv, is_unsigned = self._shift_operand_conversion(lv, rv)
1123
+ else:
1124
+ lv, rv, is_unsigned = self._usual_arithmetic_conversion(lv, rv)
1125
+ dispatch_type = dispatch_type_int
1126
+ elif isinstance(lv.type, ir.IntType) and self._is_floating_ir_type(rv.type):
1127
+ lv = self._implicit_convert(lv, rv.type)
1128
+ dispatch_type = dispatch_type_double
1129
+ elif self._is_floating_ir_type(lv.type) and isinstance(rv.type, ir.IntType):
1130
+ rv = self._implicit_convert(rv, lv.type)
1131
+ dispatch_type = dispatch_type_double
1132
+ elif self._is_floating_ir_type(lv.type) and self._is_floating_ir_type(rv.type):
1133
+ if lv.type != rv.type:
1134
+ target = self._common_float_type(lv.type, rv.type)
1135
+ lv = self._implicit_convert(lv, target)
1136
+ rv = self._implicit_convert(rv, target)
1137
+ dispatch_type = dispatch_type_double
1138
+ else:
1139
+ dispatch_type = dispatch_type_double
1140
+ dispatch = (node.op, dispatch_type)
1141
+ handle = dispatch_dict.get(dispatch)
1142
+ # Override to unsigned for /= %= >>= when operands are unsigned
1143
+ if dispatch_type == dispatch_type_int and is_unsigned:
1144
+ if node.op == "/=":
1145
+ handle = self.builder.udiv
1146
+ elif node.op == "%=":
1147
+ handle = self.builder.urem
1148
+ elif node.op == ">>=":
1149
+ handle = self.builder.lshr
1150
+
1151
+ if node.op == "=":
1152
+ # Type coercion: match rv to the target's pointee type
1153
+ if lv_addr and hasattr(lv_addr.type, "pointee"):
1154
+ target_type = lv_addr.type.pointee
1155
+ else:
1156
+ target_type = lv.type
1157
+ if rv.type != target_type:
1158
+ rv = self._implicit_convert(rv, target_type)
1159
+ self._safe_store(rv, lv_addr)
1160
+ return rv, lv_addr # return value for chained assignment
1161
+ else:
1162
+ # Pointer compound assignment: p += n, p -= n
1163
+ if isinstance(lv.type, ir.PointerType) and isinstance(rv.type, ir.IntType):
1164
+ rv = self._integer_promotion(rv)
1165
+ rv = self._convert_int_value(rv, int64_t, result_unsigned=False)
1166
+ if node.op == "+=":
1167
+ addresult = self.builder.gep(lv, [rv], name="ptradd")
1168
+ elif node.op == "-=":
1169
+ neg = self.builder.neg(rv, "neg")
1170
+ addresult = self.builder.gep(lv, [neg], name="ptrsub")
1171
+ else:
1172
+ addresult = handle(lv, rv, "addtmp")
1173
+ else:
1174
+ addresult = handle(lv, rv, "addtmp")
1175
+ self._safe_store(addresult, lv_addr)
1176
+ return addresult, lv_addr
1177
+
1178
+ def codegen_UnaryOp(self, node):
1179
+
1180
+ result = None
1181
+ result_ptr = None
1182
+
1183
+ if node.op in ("p++", "p--", "++", "--"):
1184
+ lv, lv_addr = self.codegen(node.expr)
1185
+ if lv is None:
1186
+ return ir.Constant(int64_t, 0), None
1187
+ is_post = node.op.startswith("p")
1188
+ is_inc = "+" in node.op
1189
+ if isinstance(lv.type, ir.PointerType):
1190
+ delta = ir.Constant(int64_t, 1 if is_inc else -1)
1191
+ new_val = self.builder.gep(lv, [delta], name="ptrincdec")
1192
+ else:
1193
+ one = ir.Constant(lv.type, 1)
1194
+ new_val = (
1195
+ self.builder.add(lv, one, "inc")
1196
+ if is_inc
1197
+ else self.builder.sub(lv, one, "dec")
1198
+ )
1199
+ self._safe_store(new_val, lv_addr)
1200
+ result = lv if is_post else new_val
1201
+
1202
+ elif node.op == "*":
1203
+ if (
1204
+ isinstance(node.expr, c_ast.Cast)
1205
+ and isinstance(node.expr.expr, c_ast.FuncCall)
1206
+ and isinstance(node.expr.expr.name, c_ast.ID)
1207
+ and node.expr.expr.name.name == "__builtin_va_arg"
1208
+ ):
1209
+ target_ptr_type = self._resolve_ast_type(node.expr.to_type.type)
1210
+ va_args = (
1211
+ node.expr.expr.args.exprs if node.expr.expr.args is not None else []
1212
+ )
1213
+ if isinstance(target_ptr_type, ir.PointerType) and va_args:
1214
+ ap_addr, _ = self.codegen(va_args[0])
1215
+ if isinstance(getattr(ap_addr, "type", None), ir.PointerType):
1216
+ self._vaarg_counter += 1
1217
+ name = f"__pcc_va_arg_{self._vaarg_counter}"
1218
+ placeholder = self.module.globals.get(name)
1219
+ if placeholder is None:
1220
+ placeholder = ir.Function(
1221
+ self.module,
1222
+ ir.FunctionType(
1223
+ target_ptr_type.pointee, [ap_addr.type]
1224
+ ),
1225
+ name=name,
1226
+ )
1227
+ result = self.builder.call(
1228
+ placeholder,
1229
+ [ap_addr],
1230
+ name=f"vaargtmp.{self._vaarg_counter}",
1231
+ )
1232
+ return result, None
1233
+ name_ir, name_ptr = self.codegen(node.expr)
1234
+ if name_ptr is None and isinstance(name_ir.type, ir.ArrayType):
1235
+ result_ptr = self._decay_array_value_to_pointer(name_ir, "derefarray")
1236
+ else:
1237
+ result_ptr = name_ir
1238
+ result = self._safe_load(result_ptr)
1239
+ if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
1240
+ result_ptr
1241
+ ):
1242
+ self._tag_unsigned(result)
1243
+
1244
+ elif node.op == "&":
1245
+ name_ir, name_ptr = self.codegen(node.expr)
1246
+ if name_ptr is None:
1247
+ # Functions are already first-class pointers in LLVM IR.
1248
+ # Taking their address should preserve the function symbol,
1249
+ # not turn it into a null pointer.
1250
+ result = name_ir
1251
+ result_ptr = None
1252
+ else:
1253
+ result_ptr = name_ptr
1254
+ result = result_ptr
1255
+ if self._is_unsigned_binding(result_ptr):
1256
+ self._tag_unsigned_pointee(result)
1257
+ if self._is_unsigned_return_binding(result_ptr):
1258
+ self._tag_unsigned_return(result)
1259
+
1260
+ elif node.op == "+":
1261
+ operand, _ = self.codegen(node.expr)
1262
+ if isinstance(operand.type, ir.IntType):
1263
+ operand = self._integer_promotion(operand)
1264
+ result = operand # unary plus is a no-op
1265
+
1266
+ elif node.op == "-":
1267
+ operand, _ = self.codegen(node.expr)
1268
+ if isinstance(operand.type, ir.IntType):
1269
+ operand = self._integer_promotion(operand)
1270
+ result = self.builder.neg(operand, "negtmp")
1271
+ if self._is_unsigned_val(operand):
1272
+ self._tag_unsigned(result)
1273
+ else:
1274
+ result = self.builder.fneg(operand, "negtmp")
1275
+
1276
+ elif node.op == "!":
1277
+ operand, _ = self.codegen(node.expr)
1278
+ if isinstance(operand.type, ir.PointerType):
1279
+ null = ir.Constant(operand.type, None)
1280
+ cmp = self.builder.icmp_unsigned("==", operand, null, "nottmp")
1281
+ result = self.builder.zext(cmp, int64_t, "notres")
1282
+ elif isinstance(operand.type, ir.IntType):
1283
+ cmp = self.builder.icmp_signed(
1284
+ "==", operand, ir.Constant(operand.type, 0), "nottmp"
1285
+ )
1286
+ result = self.builder.zext(cmp, int64_t, "notres")
1287
+ else:
1288
+ cmp = self.builder.fcmp_ordered(
1289
+ "==", operand, ir.Constant(operand.type, 0.0), "nottmp"
1290
+ )
1291
+ result = self.builder.zext(cmp, int64_t, "notres")
1292
+
1293
+ elif node.op == "~":
1294
+ operand, _ = self.codegen(node.expr)
1295
+ if isinstance(operand.type, ir.IntType):
1296
+ operand = self._integer_promotion(operand)
1297
+ result = self.builder.not_(operand, "invtmp")
1298
+ if self._is_unsigned_val(operand):
1299
+ self._tag_unsigned(result)
1300
+
1301
+ elif node.op == "sizeof":
1302
+ result = self._codegen_sizeof(node.expr)
1303
+
1304
+ return result, result_ptr
1305
+
1306
+ def _codegen_sizeof(self, expr):
1307
+ """Return sizeof as an i64 constant (always unsigned in C)."""
1308
+ if isinstance(expr, c_ast.Typename):
1309
+ ir_t = self._resolve_ast_type(expr.type)
1310
+ size = self._ir_type_size(ir_t)
1311
+ elif isinstance(expr, c_ast.ID):
1312
+ ir_type, _ = self.lookup(expr.name)
1313
+ size = self._ir_type_size(ir_type)
1314
+ else:
1315
+ val, _ = self.codegen(expr)
1316
+ semantic_type = self._get_expr_ir_type(expr, getattr(val, "type", None))
1317
+ size = self._ir_type_size(semantic_type)
1318
+ result = ir.Constant(int64_t, size)
1319
+ return self._tag_unsigned(result)
1320
+
1321
+ def _resolve_type_str(self, type_str, depth=0):
1322
+ """Resolve typedef'd type names to their base type string."""
1323
+ if depth > 10:
1324
+ return type_str # prevent infinite recursion
1325
+ if isinstance(type_str, list):
1326
+ type_str = type_str[0] if len(type_str) == 1 else type_str
1327
+ if isinstance(type_str, list):
1328
+ return type_str # multi-word type, not a typedef
1329
+ key = f"__typedef_{type_str}"
1330
+ if key in self.env:
1331
+ resolved = self.env[key]
1332
+ if isinstance(resolved, str):
1333
+ # Could be a __struct_ reference or a base type name
1334
+ if resolved.startswith("__struct_"):
1335
+ struct_name = resolved[len("__struct_") :]
1336
+ if struct_name in self.env:
1337
+ return self.env[struct_name][0]
1338
+ return int8_t # opaque
1339
+ # Recursively resolve further typedefs
1340
+ return self._resolve_type_str(resolved, depth + 1)
1341
+ if isinstance(resolved, ir.Type):
1342
+ return resolved
1343
+ # resolved is a list — recursively resolve single-element lists
1344
+ if isinstance(resolved, list) and len(resolved) == 1:
1345
+ return self._resolve_type_str(resolved[0], depth + 1)
1346
+ return resolved
1347
+ return type_str
1348
+
1349
+ def _get_ir_type(self, type_str):
1350
+ """Get IR type, resolving typedefs."""
1351
+ resolved = self._resolve_type_str(type_str)
1352
+ if isinstance(resolved, ir.Type):
1353
+ return resolved
1354
+ return get_ir_type(resolved)
1355
+
1356
+ def _is_unsigned_type_names(self, type_str):
1357
+ """Check if a type name list resolves to an unsigned type."""
1358
+ if isinstance(type_str, list):
1359
+ if _is_unsigned_names(type_str):
1360
+ return True
1361
+ # Single-element list: check typedef chain
1362
+ if len(type_str) == 1:
1363
+ return self._is_unsigned_type_names(type_str[0])
1364
+ s = " ".join(sorted(type_str))
1365
+ return s in _UNSIGNED_TYPE_NAMES
1366
+ # String: check typedef chain
1367
+ key = f"__typedef_{type_str}"
1368
+ if key in self.env:
1369
+ resolved = self.env[key]
1370
+ if isinstance(resolved, list):
1371
+ return self._is_unsigned_type_names(resolved)
1372
+ if isinstance(resolved, str):
1373
+ return self._is_unsigned_type_names(resolved)
1374
+ return type_str in _UNSIGNED_TYPE_NAMES or type_str == "size_t"
1375
+
1376
+ def _is_unsigned_scalar_decl_type(self, node_type):
1377
+ if not isinstance(node_type, c_ast.TypeDecl):
1378
+ return False
1379
+ inner = node_type.type
1380
+ if not isinstance(inner, c_ast.IdentifierType):
1381
+ return False
1382
+ return self._is_unsigned_type_names(inner.names)
1383
+
1384
+ def _has_unsigned_scalar_pointee(self, node_type):
1385
+ if isinstance(node_type, (c_ast.ArrayDecl, c_ast.PtrDecl)):
1386
+ return self._is_unsigned_scalar_decl_type(node_type.type)
1387
+ return False
1388
+
1389
+ def _func_decl_returns_unsigned(self, node_type):
1390
+ return isinstance(
1391
+ node_type, c_ast.FuncDecl
1392
+ ) and self._is_unsigned_scalar_decl_type(node_type.type)
1393
+
1394
+ def _tag_value_from_decl_type(self, value, decl_type):
1395
+ if value is None:
1396
+ return value
1397
+ if isinstance(getattr(value, "type", None), ir.IntType):
1398
+ if self._is_unsigned_scalar_decl_type(decl_type):
1399
+ self._tag_unsigned(value)
1400
+ elif isinstance(decl_type, c_ast.TypeDecl):
1401
+ self._clear_unsigned(value)
1402
+ if self._has_unsigned_scalar_pointee(decl_type) and isinstance(
1403
+ getattr(value, "type", None), ir.PointerType
1404
+ ):
1405
+ self._tag_unsigned_pointee(value)
1406
+ if (
1407
+ isinstance(decl_type, c_ast.PtrDecl)
1408
+ and self._func_decl_returns_unsigned(decl_type.type)
1409
+ and isinstance(getattr(value, "type", None), ir.PointerType)
1410
+ ):
1411
+ self._tag_unsigned_return(value)
1412
+ return value
1413
+
1414
+ def _build_const_array_init(self, init_list, array_type, elem_ir_type):
1415
+ """Build a constant initializer for a global array."""
1416
+ actual_elem = (
1417
+ array_type.element if isinstance(array_type, ir.ArrayType) else elem_ir_type
1418
+ )
1419
+ values = []
1420
+ for expr in init_list.exprs:
1421
+ if isinstance(expr, c_ast.InitList):
1422
+ sub_type = actual_elem
1423
+ values.append(
1424
+ self._build_const_array_init(expr, sub_type, elem_ir_type)
1425
+ )
1426
+ else:
1427
+ try:
1428
+ val = self._eval_const_expr(expr)
1429
+ c = ir.Constant(actual_elem, val)
1430
+ str(c) # verify serializable
1431
+ values.append(c)
1432
+ except Exception:
1433
+ values.append(ir.Constant(actual_elem, None))
1434
+ try:
1435
+ result = ir.Constant(array_type, values)
1436
+ str(result) # verify
1437
+ return result
1438
+ except Exception:
1439
+ return ir.Constant(array_type, None)
1440
+
1441
+ def _zero_initializer(self, ir_type):
1442
+ if isinstance(ir_type, ir.PointerType):
1443
+ return ir.Constant(ir_type, None)
1444
+ if self._is_floating_ir_type(ir_type):
1445
+ return ir.Constant(ir_type, 0.0)
1446
+ if isinstance(ir_type, ir.IntType):
1447
+ return ir.Constant(ir_type, 0)
1448
+ return ir.Constant(ir_type, None)
1449
+
1450
+ def _make_global_string_constant(self, raw, name_hint="str"):
1451
+ processed = self._process_escapes(raw)
1452
+ data = self._string_bytes(processed + "\00")
1453
+ arr_type = ir.ArrayType(int8_t, len(data))
1454
+ gv = ir.GlobalVariable(
1455
+ self.module, arr_type, self.module.get_unique_name(name_hint)
1456
+ )
1457
+ gv.initializer = ir.Constant(arr_type, data)
1458
+ gv.global_constant = True
1459
+ gv.linkage = "internal"
1460
+ return gv
1461
+
1462
+ def _const_pointer_to_first_elem(self, gv, target_type):
1463
+ idx0 = ir.Constant(ir.IntType(32), 0)
1464
+ ptr = gv.gep([idx0, idx0])
1465
+ return ptr if ptr.type == target_type else ptr.bitcast(target_type)
1466
+
1467
+ def _is_little_endian(self):
1468
+ return not str(self.module.data_layout).startswith("E")
1469
+
1470
+ def _zero_bytes(self, size):
1471
+ return [ir.Constant(int8_t, 0) for _ in range(size)]
1472
+
1473
+ def _scalar_init_node(self, init_node):
1474
+ if not isinstance(init_node, c_ast.InitList):
1475
+ return init_node
1476
+ if not init_node.exprs:
1477
+ return None
1478
+ return self._scalar_init_node(init_node.exprs[0])
1479
+
1480
+ def _build_pointer_const(self, init_node, ir_type):
1481
+ if isinstance(init_node, c_ast.InitList):
1482
+ if init_node.exprs:
1483
+ return self._build_pointer_const(init_node.exprs[0], ir_type)
1484
+ return ir.Constant(ir_type, None)
1485
+ if (
1486
+ isinstance(init_node, c_ast.Constant)
1487
+ and getattr(init_node, "type", None) == "string"
1488
+ ):
1489
+ gv = self._make_global_string_constant(init_node.value[1:-1])
1490
+ return self._const_pointer_to_first_elem(gv, ir_type)
1491
+ if isinstance(init_node, c_ast.ID):
1492
+ try:
1493
+ _, sym = self.lookup(init_node.name)
1494
+ except Exception:
1495
+ sym = None
1496
+ if isinstance(sym, ir.Function):
1497
+ return sym if sym.type == ir_type else sym.bitcast(ir_type)
1498
+ if isinstance(sym, ir.GlobalVariable):
1499
+ if isinstance(sym.value_type, ir.ArrayType):
1500
+ return self._const_pointer_to_first_elem(sym, ir_type)
1501
+ if sym.type == ir_type:
1502
+ return sym
1503
+ if isinstance(sym.type, ir.PointerType):
1504
+ return sym.bitcast(ir_type)
1505
+ if (
1506
+ isinstance(init_node, c_ast.UnaryOp)
1507
+ and init_node.op == "&"
1508
+ and isinstance(init_node.expr, c_ast.ID)
1509
+ ):
1510
+ try:
1511
+ _, sym = self.lookup(init_node.expr.name)
1512
+ except Exception:
1513
+ sym = None
1514
+ if isinstance(sym, ir.Function):
1515
+ return sym if sym.type == ir_type else sym.bitcast(ir_type)
1516
+ if isinstance(sym, ir.GlobalVariable):
1517
+ if sym.type == ir_type:
1518
+ return sym
1519
+ if isinstance(sym.type, ir.PointerType):
1520
+ return sym.bitcast(ir_type)
1521
+ try:
1522
+ val = self._eval_const_expr(init_node)
1523
+ if val == 0:
1524
+ return ir.Constant(ir_type, None)
1525
+ except Exception:
1526
+ return None
1527
+ return None
1528
+
1529
+ def _const_int_to_bytes(self, value, byte_width):
1530
+ if byte_width <= 0:
1531
+ return []
1532
+ mask = (1 << (byte_width * 8)) - 1
1533
+ raw = int(value) & mask
1534
+ return [
1535
+ ir.Constant(int8_t, b)
1536
+ for b in raw.to_bytes(
1537
+ byte_width,
1538
+ byteorder="little" if self._is_little_endian() else "big",
1539
+ signed=False,
1540
+ )
1541
+ ]
1542
+
1543
+ def _split_int_constant_to_bytes(self, int_const, byte_width):
1544
+ if byte_width <= 0:
1545
+ return []
1546
+
1547
+ raw_const = getattr(int_const, "constant", None)
1548
+ if isinstance(raw_const, int):
1549
+ return self._const_int_to_bytes(raw_const, byte_width)
1550
+
1551
+ int_bits = byte_width * 8
1552
+ if int_const.type.width != int_bits:
1553
+ if int_const.type.width < int_bits:
1554
+ int_const = int_const.zext(ir.IntType(int_bits))
1555
+ else:
1556
+ int_const = int_const.trunc(ir.IntType(int_bits))
1557
+
1558
+ byte_values = []
1559
+ for i in range(byte_width):
1560
+ shift_bits = 8 * (i if self._is_little_endian() else (byte_width - 1 - i))
1561
+ part = int_const
1562
+ if shift_bits:
1563
+ part = part.lshr(ir.Constant(part.type, shift_bits))
1564
+ if part.type.width != 8:
1565
+ part = part.trunc(int8_t)
1566
+ byte_values.append(part)
1567
+ return byte_values
1568
+
1569
+ def _pointer_const_to_bytes(self, ptr_const):
1570
+ if (
1571
+ isinstance(ptr_const, ir.Constant)
1572
+ and getattr(ptr_const, "constant", None) is None
1573
+ ):
1574
+ return self._zero_bytes(self._ir_type_size(ptr_const.type))
1575
+ return self._split_int_constant_to_bytes(
1576
+ ptr_const.ptrtoint(int64_t), self._ir_type_size(ptr_const.type)
1577
+ )
1578
+
1579
+ def _bytes_to_int_constant(self, byte_values, int_type):
1580
+ byte_width = int_type.width // 8
1581
+ values = list(byte_values[:byte_width])
1582
+ if len(values) < byte_width:
1583
+ values.extend(self._zero_bytes(byte_width - len(values)))
1584
+
1585
+ result = 0
1586
+ for i, byte_val in enumerate(values):
1587
+ shift_bits = 8 * (i if self._is_little_endian() else (byte_width - 1 - i))
1588
+ raw = getattr(byte_val, "constant", 0)
1589
+ if not isinstance(raw, int):
1590
+ raw = 0
1591
+ result |= (raw & 0xFF) << shift_bits
1592
+
1593
+ bits = int_type.width
1594
+ mask = (1 << bits) - 1
1595
+ result &= mask
1596
+ sign_bit = 1 << (bits - 1)
1597
+ if result & sign_bit:
1598
+ result -= 1 << bits
1599
+ return ir.Constant(int_type, result)
1600
+
1601
+ def _const_init_bytes(self, init_node, ir_type):
1602
+ size = self._ir_type_size(ir_type)
1603
+ if init_node is None:
1604
+ return self._zero_bytes(size)
1605
+
1606
+ if getattr(ir_type, "is_union", False):
1607
+ raw = self._zero_bytes(size)
1608
+ member_names = getattr(ir_type, "members", None) or list(
1609
+ ir_type.member_types.keys()
1610
+ )
1611
+ if not member_names:
1612
+ return raw
1613
+
1614
+ first_name = member_names[0]
1615
+ member_type = ir_type.member_types[first_name]
1616
+ member_init = init_node
1617
+ if isinstance(init_node, c_ast.InitList):
1618
+ exprs = init_node.exprs or []
1619
+ if not exprs:
1620
+ member_init = None
1621
+ elif isinstance(member_type, (ir.ArrayType, ir.LiteralStructType)):
1622
+ member_init = (
1623
+ exprs[0]
1624
+ if len(exprs) == 1 and isinstance(exprs[0], c_ast.InitList)
1625
+ else init_node
1626
+ )
1627
+ else:
1628
+ member_init = exprs[0]
1629
+
1630
+ member_bytes = self._const_init_bytes(member_init, member_type)
1631
+ raw[: min(size, len(member_bytes))] = member_bytes[:size]
1632
+ return raw
1633
+
1634
+ if isinstance(ir_type, ir.PointerType):
1635
+ ptr_const = self._build_pointer_const(init_node, ir_type)
1636
+ if ptr_const is None:
1637
+ return self._zero_bytes(size)
1638
+ return self._pointer_const_to_bytes(ptr_const)
1639
+
1640
+ if self._is_floating_ir_type(ir_type):
1641
+ scalar_node = self._scalar_init_node(init_node)
1642
+ if scalar_node is None:
1643
+ value = 0.0
1644
+ elif isinstance(scalar_node, c_ast.Constant):
1645
+ try:
1646
+ value = self._parse_float_constant(scalar_node.value)
1647
+ except ValueError:
1648
+ value = float(self._eval_const_expr(scalar_node))
1649
+ else:
1650
+ value = float(self._eval_const_expr(scalar_node))
1651
+ fmt = "d" if isinstance(ir_type, ir.DoubleType) else "f"
1652
+ packed = struct.pack(
1653
+ ("<" if self._is_little_endian() else ">") + fmt,
1654
+ value,
1655
+ )
1656
+ return [ir.Constant(int8_t, b) for b in packed]
1657
+
1658
+ if isinstance(ir_type, ir.IntType):
1659
+ scalar_node = self._scalar_init_node(init_node)
1660
+ if scalar_node is None:
1661
+ return self._zero_bytes(size)
1662
+ return self._const_int_to_bytes(self._eval_const_expr(scalar_node), size)
1663
+
1664
+ if isinstance(ir_type, ir.ArrayType):
1665
+ if (
1666
+ isinstance(init_node, c_ast.Constant)
1667
+ and getattr(init_node, "type", None) == "string"
1668
+ and isinstance(ir_type.element, ir.IntType)
1669
+ and ir_type.element.width == 8
1670
+ ):
1671
+ raw = init_node.value[1:-1]
1672
+ processed = self._process_escapes(raw)
1673
+ data = self._string_bytes(processed + "\00")
1674
+ if len(data) < ir_type.count:
1675
+ data.extend(b"\x00" * (ir_type.count - len(data)))
1676
+ else:
1677
+ data = data[: ir_type.count]
1678
+ return [ir.Constant(int8_t, b) for b in data]
1679
+
1680
+ if isinstance(init_node, c_ast.InitList):
1681
+ values = []
1682
+ for i in range(ir_type.count):
1683
+ expr = init_node.exprs[i] if i < len(init_node.exprs) else None
1684
+ values.extend(self._const_init_bytes(expr, ir_type.element))
1685
+ return values
1686
+
1687
+ return self._zero_bytes(size)
1688
+
1689
+ if isinstance(ir_type, ir.LiteralStructType):
1690
+ raw = self._zero_bytes(size)
1691
+ if not isinstance(init_node, c_ast.InitList):
1692
+ return raw
1693
+
1694
+ offset = 0
1695
+ for i, member_type in enumerate(ir_type.elements):
1696
+ align = self._ir_type_align(member_type)
1697
+ offset = (offset + align - 1) & ~(align - 1)
1698
+ expr = init_node.exprs[i] if i < len(init_node.exprs) else None
1699
+ field_bytes = self._const_init_bytes(expr, member_type)
1700
+ field_size = self._ir_type_size(member_type)
1701
+ raw[offset : offset + field_size] = field_bytes[:field_size]
1702
+ offset += field_size
1703
+ return raw
1704
+
1705
+ scalar_node = self._scalar_init_node(init_node)
1706
+ if scalar_node is None:
1707
+ return self._zero_bytes(size)
1708
+ try:
1709
+ return self._const_int_to_bytes(self._eval_const_expr(scalar_node), size)
1710
+ except Exception:
1711
+ return self._zero_bytes(size)
1712
+
1713
+ def _build_const_init(self, init_node, ir_type):
1714
+ if init_node is None:
1715
+ return self._zero_initializer(ir_type)
1716
+
1717
+ if getattr(ir_type, "is_union", False):
1718
+ try:
1719
+ raw = self._const_init_bytes(init_node, ir_type)
1720
+ fields = []
1721
+ head_type = ir_type.elements[0]
1722
+ if not isinstance(head_type, ir.IntType):
1723
+ return self._zero_initializer(ir_type)
1724
+ head_size = self._ir_type_size(head_type)
1725
+ fields.append(self._bytes_to_int_constant(raw[:head_size], head_type))
1726
+ if len(ir_type.elements) > 1:
1727
+ tail_type = ir_type.elements[1]
1728
+ tail_size = self._ir_type_size(tail_type)
1729
+ tail_bytes = raw[head_size : head_size + tail_size]
1730
+ fields.append(ir.Constant(tail_type, tail_bytes))
1731
+ return ir.Constant(ir_type, fields)
1732
+ except Exception:
1733
+ return self._zero_initializer(ir_type)
1734
+
1735
+ if isinstance(ir_type, ir.PointerType):
1736
+ ptr_const = self._build_pointer_const(init_node, ir_type)
1737
+ if ptr_const is not None:
1738
+ return ptr_const
1739
+ return self._zero_initializer(ir_type)
1740
+
1741
+ if isinstance(ir_type, ir.ArrayType):
1742
+ if (
1743
+ isinstance(init_node, c_ast.Constant)
1744
+ and getattr(init_node, "type", None) == "string"
1745
+ ):
1746
+ raw = init_node.value[1:-1]
1747
+ processed = self._process_escapes(raw)
1748
+ data = self._string_bytes(processed + "\00")
1749
+ if len(data) < ir_type.count:
1750
+ data.extend(b"\x00" * (ir_type.count - len(data)))
1751
+ else:
1752
+ data = data[: ir_type.count]
1753
+ try:
1754
+ return ir.Constant(ir_type, data)
1755
+ except Exception:
1756
+ return self._zero_initializer(ir_type)
1757
+
1758
+ if isinstance(init_node, c_ast.InitList):
1759
+ values = []
1760
+ for i in range(ir_type.count):
1761
+ expr = init_node.exprs[i] if i < len(init_node.exprs) else None
1762
+ values.append(self._build_const_init(expr, ir_type.element))
1763
+ try:
1764
+ return ir.Constant(ir_type, values)
1765
+ except Exception:
1766
+ return self._zero_initializer(ir_type)
1767
+
1768
+ return self._zero_initializer(ir_type)
1769
+
1770
+ if isinstance(ir_type, ir.LiteralStructType):
1771
+ if isinstance(init_node, c_ast.InitList):
1772
+ values = []
1773
+ for i, member_type in enumerate(ir_type.elements):
1774
+ expr = init_node.exprs[i] if i < len(init_node.exprs) else None
1775
+ values.append(self._build_const_init(expr, member_type))
1776
+ try:
1777
+ return ir.Constant(ir_type, values)
1778
+ except Exception:
1779
+ return self._zero_initializer(ir_type)
1780
+ return self._zero_initializer(ir_type)
1781
+
1782
+ if isinstance(init_node, c_ast.InitList):
1783
+ if init_node.exprs:
1784
+ return self._build_const_init(init_node.exprs[0], ir_type)
1785
+ return self._zero_initializer(ir_type)
1786
+
1787
+ try:
1788
+ val = self._eval_const_expr(init_node)
1789
+ result = ir.Constant(ir_type, val)
1790
+ str(result)
1791
+ return result
1792
+ except Exception:
1793
+ return self._zero_initializer(ir_type)
1794
+
1795
+ def _init_array(self, base_addr, init_list, elem_ir_type, prefix_idx):
1796
+ """Recursively initialize array elements from an InitList."""
1797
+ for i, expr in enumerate(init_list.exprs):
1798
+ idx = prefix_idx + [ir.Constant(ir.IntType(32), i)]
1799
+ if isinstance(expr, c_ast.InitList):
1800
+ self._init_array(base_addr, expr, elem_ir_type, idx)
1801
+ else:
1802
+ val, _ = self.codegen(expr)
1803
+ val = self._implicit_convert(val, elem_ir_type)
1804
+ elem_ptr = self.builder.gep(base_addr, idx, inbounds=True)
1805
+ self._safe_store(val, elem_ptr)
1806
+
1807
+ def _build_array_ir_type(self, array_decl):
1808
+ dims = []
1809
+ node = array_decl
1810
+ while isinstance(node, c_ast.ArrayDecl):
1811
+ dims.append(self._eval_dim(node.dim) if node.dim else 0)
1812
+ node = node.type
1813
+ elem_ir_type = self._resolve_ast_type(node)
1814
+ if isinstance(elem_ir_type, ir.VoidType):
1815
+ elem_ir_type = int8_t
1816
+ arr_ir_type = elem_ir_type
1817
+ for dim in reversed(dims):
1818
+ arr_ir_type = ir.ArrayType(arr_ir_type, dim)
1819
+ arr_ir_type.dim_array = dims
1820
+ return arr_ir_type
1821
+
1822
+ def _resolve_param_type(self, param):
1823
+ """Resolve a function parameter type, handling typedefs and pointers."""
1824
+ if isinstance(param.type, c_ast.ArrayDecl):
1825
+ arr_type = self._build_array_ir_type(param.type)
1826
+ return ir.PointerType(arr_type.element)
1827
+ t = self._resolve_ast_type(param.type)
1828
+ if isinstance(t, ir.ArrayType):
1829
+ return ir.PointerType(t.element)
1830
+ if isinstance(t, ir.VoidType):
1831
+ return None # void params mean "no params" in C
1832
+ return t
1833
+
1834
+ def _resolve_ast_type(self, node_type):
1835
+ """Recursively resolve an AST type to IR type, with typedef support."""
1836
+ if isinstance(node_type, c_ast.PtrDecl):
1837
+ inner = node_type.type
1838
+ if isinstance(inner, c_ast.FuncDecl):
1839
+ return self._build_func_ptr_type(inner)
1840
+ pointee = self._resolve_ast_type(inner)
1841
+ if isinstance(pointee, ir.VoidType):
1842
+ return voidptr_t
1843
+ return ir.PointerType(pointee)
1844
+ elif isinstance(node_type, c_ast.TypeDecl):
1845
+ if isinstance(node_type.type, c_ast.IdentifierType):
1846
+ return self._get_ir_type(node_type.type.names)
1847
+ elif isinstance(node_type.type, c_ast.Struct):
1848
+ return self.codegen_Struct(node_type.type)
1849
+ elif isinstance(node_type.type, c_ast.Union):
1850
+ return self.codegen_Union(node_type.type)
1851
+ return int64_t
1852
+ elif isinstance(node_type, c_ast.ArrayDecl):
1853
+ return voidptr_t
1854
+ return int64_t
1855
+
1856
+ def _eval_dim(self, dim_node):
1857
+ """Evaluate array dimension (may be a constant or expression)."""
1858
+ if dim_node is None:
1859
+ return 0
1860
+ if isinstance(dim_node, c_ast.Constant):
1861
+ v = dim_node.value.rstrip("uUlL")
1862
+ return int(v, 0) # handles hex/octal/decimal
1863
+ return self._eval_const_expr(dim_node)
1864
+
1865
+ def _build_func_ptr_type(self, func_decl_node):
1866
+ """Build an IR function pointer type from a FuncDecl AST node."""
1867
+ ret_ir, _ = self.codegen(func_decl_node)
1868
+ param_types = []
1869
+ if func_decl_node.args:
1870
+ for param in func_decl_node.args.params:
1871
+ if isinstance(param, c_ast.EllipsisParam):
1872
+ continue
1873
+ if isinstance(param, c_ast.Typename):
1874
+ t = self._resolve_ast_type(param.type)
1875
+ if not isinstance(t, ir.VoidType):
1876
+ param_types.append(t)
1877
+ elif isinstance(param, c_ast.Decl):
1878
+ t = self._resolve_param_type(param)
1879
+ if t is not None:
1880
+ param_types.append(t)
1881
+ if isinstance(ret_ir, ir.VoidType):
1882
+ ret_ir = ir.VoidType()
1883
+ func_type = ir.FunctionType(ret_ir, param_types)
1884
+ return func_type.as_pointer()
1885
+
1886
+ def _safe_load(self, ptr, name=""):
1887
+ """Load from ptr, guard against non-pointer types."""
1888
+ if not isinstance(ptr.type, ir.PointerType):
1889
+ return ptr
1890
+ if isinstance(ptr.type.pointee, ir.FunctionType):
1891
+ return ptr # function pointers are first-class as pointers
1892
+ try:
1893
+ return self.builder.load(ptr, name=name)
1894
+ except Exception:
1895
+ return ptr
1896
+
1897
+ def _decay_array_value_to_pointer(self, value, name="arraydecay"):
1898
+ """Convert an array value (including string literals) to &value[0]."""
1899
+ if not isinstance(value.type, ir.ArrayType):
1900
+ return value
1901
+ base = value
1902
+ if isinstance(value, ir.values.Constant):
1903
+ gv = ir.GlobalVariable(
1904
+ self.module, value.type, self.module.get_unique_name("strlit")
1905
+ )
1906
+ gv.initializer = value
1907
+ gv.global_constant = True
1908
+ gv.linkage = "internal"
1909
+ base = gv
1910
+ idx0 = ir.Constant(ir.IntType(32), 0)
1911
+ return self.builder.gep(base, [idx0, idx0], name=name)
1912
+
1913
+ def _safe_store(self, value, ptr):
1914
+ """Store value to ptr, auto-converting types if needed."""
1915
+ if value is None or ptr is None:
1916
+ return
1917
+ if isinstance(value.type, ir.VoidType):
1918
+ return # Can't store void
1919
+ if not isinstance(ptr.type, ir.PointerType):
1920
+ return
1921
+ if hasattr(ptr.type, "pointee") and value.type != ptr.type.pointee:
1922
+ value = self._implicit_convert(value, ptr.type.pointee)
1923
+ try:
1924
+ self.builder.store(value, ptr)
1925
+ except (TypeError, Exception):
1926
+ pass
1927
+
1928
+ def _implicit_convert(self, val, target_type):
1929
+ """Convert val to target_type if needed (implicit C promotion/truncation)."""
1930
+ if val is None or isinstance(val.type, ir.VoidType):
1931
+ # Can't convert void — return a zero of target type
1932
+ if isinstance(target_type, ir.PointerType):
1933
+ return ir.Constant(target_type, None)
1934
+ elif isinstance(target_type, ir.VoidType):
1935
+ return val
1936
+ return ir.Constant(target_type, 0)
1937
+ if val.type == target_type:
1938
+ return val
1939
+ if isinstance(val.type, ir.IntType) and self._is_floating_ir_type(target_type):
1940
+ return self._int_to_float(val, target_type)
1941
+ if self._is_floating_ir_type(val.type) and isinstance(target_type, ir.IntType):
1942
+ return self.builder.fptosi(val, target_type)
1943
+ if self._is_floating_ir_type(val.type) and self._is_floating_ir_type(
1944
+ target_type
1945
+ ):
1946
+ if isinstance(val.type, ir.FloatType) and isinstance(
1947
+ target_type, ir.DoubleType
1948
+ ):
1949
+ return self.builder.fpext(val, target_type)
1950
+ if isinstance(val.type, ir.DoubleType) and isinstance(
1951
+ target_type, ir.FloatType
1952
+ ):
1953
+ return self.builder.fptrunc(val, target_type)
1954
+ return val
1955
+ # int -> int (wider or narrower)
1956
+ if isinstance(val.type, ir.IntType) and isinstance(target_type, ir.IntType):
1957
+ if val.type.width < target_type.width:
1958
+ if self._is_unsigned_val(val):
1959
+ result = self.builder.zext(val, target_type)
1960
+ return self._tag_unsigned(result)
1961
+ return self.builder.sext(val, target_type)
1962
+ elif val.type.width > target_type.width:
1963
+ result = self.builder.trunc(val, target_type)
1964
+ if self._is_unsigned_val(val):
1965
+ return self._tag_unsigned(result)
1966
+ return result
1967
+ # int -> pointer (e.g., NULL assignment, p = 0)
1968
+ if isinstance(val.type, ir.IntType) and isinstance(target_type, ir.PointerType):
1969
+ # inttoptr only works for simple pointer types, not function pointers
1970
+ raw_ptr = self.builder.inttoptr(val, voidptr_t)
1971
+ if target_type == voidptr_t:
1972
+ return raw_ptr
1973
+ return self.builder.bitcast(raw_ptr, target_type)
1974
+ # pointer -> int
1975
+ if isinstance(val.type, ir.PointerType) and isinstance(target_type, ir.IntType):
1976
+ return self.builder.ptrtoint(val, target_type)
1977
+ # pointer -> different pointer
1978
+ if isinstance(val.type, ir.PointerType) and isinstance(
1979
+ target_type, ir.PointerType
1980
+ ):
1981
+ result = self.builder.bitcast(val, target_type)
1982
+ if self._is_unsigned_pointee(val):
1983
+ self._tag_unsigned_pointee(result)
1984
+ if self._is_unsigned_return(val):
1985
+ self._tag_unsigned_return(result)
1986
+ return result
1987
+ # array -> pointer (string literal to char*)
1988
+ if isinstance(val.type, ir.ArrayType) and isinstance(
1989
+ target_type, ir.PointerType
1990
+ ):
1991
+ ptr = self._decay_array_value_to_pointer(val)
1992
+ if ptr.type == target_type:
1993
+ return ptr
1994
+ return self.builder.bitcast(ptr, target_type)
1995
+ return val
1996
+
1997
+ def _extend_call_result(self, result, returns_unsigned=False):
1998
+ if not isinstance(result.type, ir.IntType):
1999
+ return result
2000
+ if returns_unsigned:
2001
+ self._tag_unsigned(result)
2002
+ else:
2003
+ self._clear_unsigned(result)
2004
+ return result
2005
+
2006
+ def _to_bool(self, val, name="cond"):
2007
+ """Convert any value to an i1 boolean (!=0)."""
2008
+ if isinstance(val.type, ir.IntType):
2009
+ if val.type.width == 1:
2010
+ return val
2011
+ return self.builder.icmp_signed("!=", val, ir.Constant(val.type, 0), name)
2012
+ elif isinstance(val.type, ir.PointerType):
2013
+ null = ir.Constant(val.type, None)
2014
+ return self.builder.icmp_unsigned("!=", val, null, name)
2015
+ else:
2016
+ return self.builder.fcmp_unordered(
2017
+ "!=", val, ir.Constant(val.type, 0.0), name
2018
+ )
2019
+
2020
+ def _ir_type_align(self, ir_type):
2021
+ """Return natural alignment of an IR type in bytes."""
2022
+ if isinstance(ir_type, ir.IntType):
2023
+ return min(ir_type.width // 8, 8)
2024
+ elif isinstance(ir_type, ir.FloatType):
2025
+ return 4
2026
+ elif isinstance(ir_type, ir.DoubleType):
2027
+ return 8
2028
+ elif isinstance(ir_type, ir.PointerType):
2029
+ return 8
2030
+ elif isinstance(ir_type, ir.ArrayType):
2031
+ return self._ir_type_align(ir_type.element)
2032
+ elif isinstance(ir_type, ir.LiteralStructType):
2033
+ if not ir_type.elements:
2034
+ return 1
2035
+ return max(self._ir_type_align(e) for e in ir_type.elements)
2036
+ return 8
2037
+
2038
+ def _ir_type_size(self, ir_type):
2039
+ """Compute byte size of an IR type with proper alignment/padding."""
2040
+ if isinstance(ir_type, ir.IntType):
2041
+ return ir_type.width // 8
2042
+ elif isinstance(ir_type, ir.FloatType):
2043
+ return 4
2044
+ elif isinstance(ir_type, ir.DoubleType):
2045
+ return 8
2046
+ elif isinstance(ir_type, ir.PointerType):
2047
+ return 8
2048
+ elif isinstance(ir_type, ir.ArrayType):
2049
+ return int(ir_type.count) * self._ir_type_size(ir_type.element)
2050
+ elif isinstance(ir_type, ir.LiteralStructType):
2051
+ offset = 0
2052
+ for elem in ir_type.elements:
2053
+ align = self._ir_type_align(elem)
2054
+ offset = (offset + align - 1) & ~(align - 1) # align up
2055
+ offset += self._ir_type_size(elem)
2056
+ # Tail padding: align to struct's overall alignment
2057
+ struct_align = self._ir_type_align(ir_type)
2058
+ offset = (offset + struct_align - 1) & ~(struct_align - 1)
2059
+ return offset
2060
+ return 8
2061
+
2062
+ def _refine_member_ir_type(self, aggregate_type, member_key, field_type):
2063
+ """Prefer semantic member types over storage types when available."""
2064
+ semantic_field_type = field_type
2065
+ member_decl_types = getattr(aggregate_type, "member_decl_types", None)
2066
+ decl_type = None
2067
+
2068
+ if isinstance(member_decl_types, dict):
2069
+ decl_type = member_decl_types.get(member_key)
2070
+ elif (
2071
+ isinstance(member_key, int)
2072
+ and member_decl_types is not None
2073
+ and member_key < len(member_decl_types)
2074
+ ):
2075
+ decl_type = member_decl_types[member_key]
2076
+
2077
+ if decl_type is None:
2078
+ return semantic_field_type
2079
+
2080
+ try:
2081
+ resolved = self._resolve_ast_type(decl_type)
2082
+ if isinstance(field_type, ir.ArrayType) and isinstance(
2083
+ resolved, ir.PointerType
2084
+ ):
2085
+ return semantic_field_type
2086
+ if isinstance(
2087
+ resolved, (ir.ArrayType, ir.LiteralStructType, ir.PointerType)
2088
+ ):
2089
+ return resolved
2090
+ except Exception:
2091
+ pass
2092
+
2093
+ return semantic_field_type
2094
+
2095
+ def _get_aggregate_field_info(self, aggregate_type, field_name):
2096
+ """Return byte offset and semantic IR type for a struct/union field."""
2097
+ if getattr(aggregate_type, "is_union", False):
2098
+ field_type = aggregate_type.member_types[field_name]
2099
+ semantic_field_type = self._refine_member_ir_type(
2100
+ aggregate_type, field_name, field_type
2101
+ )
2102
+ return 0, semantic_field_type
2103
+
2104
+ if not hasattr(aggregate_type, "members"):
2105
+ raise CodegenError(f"Aggregate has no named fields: {aggregate_type}")
2106
+
2107
+ field_index = None
2108
+ for i, member in enumerate(aggregate_type.members):
2109
+ if member == field_name:
2110
+ field_index = i
2111
+ break
2112
+
2113
+ if field_index is None:
2114
+ raise CodegenError(f"Field '{field_name}' not found in aggregate")
2115
+
2116
+ offset = 0
2117
+ for i, elem in enumerate(aggregate_type.elements):
2118
+ align = self._ir_type_align(elem)
2119
+ offset = (offset + align - 1) & ~(align - 1)
2120
+ if i == field_index:
2121
+ field_type = aggregate_type.elements[field_index]
2122
+ semantic_field_type = self._refine_member_ir_type(
2123
+ aggregate_type, field_index, field_type
2124
+ )
2125
+ return offset, semantic_field_type
2126
+ offset += self._ir_type_size(elem)
2127
+
2128
+ raise CodegenError(f"Field '{field_name}' not found in aggregate")
2129
+
2130
+ def _eval_offsetof_structref(self, node):
2131
+ """Evaluate offsetof-like expressions expanded as &((T*)0)->field."""
2132
+ if isinstance(node, c_ast.StructRef):
2133
+ base_offset, base_type = self._eval_offsetof_structref(node.name)
2134
+ aggregate_type = base_type
2135
+ if node.type == "->" and isinstance(aggregate_type, ir.PointerType):
2136
+ aggregate_type = aggregate_type.pointee
2137
+ field_offset, field_type = self._get_aggregate_field_info(
2138
+ aggregate_type, node.field.name
2139
+ )
2140
+ return base_offset + field_offset, field_type
2141
+
2142
+ if isinstance(node, c_ast.Cast):
2143
+ target_type = self._resolve_ast_type(node.to_type.type)
2144
+ return 0, target_type
2145
+
2146
+ raise CodegenError(f"Not an offsetof base: {type(node).__name__}")
2147
+
2148
+ def codegen_Typename(self, node):
2149
+ # Used inside sizeof(type) — not directly code-generated
2150
+ return None, None
2151
+
2152
+ def codegen_BinaryOp(self, node):
2153
+ # Short-circuit && and || before evaluating both sides
2154
+ if node.op == "&&":
2155
+ return self._codegen_short_circuit_and(node)
2156
+ elif node.op == "||":
2157
+ return self._codegen_short_circuit_or(node)
2158
+
2159
+ lhs, _ = self.codegen(node.left)
2160
+ rhs, _ = self.codegen(node.right)
2161
+ if lhs is None or rhs is None:
2162
+ return ir.Constant(int64_t, 0), None
2163
+
2164
+ # Pointer arithmetic: ptr + int or ptr - int
2165
+ if (
2166
+ node.op in ("+", "-")
2167
+ and isinstance(lhs.type, ir.PointerType)
2168
+ and isinstance(rhs.type, ir.IntType)
2169
+ ):
2170
+ rhs = self._integer_promotion(rhs)
2171
+ rhs = self._convert_int_value(rhs, int64_t, result_unsigned=False)
2172
+ if node.op == "-":
2173
+ rhs = self.builder.neg(rhs, "negidx")
2174
+ return self.builder.gep(lhs, [rhs], name="ptradd"), None
2175
+ if (
2176
+ node.op == "+"
2177
+ and isinstance(rhs.type, ir.PointerType)
2178
+ and isinstance(lhs.type, ir.IntType)
2179
+ ):
2180
+ lhs = self._integer_promotion(lhs)
2181
+ lhs = self._convert_int_value(lhs, int64_t, result_unsigned=False)
2182
+ return self.builder.gep(rhs, [lhs], name="ptradd"), None
2183
+
2184
+ # Pointer subtraction: ptr - ptr -> int (element count)
2185
+ if (
2186
+ node.op == "-"
2187
+ and isinstance(lhs.type, ir.PointerType)
2188
+ and isinstance(rhs.type, ir.PointerType)
2189
+ ):
2190
+ lhs_int = self.builder.ptrtoint(lhs, int64_t)
2191
+ rhs_int = self.builder.ptrtoint(rhs, int64_t)
2192
+ diff = self.builder.sub(lhs_int, rhs_int, "ptrdiff")
2193
+ elem_size = self._ir_type_size(lhs.type.pointee)
2194
+ return (
2195
+ self.builder.sdiv(
2196
+ diff, ir.Constant(int64_t, elem_size), "ptrdiff_elems"
2197
+ ),
2198
+ None,
2199
+ )
2200
+
2201
+ # Promote int/pointer mix
2202
+ if isinstance(lhs.type, ir.PointerType) and isinstance(rhs.type, ir.IntType):
2203
+ rhs = self._implicit_convert(rhs, lhs.type)
2204
+ elif isinstance(rhs.type, ir.PointerType) and isinstance(lhs.type, ir.IntType):
2205
+ lhs = self._implicit_convert(lhs, rhs.type)
2206
+
2207
+ # Promotion above can turn int/pointer into ptr/ptr; handle subtraction
2208
+ if (
2209
+ node.op == "-"
2210
+ and isinstance(lhs.type, ir.PointerType)
2211
+ and isinstance(rhs.type, ir.PointerType)
2212
+ ):
2213
+ lhs_int = self.builder.ptrtoint(lhs, int64_t)
2214
+ rhs_int = self.builder.ptrtoint(rhs, int64_t)
2215
+ diff = self.builder.sub(lhs_int, rhs_int, "ptrdiff")
2216
+ elem_size = self._ir_type_size(lhs.type.pointee)
2217
+ return (
2218
+ self.builder.sdiv(
2219
+ diff, ir.Constant(int64_t, elem_size), "ptrdiff_elems"
2220
+ ),
2221
+ None,
2222
+ )
2223
+
2224
+ is_unsigned = False
2225
+ if isinstance(lhs.type, ir.IntType) and self._is_floating_ir_type(rhs.type):
2226
+ lhs = self._implicit_convert(lhs, rhs.type)
2227
+ elif self._is_floating_ir_type(lhs.type) and isinstance(rhs.type, ir.IntType):
2228
+ rhs = self._implicit_convert(rhs, lhs.type)
2229
+ elif self._is_floating_ir_type(lhs.type) and self._is_floating_ir_type(
2230
+ rhs.type
2231
+ ):
2232
+ if lhs.type != rhs.type:
2233
+ target = self._common_float_type(lhs.type, rhs.type)
2234
+ lhs = self._implicit_convert(lhs, target)
2235
+ rhs = self._implicit_convert(rhs, target)
2236
+ elif isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
2237
+ if node.op in ("<<", ">>"):
2238
+ lhs, rhs, is_unsigned = self._shift_operand_conversion(lhs, rhs)
2239
+ else:
2240
+ lhs, rhs, is_unsigned = self._usual_arithmetic_conversion(lhs, rhs)
2241
+
2242
+ dispatch_type_double = 1
2243
+ dispatch_type_int = 0
2244
+
2245
+ if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
2246
+ dispatch_type = dispatch_type_int
2247
+ else:
2248
+ dispatch_type = dispatch_type_double
2249
+
2250
+ if node.op in ["+", "-", "*", "/", "%"]:
2251
+ if dispatch_type == dispatch_type_double:
2252
+ ops = {
2253
+ "+": self.builder.fadd,
2254
+ "-": self.builder.fsub,
2255
+ "*": self.builder.fmul,
2256
+ "/": self.builder.fdiv,
2257
+ "%": self.builder.frem,
2258
+ }
2259
+ return ops[node.op](lhs, rhs, "tmp"), None
2260
+ else:
2261
+ if node.op in ("/", "%") and is_unsigned:
2262
+ op = self.builder.udiv if node.op == "/" else self.builder.urem
2263
+ else:
2264
+ ops = {
2265
+ "+": self.builder.add,
2266
+ "-": self.builder.sub,
2267
+ "*": self.builder.mul,
2268
+ "/": self.builder.sdiv,
2269
+ "%": self.builder.srem,
2270
+ }
2271
+ op = ops[node.op]
2272
+ result = op(lhs, rhs, "tmp")
2273
+ if is_unsigned:
2274
+ self._tag_unsigned(result)
2275
+ return result, None
2276
+ elif node.op in [">", "<", ">=", "<=", "!=", "=="]:
2277
+ if isinstance(lhs.type, ir.PointerType) and isinstance(
2278
+ rhs.type, ir.PointerType
2279
+ ):
2280
+ lhs_i = self.builder.ptrtoint(lhs, int64_t)
2281
+ rhs_i = self.builder.ptrtoint(rhs, int64_t)
2282
+ cmp = self.builder.icmp_unsigned(node.op, lhs_i, rhs_i, "ptrcmp")
2283
+ elif dispatch_type == dispatch_type_int:
2284
+ if is_unsigned:
2285
+ cmp = self.builder.icmp_unsigned(node.op, lhs, rhs, "cmptmp")
2286
+ else:
2287
+ cmp = self.builder.icmp_signed(node.op, lhs, rhs, "cmptmp")
2288
+ else:
2289
+ cmp = self._float_compare(node.op, lhs, rhs, "cmptmp")
2290
+ return self.builder.zext(cmp, int64_t, "booltmp"), None
2291
+ elif node.op == "&":
2292
+ result = self.builder.and_(lhs, rhs, "andtmp")
2293
+ if is_unsigned:
2294
+ self._tag_unsigned(result)
2295
+ return result, None
2296
+ elif node.op == "|":
2297
+ result = self.builder.or_(lhs, rhs, "ortmp")
2298
+ if is_unsigned:
2299
+ self._tag_unsigned(result)
2300
+ return result, None
2301
+ elif node.op == "^":
2302
+ return self.builder.xor(lhs, rhs, "xortmp"), None
2303
+ elif node.op == "<<":
2304
+ result = self.builder.shl(lhs, rhs, "shltmp")
2305
+ if is_unsigned:
2306
+ self._tag_unsigned(result)
2307
+ return result, None
2308
+ elif node.op == ">>":
2309
+ if is_unsigned:
2310
+ return self.builder.lshr(lhs, rhs, "shrtmp"), None
2311
+ return self.builder.ashr(lhs, rhs, "shrtmp"), None
2312
+ else:
2313
+ func = self.module.globals.get("binary{0}".format(node.op))
2314
+ return self.builder.call(func, [lhs, rhs], "binop"), None
2315
+
2316
+ def _codegen_short_circuit_and(self, node):
2317
+ """Short-circuit &&: if lhs is false, skip rhs."""
2318
+ lhs, _ = self.codegen(node.left)
2319
+ lhs_bool = self._to_bool(lhs, "and_lhs")
2320
+
2321
+ rhs_bb = self.builder.function.append_basic_block("and_rhs")
2322
+ merge_bb = self.builder.function.append_basic_block("and_merge")
2323
+ lhs_bb = self.builder.block
2324
+
2325
+ self.builder.cbranch(lhs_bool, rhs_bb, merge_bb)
2326
+
2327
+ self.builder.position_at_end(rhs_bb)
2328
+ rhs, _ = self.codegen(node.right)
2329
+ rhs_bool = self._to_bool(rhs, "and_rhs")
2330
+ rhs_result = self.builder.zext(rhs_bool, int64_t, "and_rhs_ext")
2331
+ rhs_bb_end = self.builder.block
2332
+ self.builder.branch(merge_bb)
2333
+
2334
+ self.builder.position_at_end(merge_bb)
2335
+ phi = self.builder.phi(int64_t, "and_result")
2336
+ phi.add_incoming(ir.Constant(int64_t, 0), lhs_bb)
2337
+ phi.add_incoming(rhs_result, rhs_bb_end)
2338
+ return phi, None
2339
+
2340
+ def _codegen_short_circuit_or(self, node):
2341
+ """Short-circuit ||: if lhs is true, skip rhs."""
2342
+ lhs, _ = self.codegen(node.left)
2343
+ lhs_bool = self._to_bool(lhs, "or_lhs")
2344
+
2345
+ rhs_bb = self.builder.function.append_basic_block("or_rhs")
2346
+ merge_bb = self.builder.function.append_basic_block("or_merge")
2347
+ lhs_bb = self.builder.block
2348
+
2349
+ self.builder.cbranch(lhs_bool, merge_bb, rhs_bb)
2350
+
2351
+ self.builder.position_at_end(rhs_bb)
2352
+ rhs, _ = self.codegen(node.right)
2353
+ rhs_bool = self._to_bool(rhs, "or_rhs")
2354
+ rhs_result = self.builder.zext(rhs_bool, int64_t, "or_rhs_ext")
2355
+ rhs_bb_end = self.builder.block
2356
+ self.builder.branch(merge_bb)
2357
+
2358
+ self.builder.position_at_end(merge_bb)
2359
+ phi = self.builder.phi(int64_t, "or_result")
2360
+ phi.add_incoming(ir.Constant(int64_t, 1), lhs_bb)
2361
+ phi.add_incoming(rhs_result, rhs_bb_end)
2362
+ return phi, None
2363
+
2364
+ def codegen_If(self, node):
2365
+
2366
+ cond_val, _ = self.codegen(node.cond)
2367
+ cmp = self._to_bool(cond_val)
2368
+
2369
+ then_bb = self.builder.function.append_basic_block("then")
2370
+ else_bb = self.builder.function.append_basic_block("else")
2371
+ merge_bb = self.builder.function.append_basic_block("ifend")
2372
+
2373
+ self.builder.cbranch(cmp, then_bb, else_bb)
2374
+
2375
+ with self.new_scope():
2376
+ self.builder.position_at_end(then_bb)
2377
+ self.codegen(node.iftrue)
2378
+ if not self.builder.block.is_terminated:
2379
+ self.builder.branch(merge_bb)
2380
+
2381
+ with self.new_scope():
2382
+ self.builder.position_at_end(else_bb)
2383
+ if node.iffalse:
2384
+ self.codegen(node.iffalse)
2385
+ if not self.builder.block.is_terminated:
2386
+ self.builder.branch(merge_bb)
2387
+ self.builder.position_at_end(merge_bb)
2388
+ # self.builder.block = merge_bb
2389
+
2390
+ return None, None
2391
+
2392
+ def codegen_NoneType(self, node):
2393
+ return None, None
2394
+
2395
+ def codegen_For(self, node):
2396
+
2397
+ saved_block = self.builder.block
2398
+ self.builder.position_at_end(saved_block) # why the save_block at the end
2399
+
2400
+ if node.init is not None:
2401
+ self.codegen(node.init)
2402
+
2403
+ # The builder is what? loop is a block which begin with loop
2404
+ test_bb = self.builder.function.append_basic_block("test")
2405
+ loop_bb = self.builder.function.append_basic_block("loop")
2406
+ next_bb = self.builder.function.append_basic_block("next")
2407
+
2408
+ # append by name nor just add it
2409
+ after_loop_label = self.new_label("afterloop")
2410
+ after_bb = ir.Block(self.builder.function, after_loop_label)
2411
+ # self.builder.function.append_basic_block('afterloop')
2412
+
2413
+ self.builder.branch(test_bb)
2414
+ self.builder.position_at_end(test_bb)
2415
+
2416
+ if node.cond is not None:
2417
+ endcond, _ = self.codegen(node.cond)
2418
+ cmp = self._to_bool(endcond, "loopcond")
2419
+ self.builder.cbranch(cmp, loop_bb, after_bb)
2420
+ else:
2421
+ # for(;;) - infinite loop, always branch to body
2422
+ self.builder.branch(loop_bb)
2423
+
2424
+ with self.new_scope():
2425
+ self.define("break", after_bb)
2426
+ self.define("continue", next_bb)
2427
+ self.builder.position_at_end(loop_bb)
2428
+ body_val, _ = self.codegen(node.stmt) # if was ready codegen
2429
+ if not self.builder.block.is_terminated:
2430
+ self.builder.branch(next_bb)
2431
+ self.builder.position_at_end(next_bb)
2432
+ if node.next is not None:
2433
+ self.codegen(node.next)
2434
+ self.builder.branch(test_bb)
2435
+ # this append_basic_blook change the label
2436
+ # after_bb = self.builder.function.append_basic_block(after_loop_label)
2437
+ self.builder.function.basic_blocks.append(after_bb)
2438
+ self.builder.position_at_end(after_bb)
2439
+
2440
+ return ir.values.Constant(ir.DoubleType(), 0.0), None
2441
+
2442
+ def codegen_While(self, node):
2443
+
2444
+ saved_block = self.builder.block
2445
+ id_name = node.__class__.__name__
2446
+ self.builder.position_at_end(saved_block)
2447
+ # The builder is what? loop is a block which begin with loop
2448
+ test_bb = self.builder.function.append_basic_block(
2449
+ "test"
2450
+ ) # just create some block need to be filled
2451
+ loop_bb = self.builder.function.append_basic_block("loop")
2452
+ after_bb = self.builder.function.append_basic_block("afterloop")
2453
+
2454
+ self.builder.branch(test_bb)
2455
+ self.builder.position_at_start(test_bb)
2456
+ endcond, _ = self.codegen(node.cond)
2457
+ cmp = self._to_bool(endcond, "loopcond")
2458
+ self.builder.cbranch(cmp, loop_bb, after_bb)
2459
+
2460
+ with self.new_scope():
2461
+ self.define("break", after_bb)
2462
+ self.define("continue", test_bb)
2463
+ self.builder.position_at_end(loop_bb)
2464
+ body_val, _ = self.codegen(node.stmt)
2465
+ # after eval body we need to goto test_bb
2466
+ # New code will be inserted into after_bb
2467
+ if not self.builder.block.is_terminated:
2468
+ self.builder.branch(test_bb)
2469
+ self.builder.position_at_end(after_bb)
2470
+
2471
+ # The 'for' expression always returns 0
2472
+ return ir.values.Constant(ir.DoubleType(), 0.0)
2473
+
2474
+ def codegen_Break(self, node):
2475
+ self.builder.branch(self.lookup("break"))
2476
+ return None, None
2477
+
2478
+ def codegen_Continue(self, node):
2479
+ self.builder.branch(self.lookup("continue"))
2480
+ return None, None
2481
+
2482
+ def codegen_DoWhile(self, node):
2483
+
2484
+ saved_block = self.builder.block
2485
+ self.builder.position_at_end(saved_block)
2486
+
2487
+ loop_bb = self.builder.function.append_basic_block("dowhile_body")
2488
+ test_bb = self.builder.function.append_basic_block("dowhile_test")
2489
+ after_bb = self.builder.function.append_basic_block("dowhile_end")
2490
+
2491
+ self.builder.branch(loop_bb)
2492
+
2493
+ with self.new_scope():
2494
+ self.define("break", after_bb)
2495
+ self.define("continue", test_bb)
2496
+ self.builder.position_at_end(loop_bb)
2497
+ self.codegen(node.stmt)
2498
+ if not self.builder.block.is_terminated:
2499
+ self.builder.branch(test_bb)
2500
+
2501
+ self.builder.position_at_end(test_bb)
2502
+ endcond, _ = self.codegen(node.cond)
2503
+ cmp = self._to_bool(endcond, "loopcond")
2504
+ self.builder.cbranch(cmp, loop_bb, after_bb)
2505
+
2506
+ self.builder.position_at_end(after_bb)
2507
+ return ir.values.Constant(ir.DoubleType(), 0.0), None
2508
+
2509
+ def codegen_Switch(self, node):
2510
+
2511
+ cond_val, _ = self.codegen(node.cond)
2512
+ # Switch requires integer condition
2513
+ if isinstance(cond_val.type, ir.PointerType):
2514
+ cond_val = self.builder.ptrtoint(cond_val, int64_t)
2515
+ elif self._is_floating_ir_type(cond_val.type):
2516
+ cond_val = self.builder.fptosi(cond_val, int64_t)
2517
+ elif isinstance(cond_val.type, ir.IntType) and cond_val.type.width != 64:
2518
+ cond_val = self._implicit_convert(cond_val, int64_t)
2519
+
2520
+ after_bb = self.builder.function.append_basic_block("switch_end")
2521
+
2522
+ # Preserve C switch semantics: grouped case labels and fallthrough
2523
+ # share code by jumping into the next label block, not directly to
2524
+ # the switch epilogue.
2525
+ if isinstance(node.stmt, c_ast.Compound):
2526
+ switch_items = list(node.stmt.block_items or [])
2527
+ elif node.stmt is not None:
2528
+ switch_items = [node.stmt]
2529
+ else:
2530
+ switch_items = []
2531
+ labels = [
2532
+ item
2533
+ for item in switch_items
2534
+ if isinstance(item, (c_ast.Case, c_ast.Default))
2535
+ ]
2536
+
2537
+ label_blocks = {}
2538
+ default_bb = after_bb
2539
+ for item in labels:
2540
+ bb_name = (
2541
+ "switch_default" if isinstance(item, c_ast.Default) else "switch_case"
2542
+ )
2543
+ bb = self.builder.function.append_basic_block(bb_name)
2544
+ label_blocks[id(item)] = bb
2545
+ if isinstance(item, c_ast.Default):
2546
+ default_bb = bb
2547
+
2548
+ switch_inst = self.builder.switch(cond_val, default_bb)
2549
+
2550
+ with self.new_scope():
2551
+ self.define("break", after_bb)
2552
+
2553
+ for item in labels:
2554
+ if not isinstance(item, c_ast.Case):
2555
+ continue
2556
+ # Case values must be compile-time constants
2557
+ try:
2558
+ const_int = self._eval_const_expr(item.expr)
2559
+ case_val = ir.Constant(cond_val.type, const_int)
2560
+ except Exception:
2561
+ case_val, _ = self.codegen(item.expr)
2562
+ if case_val is None:
2563
+ continue
2564
+ if not isinstance(case_val, ir.Constant):
2565
+ # Non-constant case: skip (LLVM requires constants)
2566
+ continue
2567
+ if case_val.type != cond_val.type:
2568
+ case_val = ir.Constant(cond_val.type, case_val.constant)
2569
+ switch_inst.add_case(case_val, label_blocks[id(item)])
2570
+
2571
+ for idx, item in enumerate(labels):
2572
+ self.builder.position_at_end(label_blocks[id(item)])
2573
+ for stmt in item.stmts or []:
2574
+ self.codegen(stmt)
2575
+ if self.builder.block.is_terminated:
2576
+ break
2577
+ if not self.builder.block.is_terminated:
2578
+ next_bb = after_bb
2579
+ if idx + 1 < len(labels):
2580
+ next_bb = label_blocks[id(labels[idx + 1])]
2581
+ self.builder.branch(next_bb)
2582
+
2583
+ self.builder.position_at_end(after_bb)
2584
+ return None, None
2585
+
2586
+ def codegen_TernaryOp(self, node):
2587
+
2588
+ cond_val, _ = self.codegen(node.cond)
2589
+ cmp = self._to_bool(cond_val)
2590
+
2591
+ then_bb = self.builder.function.append_basic_block("ternary_true")
2592
+ else_bb = self.builder.function.append_basic_block("ternary_false")
2593
+ merge_bb = self.builder.function.append_basic_block("ternary_end")
2594
+
2595
+ self.builder.cbranch(cmp, then_bb, else_bb)
2596
+
2597
+ self.builder.position_at_end(then_bb)
2598
+ true_val, _ = self.codegen(node.iftrue)
2599
+ true_bb_end = self.builder.block
2600
+
2601
+ self.builder.position_at_end(else_bb)
2602
+ false_val, _ = self.codegen(node.iffalse)
2603
+ false_bb_end = self.builder.block
2604
+
2605
+ def zero_value(target_type):
2606
+ if isinstance(target_type, ir.PointerType):
2607
+ return ir.Constant(target_type, None)
2608
+ if self._is_floating_ir_type(target_type):
2609
+ return ir.Constant(target_type, 0.0)
2610
+ return ir.Constant(target_type, 0)
2611
+
2612
+ def pick_target_type(lhs, rhs):
2613
+ if lhs is None and rhs is None:
2614
+ return int64_t
2615
+ if lhs is None:
2616
+ return rhs.type
2617
+ if rhs is None:
2618
+ return lhs.type
2619
+ if isinstance(lhs.type, ir.ArrayType) or isinstance(rhs.type, ir.ArrayType):
2620
+ if isinstance(lhs.type, ir.PointerType):
2621
+ return lhs.type
2622
+ if isinstance(rhs.type, ir.PointerType):
2623
+ return rhs.type
2624
+ if isinstance(lhs.type, ir.ArrayType):
2625
+ return ir.PointerType(lhs.type.element)
2626
+ return ir.PointerType(rhs.type.element)
2627
+ if lhs.type == rhs.type:
2628
+ return lhs.type
2629
+ if isinstance(lhs.type, ir.PointerType) and isinstance(
2630
+ rhs.type, ir.PointerType
2631
+ ):
2632
+ if lhs.type == rhs.type:
2633
+ return lhs.type
2634
+ return voidptr_t
2635
+ if isinstance(lhs.type, ir.PointerType) and isinstance(
2636
+ rhs.type, ir.IntType
2637
+ ):
2638
+ return lhs.type
2639
+ if isinstance(rhs.type, ir.PointerType) and isinstance(
2640
+ lhs.type, ir.IntType
2641
+ ):
2642
+ return rhs.type
2643
+ if self._is_floating_ir_type(lhs.type) or self._is_floating_ir_type(
2644
+ rhs.type
2645
+ ):
2646
+ return self._common_float_type(lhs.type, rhs.type)
2647
+ if isinstance(lhs.type, ir.IntType) and isinstance(rhs.type, ir.IntType):
2648
+ return lhs.type if lhs.type.width >= rhs.type.width else rhs.type
2649
+ return lhs.type
2650
+
2651
+ target = pick_target_type(true_val, false_val)
2652
+ incoming = []
2653
+ for branch_end, branch_val in (
2654
+ (true_bb_end, true_val),
2655
+ (false_bb_end, false_val),
2656
+ ):
2657
+ if branch_end.is_terminated:
2658
+ continue
2659
+ self.builder.position_at_end(branch_end)
2660
+ value = branch_val if branch_val is not None else zero_value(target)
2661
+ if value.type != target or isinstance(value.type, ir.ArrayType):
2662
+ value = self._implicit_convert(value, target)
2663
+ incoming.append((self.builder.block, value))
2664
+ self.builder.branch(merge_bb)
2665
+
2666
+ self.builder.position_at_end(merge_bb)
2667
+ if not incoming:
2668
+ return zero_value(target), None
2669
+ if len(incoming) == 1:
2670
+ return incoming[0][1], None
2671
+
2672
+ phi = self.builder.phi(target, "ternary")
2673
+ for pred, value in incoming:
2674
+ phi.add_incoming(value, pred)
2675
+ return phi, None
2676
+
2677
+ def codegen_Cast(self, node):
2678
+
2679
+ expr, ptr = self.codegen(node.expr)
2680
+
2681
+ dest_ir_type = self._resolve_ast_type(node.to_type.type)
2682
+ # Check if casting to unsigned type
2683
+ is_unsigned = False
2684
+ if isinstance(node.to_type.type, c_ast.TypeDecl) and isinstance(
2685
+ node.to_type.type.type, c_ast.IdentifierType
2686
+ ):
2687
+ is_unsigned = self._is_unsigned_type_names(node.to_type.type.type.names)
2688
+ if self._is_floating_ir_type(expr.type) and isinstance(
2689
+ dest_ir_type, ir.IntType
2690
+ ):
2691
+ if is_unsigned:
2692
+ result = self.builder.fptoui(expr, dest_ir_type)
2693
+ self._tag_value_from_decl_type(result, node.to_type.type)
2694
+ return result, None
2695
+ result = self.builder.fptosi(expr, dest_ir_type)
2696
+ self._clear_unsigned(result)
2697
+ self._tag_value_from_decl_type(result, node.to_type.type)
2698
+ return result, None
2699
+ if expr.type == dest_ir_type:
2700
+ if isinstance(dest_ir_type, ir.IntType):
2701
+ if is_unsigned:
2702
+ if self._is_unsigned_val(expr):
2703
+ self._tag_value_from_decl_type(expr, node.to_type.type)
2704
+ return expr, None
2705
+ result = self.builder.add(
2706
+ expr, ir.Constant(dest_ir_type, 0), "casttmp"
2707
+ )
2708
+ self._tag_unsigned(result)
2709
+ self._tag_value_from_decl_type(result, node.to_type.type)
2710
+ return result, None
2711
+ if self._is_unsigned_val(expr):
2712
+ result = self.builder.add(
2713
+ expr, ir.Constant(dest_ir_type, 0), "casttmp"
2714
+ )
2715
+ self._tag_value_from_decl_type(result, node.to_type.type)
2716
+ return result, None
2717
+ self._clear_unsigned(expr)
2718
+ if is_unsigned:
2719
+ self._tag_unsigned(expr)
2720
+ self._tag_value_from_decl_type(expr, node.to_type.type)
2721
+ return expr, ptr
2722
+ result = self._implicit_convert(expr, dest_ir_type)
2723
+ if is_unsigned:
2724
+ self._tag_unsigned(result)
2725
+ elif isinstance(dest_ir_type, ir.IntType):
2726
+ self._clear_unsigned(result)
2727
+ self._tag_value_from_decl_type(result, node.to_type.type)
2728
+ return result, None
2729
+
2730
+ def codegen_FuncCall(self, node):
2731
+
2732
+ callee = None
2733
+ if isinstance(node.name, c_ast.ID):
2734
+ callee = node.name.name
2735
+ if callee == "__builtin_va_start":
2736
+ return self._codegen_builtin_va_start(node)
2737
+ if callee == "__builtin_va_end":
2738
+ return self._codegen_builtin_va_end(node)
2739
+ if callee == "__builtin_va_copy":
2740
+ return self._codegen_builtin_va_copy(node)
2741
+ if callee == "__builtin_va_arg":
2742
+ return ir.Constant(voidptr_t, None), None
2743
+ else:
2744
+ # Calling function pointer in struct: s.fn(args)
2745
+ call_args = []
2746
+ if node.args:
2747
+ call_args = [self.codegen(arg)[0] for arg in node.args.exprs]
2748
+ fp_val, _ = self.codegen(node.name)
2749
+ if isinstance(fp_val.type, ir.PointerType) and isinstance(
2750
+ fp_val.type.pointee, ir.FunctionType
2751
+ ):
2752
+ # Coerce args to match function pointer param types
2753
+ ftype = fp_val.type.pointee
2754
+ coerced = []
2755
+ for j, a in enumerate(call_args):
2756
+ if j < len(ftype.args):
2757
+ coerced.append(self._coerce_arg(a, ftype.args[j]))
2758
+ else:
2759
+ coerced.append(a)
2760
+ call_args = coerced
2761
+ ret_type = ftype.return_type
2762
+ if isinstance(ret_type, ir.VoidType):
2763
+ self.builder.call(fp_val, call_args)
2764
+ return ir.Constant(int64_t, 0), None
2765
+ result = self.builder.call(fp_val, call_args, "fpcall")
2766
+ return (
2767
+ self._extend_call_result(
2768
+ result, returns_unsigned=self._is_unsigned_return(fp_val)
2769
+ ),
2770
+ None,
2771
+ )
2772
+ # Not a function pointer — can't call, return dummy
2773
+ return ir.Constant(int64_t, 0), None
2774
+
2775
+ _, callee_func = self.lookup(callee)
2776
+
2777
+ call_args = []
2778
+ if node.args:
2779
+ call_args = [self.codegen(arg)[0] for arg in node.args.exprs]
2780
+
2781
+ # Function pointer: load the pointer and call through it
2782
+ if not isinstance(callee_func, ir.Function):
2783
+ if hasattr(callee_func, "type") and isinstance(
2784
+ callee_func.type, ir.PointerType
2785
+ ):
2786
+ loaded = self._safe_load(callee_func, name="fptr")
2787
+ if self._is_unsigned_return_binding(callee_func):
2788
+ self._tag_unsigned_return(loaded)
2789
+ # loaded could be a function pointer (ptr to FunctionType)
2790
+ # or the alloca's pointee could be a function ptr
2791
+ func_val = loaded
2792
+ if isinstance(func_val.type, ir.PointerType) and isinstance(
2793
+ func_val.type.pointee, ir.FunctionType
2794
+ ):
2795
+ ftype = func_val.type.pointee
2796
+ coerced = [
2797
+ self._coerce_arg(a, ftype.args[j]) if j < len(ftype.args) else a
2798
+ for j, a in enumerate(call_args)
2799
+ ]
2800
+ ret_type = ftype.return_type
2801
+ is_void = isinstance(ret_type, ir.VoidType)
2802
+ if is_void:
2803
+ self.builder.call(func_val, coerced)
2804
+ return ir.Constant(int64_t, 0), None
2805
+ result = self.builder.call(func_val, coerced, "fpcall")
2806
+ return (
2807
+ self._extend_call_result(
2808
+ result, returns_unsigned=self._is_unsigned_return(func_val)
2809
+ ),
2810
+ None,
2811
+ )
2812
+ return ir.Constant(int64_t, 0), None # unknown function — return dummy
2813
+
2814
+ if callee_func is None or not isinstance(callee_func, (ir.Function,)):
2815
+ return ir.Constant(int64_t, 0), None
2816
+
2817
+ # Convert arguments to match function parameter types
2818
+ converted = self._convert_call_args(call_args, callee_func)
2819
+
2820
+ # Call and handle return type
2821
+ try:
2822
+ is_void = isinstance(callee_func.return_value.type, ir.VoidType)
2823
+ except Exception:
2824
+ is_void = False
2825
+ try:
2826
+ if is_void:
2827
+ self.builder.call(callee_func, converted)
2828
+ return ir.Constant(int64_t, 0), None
2829
+ result = self.builder.call(callee_func, converted, "calltmp")
2830
+ except (TypeError, IndexError):
2831
+ # Arg count/type mismatch — return dummy value
2832
+ return ir.Constant(int64_t, 0), None
2833
+
2834
+ # Widen small int returns (e.g., i32 from strcmp) to i64
2835
+ return (
2836
+ self._extend_call_result(
2837
+ result, returns_unsigned=self._is_unsigned_return_binding(callee_func)
2838
+ ),
2839
+ None,
2840
+ )
2841
+
2842
+ def _get_or_declare_intrinsic(self, name, ret_type, arg_types):
2843
+ existing = self.module.globals.get(name)
2844
+ if existing is not None:
2845
+ return existing
2846
+ return ir.Function(self.module, ir.FunctionType(ret_type, arg_types), name=name)
2847
+
2848
+ def _codegen_builtin_va_start(self, node):
2849
+ if not node.args or not node.args.exprs:
2850
+ return ir.Constant(int64_t, 0), None
2851
+ ap_addr, _ = self.codegen(node.args.exprs[0])
2852
+ if not isinstance(getattr(ap_addr, "type", None), ir.PointerType):
2853
+ return ir.Constant(int64_t, 0), None
2854
+ intrinsic = self._get_or_declare_intrinsic(
2855
+ "llvm.va_start", ir.VoidType(), [voidptr_t]
2856
+ )
2857
+ arg = ap_addr
2858
+ if arg.type != voidptr_t:
2859
+ arg = self.builder.bitcast(arg, voidptr_t, name="vastartarg")
2860
+ self.builder.call(intrinsic, [arg])
2861
+ return ir.Constant(int64_t, 0), None
2862
+
2863
+ def _codegen_builtin_va_end(self, node):
2864
+ if not node.args or not node.args.exprs:
2865
+ return ir.Constant(int64_t, 0), None
2866
+ ap_addr, _ = self.codegen(node.args.exprs[0])
2867
+ if not isinstance(getattr(ap_addr, "type", None), ir.PointerType):
2868
+ return ir.Constant(int64_t, 0), None
2869
+ intrinsic = self._get_or_declare_intrinsic(
2870
+ "llvm.va_end", ir.VoidType(), [voidptr_t]
2871
+ )
2872
+ arg = ap_addr
2873
+ if arg.type != voidptr_t:
2874
+ arg = self.builder.bitcast(arg, voidptr_t, name="vaendarg")
2875
+ self.builder.call(intrinsic, [arg])
2876
+ return ir.Constant(int64_t, 0), None
2877
+
2878
+ def _codegen_builtin_va_copy(self, node):
2879
+ if not node.args or len(node.args.exprs) < 2:
2880
+ return ir.Constant(int64_t, 0), None
2881
+ dst_addr, _ = self.codegen(node.args.exprs[0])
2882
+ src_addr, _ = self.codegen(node.args.exprs[1])
2883
+ if not isinstance(getattr(dst_addr, "type", None), ir.PointerType):
2884
+ return ir.Constant(int64_t, 0), None
2885
+ if not isinstance(getattr(src_addr, "type", None), ir.PointerType):
2886
+ return ir.Constant(int64_t, 0), None
2887
+ src_val = self._safe_load(src_addr)
2888
+ dst_pointee = dst_addr.type.pointee
2889
+ if src_val.type != dst_pointee:
2890
+ src_val = self._implicit_convert(src_val, dst_pointee)
2891
+ self._safe_store(src_val, dst_addr)
2892
+ return ir.Constant(int64_t, 0), None
2893
+
2894
+ def _convert_call_args(self, call_args, callee_func):
2895
+ """Convert call arguments to match function parameter types."""
2896
+ converted = []
2897
+ param_types = [p.type for p in callee_func.args]
2898
+
2899
+ for i, arg in enumerate(call_args):
2900
+ if i < len(param_types):
2901
+ expected = param_types[i]
2902
+ arg = self._coerce_arg(arg, expected)
2903
+ else:
2904
+ arg = self._default_arg_promotion(arg)
2905
+ converted.append(arg)
2906
+ return converted
2907
+
2908
+ def _default_arg_promotion(self, arg):
2909
+ """Apply C default argument promotions for variadic calls."""
2910
+ if arg is None or isinstance(getattr(arg, "type", None), ir.VoidType):
2911
+ return ir.Constant(int64_t, 0)
2912
+ if isinstance(arg.type, ir.ArrayType):
2913
+ return self._implicit_convert(arg, ir.PointerType(arg.type.element))
2914
+ if isinstance(arg.type, ir.FloatType):
2915
+ return self.builder.fpext(arg, ir.DoubleType())
2916
+ if isinstance(arg.type, ir.IntType) and arg.type.width < int32_t.width:
2917
+ return self._integer_promotion(arg)
2918
+ return arg
2919
+
2920
+ def _coerce_arg(self, arg, expected):
2921
+ """Coerce a single argument to the expected type."""
2922
+ if arg is None or isinstance(getattr(arg, "type", None), ir.VoidType):
2923
+ return (
2924
+ ir.Constant(expected, None)
2925
+ if isinstance(expected, ir.PointerType)
2926
+ else ir.Constant(int64_t, 0)
2927
+ )
2928
+ if arg.type == expected:
2929
+ return arg
2930
+ # String literal [N x i8] -> pointer
2931
+ if isinstance(arg.type, ir.ArrayType) and isinstance(expected, ir.PointerType):
2932
+ gv = ir.GlobalVariable(
2933
+ self.module, arg.type, self.module.get_unique_name("str")
2934
+ )
2935
+ gv.initializer = arg
2936
+ gv.global_constant = True
2937
+ return self.builder.bitcast(gv, expected)
2938
+ # Pointer -> different pointer: bitcast
2939
+ if isinstance(arg.type, ir.PointerType) and isinstance(
2940
+ expected, ir.PointerType
2941
+ ):
2942
+ return self.builder.bitcast(arg, expected)
2943
+ # Numeric conversions
2944
+ return self._implicit_convert(arg, expected)
2945
+
2946
+ def codegen_Decl(self, node):
2947
+
2948
+ type_str = ""
2949
+
2950
+ # Skip anonymous/unnamed declarations
2951
+ if node.name is None and not isinstance(
2952
+ node.type, (c_ast.Struct, c_ast.Union, c_ast.Enum, c_ast.FuncDecl)
2953
+ ):
2954
+ if not (
2955
+ isinstance(node.type, c_ast.TypeDecl)
2956
+ and isinstance(node.type.type, (c_ast.Struct, c_ast.Union, c_ast.Enum))
2957
+ ):
2958
+ return None, None
2959
+
2960
+ # Static local variables: stored as globals with function-scoped names
2961
+ is_static = node.storage and "static" in node.storage
2962
+ if is_static and not self.in_global and isinstance(node.type, c_ast.TypeDecl):
2963
+ type_str = node.type.type.names
2964
+ ir_type = self._get_ir_type(type_str)
2965
+ # Create unique global name
2966
+ global_name = f"__static_{self.function.name}_{node.name}"
2967
+ gv = ir.GlobalVariable(self.module, ir_type, global_name)
2968
+ if node.init:
2969
+ gv.initializer = self._build_const_init(node.init, ir_type)
2970
+ else:
2971
+ gv.initializer = self._zero_initializer(ir_type)
2972
+ self.define(node.name, (ir_type, gv))
2973
+ return None, None
2974
+
2975
+ if isinstance(node.type, c_ast.Enum):
2976
+ return self.codegen_Enum(node.type)
2977
+
2978
+ # Forward function declaration: int foo(int x);
2979
+ if isinstance(node.type, c_ast.FuncDecl):
2980
+ funcname = node.name
2981
+ # Skip if already exists (module globals, libc, or env)
2982
+ existing = self.module.globals.get(funcname)
2983
+ if existing:
2984
+ if self._func_decl_returns_unsigned(node.type):
2985
+ self._mark_unsigned_return(existing)
2986
+ self.define(funcname, (None, existing))
2987
+ return None, None
2988
+ if funcname in LIBC_FUNCTIONS:
2989
+ self._declare_libc(funcname)
2990
+ return None, None
2991
+ ir_type, _ = self.codegen(node.type)
2992
+ arg_types = []
2993
+ is_va = False
2994
+ if node.type.args:
2995
+ for arg in node.type.args.params:
2996
+ if isinstance(arg, c_ast.EllipsisParam):
2997
+ is_va = True
2998
+ continue
2999
+ t = self._resolve_param_type(arg)
3000
+ if t is not None:
3001
+ arg_types.append(t)
3002
+ try:
3003
+ func = ir.Function(
3004
+ self.module,
3005
+ ir.FunctionType(ir_type, arg_types, var_arg=is_va),
3006
+ name=funcname,
3007
+ )
3008
+ if self._func_decl_returns_unsigned(node.type):
3009
+ self._mark_unsigned_return(func)
3010
+ self.define(funcname, (ir_type, func))
3011
+ except Exception:
3012
+ # Already exists (libc or previous decl)
3013
+ existing = self.module.globals.get(funcname)
3014
+ if existing:
3015
+ if self._func_decl_returns_unsigned(node.type):
3016
+ self._mark_unsigned_return(existing)
3017
+ self.define(funcname, (ir_type, existing))
3018
+ return None, None
3019
+
3020
+ # Bare struct/union/type definition
3021
+ if isinstance(node.type, c_ast.Union):
3022
+ if node.name is None:
3023
+ self.codegen_Union(node.type)
3024
+ return None, None
3025
+
3026
+ if isinstance(node.type, c_ast.Struct) and node.name is None:
3027
+ self.codegen_Struct(node.type)
3028
+ return None, None
3029
+
3030
+ if isinstance(node.type, c_ast.TypeDecl):
3031
+ if isinstance(node.type.type, c_ast.IdentifierType):
3032
+ # Check if the type resolves to a struct or pointer via typedef
3033
+ resolved = self._resolve_type_str(node.type.type.names)
3034
+ if isinstance(
3035
+ resolved, (ir.LiteralStructType, ir.PointerType, ir.ArrayType)
3036
+ ):
3037
+ name = node.type.declname
3038
+ ir_type = resolved
3039
+ if not self.in_global:
3040
+ ret = self._alloca_in_entry(ir_type, name)
3041
+ self.define(name, (ir_type, ret))
3042
+ else:
3043
+ ret = ir.GlobalVariable(self.module, ir_type, name)
3044
+ self.define(name, (ir_type, ret))
3045
+ if node.init is not None:
3046
+ if self.in_global:
3047
+ ret.initializer = self._build_const_init(node.init, ir_type)
3048
+ else:
3049
+ init_val, _ = self.codegen(node.init)
3050
+ if init_val is not None:
3051
+ if init_val.type != ir_type:
3052
+ init_val = self._implicit_convert(init_val, ir_type)
3053
+ self._safe_store(init_val, ret)
3054
+ elif self.in_global:
3055
+ ret.initializer = self._zero_initializer(ir_type)
3056
+ return None, None
3057
+
3058
+ if isinstance(node.type.type, (c_ast.Struct, c_ast.Union)):
3059
+ name = node.type.declname
3060
+ codegen_fn = (
3061
+ self.codegen_Union
3062
+ if isinstance(node.type.type, c_ast.Union)
3063
+ else self.codegen_Struct
3064
+ )
3065
+ if node.type.type.name is None:
3066
+ struct_type = codegen_fn(node.type.type)
3067
+ if not self.in_global:
3068
+ ret = self._alloca_in_entry(struct_type, name)
3069
+ self.define(name, (struct_type, ret))
3070
+ else:
3071
+ ret = ir.GlobalVariable(self.module, struct_type, name)
3072
+ self.define(name, (struct_type, ret))
3073
+ if node.init is not None:
3074
+ if self.in_global:
3075
+ ret.initializer = self._build_const_init(
3076
+ node.init, struct_type
3077
+ )
3078
+ else:
3079
+ init_val, _ = self.codegen(node.init)
3080
+ if init_val is not None:
3081
+ if init_val.type != struct_type:
3082
+ init_val = self._implicit_convert(
3083
+ init_val, struct_type
3084
+ )
3085
+ self._safe_store(init_val, ret)
3086
+ elif self.in_global:
3087
+ ret.initializer = self._zero_initializer(struct_type)
3088
+ return None, None
3089
+ else:
3090
+ struct_type = self.env[node.type.type.name][0]
3091
+ if not self.in_global:
3092
+ ret = self._alloca_in_entry(struct_type, name)
3093
+ self.define(name, (struct_type, ret))
3094
+ else:
3095
+ ret = ir.GlobalVariable(self.module, struct_type, name)
3096
+ self.define(name, (struct_type, ret))
3097
+ if node.init is not None:
3098
+ if self.in_global:
3099
+ ret.initializer = self._build_const_init(
3100
+ node.init, struct_type
3101
+ )
3102
+ else:
3103
+ init_val, _ = self.codegen(node.init)
3104
+ if init_val is not None:
3105
+ if init_val.type != struct_type:
3106
+ init_val = self._implicit_convert(
3107
+ init_val, struct_type
3108
+ )
3109
+ self._safe_store(init_val, ret)
3110
+ elif self.in_global:
3111
+ ret.initializer = self._zero_initializer(struct_type)
3112
+ return None, None
3113
+ else:
3114
+ type_str = node.type.type.names
3115
+ is_unsigned = self._is_unsigned_type_names(type_str)
3116
+ ir_type = self._get_ir_type(type_str)
3117
+ type_str = self._resolve_type_str(type_str)
3118
+ if isinstance(type_str, ir.Type):
3119
+ type_str = "int" # fallback for alloca name
3120
+ if self._is_floating_ir_type(ir_type):
3121
+ init = 0.0
3122
+ else:
3123
+ init = 0
3124
+
3125
+ if node.init is not None:
3126
+ if self.in_global:
3127
+ init_val = self._build_const_init(node.init, ir_type)
3128
+ else:
3129
+ init_val, _ = self.codegen(node.init)
3130
+ else:
3131
+ init_val = self._zero_initializer(ir_type)
3132
+
3133
+ var_addr, var_ir_type = self.create_entry_block_alloca(
3134
+ node.name, type_str, 1
3135
+ )
3136
+ if is_unsigned:
3137
+ self._mark_unsigned(var_addr)
3138
+
3139
+ if self.in_global:
3140
+ var_addr.initializer = init_val
3141
+ else:
3142
+ init_val = self._implicit_convert(init_val, ir_type)
3143
+ self._safe_store(init_val, var_addr)
3144
+
3145
+ elif isinstance(node.type, c_ast.ArrayDecl):
3146
+ array_list = []
3147
+ array_node = node.type
3148
+ var_addr = None
3149
+ var_ir_type = None
3150
+ elem_ir_type = None
3151
+ while True:
3152
+ array_next_type = array_node.type
3153
+ if isinstance(array_next_type, c_ast.TypeDecl):
3154
+ dim_val = self._eval_dim(array_node.dim) if array_node.dim else 0
3155
+ array_list.append(dim_val)
3156
+ elem_ir_type = self._resolve_ast_type(array_next_type)
3157
+ break
3158
+
3159
+ elif isinstance(array_next_type, c_ast.ArrayDecl):
3160
+ array_list.append(self._eval_dim(array_node.dim))
3161
+ array_node = array_next_type
3162
+ continue
3163
+ elif isinstance(array_next_type, c_ast.PtrDecl):
3164
+ # Array of pointers: int *arr[3]
3165
+ dim = self._eval_dim(array_node.dim)
3166
+ inner = array_next_type.type
3167
+ if isinstance(inner, c_ast.TypeDecl):
3168
+ elem_type_str = inner.type.names
3169
+ else:
3170
+ elem_type_str = "int"
3171
+ elem_ir = ir.PointerType(get_ir_type(elem_type_str))
3172
+ elem_ir_type = elem_ir
3173
+ arr_ir = ir.ArrayType(elem_ir, dim)
3174
+ arr_ir.dim_array = [dim]
3175
+ if not self.in_global:
3176
+ var_addr = self._alloca_in_entry(arr_ir, node.name)
3177
+ self.define(node.name, (arr_ir, var_addr))
3178
+ else:
3179
+ existing = self.module.globals.get(node.name)
3180
+ if existing:
3181
+ var_addr = existing
3182
+ else:
3183
+ var_addr = ir.GlobalVariable(self.module, arr_ir, node.name)
3184
+ self.define(node.name, (arr_ir, var_addr))
3185
+ var_ir_type = arr_ir
3186
+ break
3187
+ else:
3188
+ raise Exception("TODO implement")
3189
+
3190
+ if var_addr is None:
3191
+ var_ir_type = elem_ir_type
3192
+ for dim in reversed(array_list):
3193
+ var_ir_type = ir.ArrayType(var_ir_type, dim)
3194
+ var_ir_type.dim_array = array_list
3195
+ if not self.in_global:
3196
+ var_addr = self._alloca_in_entry(var_ir_type, node.name)
3197
+ else:
3198
+ existing = self.module.globals.get(node.name)
3199
+ if existing:
3200
+ var_addr = existing
3201
+ else:
3202
+ var_addr = ir.GlobalVariable(
3203
+ self.module, var_ir_type, node.name
3204
+ )
3205
+ self.define(node.name, (var_ir_type, var_addr))
3206
+
3207
+ if self._has_unsigned_scalar_pointee(node.type):
3208
+ self._mark_unsigned_pointee(var_addr)
3209
+
3210
+ # Infer the size of zero-length arrays from the initializer.
3211
+ if (
3212
+ isinstance(var_ir_type, ir.ArrayType)
3213
+ and var_ir_type.count == 0
3214
+ and node.init is not None
3215
+ ):
3216
+ actual_count = None
3217
+ if isinstance(node.init, c_ast.InitList):
3218
+ actual_count = len(node.init.exprs)
3219
+ elif (
3220
+ isinstance(node.init, c_ast.Constant)
3221
+ and getattr(node.init, "type", None) == "string"
3222
+ ):
3223
+ raw = node.init.value[1:-1]
3224
+ actual_count = len(self._process_escapes(raw)) + 1
3225
+ if actual_count is not None and elem_ir_type is not None:
3226
+ var_ir_type = ir.ArrayType(elem_ir_type, actual_count)
3227
+ var_ir_type.dim_array = [actual_count]
3228
+ if self.in_global:
3229
+ new_name = self.module.get_unique_name(node.name)
3230
+ var_addr = ir.GlobalVariable(self.module, var_ir_type, new_name)
3231
+ self.define(node.name, (var_ir_type, var_addr))
3232
+ if not hasattr(self, "_array_renames"):
3233
+ self._array_renames = {}
3234
+ self._array_renames[f'@"{node.name}"'] = f'@"{new_name}"'
3235
+ else:
3236
+ var_addr = self._alloca_in_entry(var_ir_type, node.name)
3237
+ self.define(node.name, (var_ir_type, var_addr))
3238
+
3239
+ if self._has_unsigned_scalar_pointee(node.type):
3240
+ self._mark_unsigned_pointee(var_addr)
3241
+
3242
+ # Handle array initialization: int a[3] = {1, 2, 3}; or
3243
+ # char s[] = "hi"; or const char *names[] = {"a", helper};
3244
+ if node.init is not None:
3245
+ if self.in_global:
3246
+ try:
3247
+ const_init = self._build_const_init(node.init, var_ir_type)
3248
+ str(const_init)
3249
+ var_addr.initializer = const_init
3250
+ except Exception:
3251
+ var_addr.initializer = self._zero_initializer(var_ir_type)
3252
+ elif isinstance(node.init, c_ast.InitList):
3253
+ self._init_array(
3254
+ var_addr,
3255
+ node.init,
3256
+ elem_ir_type,
3257
+ [ir.Constant(ir.IntType(32), 0)],
3258
+ )
3259
+ elif (
3260
+ isinstance(node.init, c_ast.Constant)
3261
+ and getattr(node.init, "type", None) == "string"
3262
+ and isinstance(elem_ir_type, ir.IntType)
3263
+ and elem_ir_type.width == 8
3264
+ ):
3265
+ raw = self._process_escapes(node.init.value[1:-1]) + "\00"
3266
+ idx0 = ir.Constant(ir.IntType(32), 0)
3267
+ for i, ch in enumerate(raw[: var_ir_type.count]):
3268
+ elem_ptr = self.builder.gep(
3269
+ var_addr,
3270
+ [idx0, ir.Constant(ir.IntType(32), i)],
3271
+ inbounds=True,
3272
+ )
3273
+ self.builder.store(int8_t(ord(ch)), elem_ptr)
3274
+ elif self.in_global:
3275
+ var_addr.initializer = self._zero_initializer(var_ir_type)
3276
+
3277
+ elif isinstance(node.type, c_ast.PtrDecl):
3278
+
3279
+ point_level = 1
3280
+ sub_node = node.type
3281
+ resolved_pointee_type = None
3282
+
3283
+ while True:
3284
+ sub_next_type = sub_node.type
3285
+ if isinstance(sub_next_type, c_ast.TypeDecl):
3286
+ if isinstance(sub_next_type.type, c_ast.Struct):
3287
+ # pointer to struct: struct { int x; } *p
3288
+ resolved_pointee_type = self.codegen_Struct(sub_next_type.type)
3289
+ type_str = "struct"
3290
+ elif isinstance(sub_next_type.type, c_ast.Union):
3291
+ resolved_pointee_type = self.codegen_Union(sub_next_type.type)
3292
+ type_str = "union"
3293
+ else:
3294
+ type_str = sub_next_type.type.names
3295
+ resolved = self._get_ir_type(type_str)
3296
+ if isinstance(resolved, ir.Type):
3297
+ resolved_pointee_type = resolved
3298
+ if isinstance(resolved, ir.LiteralStructType):
3299
+ type_str = "struct"
3300
+ break
3301
+ elif isinstance(sub_next_type, c_ast.PtrDecl):
3302
+ point_level += 1
3303
+ sub_node = sub_next_type
3304
+ continue
3305
+ elif isinstance(sub_next_type, c_ast.FuncDecl):
3306
+ # Function pointer: int (*fp)(int, int)
3307
+ func_ir_type = self._build_func_ptr_type(sub_next_type)
3308
+ if not self.in_global:
3309
+ var_addr = self._alloca_in_entry(func_ir_type, node.name)
3310
+ self.define(node.name, (func_ir_type, var_addr))
3311
+ else:
3312
+ var_addr = ir.GlobalVariable(
3313
+ self.module, func_ir_type, node.name
3314
+ )
3315
+ var_addr.initializer = ir.Constant(func_ir_type, None)
3316
+ self.define(node.name, (func_ir_type, var_addr))
3317
+ if self._func_decl_returns_unsigned(sub_next_type):
3318
+ self._mark_unsigned_return(var_addr)
3319
+ if node.init is not None:
3320
+ init_val, _ = self.codegen(node.init)
3321
+ # init_val is an ir.Function, bitcast to func ptr type
3322
+ if init_val.type != func_ir_type:
3323
+ init_val = self.builder.bitcast(init_val, func_ir_type)
3324
+ self._safe_store(init_val, var_addr)
3325
+ return None, var_addr
3326
+ pass
3327
+
3328
+ if resolved_pointee_type is not None:
3329
+ ir_type = resolved_pointee_type
3330
+ if isinstance(ir_type, ir.VoidType):
3331
+ ir_type = int8_t
3332
+ for _ in range(point_level):
3333
+ ir_type = ir.PointerType(ir_type)
3334
+ if not self.in_global:
3335
+ var_addr = self._alloca_in_entry(ir_type, node.name)
3336
+ self.define(node.name, (ir_type, var_addr))
3337
+ else:
3338
+ var_addr = ir.GlobalVariable(self.module, ir_type, node.name)
3339
+ self.define(node.name, (ir_type, var_addr))
3340
+ var_ir_type = ir_type
3341
+ else:
3342
+ var_addr, var_ir_type = self.create_entry_block_alloca(
3343
+ node.name, type_str, 1, point_level=point_level
3344
+ )
3345
+
3346
+ if self._has_unsigned_scalar_pointee(node.type):
3347
+ self._mark_unsigned_pointee(var_addr)
3348
+
3349
+ if node.init is not None:
3350
+ if self.in_global:
3351
+ try:
3352
+ const_init = self._build_const_init(node.init, var_ir_type)
3353
+ str(const_init)
3354
+ var_addr.initializer = const_init
3355
+ except Exception:
3356
+ var_addr.initializer = ir.Constant(var_ir_type, None)
3357
+ else:
3358
+ init_val, _ = self.codegen(node.init)
3359
+ if isinstance(init_val.type, ir.ArrayType) and isinstance(
3360
+ var_ir_type, ir.PointerType
3361
+ ):
3362
+ gv = ir.GlobalVariable(
3363
+ self.module,
3364
+ init_val.type,
3365
+ self.module.get_unique_name("str"),
3366
+ )
3367
+ gv.initializer = init_val
3368
+ gv.global_constant = True
3369
+ init_val = self.builder.bitcast(gv, var_ir_type)
3370
+ elif init_val.type != var_ir_type:
3371
+ init_val = self._implicit_convert(init_val, var_ir_type)
3372
+ self._safe_store(init_val, var_addr)
3373
+ else:
3374
+ return None, None
3375
+
3376
+ return None, var_addr
3377
+
3378
+ def codegen_ID(self, node):
3379
+
3380
+ valtype, var = self.lookup(node.name)
3381
+ node.ir_type = valtype
3382
+ # Enum constants are stored as ir.Constant, not alloca'd
3383
+ if isinstance(var, ir.values.Constant):
3384
+ return var, None
3385
+ # Function reference: return function pointer directly
3386
+ if isinstance(var, ir.Function):
3387
+ if self._is_unsigned_return_binding(var):
3388
+ self._tag_unsigned_return(var)
3389
+ return var, None
3390
+ # Array types: decay to pointer to first element
3391
+ if isinstance(valtype, ir.ArrayType):
3392
+ ptr = self.builder.gep(
3393
+ var,
3394
+ [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
3395
+ name="arraydecay",
3396
+ )
3397
+ if self._is_unsigned_pointee_binding(var):
3398
+ self._tag_unsigned_pointee(ptr)
3399
+ return ptr, var
3400
+ # Guard: only load from pointer types
3401
+ if not isinstance(var.type, ir.PointerType):
3402
+ return var, None
3403
+ result = self._safe_load(var)
3404
+ # Propagate unsigned-ness from variable to loaded value
3405
+ if self._is_unsigned_binding(var):
3406
+ self._tag_unsigned(result)
3407
+ if self._is_unsigned_pointee_binding(var):
3408
+ self._tag_unsigned_pointee(result)
3409
+ if self._is_unsigned_return_binding(var):
3410
+ self._tag_unsigned_return(result)
3411
+ return result, var
3412
+
3413
+ def codegen_ArrayRef(self, node):
3414
+
3415
+ name = node.name
3416
+ subscript = node.subscript
3417
+ name_ir, name_ptr = self.codegen(name)
3418
+ if name_ir is None:
3419
+ return ir.Constant(int64_t, 0), None
3420
+ if (
3421
+ name_ptr is None
3422
+ and isinstance(name_ir, ir.values.Constant)
3423
+ and isinstance(name_ir.type, ir.ArrayType)
3424
+ ):
3425
+ gv = ir.GlobalVariable(
3426
+ self.module, name_ir.type, self.module.get_unique_name("strlit")
3427
+ )
3428
+ gv.initializer = name_ir
3429
+ gv.global_constant = True
3430
+ gv.linkage = "internal"
3431
+ name_ptr = gv
3432
+ subscript_ir, subscript_ptr = self.codegen(subscript)
3433
+ if subscript_ir is None:
3434
+ return ir.Constant(int64_t, 0), None
3435
+
3436
+ if isinstance(subscript_ir.type, ir.IntType):
3437
+ subscript_ir = self._implicit_convert(subscript_ir, ir.IntType(64))
3438
+ else:
3439
+ subscript_ir = self.builder.fptoui(subscript_ir, ir.IntType(64))
3440
+
3441
+ # Pointer subscript: p[i] -> *(p + i)
3442
+ name_type = self._get_expr_ir_type(name) or name_ir.type
3443
+ if isinstance(name_type, ir.PointerType) and isinstance(
3444
+ name_ir.type, ir.PointerType
3445
+ ):
3446
+ value_ir_type = name_type.pointee
3447
+ elem_ptr = self.builder.gep(name_ir, [subscript_ir], name="ptridx")
3448
+ # If GEP result points to an array, return pointer (array decay)
3449
+ if isinstance(elem_ptr.type, ir.PointerType) and isinstance(
3450
+ elem_ptr.type.pointee, ir.ArrayType
3451
+ ):
3452
+ node.ir_type = elem_ptr.type.pointee
3453
+ return elem_ptr, elem_ptr
3454
+ value_result = self._safe_load(elem_ptr)
3455
+ if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
3456
+ name_ptr
3457
+ ):
3458
+ self._tag_unsigned(value_result)
3459
+ node.ir_type = value_ir_type
3460
+ return value_result, elem_ptr
3461
+
3462
+ # Non-array type (opaque struct etc): treat as pointer subscript
3463
+ if not isinstance(name_type, ir.ArrayType):
3464
+ ptr = (
3465
+ self.builder.bitcast(name_ir, ir.PointerType(int8_t))
3466
+ if not isinstance(name_ir.type, ir.PointerType)
3467
+ else name_ir
3468
+ )
3469
+ elem_ptr = self.builder.gep(ptr, [subscript_ir], name="ptridx")
3470
+ value_result = self._safe_load(elem_ptr)
3471
+ if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(ptr):
3472
+ self._tag_unsigned(value_result)
3473
+ node.ir_type = (
3474
+ elem_ptr.type.pointee
3475
+ if isinstance(elem_ptr.type, ir.PointerType)
3476
+ else name_type
3477
+ )
3478
+ return value_result, elem_ptr
3479
+
3480
+ # Array subscript: a[i] using GEP for correct stride calculation
3481
+ value_ir_type = name_type.element
3482
+
3483
+ # If no address pointer, use name_ir as base
3484
+ if name_ptr is None:
3485
+ name_ptr = name_ir
3486
+ if name_ptr is None:
3487
+ return ir.Constant(int64_t, 0), None
3488
+
3489
+ # GEP requires a pointer base; if name_ptr is a pointer to array, use GEP
3490
+ if isinstance(name_ptr.type, ir.PointerType):
3491
+ zero = ir.Constant(ir.IntType(32), 0)
3492
+ idx = (
3493
+ self.builder.trunc(subscript_ir, ir.IntType(32))
3494
+ if isinstance(subscript_ir.type, ir.IntType)
3495
+ and subscript_ir.type.width > 32
3496
+ else subscript_ir
3497
+ )
3498
+ elem_ptr = self.builder.gep(name_ptr, [zero, idx], name="arridx")
3499
+
3500
+ # If element is sub-array, return pointer (array decay)
3501
+ if isinstance(value_ir_type, ir.ArrayType):
3502
+ node.ir_type = value_ir_type
3503
+ return elem_ptr, elem_ptr
3504
+ else:
3505
+ value_result = self._safe_load(elem_ptr)
3506
+ if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
3507
+ name_ptr
3508
+ ):
3509
+ self._tag_unsigned(value_result)
3510
+ node.ir_type = value_ir_type
3511
+ return value_result, elem_ptr
3512
+
3513
+ # Fallback: byte offset arithmetic (for non-pointer base)
3514
+ elem_size = self._ir_type_size(value_ir_type)
3515
+ stride = ir.Constant(ir.IntType(64), elem_size)
3516
+ offset = self.builder.mul(stride, subscript_ir, "array_add")
3517
+ base_int = (
3518
+ self.builder.ptrtoint(name_ptr, ir.IntType(64))
3519
+ if isinstance(name_ptr.type, ir.PointerType)
3520
+ else (
3521
+ name_ptr
3522
+ if isinstance(name_ptr.type, ir.IntType)
3523
+ else self.builder.ptrtoint(name_ptr, ir.IntType(64))
3524
+ )
3525
+ )
3526
+ addr = self.builder.add(offset, base_int, "addtmp")
3527
+ value_ptr = self.builder.inttoptr(addr, ir.PointerType(value_ir_type))
3528
+ if isinstance(value_ir_type, ir.ArrayType):
3529
+ node.ir_type = value_ir_type
3530
+ return value_ptr, value_ptr
3531
+ else:
3532
+ value_result = self._safe_load(value_ptr)
3533
+ if self._is_unsigned_pointee(name_ir) or self._is_unsigned_pointee(
3534
+ name_ptr
3535
+ ):
3536
+ self._tag_unsigned(value_result)
3537
+ node.ir_type = value_ir_type
3538
+ return value_result, value_ptr
3539
+
3540
+ def codegen_Return(self, node):
3541
+
3542
+ if node.expr is None:
3543
+ self.builder.ret_void()
3544
+ else:
3545
+ retval, _ = self.codegen(node.expr)
3546
+ # Implicit convert to function return type
3547
+ func_ret_type = self.function.return_value.type
3548
+ if retval.type != func_ret_type:
3549
+ retval = self._implicit_convert(retval, func_ret_type)
3550
+ self.builder.ret(retval)
3551
+ return None, None
3552
+
3553
+ def codegen_Compound(self, node):
3554
+
3555
+ if node.block_items:
3556
+ for stmt in node.block_items:
3557
+ if self.builder and self.builder.block.is_terminated:
3558
+ # After a terminator (goto/break/continue/return),
3559
+ # only process labels — skip unreachable code
3560
+ if isinstance(stmt, c_ast.Label):
3561
+ self.codegen(stmt)
3562
+ continue
3563
+ self.codegen(stmt)
3564
+ return None, None
3565
+
3566
+ def codegen_FuncDecl(self, node):
3567
+ ir_type = self._resolve_ast_type(node.type)
3568
+ return ir_type, None
3569
+
3570
+ def codegen_FuncDef(self, node):
3571
+
3572
+ # deep level func have deep level
3573
+ # we don't want funcdecl in codegen_decl too
3574
+ ir_type, _ = self.codegen(node.decl.type)
3575
+ funcname = node.decl.name
3576
+
3577
+ if funcname == "main":
3578
+ self.return_type = ir_type # for call in C
3579
+
3580
+ arg_types = []
3581
+ is_var_arg = False
3582
+ if node.decl.type.args:
3583
+ for arg_type in node.decl.type.args.params:
3584
+ if isinstance(arg_type, c_ast.EllipsisParam):
3585
+ is_var_arg = True
3586
+ continue
3587
+ t = self._resolve_param_type(arg_type)
3588
+ if t is not None:
3589
+ arg_types.append(t)
3590
+
3591
+ with self.new_function():
3592
+
3593
+ existing = self.module.globals.get(funcname)
3594
+ if existing and isinstance(existing, ir.Function):
3595
+ if existing.is_declaration:
3596
+ self.function = existing
3597
+ else:
3598
+ # Already defined — skip this duplicate definition
3599
+ return None, None
3600
+ else:
3601
+ try:
3602
+ self.function = ir.Function(
3603
+ self.module,
3604
+ ir.FunctionType(ir_type, arg_types, var_arg=is_var_arg),
3605
+ name=funcname,
3606
+ )
3607
+ except Exception:
3608
+ return None, None
3609
+ if self._func_decl_returns_unsigned(node.decl.type):
3610
+ self._mark_unsigned_return(self.function)
3611
+ self.block = self.function.append_basic_block()
3612
+ self.builder = ir.IRBuilder(self.block)
3613
+ if len(self.env.maps) > 1:
3614
+ self.env.maps[1][funcname] = (ir_type, self.function)
3615
+ self.define(funcname, (ir_type, self.function))
3616
+ if node.decl.type.args:
3617
+ param_idx = 0
3618
+ for p in node.decl.type.args.params:
3619
+ if isinstance(p, c_ast.EllipsisParam):
3620
+ continue
3621
+ # Skip void params (f(void) means no params)
3622
+ if isinstance(p, c_ast.Typename) and isinstance(
3623
+ getattr(p, "type", None), c_ast.TypeDecl
3624
+ ):
3625
+ if isinstance(
3626
+ p.type.type, c_ast.IdentifierType
3627
+ ) and p.type.type.names == ["void"]:
3628
+ continue
3629
+ if param_idx >= len(arg_types):
3630
+ break
3631
+ arg_type = arg_types[param_idx]
3632
+ pname = p.name if isinstance(p.name, str) else f"arg{param_idx}"
3633
+ var = self._alloca_in_entry(arg_type, pname)
3634
+ self.define(pname, (arg_type, var))
3635
+ self._safe_store(self.function.args[param_idx], var)
3636
+ # Track unsigned params
3637
+ if isinstance(p, c_ast.Decl) and isinstance(
3638
+ getattr(p, "type", None), c_ast.TypeDecl
3639
+ ):
3640
+ if isinstance(p.type.type, c_ast.IdentifierType):
3641
+ if self._is_unsigned_type_names(p.type.type.names):
3642
+ self._mark_unsigned(var)
3643
+ if isinstance(p, c_ast.Decl):
3644
+ if self._has_unsigned_scalar_pointee(p.type):
3645
+ self._mark_unsigned_pointee(var)
3646
+ if isinstance(
3647
+ p.type, c_ast.PtrDecl
3648
+ ) and self._func_decl_returns_unsigned(p.type.type):
3649
+ self._mark_unsigned_return(var)
3650
+ param_idx += 1
3651
+
3652
+ self.codegen(node.body)
3653
+
3654
+ if not self.builder.block.is_terminated:
3655
+ if isinstance(ir_type, ir.VoidType):
3656
+ self.builder.ret_void()
3657
+ elif isinstance(ir_type, ir.PointerType):
3658
+ self.builder.ret(ir.Constant(ir_type, None))
3659
+ elif self._is_floating_ir_type(ir_type):
3660
+ self.builder.ret(ir.Constant(ir_type, 0.0))
3661
+ else:
3662
+ self.builder.ret(ir.Constant(ir_type, 0))
3663
+
3664
+ return None, None
3665
+
3666
+ def codegen_Struct(self, node):
3667
+ # Generate LLVM types for struct members
3668
+
3669
+ # If this is a reference to a named struct without decls, look it up
3670
+ if node.name and (node.decls is None or len(node.decls) == 0):
3671
+ if node.name in self.env:
3672
+ return self.env[node.name][0]
3673
+ # Opaque/forward-declared struct: treat as i8 (byte) for pointer use
3674
+ opaque = ir.IntType(8)
3675
+ self.define(node.name, (opaque, None))
3676
+ return opaque
3677
+
3678
+ member_types = []
3679
+ member_names = []
3680
+ member_decl_types = []
3681
+ for decl in node.decls:
3682
+ if isinstance(decl.type, c_ast.TypeDecl) and isinstance(
3683
+ decl.type.type, c_ast.Struct
3684
+ ):
3685
+ nested_type = self.codegen_Struct(decl.type.type)
3686
+ member_types.append(nested_type)
3687
+ elif isinstance(decl.type, c_ast.TypeDecl) and isinstance(
3688
+ decl.type.type, c_ast.Union
3689
+ ):
3690
+ nested_type = self.codegen_Union(decl.type.type)
3691
+ member_types.append(nested_type)
3692
+ elif isinstance(decl.type, c_ast.ArrayDecl):
3693
+ # Handle multi-dimensional arrays: a[N][M] -> [N x [M x T]]
3694
+ def _build_array_type(arr_node):
3695
+ dim = self._eval_dim(arr_node.dim) if arr_node.dim else 0
3696
+ if isinstance(arr_node.type, c_ast.ArrayDecl):
3697
+ inner = _build_array_type(arr_node.type)
3698
+ else:
3699
+ inner = self._resolve_ast_type(arr_node.type)
3700
+ return ir.ArrayType(inner, dim)
3701
+
3702
+ member_types.append(_build_array_type(decl.type))
3703
+ elif isinstance(decl.type, c_ast.PtrDecl):
3704
+ member_types.append(self._resolve_ast_type(decl.type))
3705
+ elif isinstance(decl.type, c_ast.TypeDecl):
3706
+ type_str = decl.type.type.names
3707
+ member_types.append(self._get_ir_type(type_str))
3708
+ else:
3709
+ member_types.append(int64_t) # fallback
3710
+ member_names.append(decl.name)
3711
+ member_decl_types.append(decl.type)
3712
+ # Create the struct type
3713
+ struct_type = ir.LiteralStructType(member_types)
3714
+ struct_type.members = member_names
3715
+ struct_type.member_decl_types = member_decl_types
3716
+
3717
+ # Register named structs for later reuse
3718
+ if node.name:
3719
+ self.define(node.name, (struct_type, None))
3720
+
3721
+ return struct_type
3722
+
3723
+ def codegen_Union(self, node):
3724
+ """Model union as a struct with alignment-preserving storage."""
3725
+ if node.name and (node.decls is None or len(node.decls) == 0):
3726
+ return self.env[node.name][0]
3727
+
3728
+ member_types = {}
3729
+ member_decl_types = {}
3730
+ max_size = 0
3731
+ max_align = 1
3732
+ for decl in node.decls:
3733
+ if isinstance(decl.type, c_ast.ArrayDecl):
3734
+ ir_t = self._build_array_ir_type(decl.type)
3735
+ else:
3736
+ ir_t = self._resolve_ast_type(decl.type)
3737
+ member_types[decl.name] = ir_t
3738
+ member_decl_types[decl.name] = decl.type
3739
+ sz = self._ir_type_size(ir_t)
3740
+ al = self._ir_type_align(ir_t)
3741
+ if sz > max_size:
3742
+ max_size = sz
3743
+ if al > max_align:
3744
+ max_align = al
3745
+
3746
+ # Use a struct {align_type, [padding x i8]} to preserve alignment
3747
+ # Pick an alignment element: i64 for 8, i32 for 4, i16 for 2, i8 for 1
3748
+ align_map = {8: int64_t, 4: int32_t, 2: int16_t, 1: int8_t}
3749
+ align_type = align_map.get(max_align, int64_t)
3750
+ align_size = max_align
3751
+ pad_size = max_size - align_size
3752
+ if pad_size > 0:
3753
+ union_type = ir.LiteralStructType(
3754
+ [align_type, ir.ArrayType(int8_t, pad_size)]
3755
+ )
3756
+ else:
3757
+ union_type = ir.LiteralStructType([align_type])
3758
+ union_type.members = list(member_types.keys())
3759
+ union_type.member_types = member_types
3760
+ union_type.member_decl_types = member_decl_types
3761
+ union_type.is_union = True
3762
+
3763
+ if node.name:
3764
+ self.define(node.name, (union_type, None))
3765
+
3766
+ return union_type
3767
+
3768
+ def codegen_StructRef(self, node):
3769
+
3770
+ if isinstance(node.name, c_ast.StructRef):
3771
+ inner_val, inner_addr = self.codegen_StructRef(node.name)
3772
+ if node.type == "->":
3773
+ # Chain: (a->b)->c — need to use the VALUE of a->b as pointer base
3774
+ # inner_val is the loaded field value (a pointer to next struct)
3775
+ base = inner_val
3776
+ semantic_base_type = self._get_expr_ir_type(node.name)
3777
+ if (
3778
+ isinstance(semantic_base_type, ir.PointerType)
3779
+ and base.type != semantic_base_type
3780
+ ):
3781
+ try:
3782
+ base = self.builder.bitcast(base, semantic_base_type)
3783
+ except Exception:
3784
+ pass
3785
+ struct_type = (
3786
+ base.type.pointee if hasattr(base.type, "pointee") else int8_t
3787
+ )
3788
+ struct_addr = base
3789
+ else:
3790
+ # Chain: (a->b).c — use the ADDRESS of a->b as struct base
3791
+ semantic_base_type = self._get_expr_ir_type(node.name)
3792
+ if semantic_base_type is not None:
3793
+ expected_addr_type = ir.PointerType(semantic_base_type)
3794
+ if inner_addr.type != expected_addr_type:
3795
+ try:
3796
+ inner_addr = self.builder.bitcast(
3797
+ inner_addr, expected_addr_type
3798
+ )
3799
+ except Exception:
3800
+ pass
3801
+ struct_type = (
3802
+ inner_addr.type.pointee
3803
+ if hasattr(inner_addr.type, "pointee")
3804
+ else int8_t
3805
+ )
3806
+ struct_addr = inner_addr
3807
+ elif isinstance(node.name, c_ast.ID):
3808
+ struct_instance_addr = self.env[node.name.name][1]
3809
+ if not isinstance(struct_instance_addr.type, ir.PointerType):
3810
+ raise Exception("Invalid struct reference")
3811
+
3812
+ if node.type == "->":
3813
+ ptr_val = self._safe_load(struct_instance_addr)
3814
+ struct_type = (
3815
+ ptr_val.type.pointee if hasattr(ptr_val.type, "pointee") else int8_t
3816
+ )
3817
+ struct_addr = ptr_val
3818
+ else:
3819
+ struct_type = (
3820
+ struct_instance_addr.type.pointee
3821
+ if hasattr(struct_instance_addr.type, "pointee")
3822
+ else int8_t
3823
+ )
3824
+ struct_addr = struct_instance_addr
3825
+ else:
3826
+ # Cast/UnaryOp/other expression as struct base: ((Type*)ptr)->field
3827
+ val, addr = self.codegen(node.name)
3828
+ semantic_base_type = self._get_expr_ir_type(node.name)
3829
+ if node.type == "->":
3830
+ struct_addr = val
3831
+ if (
3832
+ isinstance(semantic_base_type, ir.PointerType)
3833
+ and struct_addr.type != semantic_base_type
3834
+ ):
3835
+ try:
3836
+ struct_addr = self.builder.bitcast(
3837
+ struct_addr, semantic_base_type
3838
+ )
3839
+ except Exception:
3840
+ pass
3841
+ struct_type = (
3842
+ struct_addr.type.pointee
3843
+ if hasattr(struct_addr.type, "pointee")
3844
+ else int8_t
3845
+ )
3846
+ else:
3847
+ struct_addr = addr if addr else val
3848
+ if addr is not None and semantic_base_type is not None:
3849
+ expected_addr_type = ir.PointerType(semantic_base_type)
3850
+ if struct_addr.type != expected_addr_type:
3851
+ try:
3852
+ struct_addr = self.builder.bitcast(
3853
+ struct_addr, expected_addr_type
3854
+ )
3855
+ except Exception:
3856
+ pass
3857
+ struct_type = (
3858
+ struct_addr.type.pointee
3859
+ if hasattr(struct_addr.type, "pointee")
3860
+ else int8_t
3861
+ )
3862
+ else:
3863
+ struct_type = (
3864
+ semantic_base_type
3865
+ if semantic_base_type is not None
3866
+ else (val.type if hasattr(val.type, "members") else int8_t)
3867
+ )
3868
+
3869
+ # Union access: all fields share offset 0, use bitcast
3870
+ if getattr(struct_type, "is_union", False):
3871
+ member_ir_type = struct_type.member_types[node.field.name]
3872
+ semantic_field_type = member_ir_type
3873
+ member_decl_types = getattr(struct_type, "member_decl_types", None)
3874
+ decl_type = None
3875
+ if member_decl_types and node.field.name in member_decl_types:
3876
+ decl_type = member_decl_types[node.field.name]
3877
+ try:
3878
+ resolved = self._resolve_ast_type(decl_type)
3879
+ if isinstance(member_ir_type, ir.ArrayType) and isinstance(
3880
+ resolved, ir.PointerType
3881
+ ):
3882
+ pass
3883
+ elif isinstance(
3884
+ resolved, (ir.ArrayType, ir.LiteralStructType, ir.PointerType)
3885
+ ):
3886
+ semantic_field_type = resolved
3887
+ except Exception:
3888
+ pass
3889
+ ptr = self.builder.bitcast(struct_addr, ir.PointerType(semantic_field_type))
3890
+ if isinstance(semantic_field_type, ir.ArrayType):
3891
+ elem_ptr = self.builder.gep(
3892
+ ptr,
3893
+ [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
3894
+ name="unionarraydecay",
3895
+ )
3896
+ if decl_type is not None:
3897
+ self._tag_value_from_decl_type(elem_ptr, decl_type)
3898
+ self._set_expr_ir_type(node, semantic_field_type)
3899
+ return elem_ptr, ptr
3900
+ val = self._safe_load(ptr)
3901
+ if decl_type is not None:
3902
+ self._tag_value_from_decl_type(val, decl_type)
3903
+ self._set_expr_ir_type(node, semantic_field_type)
3904
+ return val, ptr
3905
+
3906
+ # Opaque struct (no members) — treat as byte-offset access
3907
+ if not hasattr(struct_type, "members"):
3908
+ ptr = self.builder.bitcast(struct_addr, voidptr_t)
3909
+ val = self._safe_load(self.builder.bitcast(ptr, ir.PointerType(int64_t)))
3910
+ self._set_expr_ir_type(node, int64_t)
3911
+ return val, ptr
3912
+
3913
+ field_index = None
3914
+ for i, field in enumerate(struct_type.members):
3915
+ if field == node.field.name:
3916
+ field_index = i
3917
+ break
3918
+
3919
+ if field_index is None:
3920
+ raise RuntimeError(f"Field '{node.field.name}' not found in struct")
3921
+
3922
+ field_addr = self.builder.gep(
3923
+ struct_addr,
3924
+ [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), field_index)],
3925
+ inbounds=True,
3926
+ )
3927
+
3928
+ field_type = struct_type.elements[field_index]
3929
+ semantic_field_type = field_type
3930
+ member_decl_types = getattr(struct_type, "member_decl_types", None)
3931
+ decl_type = None
3932
+ if member_decl_types and field_index < len(member_decl_types):
3933
+ decl_type = member_decl_types[field_index]
3934
+ try:
3935
+ resolved = self._resolve_ast_type(decl_type)
3936
+ # Only use semantic type if it's more specific (pointer/struct),
3937
+ # not if it decayed an array to pointer
3938
+ if isinstance(field_type, ir.ArrayType) and isinstance(
3939
+ resolved, ir.PointerType
3940
+ ):
3941
+ pass # keep original array type
3942
+ elif isinstance(resolved, (ir.LiteralStructType, ir.PointerType)):
3943
+ semantic_field_type = resolved
3944
+ except Exception:
3945
+ pass
3946
+
3947
+ typed_field_addr = field_addr
3948
+ target_ptr_type = ir.PointerType(semantic_field_type)
3949
+ if field_addr.type != target_ptr_type:
3950
+ try:
3951
+ typed_field_addr = self.builder.bitcast(field_addr, target_ptr_type)
3952
+ except Exception:
3953
+ typed_field_addr = field_addr
3954
+
3955
+ if isinstance(semantic_field_type, ir.ArrayType):
3956
+ # Array field: decay to pointer to first element
3957
+ elem_ptr = self.builder.gep(
3958
+ typed_field_addr,
3959
+ [ir.Constant(ir.IntType(32), 0), ir.Constant(ir.IntType(32), 0)],
3960
+ name="arraydecay",
3961
+ )
3962
+ if decl_type is not None:
3963
+ self._tag_value_from_decl_type(elem_ptr, decl_type)
3964
+ self._set_expr_ir_type(node, semantic_field_type)
3965
+ return elem_ptr, typed_field_addr
3966
+
3967
+ field_value = self._safe_load(typed_field_addr)
3968
+ if decl_type is not None:
3969
+ self._tag_value_from_decl_type(field_value, decl_type)
3970
+ self._set_expr_ir_type(node, semantic_field_type)
3971
+ return field_value, typed_field_addr
3972
+
3973
+ def codegen_EmptyStatement(self, node):
3974
+ return None, None
3975
+
3976
+ def codegen_ExprList(self, node):
3977
+ # Comma operator: evaluate all, return last
3978
+ result = None
3979
+ result_ptr = None
3980
+ last_expr = None
3981
+ for expr in node.exprs:
3982
+ last_expr = expr
3983
+ result, result_ptr = self.codegen(expr)
3984
+ if last_expr is not None:
3985
+ semantic_result_type = self._get_expr_ir_type(
3986
+ last_expr, getattr(result, "type", None)
3987
+ )
3988
+ if semantic_result_type is not None:
3989
+ self._set_expr_ir_type(node, semantic_result_type)
3990
+ return result, result_ptr
3991
+
3992
+ def codegen_Label(self, node):
3993
+ label_name = f"label_{node.name}"
3994
+ # Check if block already created by a forward goto
3995
+ if label_name in self._labels:
3996
+ label_bb = self._labels[label_name]
3997
+ else:
3998
+ label_bb = self.builder.function.append_basic_block(label_name)
3999
+ self._labels[label_name] = label_bb
4000
+ if not self.builder.block.is_terminated:
4001
+ self.builder.branch(label_bb)
4002
+ self.builder.position_at_end(label_bb)
4003
+ if node.stmt:
4004
+ self.codegen(node.stmt)
4005
+ return None, None
4006
+
4007
+ def codegen_Goto(self, node):
4008
+ label_name = f"label_{node.name}"
4009
+ if label_name in self._labels:
4010
+ target_bb = self._labels[label_name]
4011
+ else:
4012
+ # Forward reference: create the block now
4013
+ target_bb = self.builder.function.append_basic_block(label_name)
4014
+ self._labels[label_name] = target_bb
4015
+ self.builder.branch(target_bb)
4016
+ return None, None
4017
+
4018
+ def codegen_Enum(self, node):
4019
+ # Define each enumerator as a constant in the environment
4020
+ if node.values:
4021
+ current_val = 0
4022
+ for enumerator in node.values.enumerators:
4023
+ if enumerator.value:
4024
+ current_val = self._eval_const_expr(enumerator.value)
4025
+ self.define(
4026
+ enumerator.name, (int64_t, ir.Constant(int64_t, current_val))
4027
+ )
4028
+ current_val += 1
4029
+ return None, None
4030
+
4031
+ def _eval_const_expr(self, node):
4032
+ """Evaluate a constant expression at compile time (for enum values)."""
4033
+ if isinstance(node, c_ast.Constant):
4034
+ if node.type == "string":
4035
+ return 0 # string constants can't be int-evaluated
4036
+ v = node.value.rstrip("uUlL")
4037
+ if v.startswith("'"):
4038
+ return self._char_constant_value(v)
4039
+ if v.startswith("0x") or v.startswith("0X"):
4040
+ return int(v, 16)
4041
+ elif v.startswith("0") and len(v) > 1 and v[1:].isdigit():
4042
+ return int(v, 8)
4043
+ try:
4044
+ return int(v)
4045
+ except ValueError:
4046
+ return 0
4047
+ elif isinstance(node, c_ast.UnaryOp):
4048
+ if node.op == "sizeof":
4049
+ if isinstance(node.expr, c_ast.Typename):
4050
+ ir_t = self._resolve_ast_type(node.expr.type)
4051
+ return self._ir_type_size(ir_t)
4052
+ if isinstance(node.expr, c_ast.Constant) and node.expr.type == "string":
4053
+ raw = node.expr.value[1:-1]
4054
+ processed = self._process_escapes(raw)
4055
+ return len(self._string_bytes(processed + "\00"))
4056
+ val = self._eval_const_expr(node.expr)
4057
+ return 8 # default sizeof for expressions
4058
+ if node.op == "&" and isinstance(node.expr, c_ast.StructRef):
4059
+ offset, _ = self._eval_offsetof_structref(node.expr)
4060
+ return offset
4061
+ val = self._eval_const_expr(node.expr)
4062
+ if node.op == "-":
4063
+ return -val
4064
+ elif node.op == "+":
4065
+ return val
4066
+ elif node.op == "~":
4067
+ return ~val
4068
+ elif node.op == "!":
4069
+ return 0 if val else 1
4070
+ elif isinstance(node, c_ast.BinaryOp):
4071
+ l = self._eval_const_expr(node.left)
4072
+ r = self._eval_const_expr(node.right)
4073
+ ops = {
4074
+ "+": lambda a, b: a + b,
4075
+ "-": lambda a, b: a - b,
4076
+ "*": lambda a, b: a * b,
4077
+ "/": lambda a, b: a // b,
4078
+ "%": lambda a, b: a % b,
4079
+ "<<": lambda a, b: a << b,
4080
+ ">>": lambda a, b: a >> b,
4081
+ "&": lambda a, b: a & b,
4082
+ "|": lambda a, b: a | b,
4083
+ "^": lambda a, b: a ^ b,
4084
+ "==": lambda a, b: int(a == b),
4085
+ "!=": lambda a, b: int(a != b),
4086
+ "<": lambda a, b: int(a < b),
4087
+ "<=": lambda a, b: int(a <= b),
4088
+ ">": lambda a, b: int(a > b),
4089
+ ">=": lambda a, b: int(a >= b),
4090
+ "&&": lambda a, b: int(bool(a) and bool(b)),
4091
+ "||": lambda a, b: int(bool(a) or bool(b)),
4092
+ }
4093
+ return ops[node.op](l, r)
4094
+ elif isinstance(node, c_ast.TernaryOp):
4095
+ cond = self._eval_const_expr(node.cond)
4096
+ if cond:
4097
+ return self._eval_const_expr(node.iftrue)
4098
+ return self._eval_const_expr(node.iffalse)
4099
+ elif isinstance(node, c_ast.ID):
4100
+ # Try to look up as enum constant or defined value
4101
+ if node.name in self.env:
4102
+ _, val = self.env[node.name]
4103
+ if isinstance(val, ir.values.Constant) and isinstance(
4104
+ val.type, ir.IntType
4105
+ ):
4106
+ return int(val.constant)
4107
+ return 0 # unknown identifier defaults to 0
4108
+ elif isinstance(node, c_ast.Cast):
4109
+ return self._eval_const_expr(node.expr)
4110
+ elif isinstance(node, c_ast.Typename):
4111
+ return 0
4112
+ raise CodegenError(f"Not a constant expression: {type(node).__name__}")
4113
+
4114
+ def codegen_InitList(self, node):
4115
+ # InitList as expression — return first element or zero
4116
+ if node.exprs:
4117
+ return self.codegen(node.exprs[0])
4118
+ return ir.Constant(int64_t, 0), None
4119
+
4120
+ def codegen_DeclList(self, node):
4121
+ for decl in node.decls:
4122
+ self.codegen(decl)
4123
+ return None, None
4124
+
4125
+ def codegen_Typedef(self, node):
4126
+ # typedef int myint; / typedef int* intptr; / typedef struct{...} Name;
4127
+ if isinstance(node.type, c_ast.TypeDecl):
4128
+ if isinstance(node.type.type, c_ast.IdentifierType):
4129
+ base_type = node.type.type.names
4130
+ self.define(f"__typedef_{node.name}", base_type)
4131
+ elif isinstance(node.type.type, c_ast.Struct):
4132
+ if node.type.type.name:
4133
+ # Named struct: store reference to struct name for lazy resolution
4134
+ self.codegen_Struct(node.type.type) # ensure it's registered
4135
+ self.define(
4136
+ f"__typedef_{node.name}", f"__struct_{node.type.type.name}"
4137
+ )
4138
+ else:
4139
+ struct_type = self.codegen_Struct(node.type.type)
4140
+ self.define(f"__typedef_{node.name}", struct_type)
4141
+ elif isinstance(node.type.type, c_ast.Union):
4142
+ if node.type.type.name:
4143
+ self.codegen_Union(node.type.type)
4144
+ self.define(
4145
+ f"__typedef_{node.name}", f"__struct_{node.type.type.name}"
4146
+ )
4147
+ else:
4148
+ union_type = self.codegen_Union(node.type.type)
4149
+ self.define(f"__typedef_{node.name}", union_type)
4150
+ elif isinstance(node.type.type, c_ast.Enum):
4151
+ # typedef enum { A, B, C } MyEnum;
4152
+ self.codegen_Enum(node.type.type)
4153
+ self.define(f"__typedef_{node.name}", int64_t)
4154
+ elif isinstance(node.type, c_ast.ArrayDecl):
4155
+ self.define(f"__typedef_{node.name}", self._build_array_ir_type(node.type))
4156
+ elif isinstance(node.type, c_ast.PtrDecl):
4157
+ inner = node.type.type
4158
+ if isinstance(inner, c_ast.FuncDecl):
4159
+ fp_type = self._build_func_ptr_type(inner)
4160
+ self.define(f"__typedef_{node.name}", fp_type)
4161
+ elif isinstance(inner, c_ast.TypeDecl):
4162
+ if isinstance(inner.type, c_ast.IdentifierType):
4163
+ base_ir = self._get_ir_type(inner.type.names)
4164
+ elif isinstance(inner.type, c_ast.Struct):
4165
+ base_ir = self.codegen_Struct(inner.type)
4166
+ elif isinstance(inner.type, c_ast.Union):
4167
+ base_ir = self.codegen_Union(inner.type)
4168
+ else:
4169
+ base_ir = get_ir_type(
4170
+ inner.type.names if hasattr(inner.type, "names") else ["int"]
4171
+ )
4172
+ if isinstance(base_ir, ir.VoidType):
4173
+ ptr_type = voidptr_t
4174
+ else:
4175
+ ptr_type = ir.PointerType(base_ir)
4176
+ self.define(f"__typedef_{node.name}", ptr_type)
4177
+ return None, None