romdevtools 0.40.1 → 0.41.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,110 @@ import { readFile } from "node:fs/promises";
14
14
  import path from "node:path";
15
15
  import { runRizin, runRizinJson, RIZIN_ARCH } from "./rizin.js";
16
16
  import { decompileFunction, SLEIGH_LANGID } from "./decompile.js";
17
+ import { registersForPlatform } from "../platforms/common/registers.js";
18
+
19
+ /** B2: name hardware-register MMIO in decompiler output. Ghidra emits raw memory
20
+ * refs like `xRAM2001` / `uRAM400e` for $2001 / $400E; replace those whose
21
+ * address is a known platform register with the register NAME (a valid C
22
+ * identifier) so the C reads `PPUMASK = ...` instead of `xRAM2001 = ...`. Plus a
23
+ * one-line legend comment listing the substitutions made. */
24
+ export function nameHardwareRegisters(code, platform) {
25
+ const regs = registersForPlatform(platform);
26
+ if (!regs || !Object.keys(regs).length) return code;
27
+ const used = new Map();
28
+ // Match Ghidra's mem-ref identifiers: a few lowercase type-prefix letters,
29
+ // then RAM, then the hex address. e.g. xRAM2001, uRAM400e, cRAM00ff.
30
+ const out = code.replace(/\b[a-z]{1,3}RAM([0-9a-fA-F]{2,6})\b/g, (m, hex) => {
31
+ const addr = parseInt(hex, 16);
32
+ const name = regs[addr];
33
+ if (!name) return m;
34
+ used.set(addr, name);
35
+ return name;
36
+ });
37
+ if (!used.size) return code;
38
+ const legend = "/* hw registers: " +
39
+ [...used.entries()].map(([a, n]) => `${n}=$${a.toString(16).toUpperCase()}`).join(", ") +
40
+ " */\n";
41
+ return legend + out;
42
+ }
43
+
44
+ /** The 6502-family platforms whose SLEIGH (Ghidra) output carries the
45
+ * characteristic 8-bit clutter the B1 fold cleans up. */
46
+ const SIXTY_FIVE_OH_TWO = new Set(["nes", "atari2600", "atari7800", "c64", "lynx", "pce"]);
47
+
48
+ /** B1: 6502 idiom-folding post-pass (deterministic half). The 6502's 8-bit ALU
49
+ * lowers to literal noise in SLEIGH output — awkward width types (`uint1`,
50
+ * `xunknown1`), redundant nested width casts (`(uint2)(uint1)x`), and raw
51
+ * zero-page byte refs (`cRAM00fd`). This pass folds the SAFE, mechanical ones
52
+ * into readable C99 so the remaining logic is what an LLM (or human) reads:
53
+ * - SLEIGH width types → C99 stdint: uint1/int1/xunknown1 → uint8_t/int8_t,
54
+ * uint2 → uint16_t, uint4 → uint32_t (Ghidra's `uintN`/`undefinedN` are
55
+ * N-BYTE widths, not bit widths).
56
+ * - redundant nested width casts `(uint16_t)(uint8_t)expr` → `(uint8_t)expr`
57
+ * (the inner cast already narrows; the outer widen is noise).
58
+ * - zero-page byte refs `cRAM00fd` / `uRAM0012` → `zp_FD` / `zp_12` (a stable
59
+ * name for the ZP slot — the 6502's "fast RAM / pseudo-registers"). Only the
60
+ * $00xx page; MMIO was already named by nameHardwareRegisters (run first).
61
+ * It does NOT attempt the carry-flag 16-bit add/sub or BCD reconstruction the
62
+ * plan also lists — Ghidra usually already folds those into `+`/`uint2`, and a
63
+ * textual rewrite of what survives risks changing semantics. Those are left to
64
+ * the LLM cleanup half (the decompile output is read by an agent). Emits a
65
+ * leading "6502 fold:" legend comment noting what was applied. */
66
+ export function foldSixtyFiveOhTwoIdioms(code, platform) {
67
+ if (!SIXTY_FIVE_OH_TWO.has(platform)) return code;
68
+ const applied = [];
69
+ let out = code;
70
+
71
+ // 1) SLEIGH width types to C99 stdint. Each is a whole-word match. xunknown1
72
+ // and undefined1 are Ghidra's "1 byte, unknown signedness" - map to uint8_t.
73
+ const TYPES = [
74
+ [/\buint1\b/g, "uint8_t"], [/\bint1\b/g, "int8_t"],
75
+ [/\buint2\b/g, "uint16_t"], [/\bint2\b/g, "int16_t"],
76
+ [/\buint4\b/g, "uint32_t"], [/\bint4\b/g, "int32_t"],
77
+ [/\bxunknown1\b/g, "uint8_t"], [/\bundefined1\b/g, "uint8_t"],
78
+ [/\bxunknown2\b/g, "uint16_t"], [/\bundefined2\b/g, "uint16_t"],
79
+ [/\bxunknown4\b/g, "uint32_t"], [/\bundefined4\b/g, "uint32_t"],
80
+ ];
81
+ let typeFolds = 0;
82
+ for (const [re, to] of TYPES) {
83
+ out = out.replace(re, () => { typeFolds++; return to; });
84
+ }
85
+ if (typeFolds) applied.push("SLEIGH width types → stdint");
86
+
87
+ // 2) Redundant nested width casts: `(uint16_t)(uint8_t)X` → `(uint8_t)X`. The
88
+ // inner narrowing cast governs; the outer widen back is pure noise SLEIGH emits
89
+ // around zero-page index math. Run a couple of passes to collapse triples.
90
+ let castFolds = 0;
91
+ for (let i = 0; i < 3; i++) {
92
+ const before = out;
93
+ out = out.replace(/\((uint(?:8|16|32)_t)\)\((uint8_t)\)/g, (_m, _wide, narrow) => {
94
+ castFolds++; return `(${narrow})`;
95
+ });
96
+ if (out === before) break;
97
+ }
98
+ if (castFolds) applied.push("redundant width casts collapsed");
99
+
100
+ // 3) Zero-page byte refs → zp_XX. Only the $00 page (cRAM00fd etc.); the
101
+ // 2-hex-after-00 form. A bare 4-hex like RAM0312 is not ZP — leave it.
102
+ const zp = new Set();
103
+ out = out.replace(/\b[a-z]{1,3}RAM00([0-9a-fA-F]{2})\b/g, (_m, hex) => {
104
+ const name = "zp_" + hex.toUpperCase();
105
+ zp.add(name);
106
+ return name;
107
+ });
108
+ if (zp.size) applied.push(`${zp.size} zero-page slot${zp.size > 1 ? "s" : ""} named zp_XX`);
109
+
110
+ if (!applied.length) return code;
111
+ return `/* 6502 fold: ${applied.join("; ")} */\n` + out;
112
+ }
113
+
114
+ /** Readability post-passes applied to every decompiler C body, in order:
115
+ * B2 hardware-register naming first (so MMIO becomes PPUMASK etc. before B1's
116
+ * zero-page labeler could touch it), then B1 6502 idiom folding. Both are no-ops
117
+ * off their target platforms, so this is safe to call unconditionally. */
118
+ export function prettyDecompile(code, platform) {
119
+ return foldSixtyFiveOhTwoIdioms(nameHardwareRegisters(code, platform), platform);
120
+ }
17
121
 
18
122
  /** Sniff platform from a ROM extension (mirrors disasm.js). */
19
123
  export function sniffPlatform(p) {
@@ -53,12 +157,66 @@ async function loadContext(romPath, platformOverride) {
53
157
  if (arch == null) {
54
158
  throw new Error(`analyze: no Rizin arch mapping for platform '${platform}'`);
55
159
  }
56
- const romBytes = new Uint8Array(await readFile(romPath));
160
+ let romBytes = new Uint8Array(await readFile(romPath));
57
161
  // PCE: rizin's 6502 plugin drives the loader + standard control flow for
58
162
  // function detection, but mis-decodes HuC6280 custom opcodes — CFG/xrefs are
59
163
  // approximate. Accurate HuC6280 decode is the decompiler's job (SLEIGH spec).
60
164
  const approx = platform === "pce";
61
- return { platform, romBytes, arch, bits: BITS[arch], approx };
165
+
166
+ // Address-space prep (A2): some formats carry a header and load at a CPU base
167
+ // that isn't 0. Strip the header and report `loadBase` so rizin's functions
168
+ // (and the decompiler image) speak CPU addresses, not raw file offsets.
169
+ // c64 .prg — 2-byte little-endian LOAD ADDRESS header, code at that address
170
+ // (typically $0801 = BASIC start). Without this, rizin analyzes the header
171
+ // bytes as code at offset 0 and every address is a file offset, not a CPU
172
+ // address — functions→decompile round-trip lands on garbage.
173
+ let loadBase = 0;
174
+ if (platform === "c64" && romBytes.length >= 2) {
175
+ loadBase = romBytes[0] | (romBytes[1] << 8);
176
+ romBytes = romBytes.subarray(2);
177
+ }
178
+
179
+ // A6: container/format sniff. Some dumps are interleaved/headered such that a
180
+ // FLAT read scrambles every byte → fake "bad instruction" noise everywhere.
181
+ // Detect + auto-correct, and warn so a flat disasm isn't silently wrong.
182
+ const warnings = [];
183
+ if (platform === "genesis") {
184
+ const smd = deinterleaveSmd(romBytes);
185
+ if (smd) {
186
+ romBytes = smd;
187
+ warnings.push("Genesis ROM was SMD-INTERLEAVED (512-byte header + byte-swapped 16KB blocks) — " +
188
+ "auto-deinterleaved before analysis. A flat read of the original would scramble every instruction.");
189
+ }
190
+ }
191
+ return { platform, romBytes, arch, bits: BITS[arch], approx, loadBase, warnings };
192
+ }
193
+
194
+ /** Detect + reverse Sega Mega Drive SMD interleaving. An .smd dump is a 512-byte
195
+ * header followed by 16KB blocks where each block's first 8KB holds the ODD
196
+ * bytes and the second 8KB the EVEN bytes (interleaved). Returns the
197
+ * deinterleaved ROM, or null if the image isn't SMD-interleaved. */
198
+ export function deinterleaveSmd(bytes) {
199
+ // SMD: (N * 16KB) + 512-byte header. The header's byte 8 = 0xAA, byte 9 = 0xBB
200
+ // is the classic SMD magic; also the body length must be a multiple of 16KB.
201
+ if (bytes.length < 512 + 0x4000) return null;
202
+ const bodyLen = bytes.length - 512;
203
+ if (bodyLen % 0x4000 !== 0) return null;
204
+ const isSmdMagic = bytes[8] === 0xaa && bytes[9] === 0xbb;
205
+ // A plain .bin that happens to be (N*16KB)+512 is unusual; require the magic to
206
+ // avoid false positives on legitimately-sized flat ROMs.
207
+ if (!isSmdMagic) return null;
208
+
209
+ const body = bytes.subarray(512);
210
+ const out = new Uint8Array(bodyLen);
211
+ const blocks = bodyLen / 0x4000;
212
+ for (let b = 0; b < blocks; b++) {
213
+ const base = b * 0x4000;
214
+ for (let i = 0; i < 0x2000; i++) {
215
+ out[base + i * 2 + 1] = body[base + i]; // odd bytes (first 8KB)
216
+ out[base + i * 2] = body[base + 0x2000 + i]; // even bytes (second 8KB)
217
+ }
218
+ }
219
+ return out;
62
220
  }
63
221
 
64
222
  /** Hex-format an address the way agents expect for the platform width. */
@@ -69,8 +227,8 @@ function hx(n) { return "0x" + (n >>> 0).toString(16); }
69
227
  * @returns {{platform, count, functions: Array<{address, name, size, nbbs, cc, callers, callees}>}}
70
228
  */
71
229
  export async function analyzeFunctions(romPath, platformOverride) {
72
- const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
73
- const fns = await runRizinJson({ romBytes, arch, bits, commands: "aaa; aflj" });
230
+ const { platform, romBytes, arch, bits, loadBase, warnings } = await loadContext(romPath, platformOverride);
231
+ const fns = await runRizinJson({ romBytes, arch, bits, baddr: loadBase || undefined, commands: "aaa; aflj" });
74
232
  const functions = fns.map((f) => ({
75
233
  address: f.offset,
76
234
  addressHex: hx(f.offset),
@@ -80,8 +238,29 @@ export async function analyzeFunctions(romPath, platformOverride) {
80
238
  cc: f.cc, // cyclomatic complexity
81
239
  callers: f.indegree ?? (f.codexrefs?.length ?? 0),
82
240
  callees: f.outdegree ?? 0,
241
+ // A3: rizin's flat sweep folds DATA regions into pseudo-functions with absurd
242
+ // `size` (megabyte "functions", phantoms exceeding the ROM). The honest
243
+ // signal is the BYTES-PER-BLOCK ratio, not raw size: real code averages tens
244
+ // of bytes per basic block; a "function" of thousands of bytes per block (or
245
+ // a single huge block with no control flow) is a data table / graphics blob
246
+ // mis-detected as a function. Flag it so agents don't waste a decompile on
247
+ // it. (A 16KB function with 35 blocks + cc 19 is a real big dispatcher — NOT
248
+ // flagged; size alone is the lie, the ratio isn't.)
249
+ looksLikeData:
250
+ (f.size ?? 0) > 0x400 &&
251
+ ((f.nbbs ?? 0) <= 1 || (f.size ?? 0) / Math.max(1, f.nbbs ?? 1) > 1024),
83
252
  }));
84
- return { platform, arch, count: functions.length, functions };
253
+ // Real code first: highest nbbs/cc, then smaller size — so the actual routines
254
+ // surface above the data-fold noise without the agent having to learn the rule.
255
+ functions.sort((a, b) =>
256
+ (a.looksLikeData ? 1 : 0) - (b.looksLikeData ? 1 : 0) ||
257
+ (b.nbbs ?? 0) - (a.nbbs ?? 0) ||
258
+ (b.cc ?? 0) - (a.cc ?? 0) ||
259
+ (a.size ?? 0) - (b.size ?? 0)
260
+ );
261
+ const dataCount = functions.filter((f) => f.looksLikeData).length;
262
+ return { platform, arch, count: functions.length, dataCount, functions,
263
+ ...(warnings?.length ? { warnings } : {}) };
85
264
  }
86
265
 
87
266
  /**
@@ -90,13 +269,13 @@ export async function analyzeFunctions(romPath, platformOverride) {
90
269
  */
91
270
  export async function analyzeCfg(romPath, address, platformOverride) {
92
271
  if (address == null) throw new Error("analyze cfg: address required");
93
- const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
272
+ const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
94
273
  // afbj = basic blocks of the function as JSON: each block has addr/size/jump/
95
274
  // fail/ninstr. `jump` is the taken edge; `fail` (present only on conditional
96
275
  // blocks) is the fall-through. This is the structured CFG source — `agf json`
97
276
  // only gives a text body blob with untyped out_nodes.
98
277
  const blocks = await runRizinJson({
99
- romBytes, arch, bits,
278
+ romBytes, arch, bits, baddr: loadBase || undefined,
100
279
  commands: `aaa; af @ ${hx(address)}; afbj @ ${hx(address)}`,
101
280
  });
102
281
  if (!Array.isArray(blocks) || blocks.length === 0) {
@@ -129,10 +308,10 @@ export async function analyzeCfg(romPath, address, platformOverride) {
129
308
  */
130
309
  export async function analyzeXrefs(romPath, address, platformOverride) {
131
310
  if (address == null) throw new Error("analyze xrefs: address required");
132
- const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
311
+ const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
133
312
  let refs;
134
313
  try {
135
- refs = await runRizinJson({ romBytes, arch, bits, commands: `aaa; axtj @ ${hx(address)}` });
314
+ refs = await runRizinJson({ romBytes, arch, bits, baddr: loadBase || undefined, commands: `aaa; axtj @ ${hx(address)}` });
136
315
  } catch (e) {
137
316
  // axtj prints nothing (not even `[]`) when there are zero refs → our JSON
138
317
  // guard throws. Treat "no JSON" as "no refs".
@@ -154,11 +333,12 @@ export async function analyzeXrefs(romPath, address, platformOverride) {
154
333
  * analysis pass. The "give me the shape of this ROM" call.
155
334
  */
156
335
  export async function analyzeStructure(romPath, platformOverride) {
157
- const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
336
+ const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
337
+ const baddr = loadBase || undefined;
158
338
  const [fns, strings, entries] = await Promise.all([
159
- runRizinJson({ romBytes, arch, bits, commands: "aaa; aflj" }).catch(() => []),
160
- runRizinJson({ romBytes, arch, bits, commands: "aaa; izj" }).catch(() => []),
161
- runRizinJson({ romBytes, arch, bits, commands: "aaa; iej" }).catch(() => []),
339
+ runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; aflj" }).catch(() => []),
340
+ runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; izj" }).catch(() => []),
341
+ runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; iej" }).catch(() => []),
162
342
  ]);
163
343
  return {
164
344
  platform, arch,
@@ -204,16 +384,170 @@ export async function vaMapping(romBytes, arch, bits, vaddr, platform) {
204
384
  return { paddr: vaddr, vbase: 0 };
205
385
  }
206
386
 
387
+ /** Build a CPU-ADDRESSED sparse image of a SNES cart for the decompiler.
388
+ *
389
+ * SNES is banked: the langid is `65816:LE:24:snes` (24-bit space). If we hand
390
+ * the decompiler the flat file, a LoROM function at CPU $00:8000 lives at file
391
+ * 0, but its in-bank `jsr $80xx` operands resolve to file 0x80xx — bank-1 code,
392
+ * a plausible-but-WRONG body. So we lay each ROM chunk at its CPU address
393
+ * (sparse, zero-filled between), making BOTH the function address and every
394
+ * in-bank/JSL operand resolve. ~2x ROM size; fine at SNES cart sizes.
395
+ *
396
+ * Mirrors the detection/fold in disasm.js's mapSnesAddress (kept local to avoid
397
+ * a circular import: disasm.js imports analyze.js).
398
+ *
399
+ * The image is laid out BY CPU address, so the decompiler offset for a CPU
400
+ * address is the address itself (24-bit). @returns {{ image: Uint8Array, isLo:boolean }}
401
+ */
402
+ export function buildSnesCpuImage(romBytes, mapperHint) {
403
+ const copierOff = (romBytes.length % 0x8000 === 0x200) ? 0x200 : 0;
404
+ let isLo;
405
+ if (mapperHint === "lorom") isLo = true;
406
+ else if (mapperHint === "hirom") isLo = false;
407
+ else {
408
+ const loByte = romBytes[copierOff + 0x7FC0 + 0x15];
409
+ const hiByte = romBytes[copierOff + 0xFFC0 + 0x15];
410
+ const detLo = loByte === 0x20 || loByte === 0x30 || loByte === 0x32;
411
+ const detHi = hiByte === 0x21 || hiByte === 0x31;
412
+ isLo = detHi && !detLo ? false : true; // default LoROM when ambiguous
413
+ }
414
+ const body = romBytes.subarray(copierOff);
415
+
416
+ if (isLo) {
417
+ // LoROM: 32KB file chunk N maps to CPU bank N, $8000-$FFFF. Banks $80-$FF
418
+ // MIRROR $00-$7F (the FastROM image), and code commonly runs there (a JML to
419
+ // $F9xxxx is bank 0x79's ROM via the $80+ mirror). So we lay the full 16MB
420
+ // 24-bit space and mirror each chunk into BOTH its $00-$7F home and its
421
+ // $80-$FF twin — otherwise a reference into the high half "can't load N
422
+ // bytes" and the decompiler bails.
423
+ const fileBanks = Math.ceil(body.length / 0x8000); // ROM chunks (≤128)
424
+ const image = new Uint8Array(0x1000000); // full 16MB CPU space
425
+ for (let b = 0; b < fileBanks; b++) {
426
+ const src = body.subarray(b * 0x8000, (b + 1) * 0x8000);
427
+ const lo = (b & 0x7F); // home bank $00-$7F
428
+ image.set(src, lo * 0x10000 + 0x8000); // $lo:8000
429
+ image.set(src, (lo | 0x80) * 0x10000 + 0x8000); // $(lo|80):8000 mirror
430
+ }
431
+ return { image, isLo: true };
432
+ }
433
+ // HiROM: file 64KB chunk N is CPU bank $C0+N ($0000-$FFFF, the primary image),
434
+ // mirrored to bank $40+N. The upper half of each chunk also appears at
435
+ // $00-$3F:$8000-$FFFF and $80-$BF:$8000-$FFFF. Lay the full 16MB space and
436
+ // mirror so any of those references resolve.
437
+ const fileBanks = Math.ceil(body.length / 0x10000); // 64KB chunks
438
+ const image = new Uint8Array(0x1000000);
439
+ for (let b = 0; b < fileBanks; b++) {
440
+ const src = body.subarray(b * 0x10000, (b + 1) * 0x10000);
441
+ image.set(src, (0xC0 + b) * 0x10000); // $C0+b: full bank (primary)
442
+ image.set(src, (0x40 + b) * 0x10000); // $40+b: mirror
443
+ const upper = src.subarray(0x8000); // $8000-$FFFF half
444
+ image.set(upper, b * 0x10000 + 0x8000); // $00+b:8000 mirror
445
+ image.set(upper, (0x80 + b) * 0x10000 + 0x8000); // $80+b:8000 mirror
446
+ }
447
+ return { image, isLo: false };
448
+ }
449
+
450
+ /** Bank-aware NES image for the decompiler (A1).
451
+ *
452
+ * Rizin maps an iNES PRG as ONE flat $8000-based segment, so a `functions`
453
+ * address is a FLAT-PRG VA ($8000 + flat offset) — bank 0 at $8000-$BFFF, bank 1
454
+ * at $C000-$FFFF, bank 2 at $10000+, etc. Decompiling that flat image is
455
+ * bank-blind: an in-code `JSR $9123` (a real CPU address) resolves to flat
456
+ * $9123 = bank 0, even when the calling code lives in bank 3 → halt_baddata /
457
+ * garbage (11/12 top functions on a banked cart, empirically).
458
+ *
459
+ * Fix: from the flat VA, recover which 16KB PRG bank the function is in, then
460
+ * build a real 32KB 6502 CPU image — that bank at $8000-$BFFF, the FIXED top
461
+ * bank at $C000-$FFFF — and decompile at the function's REAL CPU address. Now
462
+ * in-bank calls AND fixed-bank ($C000+) calls both resolve.
463
+ *
464
+ * @returns {{ image: Uint8Array, cpuAddr: number, bank: number } | null} null if
465
+ * not a banked iNES (caller falls back to the flat path; NROM is fine flat).
466
+ */
467
+ export function buildNesBankImage(romBytes, flatVa) {
468
+ if (romBytes[0] !== 0x4e || romBytes[1] !== 0x45 || romBytes[2] !== 0x53 || romBytes[3] !== 0x1a) {
469
+ return null; // not iNES
470
+ }
471
+ const prgBanks16k = romBytes[4];
472
+ const prgSize = prgBanks16k * 0x4000;
473
+ if (prgSize <= 0x8000) return null; // NROM-128/256 — flat is correct
474
+ const prgStart = 16;
475
+ const prg = romBytes.subarray(prgStart, prgStart + prgSize);
476
+
477
+ // rizin flat VA → flat PRG offset (segment based at $8000).
478
+ const flatOff = (flatVa >>> 0) - 0x8000;
479
+ if (flatOff < 0 || flatOff >= prgSize) return null;
480
+ const bank = Math.floor(flatOff / 0x4000); // which 16KB bank
481
+ const inBank = flatOff % 0x4000; // offset within it
482
+ const topBank = prgBanks16k - 1; // fixed top bank
483
+
484
+ // 32KB CPU window: chosen bank at $8000, fixed top bank at $C000.
485
+ const image = new Uint8Array(0x10000);
486
+ image.set(prg.subarray(bank * 0x4000, bank * 0x4000 + 0x4000), 0x8000);
487
+ image.set(prg.subarray(topBank * 0x4000, topBank * 0x4000 + 0x4000), 0xC000);
488
+
489
+ // The function's real CPU address: if it's the fixed top bank, it's at
490
+ // $C000+inBank; otherwise it's the switchable slot at $8000+inBank.
491
+ const cpuAddr = bank === topBank ? 0xC000 + inBank : 0x8000 + inBank;
492
+ return { image, cpuAddr, bank };
493
+ }
494
+
207
495
  /**
208
496
  * Decompile the function containing `address` to C pseudocode (Ghidra).
209
- * @returns {{platform, langid, address, code, warnings, qualityNote}}
497
+ * @returns {{platform, langid, address, code, warnings, qualityNote, bank?}}
210
498
  */
211
499
  export async function analyzeDecompile(romPath, address, platformOverride) {
212
500
  if (address == null) throw new Error("analyze decompile: address required");
213
501
  const platform = platformOverride ?? sniffPlatform(romPath);
214
502
  if (!platform) throw new Error(`analyze decompile: unknown platform for '${path.basename(romPath)}'`);
215
503
  if (!SLEIGH_LANGID[platform]) throw new Error(`analyze decompile: unsupported platform '${platform}'`);
216
- const romBytes = new Uint8Array(await readFile(romPath));
504
+ let romBytes = new Uint8Array(await readFile(romPath));
505
+ // A6: deinterleave SMD Genesis dumps here too (analyzeDecompile reads the file
506
+ // directly, not via loadContext) — a flat read of an interleaved ROM decodes
507
+ // to pure garbage.
508
+ if (platform === "genesis") romBytes = deinterleaveSmd(romBytes) ?? romBytes;
509
+
510
+ // SNES: banked 24-bit space. `address` is a LoROM/HiROM CPU address (what
511
+ // target='functions'/'cfg' report). Lay the cart out by CPU address so BOTH
512
+ // the function address AND its in-bank/JSL operands resolve, then decompile at
513
+ // the CPU address directly. (Flat-at-0 would decompile file[address] — the
514
+ // wrong bank — and mis-label every operand.)
515
+ if (platform === "snes") {
516
+ const { image } = buildSnesCpuImage(romBytes);
517
+ // The image is laid out by CPU address, so the file offset IS the address.
518
+ const imgOff = address >>> 0;
519
+ if (imgOff < 0 || imgOff >= image.length) {
520
+ throw new Error(
521
+ `decompile: SNES address ${hx(address)} is outside the ${image.length}-byte CPU image ` +
522
+ `(is it a valid LoROM/HiROM code address?).`
523
+ );
524
+ }
525
+ const rs = await decompileFunction({ platform, romBytes: image, fileOffset: imgOff });
526
+ return {
527
+ platform, langid: rs.langid,
528
+ address, addressHex: hx(address),
529
+ code: prettyDecompile(rs.code, platform), warnings: rs.warnings,
530
+ qualityNote: "medium (65816 variable register width)",
531
+ };
532
+ }
533
+
534
+ // NES banked carts (A1): rizin reports flat-PRG VAs ($8000-based); decompiling
535
+ // that flat image is bank-blind (cross-bank JSR/JMP land on the wrong bank).
536
+ // Build a real 32KB CPU window (this bank @ $8000 + fixed top bank @ $C000) so
537
+ // in-bank AND fixed-bank calls resolve. NROM falls through to the flat path.
538
+ if (platform === "nes") {
539
+ const banked = buildNesBankImage(romBytes, address);
540
+ if (banked) {
541
+ const rn = await decompileFunction({ platform, romBytes: banked.image, fileOffset: banked.cpuAddr });
542
+ return {
543
+ platform, langid: rn.langid,
544
+ address, addressHex: hx(address),
545
+ bank: banked.bank,
546
+ code: prettyDecompile(rn.code, platform), warnings: rn.warnings,
547
+ qualityNote: "rough (6502 architecture limit)",
548
+ };
549
+ }
550
+ }
217
551
 
218
552
  // Use rizin's loader mapping to turn the VA (what the user sees from
219
553
  // target='functions') into the file offset the raw decompiler image needs.
@@ -221,24 +555,21 @@ export async function analyzeDecompile(romPath, address, platformOverride) {
221
555
  // decompiler's job via SLEIGH) — its flat image bases at 0 either way.
222
556
  const arch = RIZIN_ARCH[platform] ?? "6502";
223
557
  const bits = { arm: 32, m68k: 32, snes: 16 }[arch];
224
- const { paddr, vbase } = await vaMapping(romBytes, arch, bits, address, platform);
225
- if (paddr < 0 || paddr >= romBytes.length) {
226
- throw new Error(
227
- `decompile: address ${hx(address)} maps to file offset ${paddr}, outside the ` +
228
- `${romBytes.length}-byte image for ${platform}.`
229
- );
230
- }
231
- // The raw decompiler loads byte 0 at VMA 0. Code that references absolute CPU
232
- // addresses (typical 6502: JSR/JMP to $Fxxx) only resolves if the image sits
233
- // at the right CPU base. Rizin's map gives `vbase` when it knows the base;
234
- // some headerless carts (2600/7800) it loads at 0, so we supply the base from
235
- // a per-platform table. Left-pad the image by the base so file offset == CPU
236
- // address, then decompile at the function's CPU address. Capped at 64KB (the
237
- // 6502 family's whole address space) so a large base never over-allocates.
238
- // Atari 2600/7800 are headerless 6502 dumps rizin loads at 0; supply the real
239
- // CPU base so absolute references ($8000/$C000/$F000) resolve. 7800's base is
240
- // size-dependent (16KB→$C000, 32KB→$8000); 7800 carts may carry a 128-byte
241
- // header before the body.
558
+
559
+ const QUALITY = {
560
+ gba: "excellent (ARM)", genesis: "excellent (M68K)",
561
+ gb: "good (SM83)", gbc: "good (SM83)", sms: "good (Z80)", gg: "good (Z80)", msx: "good (Z80)",
562
+ snes: "medium (65816 variable register width)", pce: "medium (HuC6280)",
563
+ nes: "rough (6502 architecture limit)", atari2600: "rough (6502)", atari7800: "rough (6502)",
564
+ c64: "rough (6502)", lynx: "rough (65C02)",
565
+ };
566
+
567
+ // FORCED-BASE platforms (headerless/load-header 6502 carts): rizin/our
568
+ // analysis bases these at a known CPU address, so `address` IS a CPU address
569
+ // and `functions` already reported it as such. Strip any header, left-pad the
570
+ // body so file offset == CPU address, and decompile at `address` directly.
571
+ // 2600 $F000; 7800 size-dependent $8000-$C000 (+128B header if "AT…");
572
+ // c64 .prg the 2-byte load-address header's value (e.g. $0801).
242
573
  let forcedBase = 0, bodyStart = 0;
243
574
  if (platform === "atari2600") {
244
575
  forcedBase = 0xf000;
@@ -248,28 +579,56 @@ export async function analyzeDecompile(romPath, address, platformOverride) {
248
579
  bodyStart = hasHdr ? 128 : 0;
249
580
  const body = romBytes.length - bodyStart;
250
581
  forcedBase = body <= 0x4000 ? 0xc000 : body <= 0x8000 ? 0x8000 : 0x4000;
582
+ } else if (platform === "c64" && romBytes.length >= 2) {
583
+ bodyStart = 2;
584
+ forcedBase = romBytes[0] | (romBytes[1] << 8);
251
585
  }
252
- const base = vbase > 0 ? vbase : forcedBase;
586
+ if (forcedBase > 0 && forcedBase <= 0x10000) {
587
+ const body = romBytes.subarray(bodyStart);
588
+ // Accept `address` as EITHER a CPU address (≥ forcedBase, what functions
589
+ // reports once baddr is applied) OR a raw body file-offset (< forcedBase,
590
+ // legacy callers / direct offsets). Normalize to a CPU address.
591
+ const a = address >>> 0;
592
+ const cpuAddr = a >= forcedBase ? a : forcedBase + a;
593
+ if (cpuAddr < forcedBase || cpuAddr >= forcedBase + body.length) {
594
+ throw new Error(
595
+ `decompile: address ${hx(a)} is outside the ${platform} CPU image ` +
596
+ `($${forcedBase.toString(16)}-$${(forcedBase + body.length).toString(16)}).`
597
+ );
598
+ }
599
+ const padded = new Uint8Array(forcedBase + body.length);
600
+ padded.set(body, forcedBase);
601
+ const rf = await decompileFunction({ platform, romBytes: padded, fileOffset: cpuAddr });
602
+ return {
603
+ platform, langid: rf.langid, address, addressHex: hx(address),
604
+ code: prettyDecompile(rf.code, platform), warnings: rf.warnings, qualityNote: QUALITY[platform] ?? "unknown",
605
+ };
606
+ }
607
+
608
+ // Other platforms: use rizin's loader mapping to turn the CPU VA into the file
609
+ // offset the raw decompiler image needs. Rizin's map gives `vbase` when it
610
+ // knows the base; left-pad by it so file offset == CPU address for the cases
611
+ // where the code references absolute addresses.
612
+ const { paddr, vbase } = await vaMapping(romBytes, arch, bits, address, platform);
613
+ if (paddr < 0 || paddr >= romBytes.length) {
614
+ throw new Error(
615
+ `decompile: address ${hx(address)} maps to file offset ${paddr}, outside the ` +
616
+ `${romBytes.length}-byte image for ${platform}.`
617
+ );
618
+ }
619
+ const base = vbase;
253
620
  let image = romBytes, decompAddr = paddr;
254
621
  if (base > 0 && base <= 0x10000) {
255
- const body = romBytes.subarray(bodyStart);
256
- const padded = new Uint8Array(base + body.length);
257
- padded.set(body, base);
622
+ const padded = new Uint8Array(base + romBytes.length);
623
+ padded.set(romBytes, base);
258
624
  image = padded;
259
- decompAddr = base + (paddr - bodyStart); // CPU address of the function
625
+ decompAddr = base + paddr; // CPU address of the function
260
626
  }
261
627
  const r = await decompileFunction({ platform, romBytes: image, fileOffset: decompAddr });
262
- const QUALITY = {
263
- gba: "excellent (ARM)", genesis: "excellent (M68K)",
264
- gb: "good (SM83)", gbc: "good (SM83)", sms: "good (Z80)", gg: "good (Z80)", msx: "good (Z80)",
265
- snes: "medium (65816 variable register width)", pce: "medium (HuC6280)",
266
- nes: "rough (6502 architecture limit)", atari2600: "rough (6502)", atari7800: "rough (6502)",
267
- c64: "rough (6502)", lynx: "rough (65C02)",
268
- };
269
628
  return {
270
629
  platform, langid: r.langid,
271
630
  address, addressHex: hx(address),
272
- code: r.code,
631
+ code: prettyDecompile(r.code, platform),
273
632
  warnings: r.warnings,
274
633
  qualityNote: QUALITY[platform] ?? "unknown",
275
634
  };
@@ -77,7 +77,7 @@ export const RIZIN_ARCH = {
77
77
  * @returns {Promise<{exitCode:number, output:string, log:string, crash?:object}>}
78
78
  */
79
79
  export async function runRizin(opts) {
80
- const { romPath, romBytes, commands, arch, bits, baddr, writeable } = opts;
80
+ const { romPath, romBytes, commands, arch, bits, baddr, writeable, timeoutMs } = opts;
81
81
  if (!commands) throw new Error("runRizin: commands required");
82
82
  const bytes = romBytes ?? new Uint8Array(await readFile(romPath));
83
83
 
@@ -102,6 +102,11 @@ export async function runRizin(opts) {
102
102
  const res = await runIsolated({
103
103
  gluePath: rizinGluePath(),
104
104
  argv,
105
+ // A5: per-call timeout so a hung analysis (whole-ROM `aaa` on a multi-MB ROM)
106
+ // can't wedge the shared worker pool — on timeout the worker is killed +
107
+ // recycled and this call returns a clean { timedOut, log } result. Default
108
+ // 60s; callers can override (a scoped `af @ addr` pass is near-instant).
109
+ timeoutMs: timeoutMs ?? 60000,
105
110
  inputFiles: [{
106
111
  vfsPath: "/work/rom.bin",
107
112
  encoding: "base64",
@@ -109,6 +114,13 @@ export async function runRizin(opts) {
109
114
  }],
110
115
  outputFiles: [{ vfsPath: OUT, encoding: "utf8" }],
111
116
  });
117
+ // Surface a timeout as a thrown error so JSON callers get a clear signal
118
+ // (runRizinJson already wraps crashes; this makes the timeout explicit).
119
+ if (res.timedOut) {
120
+ const e = new Error(res.log?.trim() || "rizin analysis timed out");
121
+ /** @type {any} */ (e).timedOut = true;
122
+ throw e;
123
+ }
112
124
  return { ...res, output: res.outputs?.[OUT] ?? "" };
113
125
  }
114
126