romdevtools 0.40.1 → 0.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +97 -0
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/analysis/analyze.js +405 -46
- package/src/analysis/rizin.js +13 -1
- package/src/cores/capabilities.js +218 -0
- package/src/mcp/tools/disasm.js +23 -4
- package/src/mcp/tools/platform-tools.js +17 -5
- package/src/mcp/tools/platforms.js +18 -3
- package/src/mcp/tools/rendering-context.js +5 -4
- package/src/mcp/tools/watch-memory.js +144 -2
- package/src/mcp/util.js +37 -0
- package/src/platforms/_guides/ROMHACKING_PLAYBOOK.md +23 -8
- package/src/toolchains/_worker/pool.js +41 -3
package/src/analysis/analyze.js
CHANGED
|
@@ -14,6 +14,110 @@ import { readFile } from "node:fs/promises";
|
|
|
14
14
|
import path from "node:path";
|
|
15
15
|
import { runRizin, runRizinJson, RIZIN_ARCH } from "./rizin.js";
|
|
16
16
|
import { decompileFunction, SLEIGH_LANGID } from "./decompile.js";
|
|
17
|
+
import { registersForPlatform } from "../platforms/common/registers.js";
|
|
18
|
+
|
|
19
|
+
/** B2: name hardware-register MMIO in decompiler output. Ghidra emits raw memory
|
|
20
|
+
* refs like `xRAM2001` / `uRAM400e` for $2001 / $400E; replace those whose
|
|
21
|
+
* address is a known platform register with the register NAME (a valid C
|
|
22
|
+
* identifier) so the C reads `PPUMASK = ...` instead of `xRAM2001 = ...`. Plus a
|
|
23
|
+
* one-line legend comment listing the substitutions made. */
|
|
24
|
+
export function nameHardwareRegisters(code, platform) {
|
|
25
|
+
const regs = registersForPlatform(platform);
|
|
26
|
+
if (!regs || !Object.keys(regs).length) return code;
|
|
27
|
+
const used = new Map();
|
|
28
|
+
// Match Ghidra's mem-ref identifiers: a few lowercase type-prefix letters,
|
|
29
|
+
// then RAM, then the hex address. e.g. xRAM2001, uRAM400e, cRAM00ff.
|
|
30
|
+
const out = code.replace(/\b[a-z]{1,3}RAM([0-9a-fA-F]{2,6})\b/g, (m, hex) => {
|
|
31
|
+
const addr = parseInt(hex, 16);
|
|
32
|
+
const name = regs[addr];
|
|
33
|
+
if (!name) return m;
|
|
34
|
+
used.set(addr, name);
|
|
35
|
+
return name;
|
|
36
|
+
});
|
|
37
|
+
if (!used.size) return code;
|
|
38
|
+
const legend = "/* hw registers: " +
|
|
39
|
+
[...used.entries()].map(([a, n]) => `${n}=$${a.toString(16).toUpperCase()}`).join(", ") +
|
|
40
|
+
" */\n";
|
|
41
|
+
return legend + out;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** The 6502-family platforms whose SLEIGH (Ghidra) output carries the
|
|
45
|
+
* characteristic 8-bit clutter the B1 fold cleans up. */
|
|
46
|
+
const SIXTY_FIVE_OH_TWO = new Set(["nes", "atari2600", "atari7800", "c64", "lynx", "pce"]);
|
|
47
|
+
|
|
48
|
+
/** B1: 6502 idiom-folding post-pass (deterministic half). The 6502's 8-bit ALU
|
|
49
|
+
* lowers to literal noise in SLEIGH output — awkward width types (`uint1`,
|
|
50
|
+
* `xunknown1`), redundant nested width casts (`(uint2)(uint1)x`), and raw
|
|
51
|
+
* zero-page byte refs (`cRAM00fd`). This pass folds the SAFE, mechanical ones
|
|
52
|
+
* into readable C99 so the remaining logic is what an LLM (or human) reads:
|
|
53
|
+
* - SLEIGH width types → C99 stdint: uint1/int1/xunknown1 → uint8_t/int8_t,
|
|
54
|
+
* uint2 → uint16_t, uint4 → uint32_t (Ghidra's `uintN`/`undefinedN` are
|
|
55
|
+
* N-BYTE widths, not bit widths).
|
|
56
|
+
* - redundant nested width casts `(uint16_t)(uint8_t)expr` → `(uint8_t)expr`
|
|
57
|
+
* (the inner cast already narrows; the outer widen is noise).
|
|
58
|
+
* - zero-page byte refs `cRAM00fd` / `uRAM0012` → `zp_FD` / `zp_12` (a stable
|
|
59
|
+
* name for the ZP slot — the 6502's "fast RAM / pseudo-registers"). Only the
|
|
60
|
+
* $00xx page; MMIO was already named by nameHardwareRegisters (run first).
|
|
61
|
+
* It does NOT attempt the carry-flag 16-bit add/sub or BCD reconstruction the
|
|
62
|
+
* plan also lists — Ghidra usually already folds those into `+`/`uint2`, and a
|
|
63
|
+
* textual rewrite of what survives risks changing semantics. Those are left to
|
|
64
|
+
* the LLM cleanup half (the decompile output is read by an agent). Emits a
|
|
65
|
+
* leading "6502 fold:" legend comment noting what was applied. */
|
|
66
|
+
export function foldSixtyFiveOhTwoIdioms(code, platform) {
|
|
67
|
+
if (!SIXTY_FIVE_OH_TWO.has(platform)) return code;
|
|
68
|
+
const applied = [];
|
|
69
|
+
let out = code;
|
|
70
|
+
|
|
71
|
+
// 1) SLEIGH width types to C99 stdint. Each is a whole-word match. xunknown1
|
|
72
|
+
// and undefined1 are Ghidra's "1 byte, unknown signedness" - map to uint8_t.
|
|
73
|
+
const TYPES = [
|
|
74
|
+
[/\buint1\b/g, "uint8_t"], [/\bint1\b/g, "int8_t"],
|
|
75
|
+
[/\buint2\b/g, "uint16_t"], [/\bint2\b/g, "int16_t"],
|
|
76
|
+
[/\buint4\b/g, "uint32_t"], [/\bint4\b/g, "int32_t"],
|
|
77
|
+
[/\bxunknown1\b/g, "uint8_t"], [/\bundefined1\b/g, "uint8_t"],
|
|
78
|
+
[/\bxunknown2\b/g, "uint16_t"], [/\bundefined2\b/g, "uint16_t"],
|
|
79
|
+
[/\bxunknown4\b/g, "uint32_t"], [/\bundefined4\b/g, "uint32_t"],
|
|
80
|
+
];
|
|
81
|
+
let typeFolds = 0;
|
|
82
|
+
for (const [re, to] of TYPES) {
|
|
83
|
+
out = out.replace(re, () => { typeFolds++; return to; });
|
|
84
|
+
}
|
|
85
|
+
if (typeFolds) applied.push("SLEIGH width types → stdint");
|
|
86
|
+
|
|
87
|
+
// 2) Redundant nested width casts: `(uint16_t)(uint8_t)X` → `(uint8_t)X`. The
|
|
88
|
+
// inner narrowing cast governs; the outer widen back is pure noise SLEIGH emits
|
|
89
|
+
// around zero-page index math. Run a couple of passes to collapse triples.
|
|
90
|
+
let castFolds = 0;
|
|
91
|
+
for (let i = 0; i < 3; i++) {
|
|
92
|
+
const before = out;
|
|
93
|
+
out = out.replace(/\((uint(?:8|16|32)_t)\)\((uint8_t)\)/g, (_m, _wide, narrow) => {
|
|
94
|
+
castFolds++; return `(${narrow})`;
|
|
95
|
+
});
|
|
96
|
+
if (out === before) break;
|
|
97
|
+
}
|
|
98
|
+
if (castFolds) applied.push("redundant width casts collapsed");
|
|
99
|
+
|
|
100
|
+
// 3) Zero-page byte refs → zp_XX. Only the $00 page (cRAM00fd etc.); the
|
|
101
|
+
// 2-hex-after-00 form. A bare 4-hex like RAM0312 is not ZP — leave it.
|
|
102
|
+
const zp = new Set();
|
|
103
|
+
out = out.replace(/\b[a-z]{1,3}RAM00([0-9a-fA-F]{2})\b/g, (_m, hex) => {
|
|
104
|
+
const name = "zp_" + hex.toUpperCase();
|
|
105
|
+
zp.add(name);
|
|
106
|
+
return name;
|
|
107
|
+
});
|
|
108
|
+
if (zp.size) applied.push(`${zp.size} zero-page slot${zp.size > 1 ? "s" : ""} named zp_XX`);
|
|
109
|
+
|
|
110
|
+
if (!applied.length) return code;
|
|
111
|
+
return `/* 6502 fold: ${applied.join("; ")} */\n` + out;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Readability post-passes applied to every decompiler C body, in order:
|
|
115
|
+
* B2 hardware-register naming first (so MMIO becomes PPUMASK etc. before B1's
|
|
116
|
+
* zero-page labeler could touch it), then B1 6502 idiom folding. Both are no-ops
|
|
117
|
+
* off their target platforms, so this is safe to call unconditionally. */
|
|
118
|
+
export function prettyDecompile(code, platform) {
|
|
119
|
+
return foldSixtyFiveOhTwoIdioms(nameHardwareRegisters(code, platform), platform);
|
|
120
|
+
}
|
|
17
121
|
|
|
18
122
|
/** Sniff platform from a ROM extension (mirrors disasm.js). */
|
|
19
123
|
export function sniffPlatform(p) {
|
|
@@ -53,12 +157,66 @@ async function loadContext(romPath, platformOverride) {
|
|
|
53
157
|
if (arch == null) {
|
|
54
158
|
throw new Error(`analyze: no Rizin arch mapping for platform '${platform}'`);
|
|
55
159
|
}
|
|
56
|
-
|
|
160
|
+
let romBytes = new Uint8Array(await readFile(romPath));
|
|
57
161
|
// PCE: rizin's 6502 plugin drives the loader + standard control flow for
|
|
58
162
|
// function detection, but mis-decodes HuC6280 custom opcodes — CFG/xrefs are
|
|
59
163
|
// approximate. Accurate HuC6280 decode is the decompiler's job (SLEIGH spec).
|
|
60
164
|
const approx = platform === "pce";
|
|
61
|
-
|
|
165
|
+
|
|
166
|
+
// Address-space prep (A2): some formats carry a header and load at a CPU base
|
|
167
|
+
// that isn't 0. Strip the header and report `loadBase` so rizin's functions
|
|
168
|
+
// (and the decompiler image) speak CPU addresses, not raw file offsets.
|
|
169
|
+
// c64 .prg — 2-byte little-endian LOAD ADDRESS header, code at that address
|
|
170
|
+
// (typically $0801 = BASIC start). Without this, rizin analyzes the header
|
|
171
|
+
// bytes as code at offset 0 and every address is a file offset, not a CPU
|
|
172
|
+
// address — functions→decompile round-trip lands on garbage.
|
|
173
|
+
let loadBase = 0;
|
|
174
|
+
if (platform === "c64" && romBytes.length >= 2) {
|
|
175
|
+
loadBase = romBytes[0] | (romBytes[1] << 8);
|
|
176
|
+
romBytes = romBytes.subarray(2);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// A6: container/format sniff. Some dumps are interleaved/headered such that a
|
|
180
|
+
// FLAT read scrambles every byte → fake "bad instruction" noise everywhere.
|
|
181
|
+
// Detect + auto-correct, and warn so a flat disasm isn't silently wrong.
|
|
182
|
+
const warnings = [];
|
|
183
|
+
if (platform === "genesis") {
|
|
184
|
+
const smd = deinterleaveSmd(romBytes);
|
|
185
|
+
if (smd) {
|
|
186
|
+
romBytes = smd;
|
|
187
|
+
warnings.push("Genesis ROM was SMD-INTERLEAVED (512-byte header + byte-swapped 16KB blocks) — " +
|
|
188
|
+
"auto-deinterleaved before analysis. A flat read of the original would scramble every instruction.");
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return { platform, romBytes, arch, bits: BITS[arch], approx, loadBase, warnings };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/** Detect + reverse Sega Mega Drive SMD interleaving. An .smd dump is a 512-byte
|
|
195
|
+
* header followed by 16KB blocks where each block's first 8KB holds the ODD
|
|
196
|
+
* bytes and the second 8KB the EVEN bytes (interleaved). Returns the
|
|
197
|
+
* deinterleaved ROM, or null if the image isn't SMD-interleaved. */
|
|
198
|
+
export function deinterleaveSmd(bytes) {
|
|
199
|
+
// SMD: (N * 16KB) + 512-byte header. The header's byte 8 = 0xAA, byte 9 = 0xBB
|
|
200
|
+
// is the classic SMD magic; also the body length must be a multiple of 16KB.
|
|
201
|
+
if (bytes.length < 512 + 0x4000) return null;
|
|
202
|
+
const bodyLen = bytes.length - 512;
|
|
203
|
+
if (bodyLen % 0x4000 !== 0) return null;
|
|
204
|
+
const isSmdMagic = bytes[8] === 0xaa && bytes[9] === 0xbb;
|
|
205
|
+
// A plain .bin that happens to be (N*16KB)+512 is unusual; require the magic to
|
|
206
|
+
// avoid false positives on legitimately-sized flat ROMs.
|
|
207
|
+
if (!isSmdMagic) return null;
|
|
208
|
+
|
|
209
|
+
const body = bytes.subarray(512);
|
|
210
|
+
const out = new Uint8Array(bodyLen);
|
|
211
|
+
const blocks = bodyLen / 0x4000;
|
|
212
|
+
for (let b = 0; b < blocks; b++) {
|
|
213
|
+
const base = b * 0x4000;
|
|
214
|
+
for (let i = 0; i < 0x2000; i++) {
|
|
215
|
+
out[base + i * 2 + 1] = body[base + i]; // odd bytes (first 8KB)
|
|
216
|
+
out[base + i * 2] = body[base + 0x2000 + i]; // even bytes (second 8KB)
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return out;
|
|
62
220
|
}
|
|
63
221
|
|
|
64
222
|
/** Hex-format an address the way agents expect for the platform width. */
|
|
@@ -69,8 +227,8 @@ function hx(n) { return "0x" + (n >>> 0).toString(16); }
|
|
|
69
227
|
* @returns {{platform, count, functions: Array<{address, name, size, nbbs, cc, callers, callees}>}}
|
|
70
228
|
*/
|
|
71
229
|
export async function analyzeFunctions(romPath, platformOverride) {
|
|
72
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
73
|
-
const fns = await runRizinJson({ romBytes, arch, bits, commands: "aaa; aflj" });
|
|
230
|
+
const { platform, romBytes, arch, bits, loadBase, warnings } = await loadContext(romPath, platformOverride);
|
|
231
|
+
const fns = await runRizinJson({ romBytes, arch, bits, baddr: loadBase || undefined, commands: "aaa; aflj" });
|
|
74
232
|
const functions = fns.map((f) => ({
|
|
75
233
|
address: f.offset,
|
|
76
234
|
addressHex: hx(f.offset),
|
|
@@ -80,8 +238,29 @@ export async function analyzeFunctions(romPath, platformOverride) {
|
|
|
80
238
|
cc: f.cc, // cyclomatic complexity
|
|
81
239
|
callers: f.indegree ?? (f.codexrefs?.length ?? 0),
|
|
82
240
|
callees: f.outdegree ?? 0,
|
|
241
|
+
// A3: rizin's flat sweep folds DATA regions into pseudo-functions with absurd
|
|
242
|
+
// `size` (megabyte "functions", phantoms exceeding the ROM). The honest
|
|
243
|
+
// signal is the BYTES-PER-BLOCK ratio, not raw size: real code averages tens
|
|
244
|
+
// of bytes per basic block; a "function" of thousands of bytes per block (or
|
|
245
|
+
// a single huge block with no control flow) is a data table / graphics blob
|
|
246
|
+
// mis-detected as a function. Flag it so agents don't waste a decompile on
|
|
247
|
+
// it. (A 16KB function with 35 blocks + cc 19 is a real big dispatcher — NOT
|
|
248
|
+
// flagged; size alone is the lie, the ratio isn't.)
|
|
249
|
+
looksLikeData:
|
|
250
|
+
(f.size ?? 0) > 0x400 &&
|
|
251
|
+
((f.nbbs ?? 0) <= 1 || (f.size ?? 0) / Math.max(1, f.nbbs ?? 1) > 1024),
|
|
83
252
|
}));
|
|
84
|
-
|
|
253
|
+
// Real code first: highest nbbs/cc, then smaller size — so the actual routines
|
|
254
|
+
// surface above the data-fold noise without the agent having to learn the rule.
|
|
255
|
+
functions.sort((a, b) =>
|
|
256
|
+
(a.looksLikeData ? 1 : 0) - (b.looksLikeData ? 1 : 0) ||
|
|
257
|
+
(b.nbbs ?? 0) - (a.nbbs ?? 0) ||
|
|
258
|
+
(b.cc ?? 0) - (a.cc ?? 0) ||
|
|
259
|
+
(a.size ?? 0) - (b.size ?? 0)
|
|
260
|
+
);
|
|
261
|
+
const dataCount = functions.filter((f) => f.looksLikeData).length;
|
|
262
|
+
return { platform, arch, count: functions.length, dataCount, functions,
|
|
263
|
+
...(warnings?.length ? { warnings } : {}) };
|
|
85
264
|
}
|
|
86
265
|
|
|
87
266
|
/**
|
|
@@ -90,13 +269,13 @@ export async function analyzeFunctions(romPath, platformOverride) {
|
|
|
90
269
|
*/
|
|
91
270
|
export async function analyzeCfg(romPath, address, platformOverride) {
|
|
92
271
|
if (address == null) throw new Error("analyze cfg: address required");
|
|
93
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
272
|
+
const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
|
|
94
273
|
// afbj = basic blocks of the function as JSON: each block has addr/size/jump/
|
|
95
274
|
// fail/ninstr. `jump` is the taken edge; `fail` (present only on conditional
|
|
96
275
|
// blocks) is the fall-through. This is the structured CFG source — `agf json`
|
|
97
276
|
// only gives a text body blob with untyped out_nodes.
|
|
98
277
|
const blocks = await runRizinJson({
|
|
99
|
-
romBytes, arch, bits,
|
|
278
|
+
romBytes, arch, bits, baddr: loadBase || undefined,
|
|
100
279
|
commands: `aaa; af @ ${hx(address)}; afbj @ ${hx(address)}`,
|
|
101
280
|
});
|
|
102
281
|
if (!Array.isArray(blocks) || blocks.length === 0) {
|
|
@@ -129,10 +308,10 @@ export async function analyzeCfg(romPath, address, platformOverride) {
|
|
|
129
308
|
*/
|
|
130
309
|
export async function analyzeXrefs(romPath, address, platformOverride) {
|
|
131
310
|
if (address == null) throw new Error("analyze xrefs: address required");
|
|
132
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
311
|
+
const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
|
|
133
312
|
let refs;
|
|
134
313
|
try {
|
|
135
|
-
refs = await runRizinJson({ romBytes, arch, bits, commands: `aaa; axtj @ ${hx(address)}` });
|
|
314
|
+
refs = await runRizinJson({ romBytes, arch, bits, baddr: loadBase || undefined, commands: `aaa; axtj @ ${hx(address)}` });
|
|
136
315
|
} catch (e) {
|
|
137
316
|
// axtj prints nothing (not even `[]`) when there are zero refs → our JSON
|
|
138
317
|
// guard throws. Treat "no JSON" as "no refs".
|
|
@@ -154,11 +333,12 @@ export async function analyzeXrefs(romPath, address, platformOverride) {
|
|
|
154
333
|
* analysis pass. The "give me the shape of this ROM" call.
|
|
155
334
|
*/
|
|
156
335
|
export async function analyzeStructure(romPath, platformOverride) {
|
|
157
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
336
|
+
const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
|
|
337
|
+
const baddr = loadBase || undefined;
|
|
158
338
|
const [fns, strings, entries] = await Promise.all([
|
|
159
|
-
runRizinJson({ romBytes, arch, bits, commands: "aaa; aflj" }).catch(() => []),
|
|
160
|
-
runRizinJson({ romBytes, arch, bits, commands: "aaa; izj" }).catch(() => []),
|
|
161
|
-
runRizinJson({ romBytes, arch, bits, commands: "aaa; iej" }).catch(() => []),
|
|
339
|
+
runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; aflj" }).catch(() => []),
|
|
340
|
+
runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; izj" }).catch(() => []),
|
|
341
|
+
runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; iej" }).catch(() => []),
|
|
162
342
|
]);
|
|
163
343
|
return {
|
|
164
344
|
platform, arch,
|
|
@@ -204,16 +384,170 @@ export async function vaMapping(romBytes, arch, bits, vaddr, platform) {
|
|
|
204
384
|
return { paddr: vaddr, vbase: 0 };
|
|
205
385
|
}
|
|
206
386
|
|
|
387
|
+
/** Build a CPU-ADDRESSED sparse image of a SNES cart for the decompiler.
|
|
388
|
+
*
|
|
389
|
+
* SNES is banked: the langid is `65816:LE:24:snes` (24-bit space). If we hand
|
|
390
|
+
* the decompiler the flat file, a LoROM function at CPU $00:8000 lives at file
|
|
391
|
+
* 0, but its in-bank `jsr $80xx` operands resolve to file 0x80xx — bank-1 code,
|
|
392
|
+
* a plausible-but-WRONG body. So we lay each ROM chunk at its CPU address
|
|
393
|
+
* (sparse, zero-filled between), making BOTH the function address and every
|
|
394
|
+
* in-bank/JSL operand resolve. ~2x ROM size; fine at SNES cart sizes.
|
|
395
|
+
*
|
|
396
|
+
* Mirrors the detection/fold in disasm.js's mapSnesAddress (kept local to avoid
|
|
397
|
+
* a circular import: disasm.js imports analyze.js).
|
|
398
|
+
*
|
|
399
|
+
* The image is laid out BY CPU address, so the decompiler offset for a CPU
|
|
400
|
+
* address is the address itself (24-bit). @returns {{ image: Uint8Array, isLo:boolean }}
|
|
401
|
+
*/
|
|
402
|
+
export function buildSnesCpuImage(romBytes, mapperHint) {
|
|
403
|
+
const copierOff = (romBytes.length % 0x8000 === 0x200) ? 0x200 : 0;
|
|
404
|
+
let isLo;
|
|
405
|
+
if (mapperHint === "lorom") isLo = true;
|
|
406
|
+
else if (mapperHint === "hirom") isLo = false;
|
|
407
|
+
else {
|
|
408
|
+
const loByte = romBytes[copierOff + 0x7FC0 + 0x15];
|
|
409
|
+
const hiByte = romBytes[copierOff + 0xFFC0 + 0x15];
|
|
410
|
+
const detLo = loByte === 0x20 || loByte === 0x30 || loByte === 0x32;
|
|
411
|
+
const detHi = hiByte === 0x21 || hiByte === 0x31;
|
|
412
|
+
isLo = detHi && !detLo ? false : true; // default LoROM when ambiguous
|
|
413
|
+
}
|
|
414
|
+
const body = romBytes.subarray(copierOff);
|
|
415
|
+
|
|
416
|
+
if (isLo) {
|
|
417
|
+
// LoROM: 32KB file chunk N maps to CPU bank N, $8000-$FFFF. Banks $80-$FF
|
|
418
|
+
// MIRROR $00-$7F (the FastROM image), and code commonly runs there (a JML to
|
|
419
|
+
// $F9xxxx is bank 0x79's ROM via the $80+ mirror). So we lay the full 16MB
|
|
420
|
+
// 24-bit space and mirror each chunk into BOTH its $00-$7F home and its
|
|
421
|
+
// $80-$FF twin — otherwise a reference into the high half "can't load N
|
|
422
|
+
// bytes" and the decompiler bails.
|
|
423
|
+
const fileBanks = Math.ceil(body.length / 0x8000); // ROM chunks (≤128)
|
|
424
|
+
const image = new Uint8Array(0x1000000); // full 16MB CPU space
|
|
425
|
+
for (let b = 0; b < fileBanks; b++) {
|
|
426
|
+
const src = body.subarray(b * 0x8000, (b + 1) * 0x8000);
|
|
427
|
+
const lo = (b & 0x7F); // home bank $00-$7F
|
|
428
|
+
image.set(src, lo * 0x10000 + 0x8000); // $lo:8000
|
|
429
|
+
image.set(src, (lo | 0x80) * 0x10000 + 0x8000); // $(lo|80):8000 mirror
|
|
430
|
+
}
|
|
431
|
+
return { image, isLo: true };
|
|
432
|
+
}
|
|
433
|
+
// HiROM: file 64KB chunk N is CPU bank $C0+N ($0000-$FFFF, the primary image),
|
|
434
|
+
// mirrored to bank $40+N. The upper half of each chunk also appears at
|
|
435
|
+
// $00-$3F:$8000-$FFFF and $80-$BF:$8000-$FFFF. Lay the full 16MB space and
|
|
436
|
+
// mirror so any of those references resolve.
|
|
437
|
+
const fileBanks = Math.ceil(body.length / 0x10000); // 64KB chunks
|
|
438
|
+
const image = new Uint8Array(0x1000000);
|
|
439
|
+
for (let b = 0; b < fileBanks; b++) {
|
|
440
|
+
const src = body.subarray(b * 0x10000, (b + 1) * 0x10000);
|
|
441
|
+
image.set(src, (0xC0 + b) * 0x10000); // $C0+b: full bank (primary)
|
|
442
|
+
image.set(src, (0x40 + b) * 0x10000); // $40+b: mirror
|
|
443
|
+
const upper = src.subarray(0x8000); // $8000-$FFFF half
|
|
444
|
+
image.set(upper, b * 0x10000 + 0x8000); // $00+b:8000 mirror
|
|
445
|
+
image.set(upper, (0x80 + b) * 0x10000 + 0x8000); // $80+b:8000 mirror
|
|
446
|
+
}
|
|
447
|
+
return { image, isLo: false };
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/** Bank-aware NES image for the decompiler (A1).
|
|
451
|
+
*
|
|
452
|
+
* Rizin maps an iNES PRG as ONE flat $8000-based segment, so a `functions`
|
|
453
|
+
* address is a FLAT-PRG VA ($8000 + flat offset) — bank 0 at $8000-$BFFF, bank 1
|
|
454
|
+
* at $C000-$FFFF, bank 2 at $10000+, etc. Decompiling that flat image is
|
|
455
|
+
* bank-blind: an in-code `JSR $9123` (a real CPU address) resolves to flat
|
|
456
|
+
* $9123 = bank 0, even when the calling code lives in bank 3 → halt_baddata /
|
|
457
|
+
* garbage (11/12 top functions on a banked cart, empirically).
|
|
458
|
+
*
|
|
459
|
+
* Fix: from the flat VA, recover which 16KB PRG bank the function is in, then
|
|
460
|
+
* build a real 32KB 6502 CPU image — that bank at $8000-$BFFF, the FIXED top
|
|
461
|
+
* bank at $C000-$FFFF — and decompile at the function's REAL CPU address. Now
|
|
462
|
+
* in-bank calls AND fixed-bank ($C000+) calls both resolve.
|
|
463
|
+
*
|
|
464
|
+
* @returns {{ image: Uint8Array, cpuAddr: number, bank: number } | null} null if
|
|
465
|
+
* not a banked iNES (caller falls back to the flat path; NROM is fine flat).
|
|
466
|
+
*/
|
|
467
|
+
export function buildNesBankImage(romBytes, flatVa) {
|
|
468
|
+
if (romBytes[0] !== 0x4e || romBytes[1] !== 0x45 || romBytes[2] !== 0x53 || romBytes[3] !== 0x1a) {
|
|
469
|
+
return null; // not iNES
|
|
470
|
+
}
|
|
471
|
+
const prgBanks16k = romBytes[4];
|
|
472
|
+
const prgSize = prgBanks16k * 0x4000;
|
|
473
|
+
if (prgSize <= 0x8000) return null; // NROM-128/256 — flat is correct
|
|
474
|
+
const prgStart = 16;
|
|
475
|
+
const prg = romBytes.subarray(prgStart, prgStart + prgSize);
|
|
476
|
+
|
|
477
|
+
// rizin flat VA → flat PRG offset (segment based at $8000).
|
|
478
|
+
const flatOff = (flatVa >>> 0) - 0x8000;
|
|
479
|
+
if (flatOff < 0 || flatOff >= prgSize) return null;
|
|
480
|
+
const bank = Math.floor(flatOff / 0x4000); // which 16KB bank
|
|
481
|
+
const inBank = flatOff % 0x4000; // offset within it
|
|
482
|
+
const topBank = prgBanks16k - 1; // fixed top bank
|
|
483
|
+
|
|
484
|
+
// 32KB CPU window: chosen bank at $8000, fixed top bank at $C000.
|
|
485
|
+
const image = new Uint8Array(0x10000);
|
|
486
|
+
image.set(prg.subarray(bank * 0x4000, bank * 0x4000 + 0x4000), 0x8000);
|
|
487
|
+
image.set(prg.subarray(topBank * 0x4000, topBank * 0x4000 + 0x4000), 0xC000);
|
|
488
|
+
|
|
489
|
+
// The function's real CPU address: if it's the fixed top bank, it's at
|
|
490
|
+
// $C000+inBank; otherwise it's the switchable slot at $8000+inBank.
|
|
491
|
+
const cpuAddr = bank === topBank ? 0xC000 + inBank : 0x8000 + inBank;
|
|
492
|
+
return { image, cpuAddr, bank };
|
|
493
|
+
}
|
|
494
|
+
|
|
207
495
|
/**
|
|
208
496
|
* Decompile the function containing `address` to C pseudocode (Ghidra).
|
|
209
|
-
* @returns {{platform, langid, address, code, warnings, qualityNote}}
|
|
497
|
+
* @returns {{platform, langid, address, code, warnings, qualityNote, bank?}}
|
|
210
498
|
*/
|
|
211
499
|
export async function analyzeDecompile(romPath, address, platformOverride) {
|
|
212
500
|
if (address == null) throw new Error("analyze decompile: address required");
|
|
213
501
|
const platform = platformOverride ?? sniffPlatform(romPath);
|
|
214
502
|
if (!platform) throw new Error(`analyze decompile: unknown platform for '${path.basename(romPath)}'`);
|
|
215
503
|
if (!SLEIGH_LANGID[platform]) throw new Error(`analyze decompile: unsupported platform '${platform}'`);
|
|
216
|
-
|
|
504
|
+
let romBytes = new Uint8Array(await readFile(romPath));
|
|
505
|
+
// A6: deinterleave SMD Genesis dumps here too (analyzeDecompile reads the file
|
|
506
|
+
// directly, not via loadContext) — a flat read of an interleaved ROM decodes
|
|
507
|
+
// to pure garbage.
|
|
508
|
+
if (platform === "genesis") romBytes = deinterleaveSmd(romBytes) ?? romBytes;
|
|
509
|
+
|
|
510
|
+
// SNES: banked 24-bit space. `address` is a LoROM/HiROM CPU address (what
|
|
511
|
+
// target='functions'/'cfg' report). Lay the cart out by CPU address so BOTH
|
|
512
|
+
// the function address AND its in-bank/JSL operands resolve, then decompile at
|
|
513
|
+
// the CPU address directly. (Flat-at-0 would decompile file[address] — the
|
|
514
|
+
// wrong bank — and mis-label every operand.)
|
|
515
|
+
if (platform === "snes") {
|
|
516
|
+
const { image } = buildSnesCpuImage(romBytes);
|
|
517
|
+
// The image is laid out by CPU address, so the file offset IS the address.
|
|
518
|
+
const imgOff = address >>> 0;
|
|
519
|
+
if (imgOff < 0 || imgOff >= image.length) {
|
|
520
|
+
throw new Error(
|
|
521
|
+
`decompile: SNES address ${hx(address)} is outside the ${image.length}-byte CPU image ` +
|
|
522
|
+
`(is it a valid LoROM/HiROM code address?).`
|
|
523
|
+
);
|
|
524
|
+
}
|
|
525
|
+
const rs = await decompileFunction({ platform, romBytes: image, fileOffset: imgOff });
|
|
526
|
+
return {
|
|
527
|
+
platform, langid: rs.langid,
|
|
528
|
+
address, addressHex: hx(address),
|
|
529
|
+
code: prettyDecompile(rs.code, platform), warnings: rs.warnings,
|
|
530
|
+
qualityNote: "medium (65816 variable register width)",
|
|
531
|
+
};
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// NES banked carts (A1): rizin reports flat-PRG VAs ($8000-based); decompiling
|
|
535
|
+
// that flat image is bank-blind (cross-bank JSR/JMP land on the wrong bank).
|
|
536
|
+
// Build a real 32KB CPU window (this bank @ $8000 + fixed top bank @ $C000) so
|
|
537
|
+
// in-bank AND fixed-bank calls resolve. NROM falls through to the flat path.
|
|
538
|
+
if (platform === "nes") {
|
|
539
|
+
const banked = buildNesBankImage(romBytes, address);
|
|
540
|
+
if (banked) {
|
|
541
|
+
const rn = await decompileFunction({ platform, romBytes: banked.image, fileOffset: banked.cpuAddr });
|
|
542
|
+
return {
|
|
543
|
+
platform, langid: rn.langid,
|
|
544
|
+
address, addressHex: hx(address),
|
|
545
|
+
bank: banked.bank,
|
|
546
|
+
code: prettyDecompile(rn.code, platform), warnings: rn.warnings,
|
|
547
|
+
qualityNote: "rough (6502 architecture limit)",
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
}
|
|
217
551
|
|
|
218
552
|
// Use rizin's loader mapping to turn the VA (what the user sees from
|
|
219
553
|
// target='functions') into the file offset the raw decompiler image needs.
|
|
@@ -221,24 +555,21 @@ export async function analyzeDecompile(romPath, address, platformOverride) {
|
|
|
221
555
|
// decompiler's job via SLEIGH) — its flat image bases at 0 either way.
|
|
222
556
|
const arch = RIZIN_ARCH[platform] ?? "6502";
|
|
223
557
|
const bits = { arm: 32, m68k: 32, snes: 16 }[arch];
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
//
|
|
234
|
-
//
|
|
235
|
-
//
|
|
236
|
-
//
|
|
237
|
-
//
|
|
238
|
-
//
|
|
239
|
-
// CPU base so absolute references ($8000/$C000/$F000) resolve. 7800's base is
|
|
240
|
-
// size-dependent (16KB→$C000, 32KB→$8000); 7800 carts may carry a 128-byte
|
|
241
|
-
// header before the body.
|
|
558
|
+
|
|
559
|
+
const QUALITY = {
|
|
560
|
+
gba: "excellent (ARM)", genesis: "excellent (M68K)",
|
|
561
|
+
gb: "good (SM83)", gbc: "good (SM83)", sms: "good (Z80)", gg: "good (Z80)", msx: "good (Z80)",
|
|
562
|
+
snes: "medium (65816 variable register width)", pce: "medium (HuC6280)",
|
|
563
|
+
nes: "rough (6502 architecture limit)", atari2600: "rough (6502)", atari7800: "rough (6502)",
|
|
564
|
+
c64: "rough (6502)", lynx: "rough (65C02)",
|
|
565
|
+
};
|
|
566
|
+
|
|
567
|
+
// FORCED-BASE platforms (headerless/load-header 6502 carts): rizin/our
|
|
568
|
+
// analysis bases these at a known CPU address, so `address` IS a CPU address
|
|
569
|
+
// and `functions` already reported it as such. Strip any header, left-pad the
|
|
570
|
+
// body so file offset == CPU address, and decompile at `address` directly.
|
|
571
|
+
// 2600 → $F000; 7800 → size-dependent $8000-$C000 (+128B header if "AT…");
|
|
572
|
+
// c64 .prg → the 2-byte load-address header's value (e.g. $0801).
|
|
242
573
|
let forcedBase = 0, bodyStart = 0;
|
|
243
574
|
if (platform === "atari2600") {
|
|
244
575
|
forcedBase = 0xf000;
|
|
@@ -248,28 +579,56 @@ export async function analyzeDecompile(romPath, address, platformOverride) {
|
|
|
248
579
|
bodyStart = hasHdr ? 128 : 0;
|
|
249
580
|
const body = romBytes.length - bodyStart;
|
|
250
581
|
forcedBase = body <= 0x4000 ? 0xc000 : body <= 0x8000 ? 0x8000 : 0x4000;
|
|
582
|
+
} else if (platform === "c64" && romBytes.length >= 2) {
|
|
583
|
+
bodyStart = 2;
|
|
584
|
+
forcedBase = romBytes[0] | (romBytes[1] << 8);
|
|
251
585
|
}
|
|
252
|
-
|
|
586
|
+
if (forcedBase > 0 && forcedBase <= 0x10000) {
|
|
587
|
+
const body = romBytes.subarray(bodyStart);
|
|
588
|
+
// Accept `address` as EITHER a CPU address (≥ forcedBase, what functions
|
|
589
|
+
// reports once baddr is applied) OR a raw body file-offset (< forcedBase,
|
|
590
|
+
// legacy callers / direct offsets). Normalize to a CPU address.
|
|
591
|
+
const a = address >>> 0;
|
|
592
|
+
const cpuAddr = a >= forcedBase ? a : forcedBase + a;
|
|
593
|
+
if (cpuAddr < forcedBase || cpuAddr >= forcedBase + body.length) {
|
|
594
|
+
throw new Error(
|
|
595
|
+
`decompile: address ${hx(a)} is outside the ${platform} CPU image ` +
|
|
596
|
+
`($${forcedBase.toString(16)}-$${(forcedBase + body.length).toString(16)}).`
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
const padded = new Uint8Array(forcedBase + body.length);
|
|
600
|
+
padded.set(body, forcedBase);
|
|
601
|
+
const rf = await decompileFunction({ platform, romBytes: padded, fileOffset: cpuAddr });
|
|
602
|
+
return {
|
|
603
|
+
platform, langid: rf.langid, address, addressHex: hx(address),
|
|
604
|
+
code: prettyDecompile(rf.code, platform), warnings: rf.warnings, qualityNote: QUALITY[platform] ?? "unknown",
|
|
605
|
+
};
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
// Other platforms: use rizin's loader mapping to turn the CPU VA into the file
|
|
609
|
+
// offset the raw decompiler image needs. Rizin's map gives `vbase` when it
|
|
610
|
+
// knows the base; left-pad by it so file offset == CPU address for the cases
|
|
611
|
+
// where the code references absolute addresses.
|
|
612
|
+
const { paddr, vbase } = await vaMapping(romBytes, arch, bits, address, platform);
|
|
613
|
+
if (paddr < 0 || paddr >= romBytes.length) {
|
|
614
|
+
throw new Error(
|
|
615
|
+
`decompile: address ${hx(address)} maps to file offset ${paddr}, outside the ` +
|
|
616
|
+
`${romBytes.length}-byte image for ${platform}.`
|
|
617
|
+
);
|
|
618
|
+
}
|
|
619
|
+
const base = vbase;
|
|
253
620
|
let image = romBytes, decompAddr = paddr;
|
|
254
621
|
if (base > 0 && base <= 0x10000) {
|
|
255
|
-
const
|
|
256
|
-
|
|
257
|
-
padded.set(body, base);
|
|
622
|
+
const padded = new Uint8Array(base + romBytes.length);
|
|
623
|
+
padded.set(romBytes, base);
|
|
258
624
|
image = padded;
|
|
259
|
-
decompAddr = base +
|
|
625
|
+
decompAddr = base + paddr; // CPU address of the function
|
|
260
626
|
}
|
|
261
627
|
const r = await decompileFunction({ platform, romBytes: image, fileOffset: decompAddr });
|
|
262
|
-
const QUALITY = {
|
|
263
|
-
gba: "excellent (ARM)", genesis: "excellent (M68K)",
|
|
264
|
-
gb: "good (SM83)", gbc: "good (SM83)", sms: "good (Z80)", gg: "good (Z80)", msx: "good (Z80)",
|
|
265
|
-
snes: "medium (65816 variable register width)", pce: "medium (HuC6280)",
|
|
266
|
-
nes: "rough (6502 architecture limit)", atari2600: "rough (6502)", atari7800: "rough (6502)",
|
|
267
|
-
c64: "rough (6502)", lynx: "rough (65C02)",
|
|
268
|
-
};
|
|
269
628
|
return {
|
|
270
629
|
platform, langid: r.langid,
|
|
271
630
|
address, addressHex: hx(address),
|
|
272
|
-
code: r.code,
|
|
631
|
+
code: prettyDecompile(r.code, platform),
|
|
273
632
|
warnings: r.warnings,
|
|
274
633
|
qualityNote: QUALITY[platform] ?? "unknown",
|
|
275
634
|
};
|
package/src/analysis/rizin.js
CHANGED
|
@@ -77,7 +77,7 @@ export const RIZIN_ARCH = {
|
|
|
77
77
|
* @returns {Promise<{exitCode:number, output:string, log:string, crash?:object}>}
|
|
78
78
|
*/
|
|
79
79
|
export async function runRizin(opts) {
|
|
80
|
-
const { romPath, romBytes, commands, arch, bits, baddr, writeable } = opts;
|
|
80
|
+
const { romPath, romBytes, commands, arch, bits, baddr, writeable, timeoutMs } = opts;
|
|
81
81
|
if (!commands) throw new Error("runRizin: commands required");
|
|
82
82
|
const bytes = romBytes ?? new Uint8Array(await readFile(romPath));
|
|
83
83
|
|
|
@@ -102,6 +102,11 @@ export async function runRizin(opts) {
|
|
|
102
102
|
const res = await runIsolated({
|
|
103
103
|
gluePath: rizinGluePath(),
|
|
104
104
|
argv,
|
|
105
|
+
// A5: per-call timeout so a hung analysis (whole-ROM `aaa` on a multi-MB ROM)
|
|
106
|
+
// can't wedge the shared worker pool — on timeout the worker is killed +
|
|
107
|
+
// recycled and this call returns a clean { timedOut, log } result. Default
|
|
108
|
+
// 60s; callers can override (a scoped `af @ addr` pass is near-instant).
|
|
109
|
+
timeoutMs: timeoutMs ?? 60000,
|
|
105
110
|
inputFiles: [{
|
|
106
111
|
vfsPath: "/work/rom.bin",
|
|
107
112
|
encoding: "base64",
|
|
@@ -109,6 +114,13 @@ export async function runRizin(opts) {
|
|
|
109
114
|
}],
|
|
110
115
|
outputFiles: [{ vfsPath: OUT, encoding: "utf8" }],
|
|
111
116
|
});
|
|
117
|
+
// Surface a timeout as a thrown error so JSON callers get a clear signal
|
|
118
|
+
// (runRizinJson already wraps crashes; this makes the timeout explicit).
|
|
119
|
+
if (res.timedOut) {
|
|
120
|
+
const e = new Error(res.log?.trim() || "rizin analysis timed out");
|
|
121
|
+
/** @type {any} */ (e).timedOut = true;
|
|
122
|
+
throw e;
|
|
123
|
+
}
|
|
112
124
|
return { ...res, output: res.outputs?.[OUT] ?? "" };
|
|
113
125
|
}
|
|
114
126
|
|