romdevtools 0.40.2 → 0.41.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +74 -0
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/analysis/analyze.js +319 -47
- package/src/analysis/rizin.js +13 -1
- package/src/cores/capabilities.js +218 -0
- package/src/mcp/tools/disasm.js +23 -4
- package/src/mcp/tools/platform-tools.js +17 -5
- package/src/mcp/tools/platforms.js +18 -3
- package/src/mcp/tools/rendering-context.js +5 -4
- package/src/mcp/tools/watch-memory.js +144 -2
- package/src/mcp/util.js +37 -0
- package/src/platforms/_guides/ROMHACKING_PLAYBOOK.md +23 -8
- package/src/toolchains/_worker/pool.js +41 -3
package/AGENTS.md
CHANGED
|
@@ -66,11 +66,11 @@ Skip playtest only when there's clearly no human in the loop: CI runs, automated
|
|
|
66
66
|
- `input` — drive controllers, look up hardware bit layouts. `navigate` walks menus by advancing on SCREEN CHANGE (not fixed frames) and reports whether each press was consumed — the fast, reliable way to script a UI.
|
|
67
67
|
- `state` — savestates and forensic state inspection (`state({op:'save'})`, `state({op:'load'})`, `state({op:'export'})` a slot to disk without touching the live host, `state({op:'list'})`, `state({op:'dump'})`)
|
|
68
68
|
- `memory` — read/write VRAM/OAM/CGRAM/ARAM and other regions (all 14 platforms). `memory({op:'read'})` takes `offsets:[…]` to batch scattered reads in one call. **`memory({op:'search'})`/`memory({op:'searchNext'})`** = the Cheat-Engine value-search loop ("find the address of X, narrow as X changes") — relative compares (`inc`/`dec`/`changed`) work as the FIRST narrow (baselines recorded at seed), and `as:'bcd'`/`as:'digits'` search packed-BCD scores and digit-per-byte HUD buffers (any constant tile base) when stored ≠ displayed. **`memory({op:'searchUnknown'})`** is the unknown-initial-value hunt — seed the whole region with no value, then narrow by `dec`/`inc`/`changed` across events (the value you can't read off the HUD). **`memory({op:'readCart'})`** reads the loaded cart image to confirm a patch is live (pass `{cpuAddress, bank}` to read a banked CPU address on NES/SNES). **`memory({op:'classify'})`** says whether bytes look like ASCII/code/tile-data (kills the "found table that's really a string" trap). `memory({op:'snapshot'})` + `memory({op:'diff'})` answer "which bytes changed across this event?" (diff defaults to a clustered summary with stride detection; small clusters carry before/after hex, `minDelta` filters churn, and predicate filters — `changeDir:'inc'|'dec'`, `deltaEq`, `beforeMin/Max`, `afterMin/Max` — keep only the bytes that moved the way you expect; `outputPath`+`echo:false` route the full list to disk); **`memory({op:'diffRuns', portsA, portsB?})`** answers "which byte does this INPUT drive?" in one call (same start state run twice under two inputs, only the divergent bytes return); `state({op:'diff'})` is the coarse whole-machine version. Reads routed to disk take `echo:false` to skip the inline hex.
|
|
69
|
-
- `debug` — **`frame({op:'verify'})`** (NO-VISION render-health: one call answers "is the game actually rendering / alive?" on all 14 — fuses a framebuffer pixel scan with the per-platform render-enable/NMI decode; `{verified:true|false|null, issues[], pixels, render}`, frame-0-guarded so it never cries wolf on boot), `sprites({op:'inspect'})`, `palette({source:'live'})`, `cpu({op:'read'})` (all 14), `audioDebug({op:'inspect'})` (the 12 systems with a sound chip — all but Atari 2600/7800; pass `frames:N` to TRACE a per-channel note-timeline for headless melody asserts), `background({view:'renderState'})`, `breakpoint({on:'write'})` (write watchpoint, all 14; EVERY hit on EVERY platform carries `registersAtHit` — the register file frozen at the hit instant, the only honest read since live regs drift after a hit — and the CPU stays frozen until the hit is cleared), **`watch({on:'dma', precision:'sampled'})`** (Genesis: which ROM offset a VRAM graphic was DMA'd from), **`watch({on:'copy'})`** (ALL 14: every write landing in a VRAM window logged with the EXECUTING instruction's PC — the generic 'which routine uploads this graphic?'; port-based video memory hooked in-core incl. the SNES DMA path, CPU-mapped VRAM via the range log), **`disasm({target:'bytes'|'rom'|'references'|'project'})`** (ALL 14 — native binutils objdump per CPU, incl. GBA ARM7/Thumb; the byte-exact `disasm({target:'project'})` reassembles through native as/ld/objcopy; banked carts — NES mappers, SNES LoROM, GB MBC, Sega mapper, MSX megaROM, 2600 F8/F6/F4, 7800 SuperGame, >32KB HuCards — are split and reference-scanned PER BANK, refs tagged `prgBank`/`romBank`), plus the **Rizin/Ghidra RE engine** `disasm({target:'cfg'|'xrefs'|'functions'|'decompile'})` (ALL 14 — control-flow graphs, deep xrefs, auto-detected functions, and Ghidra C pseudocode; quality excellent on GBA/Genesis, rough on 6502) + `symbols({op:'analyze'})` (one-shot structural map), `symbols({op})` lookup, `background({view:'rendered'})`, plus **`cheats({op})`** (`cheats({op:'lookup'})` = a free labeled RAM/code map for known ROMs, `cheats({op:'search'})` to fuzzy-find a game by name, `cheats({op:'apply'})`/`cheats({op:'clear'})` non-destructively, `cheats({op:'make'})` to create codes)
|
|
69
|
+
- `debug` — **`frame({op:'verify'})`** (NO-VISION render-health: one call answers "is the game actually rendering / alive?" on all 14 — fuses a framebuffer pixel scan with the per-platform render-enable/NMI decode; `{verified:true|false|null, issues[], pixels, render}`, frame-0-guarded so it never cries wolf on boot), `sprites({op:'inspect'})`, `palette({source:'live'})`, `cpu({op:'read'})` (all 14), `audioDebug({op:'inspect'})` (the 12 systems with a sound chip — all but Atari 2600/7800; pass `frames:N` to TRACE a per-channel note-timeline for headless melody asserts), `background({view:'renderState'})`, `breakpoint({on:'write'})` (write watchpoint, all 14; EVERY hit on EVERY platform carries `registersAtHit` — the register file frozen at the hit instant, the only honest read since live regs drift after a hit — and the CPU stays frozen until the hit is cleared), **`watch({on:'dma', precision:'sampled'})`** (Genesis: which ROM offset a VRAM graphic was DMA'd from), **`watch({on:'copy'})`** (ALL 14: every write landing in a VRAM window logged with the EXECUTING instruction's PC — the generic 'which routine uploads this graphic?'; port-based video memory hooked in-core incl. the SNES DMA path, CPU-mapped VRAM via the range log), **`disasm({target:'bytes'|'rom'|'references'|'project'})`** (ALL 14 — native binutils objdump per CPU, incl. GBA ARM7/Thumb; the byte-exact `disasm({target:'project'})` reassembles through native as/ld/objcopy; banked carts — NES mappers, SNES LoROM, GB MBC, Sega mapper, MSX megaROM, 2600 F8/F6/F4, 7800 SuperGame, >32KB HuCards — are split and reference-scanned PER BANK, refs tagged `prgBank`/`romBank`), plus the **Rizin/Ghidra RE engine** `disasm({target:'cfg'|'xrefs'|'functions'|'decompile'|'resolveJumptable'})` (ALL 14 — control-flow graphs, deep xrefs, auto-detected functions [sorted real-code-first, `looksLikeData` flagged], and Ghidra C pseudocode; quality excellent on GBA/Genesis, rough on 6502; decompile output NAMES hardware registers [`PPUMASK` not `*0x2001`] and on the 6502 family folds SLEIGH clutter to readable C99 [`uint8_t`, `zp_FD`]; `resolveJumptable` resolves computed dispatchers live via `breakpoint({on:'jumptable'})`) + `symbols({op:'analyze'})` (one-shot structural map), `symbols({op})` lookup, `background({view:'rendered'})`, plus **`cheats({op})`** (`cheats({op:'lookup'})` = a free labeled RAM/code map for known ROMs, `cheats({op:'search'})` to fuzzy-find a game by name, `cheats({op:'apply'})`/`cheats({op:'clear'})` non-destructively, `cheats({op:'make'})` to create codes)
|
|
70
70
|
- `assets` — convert PNGs to tiles (`encodeArt`/`importArt`), WAVs to BRR, identify ROMs (`cart({op:'identify'})`), plus the hacking toolkit (`romPatch({op})` — write/writeMany/spliceCHR/relocate/makeStored/findFree/findPointer/diff, `assembleSnippet`, `cart({op:'extract'})`, `cart({op:'wrap'})`)
|
|
71
71
|
- `project` — the example-game library (`examples`: list / fork / show, plus the legacy snippet ops)
|
|
72
72
|
- `show` — `playtest({op})`: `op:'open'` opens the live SDL window for a human, `op:'stop'` closes it, `op:'status'` reports liveness, `op:'framebuffer'` captures exactly what the human's window shows
|
|
73
|
-
- `advanced` — `runUntil`, **`watch({on:'mem'|'range'|'pc'})`** (LOG-ALL tracing; `range`/`pc` take **`fromState`**/`fromStatePath` to trace from a restored savestate moment), **`breakpoint({on:'write'})`** (the EXACT instruction that wrote a byte, via a core watchpoint — fixes the frame-sampled-PC problem; runs to END OF FRAME and reports the LAST matching write with `hits`=count; `condition:'increase'|'decrease'|'equals'`+`conditionValue` filters to the MEANINGFUL write — the score going UP, not the per-frame restore churn (core-level on all 14, `oldValueByte` reported); `precision:'sampled'` is the cheap frame-PC version; on a `pressDuring` run pass **`abortIf:[{region,offset,label}]`** to stop early if the driven scenario derails — a guard byte changing returns `{aborted, abortedBy, before, after}` instead of burning all `maxFrames` on a meaningless `found:false`), **`breakpoint({on:'pc'})`** (execution breakpoint — freeze the CPU AT an instruction and read its registers), **`breakpoint({on:'read'})`** (the EXACT instruction that read a byte), **`frame({op:'stepInstruction'})`** (CPU single-step) — all 14 platforms; input recording
|
|
73
|
+
- `advanced` — `runUntil`, **`watch({on:'mem'|'range'|'pc'})`** (LOG-ALL tracing; `range`/`pc` take **`fromState`**/`fromStatePath` to trace from a restored savestate moment), **`breakpoint({on:'write'})`** (the EXACT instruction that wrote a byte, via a core watchpoint — fixes the frame-sampled-PC problem; runs to END OF FRAME and reports the LAST matching write with `hits`=count; `condition:'increase'|'decrease'|'equals'`+`conditionValue` filters to the MEANINGFUL write — the score going UP, not the per-frame restore churn (core-level on all 14, `oldValueByte` reported); `precision:'sampled'` is the cheap frame-PC version; on a `pressDuring` run pass **`abortIf:[{region,offset,label}]`** to stop early if the driven scenario derails — a guard byte changing returns `{aborted, abortedBy, before, after}` instead of burning all `maxFrames` on a meaningless `found:false`), **`breakpoint({on:'pc'})`** (execution breakpoint — freeze the CPU AT an instruction and read its registers), **`breakpoint({on:'read'})`** (the EXACT instruction that read a byte), **`breakpoint({on:'jumptable'})`** (RESOLVE a computed-jump dispatcher static analysis can't follow — `JMP (table,X)` / RTS-trick state machines, script/battle VMs: break at the dispatcher, single-step through the indirect transfer, record the COMPUTED targets live across frames/inputs; the varying arms are isolated from fixed trampolines; `disasm({target:'resolveJumptable'})` points here. No static-only tool can do this — it needs the live emulator), **`frame({op:'stepInstruction'})`** (CPU single-step) — all 14 platforms; input recording
|
|
74
74
|
|
|
75
75
|
**"Disassemble this NES ROM"** is now just: `disasm({target:'rom', path, startAddress, length})`. No discovery step.
|
|
76
76
|
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,80 @@ All notable changes to `romdevtools`. Dates are release dates.
|
|
|
4
4
|
(Published as `romdev-mcp` through 0.11.0; renamed to `romdevtools` in 0.13.0 —
|
|
5
5
|
the `romdev-mcp` bin is kept as an alias.)
|
|
6
6
|
|
|
7
|
+
## 0.41.0 — 2026-06-12
|
|
8
|
+
|
|
9
|
+
### RE engine round — bank-aware decompile, live jumptable recovery, readable 6502 output
|
|
10
|
+
|
|
11
|
+
A correctness + readability pass across the whole reverse-engineering engine,
|
|
12
|
+
plus the differentiator no static tool has: **resolving computed jumps with the
|
|
13
|
+
live emulator.** All 14 platforms; no `romdev-analysis*` package changes (the
|
|
14
|
+
work is in the address-mapping JS, the decompile post-passes, and the live-debug
|
|
15
|
+
tools).
|
|
16
|
+
|
|
17
|
+
#### New — `breakpoint({on:'jumptable', address})`: live computed-jumptable recovery
|
|
18
|
+
|
|
19
|
+
Static analysis follows direct addressing only, so a game's *hottest* routines —
|
|
20
|
+
state machines, script / event VMs, battle engines that dispatch through
|
|
21
|
+
`JMP (table,X)` or an RTS-trick — decompile to `(*_IRQ)()` + "Could not recover
|
|
22
|
+
jumptable." romdev has a **live emulator**, so it resolves them dynamically: break
|
|
23
|
+
at the dispatcher, single-step through the indirect transfer, and record the PC
|
|
24
|
+
it actually lands on — accumulated across frames/inputs. Fixed trampolines (the
|
|
25
|
+
compiler's pointer-call shim, return paths) are filtered out by what *doesn't*
|
|
26
|
+
vary; the destinations that vary hit-to-hit are the real switch arms, ranked by
|
|
27
|
+
hit count. Drive more game states (`pressDuring` / `fromState`) to surface rarer
|
|
28
|
+
arms. **No standalone tool (IDA / Ghidra / Binary Ninja) can do this** — it needs
|
|
29
|
+
an emulator in the loop. `disasm({target:'resolveJumptable', address})` is the
|
|
30
|
+
static-side alias that redirects to it.
|
|
31
|
+
|
|
32
|
+
#### New — `disasm({target:'decompile'})` reads cleaner
|
|
33
|
+
|
|
34
|
+
- **Hardware registers are named.** MMIO refs Ghidra emits as raw addresses
|
|
35
|
+
(`*0x2001`, `uRAM400e`) become the register name (`PPUMASK`, `NOISE_LO`), with a
|
|
36
|
+
`/* hw registers: … */` legend — on the 9 platforms with a register map
|
|
37
|
+
(NES/SNES/Genesis/GB/GBC/SMS/GG/2600/7800/C64).
|
|
38
|
+
- **6502 SLEIGH clutter folds to readable C** (NES/2600/7800/C64/Lynx/PCE). Width
|
|
39
|
+
types become C99 stdint (`uint1`→`uint8_t`, `uint2`→`uint16_t`), redundant
|
|
40
|
+
nested casts collapse (`(uint16_t)(uint8_t)x`→`(uint8_t)x`), and zero-page byte
|
|
41
|
+
refs are named (`cRAM00fd`→`zp_FD`), with a `/* 6502 fold: … */` legend. A real
|
|
42
|
+
banked NES function went from `*(xunknown1 *)(uint2)(uint1)(param_2 - 0xb)` to
|
|
43
|
+
`*(uint8_t *)(zp_FE - 0xb)` — same semantics, far more readable. (The
|
|
44
|
+
carry-flag-16-bit / BCD reconstruction is left to an LLM reading the output;
|
|
45
|
+
rewriting it textually would risk changing semantics.)
|
|
46
|
+
|
|
47
|
+
#### Improved — bank-aware decompile + honest function ranking
|
|
48
|
+
|
|
49
|
+
- **Banked NES `decompile` resolves the bank.** Rizin reports flat-PRG VAs, so a
|
|
50
|
+
flat decode was bank-blind (cross-bank `JSR`/`JMP` landed on the wrong bank).
|
|
51
|
+
`decompile` now lays a real 32 KB CPU window (selected bank @ `$8000` + fixed
|
|
52
|
+
top bank @ `$C000`) so in-bank *and* fixed-bank calls resolve; NROM falls
|
|
53
|
+
through to the flat path. On a real banked game this moved a top-12 function
|
|
54
|
+
list from ~1 readable / 11 garbage to ~10 readable / 2.
|
|
55
|
+
- **`disasm({target:'functions'})` is ranked real-code-first** with a
|
|
56
|
+
`looksLikeData` flag (+ `dataCount`), so giant single-block data folds stop
|
|
57
|
+
crowding out the actual control-flow routines you want.
|
|
58
|
+
|
|
59
|
+
#### Fixed / hardened
|
|
60
|
+
|
|
61
|
+
- **SMD-interleaved Genesis dumps auto-deinterleave.** A `.bin` in the SMD copier
|
|
62
|
+
format (size = N·16 KB + 512, `0xAA 0xBB` magic) read flat decodes to pure
|
|
63
|
+
"bad instruction" garbage; analysis now detects + reverses the interleave and
|
|
64
|
+
warns, so a flat disasm isn't silently wrong.
|
|
65
|
+
- **C64 `.prg` load-address header is stripped** before analysis (the 2-byte load
|
|
66
|
+
address was being analyzed as code), with the base applied so addresses line up.
|
|
67
|
+
- **Worker-pool timeout + recycle.** A whole-ROM `aaa` on a multi-MB ROM that
|
|
68
|
+
never returns no longer wedges the shared WASM analysis pool — the call times
|
|
69
|
+
out, the worker is killed + respawned, and a clean `{ timedOut }` result comes
|
|
70
|
+
back (with a "use a scoped pass" hint) instead of every later `disasm` hanging
|
|
71
|
+
until a manual server restart.
|
|
72
|
+
|
|
73
|
+
#### New op discovery
|
|
74
|
+
|
|
75
|
+
- **`platform({op:'capabilities', platform?})`** — the per-platform op-support
|
|
76
|
+
matrix (CPU family, rendering kind, which introspection/debug ops each core
|
|
77
|
+
actually wires), so an agent can check support before calling instead of
|
|
78
|
+
catching a failure. Unsupported ops now throw a typed, structured error
|
|
79
|
+
(`{ unsupported, platform, op, reason, alternative }`) rather than a bare string.
|
|
80
|
+
|
|
7
81
|
## 0.40.2 — 2026-06-11
|
|
8
82
|
|
|
9
83
|
### Fixed — SNES `disasm({target:'decompile'})` treated the address as a raw file offset
|
package/README.md
CHANGED
|
@@ -11,7 +11,7 @@ npx romdevtools
|
|
|
11
11
|
- **Build** — bundled per-platform toolchains (cc65, SDCC, RGBDS, asar, vasm, SGDK, PVSnesLib, libtonc, …) as WASM. Write source, compile, get a real ROM.
|
|
12
12
|
- **Run + see + drive** — load the ROM into an emulated console (libretro cores as WASM), step frames, screenshot, script controller input.
|
|
13
13
|
- **Inspect + romhack** — read CPU/video/save RAM, watch memory, write-breakpoints, the Cheat-Engine value-search loop, a bundled cheat database, mapper-aware disassembly, and a byte-exact rebuildable-project disassembler.
|
|
14
|
-
- **Reverse-engineering analysis engine (all 14 platforms)** — control-flow graphs, deep cross-references, auto-detected functions, a one-shot structural map, and a Ghidra **decompiler** (C-like pseudocode): `disasm({target:'cfg'|'xrefs'|'functions'|'decompile'})` and `symbols({op:'analyze'})`. Understand *how* a routine works before you touch it — no $3,000 IDA license, no install.
|
|
14
|
+
- **Reverse-engineering analysis engine (all 14 platforms)** — control-flow graphs, deep cross-references, auto-detected functions (ranked real-code-first), a one-shot structural map, and a Ghidra **decompiler** (C-like pseudocode, with hardware registers named and 6502 SLEIGH clutter folded to readable C): `disasm({target:'cfg'|'xrefs'|'functions'|'decompile'})` and `symbols({op:'analyze'})`. And the piece no static tool has: **live computed-jumptable recovery** — `breakpoint({on:'jumptable'})` runs the emulator to resolve the `JMP (table,X)` / RTS-trick dispatchers (state machines, script/battle VMs) that static analysis collapses to "could not recover." Understand *how* a routine works before you touch it — no $3,000 IDA license, no install.
|
|
15
15
|
- **Convert assets** — PNG → platform tiles/tilemaps, quantize-to-palette, audio importers (BRR for SNES, XGM2 PCM for Genesis).
|
|
16
16
|
|
|
17
17
|
Point any coding agent at it three ways:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "romdevtools",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.41.0",
|
|
4
4
|
"description": "Tool server giving coding agents full control of homebrew ROM development AND reverse-engineering/romhacking across 14 retro platforms (NES, SNES, GB, Genesis, Atari, C64, PC Engine, MSX, ...) via WASM toolchains + emulator cores. Use over plain HTTP, as an Agent Skill, or as an MCP server.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/mcp/server.js",
|
package/src/analysis/analyze.js
CHANGED
|
@@ -14,6 +14,110 @@ import { readFile } from "node:fs/promises";
|
|
|
14
14
|
import path from "node:path";
|
|
15
15
|
import { runRizin, runRizinJson, RIZIN_ARCH } from "./rizin.js";
|
|
16
16
|
import { decompileFunction, SLEIGH_LANGID } from "./decompile.js";
|
|
17
|
+
import { registersForPlatform } from "../platforms/common/registers.js";
|
|
18
|
+
|
|
19
|
+
/** B2: name hardware-register MMIO in decompiler output. Ghidra emits raw memory
|
|
20
|
+
* refs like `xRAM2001` / `uRAM400e` for $2001 / $400E; replace those whose
|
|
21
|
+
* address is a known platform register with the register NAME (a valid C
|
|
22
|
+
* identifier) so the C reads `PPUMASK = ...` instead of `xRAM2001 = ...`. Plus a
|
|
23
|
+
* one-line legend comment listing the substitutions made. */
|
|
24
|
+
export function nameHardwareRegisters(code, platform) {
|
|
25
|
+
const regs = registersForPlatform(platform);
|
|
26
|
+
if (!regs || !Object.keys(regs).length) return code;
|
|
27
|
+
const used = new Map();
|
|
28
|
+
// Match Ghidra's mem-ref identifiers: a few lowercase type-prefix letters,
|
|
29
|
+
// then RAM, then the hex address. e.g. xRAM2001, uRAM400e, cRAM00ff.
|
|
30
|
+
const out = code.replace(/\b[a-z]{1,3}RAM([0-9a-fA-F]{2,6})\b/g, (m, hex) => {
|
|
31
|
+
const addr = parseInt(hex, 16);
|
|
32
|
+
const name = regs[addr];
|
|
33
|
+
if (!name) return m;
|
|
34
|
+
used.set(addr, name);
|
|
35
|
+
return name;
|
|
36
|
+
});
|
|
37
|
+
if (!used.size) return code;
|
|
38
|
+
const legend = "/* hw registers: " +
|
|
39
|
+
[...used.entries()].map(([a, n]) => `${n}=$${a.toString(16).toUpperCase()}`).join(", ") +
|
|
40
|
+
" */\n";
|
|
41
|
+
return legend + out;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** The 6502-family platforms whose SLEIGH (Ghidra) output carries the
|
|
45
|
+
* characteristic 8-bit clutter the B1 fold cleans up. */
|
|
46
|
+
const SIXTY_FIVE_OH_TWO = new Set(["nes", "atari2600", "atari7800", "c64", "lynx", "pce"]);
|
|
47
|
+
|
|
48
|
+
/** B1: 6502 idiom-folding post-pass (deterministic half). The 6502's 8-bit ALU
|
|
49
|
+
* lowers to literal noise in SLEIGH output — awkward width types (`uint1`,
|
|
50
|
+
* `xunknown1`), redundant nested width casts (`(uint2)(uint1)x`), and raw
|
|
51
|
+
* zero-page byte refs (`cRAM00fd`). This pass folds the SAFE, mechanical ones
|
|
52
|
+
* into readable C99 so the remaining logic is what an LLM (or human) reads:
|
|
53
|
+
* - SLEIGH width types → C99 stdint: uint1/int1/xunknown1 → uint8_t/int8_t,
|
|
54
|
+
* uint2 → uint16_t, uint4 → uint32_t (Ghidra's `uintN`/`undefinedN` are
|
|
55
|
+
* N-BYTE widths, not bit widths).
|
|
56
|
+
* - redundant nested width casts `(uint16_t)(uint8_t)expr` → `(uint8_t)expr`
|
|
57
|
+
* (the inner cast already narrows; the outer widen is noise).
|
|
58
|
+
* - zero-page byte refs `cRAM00fd` / `uRAM0012` → `zp_FD` / `zp_12` (a stable
|
|
59
|
+
* name for the ZP slot — the 6502's "fast RAM / pseudo-registers"). Only the
|
|
60
|
+
* $00xx page; MMIO was already named by nameHardwareRegisters (run first).
|
|
61
|
+
* It does NOT attempt the carry-flag 16-bit add/sub or BCD reconstruction the
|
|
62
|
+
* plan also lists — Ghidra usually already folds those into `+`/`uint2`, and a
|
|
63
|
+
* textual rewrite of what survives risks changing semantics. Those are left to
|
|
64
|
+
* the LLM cleanup half (the decompile output is read by an agent). Emits a
|
|
65
|
+
* leading "6502 fold:" legend comment noting what was applied. */
|
|
66
|
+
export function foldSixtyFiveOhTwoIdioms(code, platform) {
|
|
67
|
+
if (!SIXTY_FIVE_OH_TWO.has(platform)) return code;
|
|
68
|
+
const applied = [];
|
|
69
|
+
let out = code;
|
|
70
|
+
|
|
71
|
+
// 1) SLEIGH width types to C99 stdint. Each is a whole-word match. xunknown1
|
|
72
|
+
// and undefined1 are Ghidra's "1 byte, unknown signedness" - map to uint8_t.
|
|
73
|
+
const TYPES = [
|
|
74
|
+
[/\buint1\b/g, "uint8_t"], [/\bint1\b/g, "int8_t"],
|
|
75
|
+
[/\buint2\b/g, "uint16_t"], [/\bint2\b/g, "int16_t"],
|
|
76
|
+
[/\buint4\b/g, "uint32_t"], [/\bint4\b/g, "int32_t"],
|
|
77
|
+
[/\bxunknown1\b/g, "uint8_t"], [/\bundefined1\b/g, "uint8_t"],
|
|
78
|
+
[/\bxunknown2\b/g, "uint16_t"], [/\bundefined2\b/g, "uint16_t"],
|
|
79
|
+
[/\bxunknown4\b/g, "uint32_t"], [/\bundefined4\b/g, "uint32_t"],
|
|
80
|
+
];
|
|
81
|
+
let typeFolds = 0;
|
|
82
|
+
for (const [re, to] of TYPES) {
|
|
83
|
+
out = out.replace(re, () => { typeFolds++; return to; });
|
|
84
|
+
}
|
|
85
|
+
if (typeFolds) applied.push("SLEIGH width types → stdint");
|
|
86
|
+
|
|
87
|
+
// 2) Redundant nested width casts: `(uint16_t)(uint8_t)X` → `(uint8_t)X`. The
|
|
88
|
+
// inner narrowing cast governs; the outer widen back is pure noise SLEIGH emits
|
|
89
|
+
// around zero-page index math. Run a couple of passes to collapse triples.
|
|
90
|
+
let castFolds = 0;
|
|
91
|
+
for (let i = 0; i < 3; i++) {
|
|
92
|
+
const before = out;
|
|
93
|
+
out = out.replace(/\((uint(?:8|16|32)_t)\)\((uint8_t)\)/g, (_m, _wide, narrow) => {
|
|
94
|
+
castFolds++; return `(${narrow})`;
|
|
95
|
+
});
|
|
96
|
+
if (out === before) break;
|
|
97
|
+
}
|
|
98
|
+
if (castFolds) applied.push("redundant width casts collapsed");
|
|
99
|
+
|
|
100
|
+
// 3) Zero-page byte refs → zp_XX. Only the $00 page (cRAM00fd etc.); the
|
|
101
|
+
// 2-hex-after-00 form. A bare 4-hex like RAM0312 is not ZP — leave it.
|
|
102
|
+
const zp = new Set();
|
|
103
|
+
out = out.replace(/\b[a-z]{1,3}RAM00([0-9a-fA-F]{2})\b/g, (_m, hex) => {
|
|
104
|
+
const name = "zp_" + hex.toUpperCase();
|
|
105
|
+
zp.add(name);
|
|
106
|
+
return name;
|
|
107
|
+
});
|
|
108
|
+
if (zp.size) applied.push(`${zp.size} zero-page slot${zp.size > 1 ? "s" : ""} named zp_XX`);
|
|
109
|
+
|
|
110
|
+
if (!applied.length) return code;
|
|
111
|
+
return `/* 6502 fold: ${applied.join("; ")} */\n` + out;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Readability post-passes applied to every decompiler C body, in order:
|
|
115
|
+
* B2 hardware-register naming first (so MMIO becomes PPUMASK etc. before B1's
|
|
116
|
+
* zero-page labeler could touch it), then B1 6502 idiom folding. Both are no-ops
|
|
117
|
+
* off their target platforms, so this is safe to call unconditionally. */
|
|
118
|
+
export function prettyDecompile(code, platform) {
|
|
119
|
+
return foldSixtyFiveOhTwoIdioms(nameHardwareRegisters(code, platform), platform);
|
|
120
|
+
}
|
|
17
121
|
|
|
18
122
|
/** Sniff platform from a ROM extension (mirrors disasm.js). */
|
|
19
123
|
export function sniffPlatform(p) {
|
|
@@ -53,12 +157,66 @@ async function loadContext(romPath, platformOverride) {
|
|
|
53
157
|
if (arch == null) {
|
|
54
158
|
throw new Error(`analyze: no Rizin arch mapping for platform '${platform}'`);
|
|
55
159
|
}
|
|
56
|
-
|
|
160
|
+
let romBytes = new Uint8Array(await readFile(romPath));
|
|
57
161
|
// PCE: rizin's 6502 plugin drives the loader + standard control flow for
|
|
58
162
|
// function detection, but mis-decodes HuC6280 custom opcodes — CFG/xrefs are
|
|
59
163
|
// approximate. Accurate HuC6280 decode is the decompiler's job (SLEIGH spec).
|
|
60
164
|
const approx = platform === "pce";
|
|
61
|
-
|
|
165
|
+
|
|
166
|
+
// Address-space prep (A2): some formats carry a header and load at a CPU base
|
|
167
|
+
// that isn't 0. Strip the header and report `loadBase` so rizin's functions
|
|
168
|
+
// (and the decompiler image) speak CPU addresses, not raw file offsets.
|
|
169
|
+
// c64 .prg — 2-byte little-endian LOAD ADDRESS header, code at that address
|
|
170
|
+
// (typically $0801 = BASIC start). Without this, rizin analyzes the header
|
|
171
|
+
// bytes as code at offset 0 and every address is a file offset, not a CPU
|
|
172
|
+
// address — functions→decompile round-trip lands on garbage.
|
|
173
|
+
let loadBase = 0;
|
|
174
|
+
if (platform === "c64" && romBytes.length >= 2) {
|
|
175
|
+
loadBase = romBytes[0] | (romBytes[1] << 8);
|
|
176
|
+
romBytes = romBytes.subarray(2);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// A6: container/format sniff. Some dumps are interleaved/headered such that a
|
|
180
|
+
// FLAT read scrambles every byte → fake "bad instruction" noise everywhere.
|
|
181
|
+
// Detect + auto-correct, and warn so a flat disasm isn't silently wrong.
|
|
182
|
+
const warnings = [];
|
|
183
|
+
if (platform === "genesis") {
|
|
184
|
+
const smd = deinterleaveSmd(romBytes);
|
|
185
|
+
if (smd) {
|
|
186
|
+
romBytes = smd;
|
|
187
|
+
warnings.push("Genesis ROM was SMD-INTERLEAVED (512-byte header + byte-swapped 16KB blocks) — " +
|
|
188
|
+
"auto-deinterleaved before analysis. A flat read of the original would scramble every instruction.");
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
return { platform, romBytes, arch, bits: BITS[arch], approx, loadBase, warnings };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/** Detect + reverse Sega Mega Drive SMD interleaving. An .smd dump is a 512-byte
|
|
195
|
+
* header followed by 16KB blocks where each block's first 8KB holds the ODD
|
|
196
|
+
* bytes and the second 8KB the EVEN bytes (interleaved). Returns the
|
|
197
|
+
* deinterleaved ROM, or null if the image isn't SMD-interleaved. */
|
|
198
|
+
export function deinterleaveSmd(bytes) {
|
|
199
|
+
// SMD: (N * 16KB) + 512-byte header. The header's byte 8 = 0xAA, byte 9 = 0xBB
|
|
200
|
+
// is the classic SMD magic; also the body length must be a multiple of 16KB.
|
|
201
|
+
if (bytes.length < 512 + 0x4000) return null;
|
|
202
|
+
const bodyLen = bytes.length - 512;
|
|
203
|
+
if (bodyLen % 0x4000 !== 0) return null;
|
|
204
|
+
const isSmdMagic = bytes[8] === 0xaa && bytes[9] === 0xbb;
|
|
205
|
+
// A plain .bin that happens to be (N*16KB)+512 is unusual; require the magic to
|
|
206
|
+
// avoid false positives on legitimately-sized flat ROMs.
|
|
207
|
+
if (!isSmdMagic) return null;
|
|
208
|
+
|
|
209
|
+
const body = bytes.subarray(512);
|
|
210
|
+
const out = new Uint8Array(bodyLen);
|
|
211
|
+
const blocks = bodyLen / 0x4000;
|
|
212
|
+
for (let b = 0; b < blocks; b++) {
|
|
213
|
+
const base = b * 0x4000;
|
|
214
|
+
for (let i = 0; i < 0x2000; i++) {
|
|
215
|
+
out[base + i * 2 + 1] = body[base + i]; // odd bytes (first 8KB)
|
|
216
|
+
out[base + i * 2] = body[base + 0x2000 + i]; // even bytes (second 8KB)
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
return out;
|
|
62
220
|
}
|
|
63
221
|
|
|
64
222
|
/** Hex-format an address the way agents expect for the platform width. */
|
|
@@ -69,8 +227,8 @@ function hx(n) { return "0x" + (n >>> 0).toString(16); }
|
|
|
69
227
|
* @returns {{platform, count, functions: Array<{address, name, size, nbbs, cc, callers, callees}>}}
|
|
70
228
|
*/
|
|
71
229
|
export async function analyzeFunctions(romPath, platformOverride) {
|
|
72
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
73
|
-
const fns = await runRizinJson({ romBytes, arch, bits, commands: "aaa; aflj" });
|
|
230
|
+
const { platform, romBytes, arch, bits, loadBase, warnings } = await loadContext(romPath, platformOverride);
|
|
231
|
+
const fns = await runRizinJson({ romBytes, arch, bits, baddr: loadBase || undefined, commands: "aaa; aflj" });
|
|
74
232
|
const functions = fns.map((f) => ({
|
|
75
233
|
address: f.offset,
|
|
76
234
|
addressHex: hx(f.offset),
|
|
@@ -80,8 +238,29 @@ export async function analyzeFunctions(romPath, platformOverride) {
|
|
|
80
238
|
cc: f.cc, // cyclomatic complexity
|
|
81
239
|
callers: f.indegree ?? (f.codexrefs?.length ?? 0),
|
|
82
240
|
callees: f.outdegree ?? 0,
|
|
241
|
+
// A3: rizin's flat sweep folds DATA regions into pseudo-functions with absurd
|
|
242
|
+
// `size` (megabyte "functions", phantoms exceeding the ROM). The honest
|
|
243
|
+
// signal is the BYTES-PER-BLOCK ratio, not raw size: real code averages tens
|
|
244
|
+
// of bytes per basic block; a "function" of thousands of bytes per block (or
|
|
245
|
+
// a single huge block with no control flow) is a data table / graphics blob
|
|
246
|
+
// mis-detected as a function. Flag it so agents don't waste a decompile on
|
|
247
|
+
// it. (A 16KB function with 35 blocks + cc 19 is a real big dispatcher — NOT
|
|
248
|
+
// flagged; size alone is the lie, the ratio isn't.)
|
|
249
|
+
looksLikeData:
|
|
250
|
+
(f.size ?? 0) > 0x400 &&
|
|
251
|
+
((f.nbbs ?? 0) <= 1 || (f.size ?? 0) / Math.max(1, f.nbbs ?? 1) > 1024),
|
|
83
252
|
}));
|
|
84
|
-
|
|
253
|
+
// Real code first: highest nbbs/cc, then smaller size — so the actual routines
|
|
254
|
+
// surface above the data-fold noise without the agent having to learn the rule.
|
|
255
|
+
functions.sort((a, b) =>
|
|
256
|
+
(a.looksLikeData ? 1 : 0) - (b.looksLikeData ? 1 : 0) ||
|
|
257
|
+
(b.nbbs ?? 0) - (a.nbbs ?? 0) ||
|
|
258
|
+
(b.cc ?? 0) - (a.cc ?? 0) ||
|
|
259
|
+
(a.size ?? 0) - (b.size ?? 0)
|
|
260
|
+
);
|
|
261
|
+
const dataCount = functions.filter((f) => f.looksLikeData).length;
|
|
262
|
+
return { platform, arch, count: functions.length, dataCount, functions,
|
|
263
|
+
...(warnings?.length ? { warnings } : {}) };
|
|
85
264
|
}
|
|
86
265
|
|
|
87
266
|
/**
|
|
@@ -90,13 +269,13 @@ export async function analyzeFunctions(romPath, platformOverride) {
|
|
|
90
269
|
*/
|
|
91
270
|
export async function analyzeCfg(romPath, address, platformOverride) {
|
|
92
271
|
if (address == null) throw new Error("analyze cfg: address required");
|
|
93
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
272
|
+
const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
|
|
94
273
|
// afbj = basic blocks of the function as JSON: each block has addr/size/jump/
|
|
95
274
|
// fail/ninstr. `jump` is the taken edge; `fail` (present only on conditional
|
|
96
275
|
// blocks) is the fall-through. This is the structured CFG source — `agf json`
|
|
97
276
|
// only gives a text body blob with untyped out_nodes.
|
|
98
277
|
const blocks = await runRizinJson({
|
|
99
|
-
romBytes, arch, bits,
|
|
278
|
+
romBytes, arch, bits, baddr: loadBase || undefined,
|
|
100
279
|
commands: `aaa; af @ ${hx(address)}; afbj @ ${hx(address)}`,
|
|
101
280
|
});
|
|
102
281
|
if (!Array.isArray(blocks) || blocks.length === 0) {
|
|
@@ -129,10 +308,10 @@ export async function analyzeCfg(romPath, address, platformOverride) {
|
|
|
129
308
|
*/
|
|
130
309
|
export async function analyzeXrefs(romPath, address, platformOverride) {
|
|
131
310
|
if (address == null) throw new Error("analyze xrefs: address required");
|
|
132
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
311
|
+
const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
|
|
133
312
|
let refs;
|
|
134
313
|
try {
|
|
135
|
-
refs = await runRizinJson({ romBytes, arch, bits, commands: `aaa; axtj @ ${hx(address)}` });
|
|
314
|
+
refs = await runRizinJson({ romBytes, arch, bits, baddr: loadBase || undefined, commands: `aaa; axtj @ ${hx(address)}` });
|
|
136
315
|
} catch (e) {
|
|
137
316
|
// axtj prints nothing (not even `[]`) when there are zero refs → our JSON
|
|
138
317
|
// guard throws. Treat "no JSON" as "no refs".
|
|
@@ -154,11 +333,12 @@ export async function analyzeXrefs(romPath, address, platformOverride) {
|
|
|
154
333
|
* analysis pass. The "give me the shape of this ROM" call.
|
|
155
334
|
*/
|
|
156
335
|
export async function analyzeStructure(romPath, platformOverride) {
|
|
157
|
-
const { platform, romBytes, arch, bits } = await loadContext(romPath, platformOverride);
|
|
336
|
+
const { platform, romBytes, arch, bits, loadBase } = await loadContext(romPath, platformOverride);
|
|
337
|
+
const baddr = loadBase || undefined;
|
|
158
338
|
const [fns, strings, entries] = await Promise.all([
|
|
159
|
-
runRizinJson({ romBytes, arch, bits, commands: "aaa; aflj" }).catch(() => []),
|
|
160
|
-
runRizinJson({ romBytes, arch, bits, commands: "aaa; izj" }).catch(() => []),
|
|
161
|
-
runRizinJson({ romBytes, arch, bits, commands: "aaa; iej" }).catch(() => []),
|
|
339
|
+
runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; aflj" }).catch(() => []),
|
|
340
|
+
runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; izj" }).catch(() => []),
|
|
341
|
+
runRizinJson({ romBytes, arch, bits, baddr, commands: "aaa; iej" }).catch(() => []),
|
|
162
342
|
]);
|
|
163
343
|
return {
|
|
164
344
|
platform, arch,
|
|
@@ -267,16 +447,65 @@ export function buildSnesCpuImage(romBytes, mapperHint) {
|
|
|
267
447
|
return { image, isLo: false };
|
|
268
448
|
}
|
|
269
449
|
|
|
450
|
+
/** Bank-aware NES image for the decompiler (A1).
|
|
451
|
+
*
|
|
452
|
+
* Rizin maps an iNES PRG as ONE flat $8000-based segment, so a `functions`
|
|
453
|
+
* address is a FLAT-PRG VA ($8000 + flat offset) — bank 0 at $8000-$BFFF, bank 1
|
|
454
|
+
* at $C000-$FFFF, bank 2 at $10000+, etc. Decompiling that flat image is
|
|
455
|
+
* bank-blind: an in-code `JSR $9123` (a real CPU address) resolves to flat
|
|
456
|
+
* $9123 = bank 0, even when the calling code lives in bank 3 → halt_baddata /
|
|
457
|
+
* garbage (11/12 top functions on a banked cart, empirically).
|
|
458
|
+
*
|
|
459
|
+
* Fix: from the flat VA, recover which 16KB PRG bank the function is in, then
|
|
460
|
+
* build a real 32KB 6502 CPU image — that bank at $8000-$BFFF, the FIXED top
|
|
461
|
+
* bank at $C000-$FFFF — and decompile at the function's REAL CPU address. Now
|
|
462
|
+
* in-bank calls AND fixed-bank ($C000+) calls both resolve.
|
|
463
|
+
*
|
|
464
|
+
* @returns {{ image: Uint8Array, cpuAddr: number, bank: number } | null} null if
|
|
465
|
+
* not a banked iNES (caller falls back to the flat path; NROM is fine flat).
|
|
466
|
+
*/
|
|
467
|
+
export function buildNesBankImage(romBytes, flatVa) {
|
|
468
|
+
if (romBytes[0] !== 0x4e || romBytes[1] !== 0x45 || romBytes[2] !== 0x53 || romBytes[3] !== 0x1a) {
|
|
469
|
+
return null; // not iNES
|
|
470
|
+
}
|
|
471
|
+
const prgBanks16k = romBytes[4];
|
|
472
|
+
const prgSize = prgBanks16k * 0x4000;
|
|
473
|
+
if (prgSize <= 0x8000) return null; // NROM-128/256 — flat is correct
|
|
474
|
+
const prgStart = 16;
|
|
475
|
+
const prg = romBytes.subarray(prgStart, prgStart + prgSize);
|
|
476
|
+
|
|
477
|
+
// rizin flat VA → flat PRG offset (segment based at $8000).
|
|
478
|
+
const flatOff = (flatVa >>> 0) - 0x8000;
|
|
479
|
+
if (flatOff < 0 || flatOff >= prgSize) return null;
|
|
480
|
+
const bank = Math.floor(flatOff / 0x4000); // which 16KB bank
|
|
481
|
+
const inBank = flatOff % 0x4000; // offset within it
|
|
482
|
+
const topBank = prgBanks16k - 1; // fixed top bank
|
|
483
|
+
|
|
484
|
+
// 32KB CPU window: chosen bank at $8000, fixed top bank at $C000.
|
|
485
|
+
const image = new Uint8Array(0x10000);
|
|
486
|
+
image.set(prg.subarray(bank * 0x4000, bank * 0x4000 + 0x4000), 0x8000);
|
|
487
|
+
image.set(prg.subarray(topBank * 0x4000, topBank * 0x4000 + 0x4000), 0xC000);
|
|
488
|
+
|
|
489
|
+
// The function's real CPU address: if it's the fixed top bank, it's at
|
|
490
|
+
// $C000+inBank; otherwise it's the switchable slot at $8000+inBank.
|
|
491
|
+
const cpuAddr = bank === topBank ? 0xC000 + inBank : 0x8000 + inBank;
|
|
492
|
+
return { image, cpuAddr, bank };
|
|
493
|
+
}
|
|
494
|
+
|
|
270
495
|
/**
|
|
271
496
|
* Decompile the function containing `address` to C pseudocode (Ghidra).
|
|
272
|
-
* @returns {{platform, langid, address, code, warnings, qualityNote}}
|
|
497
|
+
* @returns {{platform, langid, address, code, warnings, qualityNote, bank?}}
|
|
273
498
|
*/
|
|
274
499
|
export async function analyzeDecompile(romPath, address, platformOverride) {
|
|
275
500
|
if (address == null) throw new Error("analyze decompile: address required");
|
|
276
501
|
const platform = platformOverride ?? sniffPlatform(romPath);
|
|
277
502
|
if (!platform) throw new Error(`analyze decompile: unknown platform for '${path.basename(romPath)}'`);
|
|
278
503
|
if (!SLEIGH_LANGID[platform]) throw new Error(`analyze decompile: unsupported platform '${platform}'`);
|
|
279
|
-
|
|
504
|
+
let romBytes = new Uint8Array(await readFile(romPath));
|
|
505
|
+
// A6: deinterleave SMD Genesis dumps here too (analyzeDecompile reads the file
|
|
506
|
+
// directly, not via loadContext) — a flat read of an interleaved ROM decodes
|
|
507
|
+
// to pure garbage.
|
|
508
|
+
if (platform === "genesis") romBytes = deinterleaveSmd(romBytes) ?? romBytes;
|
|
280
509
|
|
|
281
510
|
// SNES: banked 24-bit space. `address` is a LoROM/HiROM CPU address (what
|
|
282
511
|
// target='functions'/'cfg' report). Lay the cart out by CPU address so BOTH
|
|
@@ -297,35 +526,50 @@ export async function analyzeDecompile(romPath, address, platformOverride) {
|
|
|
297
526
|
return {
|
|
298
527
|
platform, langid: rs.langid,
|
|
299
528
|
address, addressHex: hx(address),
|
|
300
|
-
code: rs.code, warnings: rs.warnings,
|
|
529
|
+
code: prettyDecompile(rs.code, platform), warnings: rs.warnings,
|
|
301
530
|
qualityNote: "medium (65816 variable register width)",
|
|
302
531
|
};
|
|
303
532
|
}
|
|
304
533
|
|
|
534
|
+
// NES banked carts (A1): rizin reports flat-PRG VAs ($8000-based); decompiling
|
|
535
|
+
// that flat image is bank-blind (cross-bank JSR/JMP land on the wrong bank).
|
|
536
|
+
// Build a real 32KB CPU window (this bank @ $8000 + fixed top bank @ $C000) so
|
|
537
|
+
// in-bank AND fixed-bank calls resolve. NROM falls through to the flat path.
|
|
538
|
+
if (platform === "nes") {
|
|
539
|
+
const banked = buildNesBankImage(romBytes, address);
|
|
540
|
+
if (banked) {
|
|
541
|
+
const rn = await decompileFunction({ platform, romBytes: banked.image, fileOffset: banked.cpuAddr });
|
|
542
|
+
return {
|
|
543
|
+
platform, langid: rn.langid,
|
|
544
|
+
address, addressHex: hx(address),
|
|
545
|
+
bank: banked.bank,
|
|
546
|
+
code: prettyDecompile(rn.code, platform), warnings: rn.warnings,
|
|
547
|
+
qualityNote: "rough (6502 architecture limit)",
|
|
548
|
+
};
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
305
552
|
// Use rizin's loader mapping to turn the VA (what the user sees from
|
|
306
553
|
// target='functions') into the file offset the raw decompiler image needs.
|
|
307
554
|
// PCE uses the 6502 plugin only for the map/loader (HuC6280 decode is the
|
|
308
555
|
// decompiler's job via SLEIGH) — its flat image bases at 0 either way.
|
|
309
556
|
const arch = RIZIN_ARCH[platform] ?? "6502";
|
|
310
557
|
const bits = { arm: 32, m68k: 32, snes: 16 }[arch];
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
//
|
|
321
|
-
//
|
|
322
|
-
//
|
|
323
|
-
//
|
|
324
|
-
//
|
|
325
|
-
//
|
|
326
|
-
// CPU base so absolute references ($8000/$C000/$F000) resolve. 7800's base is
|
|
327
|
-
// size-dependent (16KB→$C000, 32KB→$8000); 7800 carts may carry a 128-byte
|
|
328
|
-
// header before the body.
|
|
558
|
+
|
|
559
|
+
const QUALITY = {
|
|
560
|
+
gba: "excellent (ARM)", genesis: "excellent (M68K)",
|
|
561
|
+
gb: "good (SM83)", gbc: "good (SM83)", sms: "good (Z80)", gg: "good (Z80)", msx: "good (Z80)",
|
|
562
|
+
snes: "medium (65816 variable register width)", pce: "medium (HuC6280)",
|
|
563
|
+
nes: "rough (6502 architecture limit)", atari2600: "rough (6502)", atari7800: "rough (6502)",
|
|
564
|
+
c64: "rough (6502)", lynx: "rough (65C02)",
|
|
565
|
+
};
|
|
566
|
+
|
|
567
|
+
// FORCED-BASE platforms (headerless/load-header 6502 carts): rizin/our
|
|
568
|
+
// analysis bases these at a known CPU address, so `address` IS a CPU address
|
|
569
|
+
// and `functions` already reported it as such. Strip any header, left-pad the
|
|
570
|
+
// body so file offset == CPU address, and decompile at `address` directly.
|
|
571
|
+
// 2600 → $F000; 7800 → size-dependent $8000-$C000 (+128B header if "AT…");
|
|
572
|
+
// c64 .prg → the 2-byte load-address header's value (e.g. $0801).
|
|
329
573
|
let forcedBase = 0, bodyStart = 0;
|
|
330
574
|
if (platform === "atari2600") {
|
|
331
575
|
forcedBase = 0xf000;
|
|
@@ -335,28 +579,56 @@ export async function analyzeDecompile(romPath, address, platformOverride) {
|
|
|
335
579
|
bodyStart = hasHdr ? 128 : 0;
|
|
336
580
|
const body = romBytes.length - bodyStart;
|
|
337
581
|
forcedBase = body <= 0x4000 ? 0xc000 : body <= 0x8000 ? 0x8000 : 0x4000;
|
|
582
|
+
} else if (platform === "c64" && romBytes.length >= 2) {
|
|
583
|
+
bodyStart = 2;
|
|
584
|
+
forcedBase = romBytes[0] | (romBytes[1] << 8);
|
|
585
|
+
}
|
|
586
|
+
if (forcedBase > 0 && forcedBase <= 0x10000) {
|
|
587
|
+
const body = romBytes.subarray(bodyStart);
|
|
588
|
+
// Accept `address` as EITHER a CPU address (≥ forcedBase, what functions
|
|
589
|
+
// reports once baddr is applied) OR a raw body file-offset (< forcedBase,
|
|
590
|
+
// legacy callers / direct offsets). Normalize to a CPU address.
|
|
591
|
+
const a = address >>> 0;
|
|
592
|
+
const cpuAddr = a >= forcedBase ? a : forcedBase + a;
|
|
593
|
+
if (cpuAddr < forcedBase || cpuAddr >= forcedBase + body.length) {
|
|
594
|
+
throw new Error(
|
|
595
|
+
`decompile: address ${hx(a)} is outside the ${platform} CPU image ` +
|
|
596
|
+
`($${forcedBase.toString(16)}-$${(forcedBase + body.length).toString(16)}).`
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
const padded = new Uint8Array(forcedBase + body.length);
|
|
600
|
+
padded.set(body, forcedBase);
|
|
601
|
+
const rf = await decompileFunction({ platform, romBytes: padded, fileOffset: cpuAddr });
|
|
602
|
+
return {
|
|
603
|
+
platform, langid: rf.langid, address, addressHex: hx(address),
|
|
604
|
+
code: prettyDecompile(rf.code, platform), warnings: rf.warnings, qualityNote: QUALITY[platform] ?? "unknown",
|
|
605
|
+
};
|
|
338
606
|
}
|
|
339
|
-
|
|
607
|
+
|
|
608
|
+
// Other platforms: use rizin's loader mapping to turn the CPU VA into the file
|
|
609
|
+
// offset the raw decompiler image needs. Rizin's map gives `vbase` when it
|
|
610
|
+
// knows the base; left-pad by it so file offset == CPU address for the cases
|
|
611
|
+
// where the code references absolute addresses.
|
|
612
|
+
const { paddr, vbase } = await vaMapping(romBytes, arch, bits, address, platform);
|
|
613
|
+
if (paddr < 0 || paddr >= romBytes.length) {
|
|
614
|
+
throw new Error(
|
|
615
|
+
`decompile: address ${hx(address)} maps to file offset ${paddr}, outside the ` +
|
|
616
|
+
`${romBytes.length}-byte image for ${platform}.`
|
|
617
|
+
);
|
|
618
|
+
}
|
|
619
|
+
const base = vbase;
|
|
340
620
|
let image = romBytes, decompAddr = paddr;
|
|
341
621
|
if (base > 0 && base <= 0x10000) {
|
|
342
|
-
const
|
|
343
|
-
|
|
344
|
-
padded.set(body, base);
|
|
622
|
+
const padded = new Uint8Array(base + romBytes.length);
|
|
623
|
+
padded.set(romBytes, base);
|
|
345
624
|
image = padded;
|
|
346
|
-
decompAddr = base +
|
|
625
|
+
decompAddr = base + paddr; // CPU address of the function
|
|
347
626
|
}
|
|
348
627
|
const r = await decompileFunction({ platform, romBytes: image, fileOffset: decompAddr });
|
|
349
|
-
const QUALITY = {
|
|
350
|
-
gba: "excellent (ARM)", genesis: "excellent (M68K)",
|
|
351
|
-
gb: "good (SM83)", gbc: "good (SM83)", sms: "good (Z80)", gg: "good (Z80)", msx: "good (Z80)",
|
|
352
|
-
snes: "medium (65816 variable register width)", pce: "medium (HuC6280)",
|
|
353
|
-
nes: "rough (6502 architecture limit)", atari2600: "rough (6502)", atari7800: "rough (6502)",
|
|
354
|
-
c64: "rough (6502)", lynx: "rough (65C02)",
|
|
355
|
-
};
|
|
356
628
|
return {
|
|
357
629
|
platform, langid: r.langid,
|
|
358
630
|
address, addressHex: hx(address),
|
|
359
|
-
code: r.code,
|
|
631
|
+
code: prettyDecompile(r.code, platform),
|
|
360
632
|
warnings: r.warnings,
|
|
361
633
|
qualityNote: QUALITY[platform] ?? "unknown",
|
|
362
634
|
};
|
package/src/analysis/rizin.js
CHANGED
|
@@ -77,7 +77,7 @@ export const RIZIN_ARCH = {
|
|
|
77
77
|
* @returns {Promise<{exitCode:number, output:string, log:string, crash?:object}>}
|
|
78
78
|
*/
|
|
79
79
|
export async function runRizin(opts) {
|
|
80
|
-
const { romPath, romBytes, commands, arch, bits, baddr, writeable } = opts;
|
|
80
|
+
const { romPath, romBytes, commands, arch, bits, baddr, writeable, timeoutMs } = opts;
|
|
81
81
|
if (!commands) throw new Error("runRizin: commands required");
|
|
82
82
|
const bytes = romBytes ?? new Uint8Array(await readFile(romPath));
|
|
83
83
|
|
|
@@ -102,6 +102,11 @@ export async function runRizin(opts) {
|
|
|
102
102
|
const res = await runIsolated({
|
|
103
103
|
gluePath: rizinGluePath(),
|
|
104
104
|
argv,
|
|
105
|
+
// A5: per-call timeout so a hung analysis (whole-ROM `aaa` on a multi-MB ROM)
|
|
106
|
+
// can't wedge the shared worker pool — on timeout the worker is killed +
|
|
107
|
+
// recycled and this call returns a clean { timedOut, log } result. Default
|
|
108
|
+
// 60s; callers can override (a scoped `af @ addr` pass is near-instant).
|
|
109
|
+
timeoutMs: timeoutMs ?? 60000,
|
|
105
110
|
inputFiles: [{
|
|
106
111
|
vfsPath: "/work/rom.bin",
|
|
107
112
|
encoding: "base64",
|
|
@@ -109,6 +114,13 @@ export async function runRizin(opts) {
|
|
|
109
114
|
}],
|
|
110
115
|
outputFiles: [{ vfsPath: OUT, encoding: "utf8" }],
|
|
111
116
|
});
|
|
117
|
+
// Surface a timeout as a thrown error so JSON callers get a clear signal
|
|
118
|
+
// (runRizinJson already wraps crashes; this makes the timeout explicit).
|
|
119
|
+
if (res.timedOut) {
|
|
120
|
+
const e = new Error(res.log?.trim() || "rizin analysis timed out");
|
|
121
|
+
/** @type {any} */ (e).timedOut = true;
|
|
122
|
+
throw e;
|
|
123
|
+
}
|
|
112
124
|
return { ...res, output: res.outputs?.[OUT] ?? "" };
|
|
113
125
|
}
|
|
114
126
|
|