romdevtools 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@
4
4
  // The MCP path registers 34 tools via registerTools(server, z, sessionKey),
5
5
  // where `server` is an McpServer and each handler closes over `sessionKey` for
6
6
  // per-session host isolation. The HTTP surfaces (POST /tool/{name},
7
- // /romdev-skill.md, /openapi.json, /documentation) want the EXACT same handlers,
7
+ // /skills/romdev/SKILL.md, /openapi.json, /documentation) want the EXACT same handlers,
8
8
  // schemas, and clean-error behavior — just reached over plain HTTP.
9
9
  //
10
10
  // Rather than duplicate anything, we run the same registration against a minimal
@@ -17,6 +17,7 @@
17
17
  import { z } from "zod";
18
18
  import { registerTools } from "../mcp/tools/index.js";
19
19
  import { withClearToolErrors } from "../mcp/util.js";
20
+ import { observer, summarizeForLog, extractImages } from "../observer/bus.js";
20
21
 
21
22
  /**
22
23
  * Build a tool registry for a given session key. Each entry's handler closes
@@ -86,8 +87,27 @@ export function buildToolRegistry(sessionKey) {
86
87
  * @param {object} args the request body
87
88
  * @returns {Promise<{ok:true, result:any}|{ok:false, error:string}>}
88
89
  */
89
- export async function runTool(tool, args) {
90
+ export async function runTool(tool, args, sessionKey) {
90
91
  const a = args ?? {};
92
+ const startedAt = Date.now();
93
+ // Emit the SAME `call` event the MCP path's observer middleware emits, so the
94
+ // /livestream view updates for HTTP/skill tool calls too (the MCP path wraps
95
+ // server.tool with installObserverMiddleware; the HTTP path runs handlers
96
+ // directly, so we emit here — the single HTTP execution chokepoint).
97
+ const emit = (extra) => {
98
+ try {
99
+ observer.push({
100
+ type: "call",
101
+ sessionKey: sessionKey ?? "http",
102
+ ts: startedAt,
103
+ tool: tool.name,
104
+ args: summarizeForLog(a),
105
+ durationMs: Date.now() - startedAt,
106
+ ...extra,
107
+ });
108
+ } catch { /* never let the observer kill a tool call */ }
109
+ };
110
+
91
111
  // Parse against the strict schema if we have a built zod object.
92
112
  const schema = tool.inputSchema;
93
113
  if (schema && typeof schema === "object" && "_def" in schema && typeof schema.safeParse === "function") {
@@ -96,6 +116,7 @@ export async function runTool(tool, args) {
96
116
  // surface the friendly first-issue message (withClearToolErrors / global map)
97
117
  const issue = parsed.error?.issues?.[0];
98
118
  const msg = (issue && issue.message) || "invalid arguments";
119
+ emit({ ok: false, error: msg });
99
120
  return { ok: false, error: msg };
100
121
  }
101
122
  }
@@ -104,22 +125,68 @@ export async function runTool(tool, args) {
104
125
  // Unwrap the MCP content envelope to plain JSON for HTTP clients.
105
126
  if (r && r.isError) {
106
127
  const text = r.content?.[0]?.text ?? "tool error";
128
+ emit({ ok: false, error: text });
107
129
  return { ok: false, error: text };
108
130
  }
131
+ const images = extractImages(r);
109
132
  const text = r?.content?.[0]?.text;
110
133
  if (typeof text === "string") {
111
134
  // most tools return jsonContent(...) → text is JSON; parse it back so the
112
135
  // HTTP response is real JSON, not a JSON-string-in-a-field.
113
- try { return { ok: true, result: JSON.parse(text) }; }
114
- catch { return { ok: true, result: { text } }; }
136
+ let parsed;
137
+ try { parsed = JSON.parse(text); } catch { parsed = { text }; }
138
+ // TRANSPORT-UNIFORM FAILURE MAPPING: a tool can signal failure either by
139
+ // throwing (→ isError above) OR by RETURNING a failure-shaped result
140
+ // ({ok:false} / {error} / {opened:false} / {applied:false} ...). On REST,
141
+ // a 200 with a failure in the body is invisible — the caller sees success
142
+ // and never reads the body. So we detect a failure-shaped result here and
143
+ // map it to ok:false (→ HTTP 400) for EVERY tool, no per-tool special-
144
+ // casing. (`notSupported`/`matched:false` are NOT failures — see below.)
145
+ if (looksLikeFailure(parsed)) {
146
+ const err = parsed.error ?? parsed.message ?? "tool reported failure";
147
+ emit({ ok: false, error: err });
148
+ return { ok: false, error: err, result: parsed };
149
+ }
150
+ emit({ ok: true, result: summarizeForLog(parsed), ...(images.length ? { images } : {}) });
151
+ return { ok: true, result: parsed };
115
152
  }
116
153
  // image / multi-part content: hand back the raw content array.
117
- return { ok: true, result: r?.content ? { content: r.content } : (r ?? {}) };
154
+ const result = r?.content ? { content: r.content } : (r ?? {});
155
+ emit({ ok: true, result: summarizeForLog(result), ...(images.length ? { images } : {}) });
156
+ return { ok: true, result };
118
157
  } catch (e) {
158
+ emit({ ok: false, error: e?.message ?? String(e) });
119
159
  return { ok: false, error: e?.message ?? String(e) };
120
160
  }
121
161
  }
122
162
 
163
+ // A RETURNED result is a FAILURE (→ non-2xx) when it carries an explicit failure
164
+ // signal: a `false` on a verb-status flag, or a top-level `error` string. This is
165
+ // the single rule that makes every tool behave the same on the transport — a tool
166
+ // can fail by throwing or by returning one of these, and either way the caller
167
+ // gets a non-2xx it can't ignore.
168
+ //
169
+ // NOT failures (these are valid ANSWERS / STATE, stay 2xx):
170
+ // • notSupported:true — the feature genuinely isn't on this platform/core
171
+ // • matched:false / found:false / hit:false — a lookup whose answer is "no"
172
+ // • looksLikeGraphic:false — a classification result
173
+ // • loaded:false / paused:false — STATE fields (is a ROM loaded? is it paused?),
174
+ // not "the action failed". This is why the flag list is DELIBERATELY narrow:
175
+ // only generic verdict flags + a couple of unambiguous action verbs. Anything
176
+ // else that wants to signal failure must do it with a top-level `error` string
177
+ // (or throw) — both of which are unambiguous.
178
+ const FAILURE_FLAGS = ["ok", "success", "opened", "applied"];
179
+ function looksLikeFailure(parsed) {
180
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return false;
181
+ // A top-level error string is unambiguous.
182
+ if (typeof parsed.error === "string" && parsed.error) return true;
183
+ // A generic verdict / unambiguous-action flag explicitly set to false.
184
+ for (const f of FAILURE_FLAGS) {
185
+ if (parsed[f] === false) return true;
186
+ }
187
+ return false;
188
+ }
189
+
123
190
  /**
124
191
  * Convert a tool's stored inputSchema (a strict zod object, or a raw shape if
125
192
  * the stamp didn't take) to a JSON Schema (zod v4 native). Used by the OpenAPI
package/src/mcp/server.js CHANGED
@@ -57,7 +57,7 @@ const PKG_VERSION = (() => {
57
57
  // AGENTS.md is the CHANNEL-NEUTRAL body (workflow knowledge, footguns, per-platform
58
58
  // docs) — it must not contain "how to connect / how to call" prose, because that
59
59
  // differs per delivery channel. The MCP channel prepends mcpPreamble ("call the
60
- // MCP tools…", never mentions HTTP routes); the skill channel (GET /romdev-skill.md)
60
+ // MCP tools…", never mentions HTTP routes); the skill channel (GET /skills/romdev/SKILL.md)
61
61
  // prepends skillPreamble ("POST /tool/{name}…", never mentions MCP). Both live in
62
62
  // src/http/skill-doc.js so neither leaks into the other surface.
63
63
  async function loadAgentsBody() {
@@ -351,7 +351,7 @@ async function main() {
351
351
  });
352
352
 
353
353
  // ── HTTP tool surface (the non-MCP way to drive romdev) ───────────────────
354
- // POST /tool/:name + /openapi.json + /documentation + /romdev-skill.md, all
354
+ // POST /tool/:name + /openapi.json + /documentation + /skills/romdev/SKILL.md, all
355
355
  // generated from the same tool registry the MCP path uses. Same Express app,
356
356
  // same localhost trust, per-agent dynamic sessions. Lets MCP-wary users (or
357
357
  // agents that prefer the Agent Skills standard) use romdev with near-zero
@@ -410,10 +410,11 @@ async function main() {
410
410
  process.exit(1);
411
411
  });
412
412
  httpServer.on("listening", () => {
413
- log.info(`romdev listening on http://${bannerHost}:${port}/mcp`);
414
413
  log.info("");
415
- log.info(`prefer a skill? save: http://${bannerHost}:${port}/romdev-skill.md`);
416
- log.info(`browse/try the tools: http://${bannerHost}:${port}/documentation`);
414
+ log.info(`romdev (v${PKG_VERSION}) listening on http://${bannerHost}:${port}/mcp`);
415
+ log.info("");
416
+ log.info(`prefer a skill? save: http://${bannerHost}:${port}/skills/romdev/SKILL.md`);
417
+ log.info("");
417
418
  log.info(`optional observer: http://${bannerHost}:${port}/livestream`);
418
419
  log.info("");
419
420
  log.info("connect your coding agent: https://github.com/monteslu/romdev#connect");
package/src/mcp/state.js CHANGED
@@ -23,12 +23,14 @@ export function getHost(sessionKey) {
23
23
  if (!host) {
24
24
  throw new Error(
25
25
  "No ROM loaded in this session — call loadMedia({path}) first. " +
26
- "If you WERE mid-session and just got reconnected (the server restarted, " +
27
- "or your session expired and your client re-initialized): the emulator " +
28
- "state is held in server memory only, so it did not survive just " +
29
- "re-run loadMedia({path}) with the ROM you were working on (it's still on " +
30
- "disk) to pick back up. Re-applying any in-progress changes means " +
31
- "rebuilding/reloading; a fresh boot is the recovery point.",
26
+ "If you DID loadMedia and still see this, your calls are landing in DIFFERENT " +
27
+ "sessions: over plain HTTP/skill you must send the SAME `x-romdev-session` " +
28
+ "header on every call (pick one stable id and reuse it) a new/missing id is " +
29
+ "a fresh empty session each time. " +
30
+ "If you WERE mid-session and just got reconnected (the server restarted or " +
31
+ "your session expired): emulator state is held in server memory only, so it " +
32
+ "did not survive — re-run loadMedia({path}) with your ROM (still on disk) to " +
33
+ "pick back up. A fresh boot is the recovery point.",
32
34
  );
33
35
  }
34
36
  return host;
@@ -37,7 +37,7 @@ export const MERGE_MAP = {
37
37
  host: { absorbs: ["unloadMedia", "shutdown", "reset", "pause", "resume"], axis: "op" },
38
38
  // ── frame (step/screenshot/stepAndShot/stepInstruction; stepInstruction folded from watch-memory.js) ──
39
39
  frame: { absorbs: ["stepFrames", "screenshot", "stepAndScreenshot", "stepInstruction"], axis: "op" },
40
- // ── scaffold (project/game + snippets; patchGbHeader stays standalone in project.js) ──
40
+ // ── scaffold (project/game + snippets; patchGbHeader folded into romPatch op:'gbHeader') ──
41
41
  scaffold: { absorbs: ["createProject", "createGame", "starterSnippets", "copyStarterSnippets"], axis: "op" },
42
42
  // ── cart (identify/extract/wrap; identifyRom from rom-id.js, rest from cart-parts.js) ──
43
43
  cart: { absorbs: ["identifyRom", "extractCart", "wrapRomFromParts"], axis: "op" },
@@ -61,14 +61,14 @@ export const MERGE_MAP = {
61
61
  cpu: { absorbs: ["getCPUState", "setRegister", "callSubroutine", "decompressWith"], axis: "op" },
62
62
  // ── breakpoint (STOP-on-first; all 4 from watch-memory.js) ──
63
63
  breakpoint: { absorbs: ["findWriter", "runUntilWrite", "runUntilPC", "runUntilRead"], axis: "on" },
64
- // ── watch (LOG-ALL; all 3 from watch-memory.js) ──
65
- watch: { absorbs: ["watchMemory", "watchRange", "logPCRange"], axis: "on" },
66
- // ── dmaTrace (Genesis VDP-DMA; watchDma from watch-memory.js, traceVramSource from trace-vram-source.js) ──
67
- dmaTrace: { absorbs: ["watchDma", "traceVramSource"], axis: "precision" },
64
+ // ── watch (LOG-ALL; watchMemory/watchRange/logPCRange + Genesis VDP-DMA trace
65
+ // on:'dma' from watchDma/traceVramSource all from watch-memory.js +
66
+ // trace-vram-source.js. dmaTrace was folded in as watch({on:'dma'}).) ──
67
+ watch: { absorbs: ["watchMemory", "watchRange", "logPCRange", "watchDma", "traceVramSource"], axis: "on" },
68
68
  // ── build (compile/run; buildSource/buildProject/runSource from toolchain.js, buildSourceWithDebug from symbols.js). ENTRY-TIER. ──
69
69
  build: { absorbs: ["buildSource", "buildSourceWithDebug", "buildProject", "runSource"], axis: "output" },
70
- // ── romPatch (8-op ROM-hack toolkit; patchFile/patchRom from rom-id.js, spliceCHR from splice-chr.js, relocateBlock/makeStoredBlock/findPointerTo from reinject.js, findFreeSpace from free-space.js, diffRoms from diff-roms.js) ──
71
- romPatch: { absorbs: ["patchFile", "patchRom", "spliceCHR", "relocateBlock", "makeStoredBlock", "findFreeSpace", "findPointerTo", "diffRoms"], axis: "op" },
70
+ // ── romPatch (9-op ROM-hack toolkit; patchFile/patchRom from rom-id.js, spliceCHR from splice-chr.js, relocateBlock/makeStoredBlock/findPointerTo from reinject.js, findFreeSpace from free-space.js, diffRoms from diff-roms.js, patchGbHeader as op:'gbHeader') ──
71
+ romPatch: { absorbs: ["patchFile", "patchRom", "spliceCHR", "relocateBlock", "makeStoredBlock", "findFreeSpace", "findPointerTo", "diffRoms", "patchGbHeader"], axis: "op" },
72
72
  // ── catalog (orient; listCategories + getStatus, both entry-tier in index.js) ──
73
73
  catalog: { absorbs: ["listCategories", "getStatus"], axis: "op" },
74
74
  // ── playtest (show-a-human window FSM; all 4 from playtest.js). ENTRY-TIER. ──
@@ -294,8 +294,9 @@ export function registerCheatTools(server, z, sessionKey) {
294
294
  "Cheat lookup / search / apply / create for the loaded ROM. `op`: " +
295
295
  "'lookup' (THIS game's known cheats from the bundled DB — returns labeled RAM addresses + Game Genie/ROM code " +
296
296
  "sites, so it answers 'which byte holds X?' for free); " +
297
- "'search' (fuzzy-find a game by NAME when you don't have the exact No-Intro title — returns game names + cheat " +
298
- "counts, then lookup the chosen one); " +
297
+ "'search' (fuzzy-find a game by NAME when you don't have the exact No-Intro title — searches ALL platforms by " +
298
+ "default and each match reports its own `platform`, so you don't need to know the console; pass `platform` only " +
299
+ "to scope it. Returns game names + cheat counts; then lookup the chosen one with its platform); " +
299
300
  "'apply' (enable a cheat on the LOADED game — pass a raw `code` or a `desc` from lookup); " +
300
301
  "'clear' (remove all active cheats); 'make' (CREATE a shareable code from an address+value). " +
301
302
  "TRUST: lookup matches by NAME/fuzzy similarity, NOT a verified CRC — a PROBABLE match. Labels are usually " +
@@ -322,7 +323,7 @@ export function registerCheatTools(server, z, sessionKey) {
322
323
  index: z.number().int().min(0).optional().describe("op=apply: cheat slot (default: next free slot). Reuse a slot to replace it."),
323
324
  enabled: z.boolean().default(true).describe("op=apply: false disables the slot instead of enabling."),
324
325
  // make / search / lookup share `platform`
325
- platform: z.enum([...MAKE_CHEAT_PLATFORMS]).optional().describe("op=lookup: override platform detection. op=search/make: REQUIRED — the target platform (all 14 tier-1)."),
326
+ platform: z.enum([...MAKE_CHEAT_PLATFORMS]).optional().describe("op=lookup: override platform detection. op=search: OPTIONAL — omit to search ALL platforms (each match returns its own `platform`); pass one only to scope the search. op=make: REQUIRED — the target platform (all 14 tier-1)."),
326
327
  address: z.number().int().min(0).optional().describe("op=make: address to cheat (RAM addr, or the ROM addr to patch)."),
327
328
  value: z.number().int().min(0).max(255).optional().describe("op=make: replacement byte (0-255). Provide value OR values."),
328
329
  values: z.array(z.number().int().min(0).max(255)).min(1).max(64).optional().describe("op=make: batch — a code per value at the same address. Returns variants[]."),
@@ -60,9 +60,22 @@ import { jsonContent, safeTool, withClearToolErrors } from "../util.js";
60
60
  import { getHostOrNull, setDisclosure } from "../state.js";
61
61
  import { MERGE_MAP } from "../tool-manifest.js";
62
62
  import { readFile } from "node:fs/promises";
63
+ import { readFileSync } from "node:fs";
63
64
  import { fileURLToPath } from "node:url";
64
65
  import { dirname, join } from "node:path";
65
66
 
67
+ // Package version — surfaced by catalog({op:'status'|'whatsNew'}) so an agent can
68
+ // check the running romdev version with a plain TOOL CALL (works over MCP AND the
69
+ // HTTP/skill surface), e.g. to detect a saved skill is stale. (GET /healthz also
70
+ // reports it for non-tool HTTP clients.)
71
+ const PKG_VERSION = (() => {
72
+ try {
73
+ return JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "package.json"), "utf8")).version;
74
+ } catch {
75
+ return "0.0.0";
76
+ }
77
+ })();
78
+
66
79
  // catalog({op:'whatsNew'}): the recent CHANGELOG + an old→new RENAME TABLE
67
80
  // derived from MERGE_MAP (the single source of truth for the consolidation), so
68
81
  // an agent resuming a handoff written against an older server can re-map every
@@ -91,6 +104,7 @@ async function buildWhatsNew() {
91
104
  changelog = sections.slice(0, 3).join("## ").trim();
92
105
  } catch { /* changelog not present in this install */ }
93
106
  return {
107
+ romdevVersion: PKG_VERSION,
94
108
  note: "Pre-1.0 the tool surface is consolidated freely with NO deprecated aliases. If a tool name from an older handoff is missing, it's almost certainly now an `op` (or other axis) on a domain tool — find it below, then read that tool's description for the exact op enum and params.",
95
109
  renameTable: renames,
96
110
  axisLegend: "Every domain tool is keyed by ONE axis: op (most), output (build), on (breakpoint), target (disasm), view (background), source (palette), stage (encodeArt), from (importArt). The value names the operation, e.g. romPatch({op:'findPointer'}).",
@@ -215,7 +229,7 @@ export function registerTools(server, z, sessionKey) {
215
229
  "• op:'whatsNew' — the recent CHANGELOG + an OLD→NEW tool RENAME TABLE. Call this FIRST if you're resuming work from a handoff written against an older server: pre-1.0 the surface is consolidated freely (no deprecated aliases), so a name you remember may now be an `op` on a domain tool. This maps them in one read instead of probing each tool.",
216
230
  {
217
231
  op: z.enum(["categories", "status", "whatsNew"]).default("categories")
218
- .describe("categories=tool-category catalog; status=live session snapshot (host/platform/frameCount/media); whatsNew=recent CHANGELOG + old→new tool rename table."),
232
+ .describe("categories=tool-category catalog; status=live session snapshot (romdevVersion + host/platform/frameCount/media — call this to check the running version, e.g. is a saved skill stale); whatsNew=recent CHANGELOG + old→new tool rename table."),
219
233
  },
220
234
  safeTool(async ({ op = "categories" }) => {
221
235
  if (op === "whatsNew") {
@@ -228,6 +242,7 @@ export function registerTools(server, z, sessionKey) {
228
242
  ? { ...host.getStatus() }
229
243
  : { loaded: false, hint: "no host yet; call loadMedia (in category 'run') to load a ROM" };
230
244
  return jsonContent({
245
+ romdevVersion: PKG_VERSION,
231
246
  ...base,
232
247
  loadedCategories: cats.filter((c) => c.loaded).map((c) => c.name),
233
248
  unloadedCategories: cats.filter((c) => !c.loaded).map((c) => c.name),
@@ -235,6 +250,7 @@ export function registerTools(server, z, sessionKey) {
235
250
  }
236
251
  const categories = disclosure.listCategories();
237
252
  return jsonContent({
253
+ romdevVersion: PKG_VERSION,
238
254
  categories,
239
255
  note: "Every tool registers at session init — this catalog is just a map grouped by purpose, NOT a gate. Call any tool by name directly.",
240
256
  humanInTheLoopHint: "Iterate INTERNALLY on screenshots first (build({output:'run'}) returns one inline; frame({op:'screenshot'/'stepAndShot'}) re-shoots the live host) — don't open a window to debug. Once the game actually boots and shows the feature you're working on, call playtest({}) so your human can watch and play it live. Opening a window on a black screen or a crash just wastes the human's attention — show them something that works.",
@@ -327,7 +343,7 @@ const TOOL_OWNER = {
327
343
  playtest: "show",
328
344
  // advanced category
329
345
  runUntil: "advanced",
330
- watch: "advanced", breakpoint: "advanced", dmaTrace: "advanced",
346
+ watch: "advanced", breakpoint: "advanced",
331
347
  recordSession: "advanced",
332
348
  // entry tier itself
333
349
  catalog: "entry",
@@ -7,6 +7,7 @@ import { writeFile } from "node:fs/promises";
7
7
 
8
8
  import { getHost, getHostOrNull } from "../state.js";
9
9
  import { imageContent, jsonContent, safeTool, textContent } from "../util.js";
10
+ import { log } from "../log.js";
10
11
 
11
12
  // Playtest windows are PER SESSION: the MCP server is multi-session (one server
12
13
  // serves several agents at once), and the same user can have 2-3 different games
@@ -104,14 +105,13 @@ export function isPlaytestRunning(sessionKey) {
104
105
  export function registerPlaytestTools(server, z, sessionKey) {
105
106
  // op:'open' — open (or reuse) the SDL window for this session.
106
107
  async function ptOpen({ scale = 3, title, aspect = "tv" }) {
107
- // No preflight display checks. We just attempt to open the SDL window and
108
- // report whatever SDL says — env-var guessing (DISPLAY/WAYLAND_DISPLAY)
109
- // is Linux-only and wrong on macOS/Windows, where those vars are never
110
- // set even with a full GUI session. SDL's createWindow already knows
111
- // whether it can draw on any platform; the try/catch below surfaces the
112
- // real error.
113
108
  const host = getHost(sessionKey);
114
109
  const loadedMediaPath = host.status?.mediaPath ?? null;
110
+ // No env-var preflight here — the GROUND-TRUTH "is there a real display?"
111
+ // check lives in loadSdl() (it asks SDL which video driver it selected and
112
+ // throws sdlKind:"no-display" if it's offscreen/dummy). That's cross-
113
+ // platform and doesn't false-bark on valid offscreen setups like Xvfb.
114
+ // The try/catch below surfaces it (and the binary errors) uniformly.
115
115
  if (reconcileSession(sessionKey)) {
116
116
  // THIS session already has a window open. We don't open a second one for
117
117
  // the same session — it shares this session's live host — so report the
@@ -153,44 +153,55 @@ export function registerPlaytestTools(server, z, sessionKey) {
153
153
  "stepFrames / pressButton) still works against the live ROM — only " +
154
154
  "the interactive window is affected.";
155
155
 
156
- if (kind === "missing-binary" || kind === "install-failed") {
156
+ // A failed window-open is a REAL FAILURE — THROW it, don't return a soft
157
+ // {opened:false} object. Returning success-shaped JSON made the failure
158
+ // invisible on the REST/skill surface (HTTP 200 = "it worked"), so an
159
+ // agent driving the routes would report "window's up!" while no window
160
+ // exists. Thrown → safeTool tags isError → runTool maps it to HTTP 400
161
+ // (REST) and a tool error (MCP). We also log to the server console so a
162
+ // human watching the terminal sees it even if the agent buries the error.
163
+ let reason, message;
164
+ if (kind === "no-display") {
165
+ // GROUND TRUTH: SDL came up on the offscreen/dummy driver — there is no
166
+ // physical screen to show the window on (it would render + play audio
167
+ // but be invisible). loadSdl()'s message already says exactly this + the
168
+ // fix; pass it straight through.
169
+ reason = "no-display";
170
+ message = (e?.message ?? String(e)) + headlessNote;
171
+ } else if (kind === "missing-binary" || kind === "install-failed") {
157
172
  // Native-addon problem, NOT a display problem.
158
173
  const fix = e?.fixCmd
159
174
  ? `Run: ${e.fixCmd} (then restart the server). `
160
175
  : "Reinstall @kmamal/sdl so its prebuilt binary is fetched. ";
161
- return jsonContent({
162
- opened: false,
163
- reason: "sdl-binary-missing",
164
- platform: process.platform,
165
- message:
166
- "The playtest window couldn't open because the @kmamal/sdl native " +
167
- "binary isn't installed: " + (e?.message ?? String(e)) + ". " +
168
- (kind === "install-failed"
169
- ? "An automatic install was attempted but failed (often a network/proxy block on the GitHub release download). "
170
- : "(This is common under `npx romdevtools` — npm skips @kmamal/sdl's install script that fetches the binary; the server tried to self-heal but the binary is still absent.) ") +
171
- fix + "This is a one-time native-addon fix, NOT a display/desktop " +
172
- "issue." + headlessNote,
173
- fixCommand: e?.fixCmd ?? null,
174
- loadedMediaPath,
175
- });
176
- }
177
-
178
- // A genuine SDL init / display failure (e.g. no video device, no
179
- // desktop session). NOW the desktop-session advice is the right call.
180
- return jsonContent({
181
- opened: false,
182
- reason: "sdl-error",
183
- platform: process.platform,
184
- message:
176
+ reason = "sdl-binary-missing";
177
+ message =
178
+ "The playtest window couldn't open because the @kmamal/sdl native " +
179
+ "binary isn't installed: " + (e?.message ?? String(e)) + ". " +
180
+ (kind === "install-failed"
181
+ ? "An automatic install was attempted but failed (often a network/proxy block on the GitHub release download). "
182
+ : "(This is common under `npx romdevtools` — npm skips @kmamal/sdl's install script that fetches the binary; the server tried to self-heal but the binary is still absent.) ") +
183
+ fix + "This is a one-time native-addon fix, NOT a display/desktop " +
184
+ "issue." + headlessNote;
185
+ } else {
186
+ // A genuine SDL init / display failure (no video device / no desktop
187
+ // session). The desktop-session advice is the right call here.
188
+ reason = "sdl-error";
189
+ message =
185
190
  "Couldn't open the SDL playtest window: " + (e?.message ?? String(e)) +
186
191
  ". SDL initialized but couldn't get a display. This usually means the " +
187
192
  "server has no access to a logged-in desktop session — e.g. it was " +
188
193
  "spawned as an MCP subprocess by your agent host, or runs over plain " +
189
194
  "SSH/headless. The reliable fix: run the server yourself in a terminal " +
190
195
  "inside your desktop session, then connect your agent to it." +
191
- headlessNote + " You can also open the built ROM in any standalone emulator.",
192
- loadedMediaPath,
193
- });
196
+ headlessNote + " You can also open the built ROM in any standalone emulator.";
197
+ }
198
+ // Server-console breadcrumb (stderr) so a human at the terminal sees the
199
+ // failure regardless of whether the agent relays the tool error.
200
+ log.error(`playtest: window failed to open (${reason}) — ${e?.fixCmd ? "fix: " + e.fixCmd : message.slice(0, 120)}`);
201
+ const err = new Error(message);
202
+ err.reason = reason;
203
+ if (e?.fixCmd) err.fixCommand = e.fixCmd;
204
+ throw err;
194
205
  }
195
206
  // Detach so process doesn't hang on the closed promise. Only clear THIS
196
207
  // session's slot, and only if it still points at this same session (a
@@ -284,7 +295,9 @@ export function registerPlaytestTools(server, z, sessionKey) {
284
295
  });
285
296
  }
286
297
  if (!inline && !outPath) {
287
- return jsonContent({ ok: false, error: "pass `path` (where to write the PNG) or `inline:true`." });
298
+ // Usage error throw so REST returns 400 (not a 200 with ok:false the
299
+ // caller might ignore).
300
+ throw new Error("playtest framebuffer: pass `path` (where to write the PNG) or `inline:true`.");
288
301
  }
289
302
  const frame = sessions.get(sessionKey).captureFrame();
290
303
  if (!frame) {
@@ -9,7 +9,6 @@
9
9
  // that compiles is in the directory.
10
10
 
11
11
  import { readFile, writeFile } from "node:fs/promises";
12
- import { patchGbHeader } from "../../platforms/gb/lib/c/patch-header.js";
13
12
  import { jsonContent, safeTool } from "../util.js";
14
13
  import { starterSnippetsCore, copyStarterSnippetsCore } from "./snippets.js";
15
14
 
@@ -320,7 +319,7 @@ TEMPLATES.gbc = {
320
319
  default: {
321
320
  main: "templates/default.c", runtime: GBC_RUNTIME,
322
321
  lang: GBC_LANG, ext: ".gbc",
323
- describe: "Minimal GBC starter. Same shape as the GB default but ROM extension .gbc — patchGbHeader sets $0143=$80 so gambatte boots in CGB mode.",
322
+ describe: "Minimal GBC starter. Same shape as the GB default but ROM extension .gbc — the GB-header patch sets $0143=$80 so gambatte boots in CGB mode.",
324
323
  },
325
324
  hello_sprite: {
326
325
  main: "templates/hello_sprite.c", runtime: GBC_RUNTIME,
@@ -1583,9 +1582,9 @@ Compiles **C89**, not C99/C11. Stick to:
1583
1582
  " frames: 60,\n" +
1584
1583
  "})\n```\n\n" +
1585
1584
  "`runSource` auto-fixes the GB/GBC cartridge header (logo, checksums, " +
1586
- "CGB flag) — you do **not** call `patchGbHeader` for a freshly built " +
1587
- "ROM. Use `patchGbHeader` only to fix up an existing/external ROM on " +
1588
- "disk or to override header fields (title, cart type, ROM/RAM size).";
1585
+ "CGB flag) — you do **not** call a header patch for a freshly built " +
1586
+ "ROM. Use `romPatch({op:'gbHeader'})` only to fix up an existing/external " +
1587
+ "ROM on disk or to override header fields (title, cart type, ROM/RAM size).";
1589
1588
  } else if (isSdccZ80) {
1590
1589
  const inc = runtimeHeaders.length > 0
1591
1590
  ? `\n includePaths: { ${runtimeHeaders.map((h) => `"${h.dst}": "${h.dst}"`).join(", ")} },`
@@ -1890,50 +1889,6 @@ export function registerProjectTools(server, z) {
1890
1889
  }),
1891
1890
  );
1892
1891
 
1893
- server.tool(
1894
- "patchGbHeader",
1895
- "Use this to write a complete, valid GB/GBC cartridge header into a ROM: Nintendo boot logo, EVERY " +
1896
- "header byte ($0134-$014C — title, CGB flag, cart type, ROM/RAM size, etc.) with ROM-only defaults, " +
1897
- "plus the header + global checksums. SDCC-path equivalent of `rgbfix -v -p 0`. Fills ALL bytes " +
1898
- "deliberately: leaving the CGB flag as the linker's $FF pad makes gambatte enter CGB mode and ignore " +
1899
- "DMG palette writes → white screen. Also shipped as `patch-header.js` in every GB/GBC project for use " +
1900
- "outside MCP.",
1901
- {
1902
- path: z.string().describe("Absolute path to the .gb / .gbc ROM file. Patched in place unless outputPath is given."),
1903
- outputPath: z.string().optional().describe("If given, write the patched ROM here instead of overwriting."),
1904
- cgb: z.boolean().optional().describe("If true, sets the CGB flag at $0143 to $80 (CGB-aware + DMG-compatible). If omitted, auto-detects from .gbc extension; default for plain .gb is false (DMG-only)."),
1905
- title: z.string().optional().describe("Cartridge title, up to 11 chars at $0134..$013E. Uppercased + zero-padded. Default = zero-fill."),
1906
- cartType: z.number().int().min(0).max(0xFF).optional().describe("Cart-type byte at $0147. Default $00 (ROM-only). Common alternatives: $01=MBC1, $03=MBC1+RAM+BAT, $11=MBC3, $13=MBC3+RAM+BAT, $19=MBC5."),
1907
- romSize: z.number().int().min(0).max(0xFF).optional().describe("ROM-size byte at $0148. Default $00 (32 KB / 2 banks). 1=64KB, 2=128KB, 3=256KB, 4=512KB, 5=1MB, 6=2MB, 7=4MB."),
1908
- ramSize: z.number().int().min(0).max(0xFF).optional().describe("RAM-size byte at $0149. Default $00 (none). $02=8KB, $03=32KB. Only meaningful with battery-backed MBC."),
1909
- destination: z.number().int().min(0).max(0xFF).optional().describe("Destination at $014A. Default $01 (non-Japan). $00 = Japan."),
1910
- },
1911
- safeTool(async ({ path: inPath, outputPath, cgb, title, cartType, romSize, ramSize, destination }) => {
1912
- const rom = new Uint8Array(await readFile(inPath));
1913
- const cgbFlag = cgb ?? (/\.gbc$/i.test(inPath) || (outputPath && /\.gbc$/i.test(outputPath)));
1914
- patchGbHeader(rom, { cgb: cgbFlag, title, cartType, romSize, ramSize, destination });
1915
- const outPath = outputPath ?? inPath;
1916
- await writeFile(outPath, rom);
1917
- return jsonContent({
1918
- path: outPath,
1919
- bytes: rom.length,
1920
- cgb: !!cgbFlag,
1921
- patched: [
1922
- "nintendo_logo@$0104..$0133",
1923
- "title@$0134..$013E",
1924
- `cgb_flag@$0143=${cgbFlag ? "$80" : "$00"}`,
1925
- "licensee@$0144..$0145=$00$00",
1926
- "sgb_flag@$0146=$00",
1927
- `cart_type@$0147=$${(cartType ?? 0).toString(16).padStart(2, "0").toUpperCase()}`,
1928
- `rom_size@$0148=$${(romSize ?? 0).toString(16).padStart(2, "0").toUpperCase()}`,
1929
- `ram_size@$0149=$${(ramSize ?? 0).toString(16).padStart(2, "0").toUpperCase()}`,
1930
- `destination@$014A=$${(destination ?? 1).toString(16).padStart(2, "0").toUpperCase()}`,
1931
- "old_licensee@$014B=$33",
1932
- "rom_version@$014C=$00",
1933
- "header_checksum@$014D",
1934
- "global_checksum@$014E..$014F",
1935
- ],
1936
- });
1937
- }),
1938
- );
1892
+ // patchGbHeader was folded into romPatch({op:'gbHeader'}) (rom-id.js) — it's a
1893
+ // ROM-file patch op, same family as romPatch's other ops, not a scaffold tool.
1939
1894
  }
@@ -30,6 +30,41 @@ export async function patchRomCore({ input, output, writes, allowExpand }) {
30
30
  return await patchRomFile({ input, output, writes, allowExpand });
31
31
  }
32
32
 
33
+ // romPatch({op:'gbHeader'}) — write a complete valid GB/GBC cartridge header
34
+ // (logo + every header byte + header/global checksums) into a ROM file. Folded
35
+ // in from the old standalone patchGbHeader tool. Also shipped as patch-header.js
36
+ // in every GB/GBC project for use outside romdev.
37
+ export async function gbHeaderCore({ path: inPath, outputPath, cgb, title, cartType, romSize, ramSize, destination }) {
38
+ if (!inPath) throw new Error("romPatch({op:'gbHeader'}): `path` (the .gb/.gbc ROM) is required.");
39
+ const { readFile, writeFile } = await import("node:fs/promises");
40
+ const { patchGbHeader } = await import("../../platforms/gb/lib/c/patch-header.js");
41
+ const rom = new Uint8Array(await readFile(inPath));
42
+ const cgbFlag = cgb ?? (/\.gbc$/i.test(inPath) || (outputPath && /\.gbc$/i.test(outputPath)));
43
+ patchGbHeader(rom, { cgb: cgbFlag, title, cartType, romSize, ramSize, destination });
44
+ const outPath = outputPath ?? inPath;
45
+ await writeFile(outPath, rom);
46
+ return {
47
+ path: outPath,
48
+ bytes: rom.length,
49
+ cgb: !!cgbFlag,
50
+ patched: [
51
+ "nintendo_logo@$0104..$0133",
52
+ "title@$0134..$013E",
53
+ `cgb_flag@$0143=${cgbFlag ? "$80" : "$00"}`,
54
+ "licensee@$0144..$0145=$00$00",
55
+ "sgb_flag@$0146=$00",
56
+ `cart_type@$0147=$${(cartType ?? 0).toString(16).padStart(2, "0").toUpperCase()}`,
57
+ `rom_size@$0148=$${(romSize ?? 0).toString(16).padStart(2, "0").toUpperCase()}`,
58
+ `ram_size@$0149=$${(ramSize ?? 0).toString(16).padStart(2, "0").toUpperCase()}`,
59
+ `destination@$014A=$${(destination ?? 1).toString(16).padStart(2, "0").toUpperCase()}`,
60
+ "old_licensee@$014B=$33",
61
+ "rom_version@$014C=$00",
62
+ "header_checksum@$014D",
63
+ "global_checksum@$014E..$014F",
64
+ ],
65
+ };
66
+ }
67
+
33
68
  export function registerRomIdTools(server, z, sessionKey) {
34
69
  // identifyRom folded into `cart`; patchFile/patchRom/spliceCHR/relocate/etc.
35
70
  // folded into the `romPatch` tool (router below).
@@ -48,7 +83,8 @@ export function registerRomIdTools(server, z, sessionKey) {
48
83
  "makeStored → {rawHex|rawBytes, format, interleave?}; " +
49
84
  "findFree → {minLength, fillBytes?, start?, end?, maxRunsReturned?}; " +
50
85
  "findPointer → {romOffset, mapper?, widths?, suppressShadows?, maxHitsReturned?}; " +
51
- "diff → {a, b, maxChangesReturned?}.\n" +
86
+ "diff → {a, b, maxChangesReturned?}; " +
87
+ "gbHeader → {path, outputPath?, cgb?, title?, cartType?, romSize?, ramSize?, destination?}.\n" +
52
88
  "• op:'write' — write N bytes into any binary file at `offset` (the generic splicer: PRG patches, CHR splices, SNES tile/sample injection). `allowExpand` grows the file — default OFF; most hacks must NOT change size or headers/mapper break. `outputPath` else writes in place.\n" +
53
89
  "• op:'writeMany' — apply a LIST of {offset, hex|base64} `writes` from `input` ROM to `output`.\n" +
54
90
  "• op:'spliceCHR' — inject a PNG's tiles into a CHR region.\n" +
@@ -56,10 +92,11 @@ export function registerRomIdTools(server, z, sessionKey) {
56
92
  "• op:'makeStored' — wrap raw bytes so the game's OWN decompressor expands them VERBATIM (edit tiles → makeStored → write, no compressor needed). `format` (raw/lz77-literal/lz2-direct/sega-rle/konami-rle/packbits/kosinski-literal; invalid → returns the platform's list). ALWAYS verify via cpu({op:'call'}) on the game's decompressor.\n" +
57
93
  "• op:'findFree' — find a run of free space to relocate into (`fillBytes` defaults to [0xFF, 0x00]).\n" +
58
94
  "• op:'findPointer' — find every pointer in the ROM that references `romOffset` (platform-correct encoding), the missing piece for redirecting a loader. `mapper` overrides SNES detection. On wide systems (Genesis/GBA) a 32-bit hit's low bytes also match the narrower form one byte over — those tail SHADOWS are suppressed by default (count in `shadowsSuppressed`); pass `suppressShadows:false` for raw, or `widths:[4]` to search only 32-bit forms. On banked 8-bit systems a 16-bit pointer is page-ambiguous — correlate with the bank-set instruction.\n" +
59
- "• op:'diff' — diff two ROMs (`a`, `b`) → the changed byte ranges.",
95
+ "• op:'diff' — diff two ROMs (`a`, `b`) → the changed byte ranges.\n" +
96
+ "• op:'gbHeader' — GAME BOY / GBC ONLY: write a complete, valid GB/GBC cartridge header into a ROM at `path` — Nintendo boot logo, every header byte ($0134-$014C: title, CGB flag, cart type, ROM/RAM size, …) with ROM-only defaults, plus the header + global checksums. The SDCC-path equivalent of `rgbfix -v -p 0`, for fixing up an externally built / hand-assembled GB ROM. (A normal build({output:'rom'/'run'}) already does this — you do NOT call gbHeader on a freshly built ROM.) Leaving the CGB flag as the linker's $FF pad makes gambatte enter CGB mode and white-screen, so this fills it deliberately.",
60
97
  {
61
- op: z.enum(["write", "writeMany", "spliceCHR", "relocate", "makeStored", "findFree", "findPointer", "diff"])
62
- .describe("write=N bytes at an offset; writeMany=a list of writes; spliceCHR=PNG tiles into CHR; relocate=write a block to free space + repoint; makeStored=wrap bytes for the game's decompressor; findFree=find free space; findPointer=find pointers to an offset; diff=diff two ROMs."),
98
+ op: z.enum(["write", "writeMany", "spliceCHR", "relocate", "makeStored", "findFree", "findPointer", "diff", "gbHeader"])
99
+ .describe("write=N bytes at an offset; writeMany=a list of writes; spliceCHR=PNG tiles into CHR; relocate=write a block to free space + repoint; makeStored=wrap bytes for the game's decompressor; findFree=find free space; findPointer=find pointers to an offset; diff=diff two ROMs; gbHeader=write a valid GB/GBC cartridge header + checksums."),
63
100
  path: z.string().optional().describe("op:write/spliceCHR/relocate/findFree/findPointer — absolute path to the ROM/file."),
64
101
  platform: z.enum(PLATFORMS).optional().describe("op:findPointer/relocate/makeStored/spliceCHR/diff — platform (inferred from extension except makeStored, which requires it)."),
65
102
  offset: z.number().int().min(0).optional().describe("op:write — file offset to write at (NOT a CPU address)."),
@@ -112,6 +149,13 @@ export function registerRomIdTools(server, z, sessionKey) {
112
149
  a: z.string().optional().describe("op:diff — path to ROM A."),
113
150
  b: z.string().optional().describe("op:diff — path to ROM B."),
114
151
  maxChangesReturned: z.number().int().min(1).max(2048).default(256).describe("op:diff — cap the change ranges returned."),
152
+ // gbHeader (path + outputPath reuse the spine fields above)
153
+ cgb: z.boolean().optional().describe("op:gbHeader — if true, sets the CGB flag at $0143 to $80 (CGB-aware + DMG-compatible). If omitted, auto-detects from a .gbc extension; default for plain .gb is false (DMG-only)."),
154
+ title: z.string().optional().describe("op:gbHeader — cartridge title, up to 11 chars at $0134..$013E. Uppercased + zero-padded. Default = zero-fill."),
155
+ cartType: z.number().int().min(0).max(0xFF).optional().describe("op:gbHeader — cart-type byte at $0147. Default $00 (ROM-only). Common: $01=MBC1, $03=MBC1+RAM+BAT, $11=MBC3, $13=MBC3+RAM+BAT, $19=MBC5."),
156
+ romSize: z.number().int().min(0).max(0xFF).optional().describe("op:gbHeader — ROM-size byte at $0148. Default $00 (32 KB / 2 banks). 1=64KB, 2=128KB, 3=256KB, 4=512KB, 5=1MB, 6=2MB, 7=4MB."),
157
+ ramSize: z.number().int().min(0).max(0xFF).optional().describe("op:gbHeader — RAM-size byte at $0149. Default $00 (none). $02=8KB, $03=32KB. Only meaningful with battery-backed MBC."),
158
+ destination: z.number().int().min(0).max(0xFF).optional().describe("op:gbHeader — destination at $014A. Default $01 (non-Japan). $00 = Japan."),
115
159
  },
116
160
  safeTool(async (args) => {
117
161
  switch (args.op) {
@@ -135,6 +179,7 @@ export function registerRomIdTools(server, z, sessionKey) {
135
179
  if (!args.a || !args.b) throw new Error("romPatch({op:'diff'}): `a` and `b` (the two ROM paths) are required.");
136
180
  return jsonContent(await diffRomsCore({ ...args, aPath: args.a, bPath: args.b }));
137
181
  }
182
+ case "gbHeader": return jsonContent(await gbHeaderCore(args));
138
183
  default: throw new Error(`romPatch: unknown op '${args.op}'`);
139
184
  }
140
185
  }),
@@ -254,7 +254,7 @@ export function registerTileInspectTools(server, z, sessionKey) {
254
254
  tilePath: z.string().optional().describe("op:preview — path to a tile dump (raw) or iNES ROM (NES auto-locates CHR)."),
255
255
  fromEmulator: z.boolean().optional().describe("op:preview — read tiles from the running emulator's live VRAM (tileStart/tileCount pick the range). Genesis byte-swap handled. Mutually exclusive with tileBytes/tilePath."),
256
256
  tileStart: z.number().int().min(0).optional().describe("op:preview — starting tile index in the source."),
257
- byteOffset: z.number().int().min(0).optional().describe("op:preview — start at a raw BYTE offset instead of a tile index (pass a dmaTrace / disasm-references source directly). WARNS on misalignment. Takes precedence over tileStart."),
257
+ byteOffset: z.number().int().min(0).optional().describe("op:preview — start at a raw BYTE offset instead of a tile index (pass a watch({on:'dma'}) / disasm-references source directly). WARNS on misalignment. Takes precedence over tileStart."),
258
258
  palette: z.array(z.any()).optional().describe("op:preview — explicit palette (NES: 4 master indices; others: RGB triples or indices)."),
259
259
  palettePath: z.string().optional().describe("op:preview — raw palette dump from disk."),
260
260
  // shared output