npm - romdevtools - Versions diffs - 0.13.0 → 0.15.0 - Mend

romdevtools 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/AGENTS.md +21 -14
package/CHANGELOG.md +125 -1
package/README.md +13 -8
package/examples/atari2600/main.asm +1 -1
package/examples/atari2600/templates/default.asm +1 -1
package/examples/atari2600/templates/paddle.asm +59 -47
package/examples/atari7800/main.c +1 -1
package/examples/atari7800/templates/default.c +1 -1
package/examples/atari7800/templates/music_demo.c +1 -1
package/examples/c64/main.c +1 -1
package/examples/c64/templates/platformer.c +2 -2
package/examples/c64/templates/puzzle.c +1 -1
package/examples/c64/templates/racing.c +3 -3
package/examples/c64/templates/shmup.c +6 -5
package/examples/c64/templates/sports.c +4 -4
package/examples/gb/main.asm +1 -1
package/examples/gb/main.c +1 -1
package/examples/gb/templates/puzzle.c +1 -1
package/examples/gb/templates/racing.c +1 -1
package/examples/gb/templates/shmup.c +1 -1
package/examples/gba/templates/gba_hello.c +1 -1
package/examples/gba/templates/maxmod_demo.c +1 -1
package/examples/gba/templates/puzzle.c +17 -3
package/examples/gba/templates/racing.c +16 -2
package/examples/gba/templates/shmup.c +23 -4
package/examples/gba/templates/tonc_hello.c +6 -4
package/examples/gbc/main.asm +1 -1
package/examples/gbc/templates/puzzle.c +1 -1
package/examples/gbc/templates/racing.c +1 -1
package/examples/gbc/templates/shmup.c +1 -1
package/examples/genesis/main.s +1 -1
package/examples/genesis/templates/puzzle.c +1 -1
package/examples/genesis/templates/racing.c +45 -1
package/examples/genesis/templates/shmup.c +12 -3
package/examples/genesis/templates/shmup_2p.c +2 -2
package/examples/genesis/templates/sports.c +39 -0
package/examples/gg/templates/hello_sprite.c +38 -23
package/examples/gg/templates/music_demo.c +11 -8
package/examples/gg/templates/platformer.c +37 -15
package/examples/gg/templates/racing.c +25 -12
package/examples/gg/templates/shmup.c +12 -6
package/examples/gg/templates/sports.c +30 -16
package/examples/gg/templates/tile_engine.c +24 -10
package/examples/lynx/templates/platformer.c +7 -1
package/examples/lynx/templates/puzzle.c +8 -2
package/examples/lynx/templates/racing.c +7 -1
package/examples/lynx/templates/sports.c +7 -1
package/examples/nes/main.c +2 -2
package/examples/nes/space-shooter/nes_runtime.h +1 -1
package/examples/nes/templates/default.c +4 -1
package/examples/nes/templates/racing.c +50 -1
package/examples/pce/main.c +1 -1
package/examples/sms/templates/hello_sprite.c +1 -1
package/examples/sms/templates/music_demo.c +1 -1
package/examples/sms/templates/puzzle.c +1 -1
package/examples/sms/templates/racing.c +1 -1
package/examples/sms/templates/shmup.c +1 -1
package/examples/sms/templates/shmup_2p.c +2 -2
package/examples/snes/main.asm +1 -1
package/examples/snes/templates/c-hello-data.asm +309 -14
package/examples/snes/templates/c-hello.c +13 -2
package/examples/snes/templates/default.c +1 -1
package/examples/snes/templates/hello_sprite-data.asm +300 -2
package/examples/snes/templates/hello_sprite.c +10 -1
package/examples/snes/templates/music_demo-data.asm +300 -2
package/examples/snes/templates/music_demo.c +10 -1
package/examples/snes/templates/platformer-data.asm +300 -2
package/examples/snes/templates/platformer.c +10 -1
package/examples/snes/templates/puzzle-data.asm +300 -2
package/examples/snes/templates/puzzle.c +11 -1
package/examples/snes/templates/racing-data.asm +300 -2
package/examples/snes/templates/racing.c +40 -4
package/examples/snes/templates/shmup-data.asm +299 -6
package/examples/snes/templates/shmup.c +11 -7
package/examples/snes/templates/sports-data.asm +300 -2
package/examples/snes/templates/sports.c +40 -5
package/package.json +1 -1
package/src/cheats/lookup.js +39 -18
package/src/http/routes.js +58 -33
package/src/http/skill-doc.js +10 -9
package/src/http/swagger.js +1 -1
package/src/http/tool-registry.js +72 -5
package/src/mcp/server.js +6 -5
package/src/mcp/state.js +8 -6
package/src/mcp/tool-manifest.js +7 -7
package/src/mcp/tools/cheats.js +4 -3
package/src/mcp/tools/index.js +18 -2
package/src/mcp/tools/playtest.js +48 -35
package/src/mcp/tools/project.js +39 -73
package/src/mcp/tools/rom-id.js +49 -4
package/src/mcp/tools/tile-inspect.js +1 -1
package/src/mcp/tools/toolchain.js +183 -19
package/src/mcp/tools/trace-vram-source.js +3 -3
package/src/mcp/tools/watch-memory.js +27 -46
package/src/observer/livestream.html +41 -5
package/src/platforms/_guides/ROMHACKING_PLAYBOOK.md +5 -5
package/src/platforms/gb/MENTAL_MODEL.md +3 -3
package/src/platforms/gb/TROUBLESHOOTING.md +1 -1
package/src/platforms/gb/UPSTREAM_SOURCES.md +1 -1
package/src/platforms/gb/lib/c/README.md +2 -2
package/src/platforms/gb/lib/c/SDCC_GOTCHAS.md +1 -1
package/src/platforms/gbc/MENTAL_MODEL.md +3 -3
package/src/platforms/gbc/TROUBLESHOOTING.md +5 -5
package/src/platforms/gbc/UPSTREAM_SOURCES.md +2 -2
package/src/platforms/gbc/lib/c/README.md +2 -2
package/src/platforms/gbc/lib/c/SDCC_GOTCHAS.md +1 -1
package/src/platforms/gg/MENTAL_MODEL.md +14 -13
package/src/platforms/gg/lib/c/vdp_init.c +10 -8
package/src/platforms/msx/MENTAL_MODEL.md +1 -1
package/src/platforms/nes/TROUBLESHOOTING.md +1 -1
package/src/platforms/nes/lib/c/nes_runtime.c +28 -6
package/src/platforms/pce/MENTAL_MODEL.md +1 -1
package/src/platforms/pce/lib/c/pce_hw.h +1 -0
package/src/platforms/pce/lib/c/pce_video.c +26 -0
package/src/platforms/sms/MENTAL_MODEL.md +12 -12
package/src/platforms/sms/lib/c/vdp_init.c +10 -8
package/src/platforms/sms/lib/vdp_init.s +1 -1
package/src/playtest/playtest.js +25 -0
package/src/toolchains/cc65/presets/nes/chr-ram-runtime.cfg +1 -1
package/src/toolchains/cc65/presets/nes/chr-ram.cfg +1 -1
package/src/toolchains/cc65/presets/nes/chr-ram.crt0.s +1 -1
package/src/toolchains/genesis-c/README.md +1 -1
package/src/toolchains/sdcc/preflight-lint.js +47 -7
package/src/toolchains/snes-c/snes-c.js +3 -7

package/src/http/routes.js CHANGED Viewed

@@ -5,20 +5,22 @@
 //   GET  /tool/:name/schema   that tool's JSON Schema (a validator on demand)
 //   GET  /openapi.json        OpenAPI 3.1 spec for every /tool/:name route
 //   GET  /documentation       Swagger UI over /openapi.json (live "try it" console)
-//   GET  /romdev-skill.md     the SKILL.md (Agent Skills open standard) — channel
-//                             doc that drives the routes, never mentions MCP
+//   GET  /skills/romdev/SKILL.md  the SKILL.md (Agent Skills open standard) — the
+//                             channel doc that drives the routes, never mentions
+//                             MCP. Also at /romdev/SKILL.md and /romdev-skill.md.
 //
-// Sessions: each agent gets its own session dynamically, same isolation as MCP.
-// First call with no x-romdev-session → mint one, return it in the response
-// header; the agent echoes it on later calls (sticky host across load→step→read).
-// A call with no header gets an ephemeral per-request session (fine for pure-file
-// tools; stateful host work should keep the header). No auth — localhost trust,
-// same as /mcp (the app already mounts localhostHostValidation()).
+// Sessions: each agent picks its own stable id and sends it as x-romdev-session
+// on EVERY call (same per-agent host isolation as MCP). The header is REQUIRED —
+// no header → 401 (we don't auto-mint a throwaway session; that silently dropped
+// the loaded ROM and surfaced as "No ROM loaded" later). First use of an id
+// creates the session, reuse keeps the host across load→step→read, different ids
+// isolate different agents. No auth beyond that — localhost trust, same as /mcp
+// (the app already mounts localhostHostValidation()).
-import { randomUUID } from "node:crypto";
 import { buildToolRegistry, runTool, toolJsonSchema } from "./tool-registry.js";
 import { skillPreamble, skillToolReference, buildSkillDoc } from "./skill-doc.js";
 import { swaggerHtml, swaggerAsset } from "./swagger.js";
+import { observer } from "../observer/bus.js";
 import { log } from "../mcp/log.js";
 const SESSION_HEADER = "x-romdev-session";
@@ -40,12 +42,16 @@ export function mountHttpToolRoutes(app, opts = {}) {
   /** @type {Map<string, {registry: Map<string,any>, lastSeen: number}>} */
   const sessions = new Map();
-  function getSession(sessionKey) {
+  function getSession(sessionKey, { sticky = false } = {}) {
     let s = sessions.get(sessionKey);
     if (!s) {
-      s = { registry: buildToolRegistry(sessionKey), lastSeen: Date.now() };
+      s = { registry: buildToolRegistry(sessionKey), lastSeen: Date.now(), sticky };
       sessions.set(sessionKey, s);
       log.debug(`[http] session ${sessionKey.slice(0, 8)} created (${sessions.size} active)`);
+      // Surface sticky sessions in /livestream (like the MCP path does on init).
+      // Ephemeral one-shot sessions are NOT registered (they'd spam connect/
+      // disconnect); their individual `call` events still show in the stream.
+      if (sticky) { try { observer.sessionConnected(sessionKey); } catch {} }
     } else {
       s.lastSeen = Date.now();
     }
@@ -58,6 +64,7 @@ export function mountHttpToolRoutes(app, opts = {}) {
     for (const [key, s] of sessions) {
       if (now - s.lastSeen > idleMs) {
         sessions.delete(key);
+        if (s.sticky) { try { observer.sessionDisconnected(key); } catch {} }
         log.debug(`[http] session ${key.slice(0, 8)} reaped (idle)`);
       }
     }
@@ -71,30 +78,38 @@ export function mountHttpToolRoutes(app, opts = {}) {
   // ── POST /tool/:name ──────────────────────────────────────────────────────
   app.post("/tool/:name", async (req, res) => {
     const name = req.params.name;
-    // session: sticky if header present, ephemeral otherwise.
-    let sessionKey = req.headers[SESSION_HEADER];
-    let ephemeral = false;
+    // Session model: the AGENT picks its own stable, task-descriptive id and
+    // sends it as x-romdev-session on EVERY call — first use creates the session,
+    // reuse keeps the same host/state (load→step→read), and different ids isolate
+    // different agents. NO HEADER → 401: we don't auto-mint a throwaway session
+    // (that silently dropped the loaded ROM and surfaced as "No ROM loaded" two
+    // calls later). Requiring the header up front turns that silent footgun into
+    // a loud, fixable 401.
+    const sessionKey = req.headers[SESSION_HEADER];
     if (typeof sessionKey !== "string" || !sessionKey) {
-      sessionKey = randomUUID();
-      ephemeral = true;
+      res.status(401).json({
+        error: "Missing required `x-romdev-session` header. Pick ONE stable, " +
+          "task-descriptive id for yourself (e.g. 'nes-platformer-build') and send " +
+          "it on EVERY call — it's your per-session emulator key (the ROM you load " +
+          "lives under it; the next call only sees it with the SAME id) and the " +
+          "label shown in the /livestream observer. Several agents share one server " +
+          "by each using a different id.",
+      });
+      return;
     }
-    const { registry } = getSession(sessionKey);
+    const { registry } = getSession(sessionKey, { sticky: true });
     const tool = registry.get(name);
     if (!tool) {
       res.status(404).json({
-        error: `Unknown tool '${name}'. GET /openapi.json or /romdev-skill.md for the list.`,
+        error: `Unknown tool '${name}'. GET /openapi.json or /skills/romdev/SKILL.md for the list.`,
       });
       return;
     }
-    // echo the session id so the agent can reuse it (esp. when we minted one)
+    // echo the session id back (convenience for clients that log it)
     res.setHeader(SESSION_HEADER, sessionKey);
-    const out = await runTool(tool, req.body);
-    if (ephemeral) {
-      // drop the ephemeral session immediately (no sticky host wanted)
-      sessions.delete(sessionKey);
-    }
+    const out = await runTool(tool, req.body, sessionKey);
     if (out.ok) res.json(out.result);
-    else res.status(400).json({ error: out.error });
+    else res.status(400).json(out.result ?? { error: out.error });
   });
   // ── GET /tool/:name/schema ────────────────────────────────────────────────
@@ -122,17 +137,26 @@ export function mountHttpToolRoutes(app, opts = {}) {
     res.send(buf);
   });
-  // ── GET /romdev-skill.md ──────────────────────────────────────────────────
-  app.get("/romdev-skill.md", (req, res) => {
+  // ── GET /skills/romdev/SKILL.md (primary) + aliases ───────────────────────
+  // Agents store skills on disk as skills/<name>/SKILL.md (a dir named after the
+  // skill, canonical file SKILL.md). We serve the same doc at several paths so
+  // the URL matches wherever the agent saved it:
+  //   /skills/romdev/SKILL.md  — primary: full disk mirror (~/.claude/skills/romdev/SKILL.md)
+  //   /romdev/SKILL.md         — alias: the <name>/SKILL.md tail
+  //   /romdev-skill.md         — alias: flat form (older refs)
+  const serveSkill = (req, res) => {
     const md = buildSkillDoc({
       registry: metaRegistry,
       agentsBody: opts.agentsBody ?? "",
       version,
     });
     res.type("text/markdown").send(md);
-  });
+  };
+  app.get("/skills/romdev/SKILL.md", serveSkill);
+  app.get("/romdev/SKILL.md", serveSkill);
+  app.get("/romdev-skill.md", serveSkill); // alias
-  log.debug("[http] tool surface mounted: POST /tool/:name, /openapi.json, /documentation, /romdev-skill.md");
+  log.debug("[http] tool surface mounted: POST /tool/:name, /openapi.json, /documentation, /skills/romdev/SKILL.md");
   return { sessions, stop: () => clearInterval(reaper) };
 }
@@ -158,13 +182,14 @@ export function buildOpenApi(registry, version) {
         },
         responses: {
           200: { description: "Tool result (JSON).", content: { "application/json": { schema: { type: "object" } } } },
-          400: { description: "Validation or tool error.", content: { "application/json": { schema: { type: "object", properties: { error: { type: "string" } } } } } },
+          400: { description: "Validation or tool error (the action did not succeed).", content: { "application/json": { schema: { type: "object", properties: { error: { type: "string" } } } } } },
+          401: { description: "Missing required x-romdev-session header.", content: { "application/json": { schema: { type: "object", properties: { error: { type: "string" } } } } } },
           404: { description: "Unknown tool." },
         },
         parameters: [{
-          name: SESSION_HEADER, in: "header", required: false,
+          name: SESSION_HEADER, in: "header", required: true,
           schema: { type: "string" },
-          description: "Per-agent session id. Omit on the first call to get one back in the response header; echo it on later calls to keep a sticky emulator session (load→step→read). Omit entirely for one-shot pure-file tools.",
+          description: "REQUIRED. Per-agent session id — pick one stable, UNIQUE, task-DESCRIPTIVE string (e.g. 'nes-platformer-build', 'zelda-romhack-text') and send it on EVERY call. It's the per-session emulator key (load→step→read state lives under it) AND the label shown in the /livestream observer, so a descriptive id tells a watching human which task each call belongs to. Several agents share one server safely by each using a different id. Missing → 401.",
         }],
       },
     };
@@ -174,7 +199,7 @@ export function buildOpenApi(registry, version) {
     info: {
       title: "romdev HTTP tool API",
       version,
-      description: "Plain-HTTP surface for romdev's retro-game-dev tools — the non-MCP way to drive the same tools. Generated from the tool registry. See /romdev-skill.md for the workflow guide.",
+      description: "Plain-HTTP surface for romdev's retro-game-dev tools — the non-MCP way to drive the same tools. Generated from the tool registry. See /skills/romdev/SKILL.md for the workflow guide.",
     },
     servers: [{ url: "/" }],
     paths,

package/src/http/skill-doc.js CHANGED Viewed

@@ -3,7 +3,7 @@
 // One shared body (AGENTS.md, channel-neutral) is wrapped per delivery channel:
 //   - MCP connection instructions = mcpPreamble + body   (says "call the MCP
 //     tools"; never mentions HTTP routes / skills)
-//   - GET /romdev-skill.md        = skill frontmatter + skillPreamble + body +
+//   - GET /skills/romdev/SKILL.md = skill frontmatter + skillPreamble + body +
 //     generated tool reference     (says "POST /tool/{name}"; never mentions MCP)
 //
 // So neither surface mentions the other: the delivery instructions live in the
@@ -38,8 +38,8 @@ export const skillPreamble = [
   "  • GET  /tool/{name}/schema — that tool's JSON Schema (the exact parameters + types).",
   "  • GET  /openapi.json — the full machine-readable API; GET /documentation — a browsable console.",
   "",
-  "Sessions (for stateful work like load→step→read): your first POST returns an `x-romdev-session` header.",
-  "Echo that header on subsequent calls to keep the SAME emulator session. Omit it for one-shot file tools.",
+  "## Sessions — IMPORTANT for stateful work (load → step → read)",
+  "**Pick ONE session id for yourself and send it as the `x-romdev-session` header on EVERY call.** Make it UNIQUE and DESCRIPTIVE of what you're doing — e.g. `nes-platformer-build`, `zelda-romhack-text`, `gba-sprite-debug` (a slug, optionally with a short random suffix to stay unique). A human may be watching the live observer at /livestream, where your session id is the label for all your activity — a descriptive id tells them at a glance which agent/task each call belongs to; a bare uuid or `default` is opaque. The emulator/host is per-session: the ROM you `loadMedia` lives in YOUR session, and the next `frame`/`memory`/`cpu` call only sees it if it carries the SAME id. Do NOT send a new id each call — that's a fresh empty session every time (your loaded ROM vanishes; \"No ROM loaded\"). Several agents can share one server safely: each just sends a DIFFERENT id, so nobody clobbers another's ROM (another reason to make yours distinctive). The header is REQUIRED on every `/tool/{name}` call — omit it and you get a **401** (the server will NOT silently run you in a throwaway session). Pure file tools (romPatch/cart/encodeAudio) still need the header; just reuse your one id everywhere.",
   "",
   "Each tool is a domain VERB keyed by an operation axis — e.g. POST /tool/memory {\"op\":\"read\",…},",
   "POST /tool/build {\"output\":\"rom\",…}, POST /tool/romPatch {\"op\":\"findPointer\",…}. The full per-tool",
@@ -47,7 +47,7 @@ export const skillPreamble = [
 ].join("\n");
 /**
- * Build GET /romdev-skill.md: frontmatter + skill preamble + shared body +
+ * Build GET /skills/romdev/SKILL.md: frontmatter + skill preamble + shared body +
  * generated tool reference.
  * @param {{registry: Map<string,any>, agentsBody: string, version?: string}} args
  * @returns {string}
@@ -68,16 +68,17 @@ export function buildSkillDoc({ registry, agentsBody, version }) {
   // Update note — stamped with the running server's version. A saved skill is a
   // static snapshot (it doesn't auto-update), but this doc is GENERATED live from
-  // the running server, so re-fetching always gives you the current version. The
-  // server reports its version at GET /healthz, so an agent can detect staleness.
+  // the running server, so re-fetching always gives the current version. An agent
+  // can check the running version two ways: the tool call POST /tool/catalog
+  // {"op":"status"} → `romdevVersion`, or GET /healthz → `version`.
   const v = version ?? "0.0.0";
   const updateNote = [
     "## Keeping this skill current",
     `This skill was generated by romdev **v${v}** (it's a snapshot — it does not auto-update). ` +
     "romdev generates it live from the running server, so to update: run the latest `npx romdevtools`, " +
-    `then re-fetch \`GET http://localhost:7331/romdev-skill.md\` and overwrite your saved copy. ` +
-    "The running server reports its version at `GET /healthz` (`{\"version\":\"…\"}`) — if it's newer than the " +
-    "`metadata.version` above, your saved skill is stale; re-fetch it.",
+    `then re-fetch \`GET http://localhost:7331/skills/romdev/SKILL.md\` and overwrite your saved copy. ` +
+    "To check whether you're stale, ask the running server its version — `POST /tool/catalog {\"op\":\"status\"}` " +
+    "returns `romdevVersion` (or `GET /healthz` → `version`); if it's newer than the `metadata.version` above, re-fetch.",
   ].join("\n");
   return [

package/src/http/swagger.js CHANGED Viewed

@@ -56,7 +56,7 @@ export function swaggerHtml(opts = {}) {
     <p>Loading interactive docs… If this doesn't render, the raw OpenAPI spec is at
        <a href="${specUrl}"><code>${specUrl}</code></a>, every tool is callable via
        <code>POST /tool/{name}</code>, and the workflow guide is at
-       <a href="/romdev-skill.md"><code>/romdev-skill.md</code></a>.</p>
+       <a href="/skills/romdev/SKILL.md"><code>/skills/romdev/SKILL.md</code></a>.</p>
   </div>
   <div id="swagger-ui"></div>
   <script src="${base}/swagger-ui-bundle.js"></script>

package/src/http/tool-registry.js CHANGED Viewed

@@ -4,7 +4,7 @@
 // The MCP path registers 34 tools via registerTools(server, z, sessionKey),
 // where `server` is an McpServer and each handler closes over `sessionKey` for
 // per-session host isolation. The HTTP surfaces (POST /tool/{name},
-// /romdev-skill.md, /openapi.json, /documentation) want the EXACT same handlers,
+// /skills/romdev/SKILL.md, /openapi.json, /documentation) want the EXACT same handlers,
 // schemas, and clean-error behavior — just reached over plain HTTP.
 //
 // Rather than duplicate anything, we run the same registration against a minimal
@@ -17,6 +17,7 @@
 import { z } from "zod";
 import { registerTools } from "../mcp/tools/index.js";
 import { withClearToolErrors } from "../mcp/util.js";
+import { observer, summarizeForLog, extractImages } from "../observer/bus.js";
 /**
  * Build a tool registry for a given session key. Each entry's handler closes
@@ -86,8 +87,27 @@ export function buildToolRegistry(sessionKey) {
  * @param {object} args  the request body
  * @returns {Promise<{ok:true, result:any}|{ok:false, error:string}>}
  */
-export async function runTool(tool, args) {
+export async function runTool(tool, args, sessionKey) {
   const a = args ?? {};
+  const startedAt = Date.now();
+  // Emit the SAME `call` event the MCP path's observer middleware emits, so the
+  // /livestream view updates for HTTP/skill tool calls too (the MCP path wraps
+  // server.tool with installObserverMiddleware; the HTTP path runs handlers
+  // directly, so we emit here — the single HTTP execution chokepoint).
+  const emit = (extra) => {
+    try {
+      observer.push({
+        type: "call",
+        sessionKey: sessionKey ?? "http",
+        ts: startedAt,
+        tool: tool.name,
+        args: summarizeForLog(a),
+        durationMs: Date.now() - startedAt,
+        ...extra,
+      });
+    } catch { /* never let the observer kill a tool call */ }
+  };
   // Parse against the strict schema if we have a built zod object.
   const schema = tool.inputSchema;
   if (schema && typeof schema === "object" && "_def" in schema && typeof schema.safeParse === "function") {
@@ -96,6 +116,7 @@ export async function runTool(tool, args) {
       // surface the friendly first-issue message (withClearToolErrors / global map)
       const issue = parsed.error?.issues?.[0];
       const msg = (issue && issue.message) || "invalid arguments";
+      emit({ ok: false, error: msg });
       return { ok: false, error: msg };
     }
   }
@@ -104,22 +125,68 @@ export async function runTool(tool, args) {
     // Unwrap the MCP content envelope to plain JSON for HTTP clients.
     if (r && r.isError) {
       const text = r.content?.[0]?.text ?? "tool error";
+      emit({ ok: false, error: text });
       return { ok: false, error: text };
     }
+    const images = extractImages(r);
     const text = r?.content?.[0]?.text;
     if (typeof text === "string") {
       // most tools return jsonContent(...) → text is JSON; parse it back so the
       // HTTP response is real JSON, not a JSON-string-in-a-field.
-      try { return { ok: true, result: JSON.parse(text) }; }
-      catch { return { ok: true, result: { text } }; }
+      let parsed;
+      try { parsed = JSON.parse(text); } catch { parsed = { text }; }
+      // TRANSPORT-UNIFORM FAILURE MAPPING: a tool can signal failure either by
+      // throwing (→ isError above) OR by RETURNING a failure-shaped result
+      // ({ok:false} / {error} / {opened:false} / {applied:false} ...). On REST,
+      // a 200 with a failure in the body is invisible — the caller sees success
+      // and never reads the body. So we detect a failure-shaped result here and
+      // map it to ok:false (→ HTTP 400) for EVERY tool, no per-tool special-
+      // casing. (`notSupported`/`matched:false` are NOT failures — see below.)
+      if (looksLikeFailure(parsed)) {
+        const err = parsed.error ?? parsed.message ?? "tool reported failure";
+        emit({ ok: false, error: err });
+        return { ok: false, error: err, result: parsed };
+      }
+      emit({ ok: true, result: summarizeForLog(parsed), ...(images.length ? { images } : {}) });
+      return { ok: true, result: parsed };
     }
     // image / multi-part content: hand back the raw content array.
-    return { ok: true, result: r?.content ? { content: r.content } : (r ?? {}) };
+    const result = r?.content ? { content: r.content } : (r ?? {});
+    emit({ ok: true, result: summarizeForLog(result), ...(images.length ? { images } : {}) });
+    return { ok: true, result };
   } catch (e) {
+    emit({ ok: false, error: e?.message ?? String(e) });
     return { ok: false, error: e?.message ?? String(e) };
   }
 }
+// A RETURNED result is a FAILURE (→ non-2xx) when it carries an explicit failure
+// signal: a `false` on a verb-status flag, or a top-level `error` string. This is
+// the single rule that makes every tool behave the same on the transport — a tool
+// can fail by throwing or by returning one of these, and either way the caller
+// gets a non-2xx it can't ignore.
+//
+// NOT failures (these are valid ANSWERS / STATE, stay 2xx):
+//   • notSupported:true — the feature genuinely isn't on this platform/core
+//   • matched:false / found:false / hit:false — a lookup whose answer is "no"
+//   • looksLikeGraphic:false — a classification result
+//   • loaded:false / paused:false — STATE fields (is a ROM loaded? is it paused?),
+//     not "the action failed". This is why the flag list is DELIBERATELY narrow:
+//     only generic verdict flags + a couple of unambiguous action verbs. Anything
+//     else that wants to signal failure must do it with a top-level `error` string
+//     (or throw) — both of which are unambiguous.
+const FAILURE_FLAGS = ["ok", "success", "opened", "applied"];
+function looksLikeFailure(parsed) {
+  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return false;
+  // A top-level error string is unambiguous.
+  if (typeof parsed.error === "string" && parsed.error) return true;
+  // A generic verdict / unambiguous-action flag explicitly set to false.
+  for (const f of FAILURE_FLAGS) {
+    if (parsed[f] === false) return true;
+  }
+  return false;
+}
 /**
  * Convert a tool's stored inputSchema (a strict zod object, or a raw shape if
  * the stamp didn't take) to a JSON Schema (zod v4 native). Used by the OpenAPI

package/src/mcp/server.js CHANGED Viewed

@@ -57,7 +57,7 @@ const PKG_VERSION = (() => {
 // AGENTS.md is the CHANNEL-NEUTRAL body (workflow knowledge, footguns, per-platform
 // docs) — it must not contain "how to connect / how to call" prose, because that
 // differs per delivery channel. The MCP channel prepends mcpPreamble ("call the
-// MCP tools…", never mentions HTTP routes); the skill channel (GET /romdev-skill.md)
+// MCP tools…", never mentions HTTP routes); the skill channel (GET /skills/romdev/SKILL.md)
 // prepends skillPreamble ("POST /tool/{name}…", never mentions MCP). Both live in
 // src/http/skill-doc.js so neither leaks into the other surface.
 async function loadAgentsBody() {
@@ -351,7 +351,7 @@ async function main() {
   });
   // ── HTTP tool surface (the non-MCP way to drive romdev) ───────────────────
-  // POST /tool/:name + /openapi.json + /documentation + /romdev-skill.md, all
+  // POST /tool/:name + /openapi.json + /documentation + /skills/romdev/SKILL.md, all
   // generated from the same tool registry the MCP path uses. Same Express app,
   // same localhost trust, per-agent dynamic sessions. Lets MCP-wary users (or
   // agents that prefer the Agent Skills standard) use romdev with near-zero
@@ -410,10 +410,11 @@ async function main() {
     process.exit(1);
   });
   httpServer.on("listening", () => {
-    log.info(`romdev listening on http://${bannerHost}:${port}/mcp`);
     log.info("");
-    log.info(`prefer a skill?  save:  http://${bannerHost}:${port}/romdev-skill.md`);
-    log.info(`browse/try the tools:   http://${bannerHost}:${port}/documentation`);
+    log.info(`romdev (v${PKG_VERSION}) listening on http://${bannerHost}:${port}/mcp`);
+    log.info("");
+    log.info(`prefer a skill?  save:  http://${bannerHost}:${port}/skills/romdev/SKILL.md`);
+    log.info("");
     log.info(`optional observer:      http://${bannerHost}:${port}/livestream`);
     log.info("");
     log.info("connect your coding agent: https://github.com/monteslu/romdev#connect");

package/src/mcp/state.js CHANGED Viewed

@@ -23,12 +23,14 @@ export function getHost(sessionKey) {
   if (!host) {
     throw new Error(
       "No ROM loaded in this session — call loadMedia({path}) first. " +
-      "If you WERE mid-session and just got reconnected (the server restarted, " +
-      "or your session expired and your client re-initialized): the emulator " +
-      "state is held in server memory only, so it did not survive — just " +
-      "re-run loadMedia({path}) with the ROM you were working on (it's still on " +
-      "disk) to pick back up. Re-applying any in-progress changes means " +
-      "rebuilding/reloading; a fresh boot is the recovery point.",
+      "If you DID loadMedia and still see this, your calls are landing in DIFFERENT " +
+      "sessions: over plain HTTP/skill you must send the SAME `x-romdev-session` " +
+      "header on every call (pick one stable id and reuse it) — a new/missing id is " +
+      "a fresh empty session each time. " +
+      "If you WERE mid-session and just got reconnected (the server restarted or " +
+      "your session expired): emulator state is held in server memory only, so it " +
+      "did not survive — re-run loadMedia({path}) with your ROM (still on disk) to " +
+      "pick back up. A fresh boot is the recovery point.",
     );
   }
   return host;

package/src/mcp/tool-manifest.js CHANGED Viewed

@@ -37,7 +37,7 @@ export const MERGE_MAP = {
   host: { absorbs: ["unloadMedia", "shutdown", "reset", "pause", "resume"], axis: "op" },
   // ── frame (step/screenshot/stepAndShot/stepInstruction; stepInstruction folded from watch-memory.js) ──
   frame: { absorbs: ["stepFrames", "screenshot", "stepAndScreenshot", "stepInstruction"], axis: "op" },
-  // ── scaffold (project/game + snippets; patchGbHeader stays standalone in project.js) ──
+  // ── scaffold (project/game + snippets; patchGbHeader folded into romPatch op:'gbHeader') ──
   scaffold: { absorbs: ["createProject", "createGame", "starterSnippets", "copyStarterSnippets"], axis: "op" },
   // ── cart (identify/extract/wrap; identifyRom from rom-id.js, rest from cart-parts.js) ──
   cart: { absorbs: ["identifyRom", "extractCart", "wrapRomFromParts"], axis: "op" },
@@ -61,14 +61,14 @@ export const MERGE_MAP = {
   cpu: { absorbs: ["getCPUState", "setRegister", "callSubroutine", "decompressWith"], axis: "op" },
   // ── breakpoint (STOP-on-first; all 4 from watch-memory.js) ──
   breakpoint: { absorbs: ["findWriter", "runUntilWrite", "runUntilPC", "runUntilRead"], axis: "on" },
-  // ── watch (LOG-ALL; all 3 from watch-memory.js) ──
-  watch: { absorbs: ["watchMemory", "watchRange", "logPCRange"], axis: "on" },
-  // ── dmaTrace (Genesis VDP-DMA; watchDma from watch-memory.js, traceVramSource from trace-vram-source.js) ──
-  dmaTrace: { absorbs: ["watchDma", "traceVramSource"], axis: "precision" },
+  // ── watch (LOG-ALL; watchMemory/watchRange/logPCRange + Genesis VDP-DMA trace
+  //    on:'dma' from watchDma/traceVramSource — all from watch-memory.js +
+  //    trace-vram-source.js. dmaTrace was folded in as watch({on:'dma'}).) ──
+  watch: { absorbs: ["watchMemory", "watchRange", "logPCRange", "watchDma", "traceVramSource"], axis: "on" },
   // ── build (compile/run; buildSource/buildProject/runSource from toolchain.js, buildSourceWithDebug from symbols.js). ENTRY-TIER. ──
   build: { absorbs: ["buildSource", "buildSourceWithDebug", "buildProject", "runSource"], axis: "output" },
-  // ── romPatch (8-op ROM-hack toolkit; patchFile/patchRom from rom-id.js, spliceCHR from splice-chr.js, relocateBlock/makeStoredBlock/findPointerTo from reinject.js, findFreeSpace from free-space.js, diffRoms from diff-roms.js) ──
-  romPatch: { absorbs: ["patchFile", "patchRom", "spliceCHR", "relocateBlock", "makeStoredBlock", "findFreeSpace", "findPointerTo", "diffRoms"], axis: "op" },
+  // ── romPatch (9-op ROM-hack toolkit; patchFile/patchRom from rom-id.js, spliceCHR from splice-chr.js, relocateBlock/makeStoredBlock/findPointerTo from reinject.js, findFreeSpace from free-space.js, diffRoms from diff-roms.js, patchGbHeader as op:'gbHeader') ──
+  romPatch: { absorbs: ["patchFile", "patchRom", "spliceCHR", "relocateBlock", "makeStoredBlock", "findFreeSpace", "findPointerTo", "diffRoms", "patchGbHeader"], axis: "op" },
   // ── catalog (orient; listCategories + getStatus, both entry-tier in index.js) ──
   catalog: { absorbs: ["listCategories", "getStatus"], axis: "op" },
   // ── playtest (show-a-human window FSM; all 4 from playtest.js). ENTRY-TIER. ──

package/src/mcp/tools/cheats.js CHANGED Viewed

@@ -294,8 +294,9 @@ export function registerCheatTools(server, z, sessionKey) {
     "Cheat lookup / search / apply / create for the loaded ROM. `op`: " +
     "'lookup' (THIS game's known cheats from the bundled DB — returns labeled RAM addresses + Game Genie/ROM code " +
     "sites, so it answers 'which byte holds X?' for free); " +
-    "'search' (fuzzy-find a game by NAME when you don't have the exact No-Intro title — returns game names + cheat " +
-    "counts, then lookup the chosen one); " +
+    "'search' (fuzzy-find a game by NAME when you don't have the exact No-Intro title — searches ALL platforms by " +
+    "default and each match reports its own `platform`, so you don't need to know the console; pass `platform` only " +
+    "to scope it. Returns game names + cheat counts; then lookup the chosen one with its platform); " +
     "'apply' (enable a cheat on the LOADED game — pass a raw `code` or a `desc` from lookup); " +
     "'clear' (remove all active cheats); 'make' (CREATE a shareable code from an address+value). " +
     "TRUST: lookup matches by NAME/fuzzy similarity, NOT a verified CRC — a PROBABLE match. Labels are usually " +
@@ -322,7 +323,7 @@ export function registerCheatTools(server, z, sessionKey) {
       index: z.number().int().min(0).optional().describe("op=apply: cheat slot (default: next free slot). Reuse a slot to replace it."),
       enabled: z.boolean().default(true).describe("op=apply: false disables the slot instead of enabling."),
       // make / search / lookup share `platform`
-      platform: z.enum([...MAKE_CHEAT_PLATFORMS]).optional().describe("op=lookup: override platform detection. op=search/make: REQUIRED — the target platform (all 14 tier-1)."),
+      platform: z.enum([...MAKE_CHEAT_PLATFORMS]).optional().describe("op=lookup: override platform detection. op=search: OPTIONAL — omit to search ALL platforms (each match returns its own `platform`); pass one only to scope the search. op=make: REQUIRED — the target platform (all 14 tier-1)."),
       address: z.number().int().min(0).optional().describe("op=make: address to cheat (RAM addr, or the ROM addr to patch)."),
       value: z.number().int().min(0).max(255).optional().describe("op=make: replacement byte (0-255). Provide value OR values."),
       values: z.array(z.number().int().min(0).max(255)).min(1).max(64).optional().describe("op=make: batch — a code per value at the same address. Returns variants[]."),

package/src/mcp/tools/index.js CHANGED Viewed

@@ -60,9 +60,22 @@ import { jsonContent, safeTool, withClearToolErrors } from "../util.js";
 import { getHostOrNull, setDisclosure } from "../state.js";
 import { MERGE_MAP } from "../tool-manifest.js";
 import { readFile } from "node:fs/promises";
+import { readFileSync } from "node:fs";
 import { fileURLToPath } from "node:url";
 import { dirname, join } from "node:path";
+// Package version — surfaced by catalog({op:'status'|'whatsNew'}) so an agent can
+// check the running romdev version with a plain TOOL CALL (works over MCP AND the
+// HTTP/skill surface), e.g. to detect a saved skill is stale. (GET /healthz also
+// reports it for non-tool HTTP clients.)
+const PKG_VERSION = (() => {
+  try {
+    return JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "package.json"), "utf8")).version;
+  } catch {
+    return "0.0.0";
+  }
+})();
 // catalog({op:'whatsNew'}): the recent CHANGELOG + an old→new RENAME TABLE
 // derived from MERGE_MAP (the single source of truth for the consolidation), so
 // an agent resuming a handoff written against an older server can re-map every
@@ -91,6 +104,7 @@ async function buildWhatsNew() {
     changelog = sections.slice(0, 3).join("## ").trim();
   } catch { /* changelog not present in this install */ }
   return {
+    romdevVersion: PKG_VERSION,
     note: "Pre-1.0 the tool surface is consolidated freely with NO deprecated aliases. If a tool name from an older handoff is missing, it's almost certainly now an `op` (or other axis) on a domain tool — find it below, then read that tool's description for the exact op enum and params.",
     renameTable: renames,
     axisLegend: "Every domain tool is keyed by ONE axis: op (most), output (build), on (breakpoint), target (disasm), view (background), source (palette), stage (encodeArt), from (importArt). The value names the operation, e.g. romPatch({op:'findPointer'}).",
@@ -215,7 +229,7 @@ export function registerTools(server, z, sessionKey) {
     "• op:'whatsNew' — the recent CHANGELOG + an OLD→NEW tool RENAME TABLE. Call this FIRST if you're resuming work from a handoff written against an older server: pre-1.0 the surface is consolidated freely (no deprecated aliases), so a name you remember may now be an `op` on a domain tool. This maps them in one read instead of probing each tool.",
     {
       op: z.enum(["categories", "status", "whatsNew"]).default("categories")
-        .describe("categories=tool-category catalog; status=live session snapshot (host/platform/frameCount/media); whatsNew=recent CHANGELOG + old→new tool rename table."),
+        .describe("categories=tool-category catalog; status=live session snapshot (romdevVersion + host/platform/frameCount/media — call this to check the running version, e.g. is a saved skill stale); whatsNew=recent CHANGELOG + old→new tool rename table."),
     },
     safeTool(async ({ op = "categories" }) => {
       if (op === "whatsNew") {
@@ -228,6 +242,7 @@ export function registerTools(server, z, sessionKey) {
           ? { ...host.getStatus() }
           : { loaded: false, hint: "no host yet; call loadMedia (in category 'run') to load a ROM" };
         return jsonContent({
+          romdevVersion: PKG_VERSION,
           ...base,
           loadedCategories: cats.filter((c) => c.loaded).map((c) => c.name),
           unloadedCategories: cats.filter((c) => !c.loaded).map((c) => c.name),
@@ -235,6 +250,7 @@ export function registerTools(server, z, sessionKey) {
       }
       const categories = disclosure.listCategories();
       return jsonContent({
+        romdevVersion: PKG_VERSION,
         categories,
         note: "Every tool registers at session init — this catalog is just a map grouped by purpose, NOT a gate. Call any tool by name directly.",
         humanInTheLoopHint: "Iterate INTERNALLY on screenshots first (build({output:'run'}) returns one inline; frame({op:'screenshot'/'stepAndShot'}) re-shoots the live host) — don't open a window to debug. Once the game actually boots and shows the feature you're working on, call playtest({}) so your human can watch and play it live. Opening a window on a black screen or a crash just wastes the human's attention — show them something that works.",
@@ -327,7 +343,7 @@ const TOOL_OWNER = {
   playtest: "show",
   // advanced category
   runUntil: "advanced",
-  watch: "advanced", breakpoint: "advanced", dmaTrace: "advanced",
+  watch: "advanced", breakpoint: "advanced",
   recordSession: "advanced",
   // entry tier itself
   catalog: "entry",