romdevtools 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/AGENTS.md +21 -14
  2. package/CHANGELOG.md +125 -1
  3. package/README.md +13 -8
  4. package/examples/atari2600/main.asm +1 -1
  5. package/examples/atari2600/templates/default.asm +1 -1
  6. package/examples/atari2600/templates/paddle.asm +59 -47
  7. package/examples/atari7800/main.c +1 -1
  8. package/examples/atari7800/templates/default.c +1 -1
  9. package/examples/atari7800/templates/music_demo.c +1 -1
  10. package/examples/c64/main.c +1 -1
  11. package/examples/c64/templates/platformer.c +2 -2
  12. package/examples/c64/templates/puzzle.c +1 -1
  13. package/examples/c64/templates/racing.c +3 -3
  14. package/examples/c64/templates/shmup.c +6 -5
  15. package/examples/c64/templates/sports.c +4 -4
  16. package/examples/gb/main.asm +1 -1
  17. package/examples/gb/main.c +1 -1
  18. package/examples/gb/templates/puzzle.c +1 -1
  19. package/examples/gb/templates/racing.c +1 -1
  20. package/examples/gb/templates/shmup.c +1 -1
  21. package/examples/gba/templates/gba_hello.c +1 -1
  22. package/examples/gba/templates/maxmod_demo.c +1 -1
  23. package/examples/gba/templates/puzzle.c +17 -3
  24. package/examples/gba/templates/racing.c +16 -2
  25. package/examples/gba/templates/shmup.c +23 -4
  26. package/examples/gba/templates/tonc_hello.c +6 -4
  27. package/examples/gbc/main.asm +1 -1
  28. package/examples/gbc/templates/puzzle.c +1 -1
  29. package/examples/gbc/templates/racing.c +1 -1
  30. package/examples/gbc/templates/shmup.c +1 -1
  31. package/examples/genesis/main.s +1 -1
  32. package/examples/genesis/templates/puzzle.c +1 -1
  33. package/examples/genesis/templates/racing.c +45 -1
  34. package/examples/genesis/templates/shmup.c +12 -3
  35. package/examples/genesis/templates/shmup_2p.c +2 -2
  36. package/examples/genesis/templates/sports.c +39 -0
  37. package/examples/gg/templates/hello_sprite.c +38 -23
  38. package/examples/gg/templates/music_demo.c +11 -8
  39. package/examples/gg/templates/platformer.c +37 -15
  40. package/examples/gg/templates/racing.c +25 -12
  41. package/examples/gg/templates/shmup.c +12 -6
  42. package/examples/gg/templates/sports.c +30 -16
  43. package/examples/gg/templates/tile_engine.c +24 -10
  44. package/examples/lynx/templates/platformer.c +7 -1
  45. package/examples/lynx/templates/puzzle.c +8 -2
  46. package/examples/lynx/templates/racing.c +7 -1
  47. package/examples/lynx/templates/sports.c +7 -1
  48. package/examples/nes/main.c +2 -2
  49. package/examples/nes/space-shooter/nes_runtime.h +1 -1
  50. package/examples/nes/templates/default.c +4 -1
  51. package/examples/nes/templates/racing.c +50 -1
  52. package/examples/pce/main.c +1 -1
  53. package/examples/sms/templates/hello_sprite.c +1 -1
  54. package/examples/sms/templates/music_demo.c +1 -1
  55. package/examples/sms/templates/puzzle.c +1 -1
  56. package/examples/sms/templates/racing.c +1 -1
  57. package/examples/sms/templates/shmup.c +1 -1
  58. package/examples/sms/templates/shmup_2p.c +2 -2
  59. package/examples/snes/main.asm +1 -1
  60. package/examples/snes/templates/c-hello-data.asm +309 -14
  61. package/examples/snes/templates/c-hello.c +13 -2
  62. package/examples/snes/templates/default.c +1 -1
  63. package/examples/snes/templates/hello_sprite-data.asm +300 -2
  64. package/examples/snes/templates/hello_sprite.c +10 -1
  65. package/examples/snes/templates/music_demo-data.asm +300 -2
  66. package/examples/snes/templates/music_demo.c +10 -1
  67. package/examples/snes/templates/platformer-data.asm +300 -2
  68. package/examples/snes/templates/platformer.c +10 -1
  69. package/examples/snes/templates/puzzle-data.asm +300 -2
  70. package/examples/snes/templates/puzzle.c +11 -1
  71. package/examples/snes/templates/racing-data.asm +300 -2
  72. package/examples/snes/templates/racing.c +40 -4
  73. package/examples/snes/templates/shmup-data.asm +299 -6
  74. package/examples/snes/templates/shmup.c +11 -7
  75. package/examples/snes/templates/sports-data.asm +300 -2
  76. package/examples/snes/templates/sports.c +40 -5
  77. package/package.json +1 -1
  78. package/src/cheats/lookup.js +39 -18
  79. package/src/http/routes.js +58 -33
  80. package/src/http/skill-doc.js +10 -9
  81. package/src/http/swagger.js +1 -1
  82. package/src/http/tool-registry.js +72 -5
  83. package/src/mcp/server.js +6 -5
  84. package/src/mcp/state.js +8 -6
  85. package/src/mcp/tool-manifest.js +7 -7
  86. package/src/mcp/tools/cheats.js +4 -3
  87. package/src/mcp/tools/index.js +18 -2
  88. package/src/mcp/tools/playtest.js +48 -35
  89. package/src/mcp/tools/project.js +39 -73
  90. package/src/mcp/tools/rom-id.js +49 -4
  91. package/src/mcp/tools/tile-inspect.js +1 -1
  92. package/src/mcp/tools/toolchain.js +183 -19
  93. package/src/mcp/tools/trace-vram-source.js +3 -3
  94. package/src/mcp/tools/watch-memory.js +27 -46
  95. package/src/observer/livestream.html +41 -5
  96. package/src/platforms/_guides/ROMHACKING_PLAYBOOK.md +5 -5
  97. package/src/platforms/gb/MENTAL_MODEL.md +3 -3
  98. package/src/platforms/gb/TROUBLESHOOTING.md +1 -1
  99. package/src/platforms/gb/UPSTREAM_SOURCES.md +1 -1
  100. package/src/platforms/gb/lib/c/README.md +2 -2
  101. package/src/platforms/gb/lib/c/SDCC_GOTCHAS.md +1 -1
  102. package/src/platforms/gbc/MENTAL_MODEL.md +3 -3
  103. package/src/platforms/gbc/TROUBLESHOOTING.md +5 -5
  104. package/src/platforms/gbc/UPSTREAM_SOURCES.md +2 -2
  105. package/src/platforms/gbc/lib/c/README.md +2 -2
  106. package/src/platforms/gbc/lib/c/SDCC_GOTCHAS.md +1 -1
  107. package/src/platforms/gg/MENTAL_MODEL.md +14 -13
  108. package/src/platforms/gg/lib/c/vdp_init.c +10 -8
  109. package/src/platforms/msx/MENTAL_MODEL.md +1 -1
  110. package/src/platforms/nes/TROUBLESHOOTING.md +1 -1
  111. package/src/platforms/nes/lib/c/nes_runtime.c +28 -6
  112. package/src/platforms/pce/MENTAL_MODEL.md +1 -1
  113. package/src/platforms/pce/lib/c/pce_hw.h +1 -0
  114. package/src/platforms/pce/lib/c/pce_video.c +26 -0
  115. package/src/platforms/sms/MENTAL_MODEL.md +12 -12
  116. package/src/platforms/sms/lib/c/vdp_init.c +10 -8
  117. package/src/platforms/sms/lib/vdp_init.s +1 -1
  118. package/src/playtest/playtest.js +25 -0
  119. package/src/toolchains/cc65/presets/nes/chr-ram-runtime.cfg +1 -1
  120. package/src/toolchains/cc65/presets/nes/chr-ram.cfg +1 -1
  121. package/src/toolchains/cc65/presets/nes/chr-ram.crt0.s +1 -1
  122. package/src/toolchains/genesis-c/README.md +1 -1
  123. package/src/toolchains/sdcc/preflight-lint.js +47 -7
  124. package/src/toolchains/snes-c/snes-c.js +3 -7
@@ -5,20 +5,22 @@
5
5
  // GET /tool/:name/schema that tool's JSON Schema (a validator on demand)
6
6
  // GET /openapi.json OpenAPI 3.1 spec for every /tool/:name route
7
7
  // GET /documentation Swagger UI over /openapi.json (live "try it" console)
8
- // GET /romdev-skill.md the SKILL.md (Agent Skills open standard) — channel
9
- // doc that drives the routes, never mentions MCP
8
+ // GET /skills/romdev/SKILL.md the SKILL.md (Agent Skills open standard) — the
9
+ // channel doc that drives the routes, never mentions
10
+ // MCP. Also at /romdev/SKILL.md and /romdev-skill.md.
10
11
  //
11
- // Sessions: each agent gets its own session dynamically, same isolation as MCP.
12
- // First call with no x-romdev-session mint one, return it in the response
13
- // header; the agent echoes it on later calls (sticky host across load→step→read).
14
- // A call with no header gets an ephemeral per-request session (fine for pure-file
15
- // tools; stateful host work should keep the header). No auth localhost trust,
16
- // same as /mcp (the app already mounts localhostHostValidation()).
12
+ // Sessions: each agent picks its own stable id and sends it as x-romdev-session
13
+ // on EVERY call (same per-agent host isolation as MCP). The header is REQUIRED —
14
+ // no header 401 (we don't auto-mint a throwaway session; that silently dropped
15
+ // the loaded ROM and surfaced as "No ROM loaded" later). First use of an id
16
+ // creates the session, reuse keeps the host across load→step→read, different ids
17
+ // isolate different agents. No auth beyond that — localhost trust, same as /mcp
18
+ // (the app already mounts localhostHostValidation()).
17
19
 
18
- import { randomUUID } from "node:crypto";
19
20
  import { buildToolRegistry, runTool, toolJsonSchema } from "./tool-registry.js";
20
21
  import { skillPreamble, skillToolReference, buildSkillDoc } from "./skill-doc.js";
21
22
  import { swaggerHtml, swaggerAsset } from "./swagger.js";
23
+ import { observer } from "../observer/bus.js";
22
24
  import { log } from "../mcp/log.js";
23
25
 
24
26
  const SESSION_HEADER = "x-romdev-session";
@@ -40,12 +42,16 @@ export function mountHttpToolRoutes(app, opts = {}) {
40
42
  /** @type {Map<string, {registry: Map<string,any>, lastSeen: number}>} */
41
43
  const sessions = new Map();
42
44
 
43
- function getSession(sessionKey) {
45
+ function getSession(sessionKey, { sticky = false } = {}) {
44
46
  let s = sessions.get(sessionKey);
45
47
  if (!s) {
46
- s = { registry: buildToolRegistry(sessionKey), lastSeen: Date.now() };
48
+ s = { registry: buildToolRegistry(sessionKey), lastSeen: Date.now(), sticky };
47
49
  sessions.set(sessionKey, s);
48
50
  log.debug(`[http] session ${sessionKey.slice(0, 8)} created (${sessions.size} active)`);
51
+ // Surface sticky sessions in /livestream (like the MCP path does on init).
52
+ // Ephemeral one-shot sessions are NOT registered (they'd spam connect/
53
+ // disconnect); their individual `call` events still show in the stream.
54
+ if (sticky) { try { observer.sessionConnected(sessionKey); } catch {} }
49
55
  } else {
50
56
  s.lastSeen = Date.now();
51
57
  }
@@ -58,6 +64,7 @@ export function mountHttpToolRoutes(app, opts = {}) {
58
64
  for (const [key, s] of sessions) {
59
65
  if (now - s.lastSeen > idleMs) {
60
66
  sessions.delete(key);
67
+ if (s.sticky) { try { observer.sessionDisconnected(key); } catch {} }
61
68
  log.debug(`[http] session ${key.slice(0, 8)} reaped (idle)`);
62
69
  }
63
70
  }
@@ -71,30 +78,38 @@ export function mountHttpToolRoutes(app, opts = {}) {
71
78
  // ── POST /tool/:name ──────────────────────────────────────────────────────
72
79
  app.post("/tool/:name", async (req, res) => {
73
80
  const name = req.params.name;
74
- // session: sticky if header present, ephemeral otherwise.
75
- let sessionKey = req.headers[SESSION_HEADER];
76
- let ephemeral = false;
81
+ // Session model: the AGENT picks its own stable, task-descriptive id and
82
+ // sends it as x-romdev-session on EVERY call — first use creates the session,
83
+ // reuse keeps the same host/state (load→step→read), and different ids isolate
84
+ // different agents. NO HEADER → 401: we don't auto-mint a throwaway session
85
+ // (that silently dropped the loaded ROM and surfaced as "No ROM loaded" two
86
+ // calls later). Requiring the header up front turns that silent footgun into
87
+ // a loud, fixable 401.
88
+ const sessionKey = req.headers[SESSION_HEADER];
77
89
  if (typeof sessionKey !== "string" || !sessionKey) {
78
- sessionKey = randomUUID();
79
- ephemeral = true;
90
+ res.status(401).json({
91
+ error: "Missing required `x-romdev-session` header. Pick ONE stable, " +
92
+ "task-descriptive id for yourself (e.g. 'nes-platformer-build') and send " +
93
+ "it on EVERY call — it's your per-session emulator key (the ROM you load " +
94
+ "lives under it; the next call only sees it with the SAME id) and the " +
95
+ "label shown in the /livestream observer. Several agents share one server " +
96
+ "by each using a different id.",
97
+ });
98
+ return;
80
99
  }
81
- const { registry } = getSession(sessionKey);
100
+ const { registry } = getSession(sessionKey, { sticky: true });
82
101
  const tool = registry.get(name);
83
102
  if (!tool) {
84
103
  res.status(404).json({
85
- error: `Unknown tool '${name}'. GET /openapi.json or /romdev-skill.md for the list.`,
104
+ error: `Unknown tool '${name}'. GET /openapi.json or /skills/romdev/SKILL.md for the list.`,
86
105
  });
87
106
  return;
88
107
  }
89
- // echo the session id so the agent can reuse it (esp. when we minted one)
108
+ // echo the session id back (convenience for clients that log it)
90
109
  res.setHeader(SESSION_HEADER, sessionKey);
91
- const out = await runTool(tool, req.body);
92
- if (ephemeral) {
93
- // drop the ephemeral session immediately (no sticky host wanted)
94
- sessions.delete(sessionKey);
95
- }
110
+ const out = await runTool(tool, req.body, sessionKey);
96
111
  if (out.ok) res.json(out.result);
97
- else res.status(400).json({ error: out.error });
112
+ else res.status(400).json(out.result ?? { error: out.error });
98
113
  });
99
114
 
100
115
  // ── GET /tool/:name/schema ────────────────────────────────────────────────
@@ -122,17 +137,26 @@ export function mountHttpToolRoutes(app, opts = {}) {
122
137
  res.send(buf);
123
138
  });
124
139
 
125
- // ── GET /romdev-skill.md ──────────────────────────────────────────────────
126
- app.get("/romdev-skill.md", (req, res) => {
140
+ // ── GET /skills/romdev/SKILL.md (primary) + aliases ───────────────────────
141
+ // Agents store skills on disk as skills/<name>/SKILL.md (a dir named after the
142
+ // skill, canonical file SKILL.md). We serve the same doc at several paths so
143
+ // the URL matches wherever the agent saved it:
144
+ // /skills/romdev/SKILL.md — primary: full disk mirror (~/.claude/skills/romdev/SKILL.md)
145
+ // /romdev/SKILL.md — alias: the <name>/SKILL.md tail
146
+ // /romdev-skill.md — alias: flat form (older refs)
147
+ const serveSkill = (req, res) => {
127
148
  const md = buildSkillDoc({
128
149
  registry: metaRegistry,
129
150
  agentsBody: opts.agentsBody ?? "",
130
151
  version,
131
152
  });
132
153
  res.type("text/markdown").send(md);
133
- });
154
+ };
155
+ app.get("/skills/romdev/SKILL.md", serveSkill);
156
+ app.get("/romdev/SKILL.md", serveSkill);
157
+ app.get("/romdev-skill.md", serveSkill); // alias
134
158
 
135
- log.debug("[http] tool surface mounted: POST /tool/:name, /openapi.json, /documentation, /romdev-skill.md");
159
+ log.debug("[http] tool surface mounted: POST /tool/:name, /openapi.json, /documentation, /skills/romdev/SKILL.md");
136
160
  return { sessions, stop: () => clearInterval(reaper) };
137
161
  }
138
162
 
@@ -158,13 +182,14 @@ export function buildOpenApi(registry, version) {
158
182
  },
159
183
  responses: {
160
184
  200: { description: "Tool result (JSON).", content: { "application/json": { schema: { type: "object" } } } },
161
- 400: { description: "Validation or tool error.", content: { "application/json": { schema: { type: "object", properties: { error: { type: "string" } } } } } },
185
+ 400: { description: "Validation or tool error (the action did not succeed).", content: { "application/json": { schema: { type: "object", properties: { error: { type: "string" } } } } } },
186
+ 401: { description: "Missing required x-romdev-session header.", content: { "application/json": { schema: { type: "object", properties: { error: { type: "string" } } } } } },
162
187
  404: { description: "Unknown tool." },
163
188
  },
164
189
  parameters: [{
165
- name: SESSION_HEADER, in: "header", required: false,
190
+ name: SESSION_HEADER, in: "header", required: true,
166
191
  schema: { type: "string" },
167
- description: "Per-agent session id. Omit on the first call to get one back in the response header; echo it on later calls to keep a sticky emulator session (load→step→read). Omit entirely for one-shot pure-file tools.",
192
+ description: "REQUIRED. Per-agent session id pick one stable, UNIQUE, task-DESCRIPTIVE string (e.g. 'nes-platformer-build', 'zelda-romhack-text') and send it on EVERY call. It's the per-session emulator key (load→step→read state lives under it) AND the label shown in the /livestream observer, so a descriptive id tells a watching human which task each call belongs to. Several agents share one server safely by each using a different id. Missing → 401.",
168
193
  }],
169
194
  },
170
195
  };
@@ -174,7 +199,7 @@ export function buildOpenApi(registry, version) {
174
199
  info: {
175
200
  title: "romdev HTTP tool API",
176
201
  version,
177
- description: "Plain-HTTP surface for romdev's retro-game-dev tools — the non-MCP way to drive the same tools. Generated from the tool registry. See /romdev-skill.md for the workflow guide.",
202
+ description: "Plain-HTTP surface for romdev's retro-game-dev tools — the non-MCP way to drive the same tools. Generated from the tool registry. See /skills/romdev/SKILL.md for the workflow guide.",
178
203
  },
179
204
  servers: [{ url: "/" }],
180
205
  paths,
@@ -3,7 +3,7 @@
3
3
  // One shared body (AGENTS.md, channel-neutral) is wrapped per delivery channel:
4
4
  // - MCP connection instructions = mcpPreamble + body (says "call the MCP
5
5
  // tools"; never mentions HTTP routes / skills)
6
- // - GET /romdev-skill.md = skill frontmatter + skillPreamble + body +
6
+ // - GET /skills/romdev/SKILL.md = skill frontmatter + skillPreamble + body +
7
7
  // generated tool reference (says "POST /tool/{name}"; never mentions MCP)
8
8
  //
9
9
  // So neither surface mentions the other: the delivery instructions live in the
@@ -38,8 +38,8 @@ export const skillPreamble = [
38
38
  " • GET /tool/{name}/schema — that tool's JSON Schema (the exact parameters + types).",
39
39
  " • GET /openapi.json — the full machine-readable API; GET /documentation — a browsable console.",
40
40
  "",
41
- "Sessions (for stateful work like load→step→read): your first POST returns an `x-romdev-session` header.",
42
- "Echo that header on subsequent calls to keep the SAME emulator session. Omit it for one-shot file tools.",
41
+ "## Sessions — IMPORTANT for stateful work (load step read)",
42
+ "**Pick ONE session id for yourself and send it as the `x-romdev-session` header on EVERY call.** Make it UNIQUE and DESCRIPTIVE of what you're doing — e.g. `nes-platformer-build`, `zelda-romhack-text`, `gba-sprite-debug` (a slug, optionally with a short random suffix to stay unique). A human may be watching the live observer at /livestream, where your session id is the label for all your activity — a descriptive id tells them at a glance which agent/task each call belongs to; a bare uuid or `default` is opaque. The emulator/host is per-session: the ROM you `loadMedia` lives in YOUR session, and the next `frame`/`memory`/`cpu` call only sees it if it carries the SAME id. Do NOT send a new id each call — that's a fresh empty session every time (your loaded ROM vanishes; \"No ROM loaded\"). Several agents can share one server safely: each just sends a DIFFERENT id, so nobody clobbers another's ROM (another reason to make yours distinctive). The header is REQUIRED on every `/tool/{name}` call — omit it and you get a **401** (the server will NOT silently run you in a throwaway session). Pure file tools (romPatch/cart/encodeAudio) still need the header; just reuse your one id everywhere.",
43
43
  "",
44
44
  "Each tool is a domain VERB keyed by an operation axis — e.g. POST /tool/memory {\"op\":\"read\",…},",
45
45
  "POST /tool/build {\"output\":\"rom\",…}, POST /tool/romPatch {\"op\":\"findPointer\",…}. The full per-tool",
@@ -47,7 +47,7 @@ export const skillPreamble = [
47
47
  ].join("\n");
48
48
 
49
49
  /**
50
- * Build GET /romdev-skill.md: frontmatter + skill preamble + shared body +
50
+ * Build GET /skills/romdev/SKILL.md: frontmatter + skill preamble + shared body +
51
51
  * generated tool reference.
52
52
  * @param {{registry: Map<string,any>, agentsBody: string, version?: string}} args
53
53
  * @returns {string}
@@ -68,16 +68,17 @@ export function buildSkillDoc({ registry, agentsBody, version }) {
68
68
 
69
69
  // Update note — stamped with the running server's version. A saved skill is a
70
70
  // static snapshot (it doesn't auto-update), but this doc is GENERATED live from
71
- // the running server, so re-fetching always gives you the current version. The
72
- // server reports its version at GET /healthz, so an agent can detect staleness.
71
+ // the running server, so re-fetching always gives the current version. An agent
72
+ // can check the running version two ways: the tool call POST /tool/catalog
73
+ // {"op":"status"} → `romdevVersion`, or GET /healthz → `version`.
73
74
  const v = version ?? "0.0.0";
74
75
  const updateNote = [
75
76
  "## Keeping this skill current",
76
77
  `This skill was generated by romdev **v${v}** (it's a snapshot — it does not auto-update). ` +
77
78
  "romdev generates it live from the running server, so to update: run the latest `npx romdevtools`, " +
78
- `then re-fetch \`GET http://localhost:7331/romdev-skill.md\` and overwrite your saved copy. ` +
79
- "The running server reports its version at `GET /healthz` (`{\"version\":\"…\"}`) — if it's newer than the " +
80
- "`metadata.version` above, your saved skill is stale; re-fetch it.",
79
+ `then re-fetch \`GET http://localhost:7331/skills/romdev/SKILL.md\` and overwrite your saved copy. ` +
80
+ "To check whether you're stale, ask the running server its version `POST /tool/catalog {\"op\":\"status\"}` " +
81
+ "returns `romdevVersion` (or `GET /healthz` → `version`); if it's newer than the `metadata.version` above, re-fetch.",
81
82
  ].join("\n");
82
83
 
83
84
  return [
@@ -56,7 +56,7 @@ export function swaggerHtml(opts = {}) {
56
56
  <p>Loading interactive docs… If this doesn't render, the raw OpenAPI spec is at
57
57
  <a href="${specUrl}"><code>${specUrl}</code></a>, every tool is callable via
58
58
  <code>POST /tool/{name}</code>, and the workflow guide is at
59
- <a href="/romdev-skill.md"><code>/romdev-skill.md</code></a>.</p>
59
+ <a href="/skills/romdev/SKILL.md"><code>/skills/romdev/SKILL.md</code></a>.</p>
60
60
  </div>
61
61
  <div id="swagger-ui"></div>
62
62
  <script src="${base}/swagger-ui-bundle.js"></script>
@@ -4,7 +4,7 @@
4
4
  // The MCP path registers 34 tools via registerTools(server, z, sessionKey),
5
5
  // where `server` is an McpServer and each handler closes over `sessionKey` for
6
6
  // per-session host isolation. The HTTP surfaces (POST /tool/{name},
7
- // /romdev-skill.md, /openapi.json, /documentation) want the EXACT same handlers,
7
+ // /skills/romdev/SKILL.md, /openapi.json, /documentation) want the EXACT same handlers,
8
8
  // schemas, and clean-error behavior — just reached over plain HTTP.
9
9
  //
10
10
  // Rather than duplicate anything, we run the same registration against a minimal
@@ -17,6 +17,7 @@
17
17
  import { z } from "zod";
18
18
  import { registerTools } from "../mcp/tools/index.js";
19
19
  import { withClearToolErrors } from "../mcp/util.js";
20
+ import { observer, summarizeForLog, extractImages } from "../observer/bus.js";
20
21
 
21
22
  /**
22
23
  * Build a tool registry for a given session key. Each entry's handler closes
@@ -86,8 +87,27 @@ export function buildToolRegistry(sessionKey) {
86
87
  * @param {object} args the request body
87
88
  * @returns {Promise<{ok:true, result:any}|{ok:false, error:string}>}
88
89
  */
89
- export async function runTool(tool, args) {
90
+ export async function runTool(tool, args, sessionKey) {
90
91
  const a = args ?? {};
92
+ const startedAt = Date.now();
93
+ // Emit the SAME `call` event the MCP path's observer middleware emits, so the
94
+ // /livestream view updates for HTTP/skill tool calls too (the MCP path wraps
95
+ // server.tool with installObserverMiddleware; the HTTP path runs handlers
96
+ // directly, so we emit here — the single HTTP execution chokepoint).
97
+ const emit = (extra) => {
98
+ try {
99
+ observer.push({
100
+ type: "call",
101
+ sessionKey: sessionKey ?? "http",
102
+ ts: startedAt,
103
+ tool: tool.name,
104
+ args: summarizeForLog(a),
105
+ durationMs: Date.now() - startedAt,
106
+ ...extra,
107
+ });
108
+ } catch { /* never let the observer kill a tool call */ }
109
+ };
110
+
91
111
  // Parse against the strict schema if we have a built zod object.
92
112
  const schema = tool.inputSchema;
93
113
  if (schema && typeof schema === "object" && "_def" in schema && typeof schema.safeParse === "function") {
@@ -96,6 +116,7 @@ export async function runTool(tool, args) {
96
116
  // surface the friendly first-issue message (withClearToolErrors / global map)
97
117
  const issue = parsed.error?.issues?.[0];
98
118
  const msg = (issue && issue.message) || "invalid arguments";
119
+ emit({ ok: false, error: msg });
99
120
  return { ok: false, error: msg };
100
121
  }
101
122
  }
@@ -104,22 +125,68 @@ export async function runTool(tool, args) {
104
125
  // Unwrap the MCP content envelope to plain JSON for HTTP clients.
105
126
  if (r && r.isError) {
106
127
  const text = r.content?.[0]?.text ?? "tool error";
128
+ emit({ ok: false, error: text });
107
129
  return { ok: false, error: text };
108
130
  }
131
+ const images = extractImages(r);
109
132
  const text = r?.content?.[0]?.text;
110
133
  if (typeof text === "string") {
111
134
  // most tools return jsonContent(...) → text is JSON; parse it back so the
112
135
  // HTTP response is real JSON, not a JSON-string-in-a-field.
113
- try { return { ok: true, result: JSON.parse(text) }; }
114
- catch { return { ok: true, result: { text } }; }
136
+ let parsed;
137
+ try { parsed = JSON.parse(text); } catch { parsed = { text }; }
138
+ // TRANSPORT-UNIFORM FAILURE MAPPING: a tool can signal failure either by
139
+ // throwing (→ isError above) OR by RETURNING a failure-shaped result
140
+ // ({ok:false} / {error} / {opened:false} / {applied:false} ...). On REST,
141
+ // a 200 with a failure in the body is invisible — the caller sees success
142
+ // and never reads the body. So we detect a failure-shaped result here and
143
+ // map it to ok:false (→ HTTP 400) for EVERY tool, no per-tool special-
144
+ // casing. (`notSupported`/`matched:false` are NOT failures — see below.)
145
+ if (looksLikeFailure(parsed)) {
146
+ const err = parsed.error ?? parsed.message ?? "tool reported failure";
147
+ emit({ ok: false, error: err });
148
+ return { ok: false, error: err, result: parsed };
149
+ }
150
+ emit({ ok: true, result: summarizeForLog(parsed), ...(images.length ? { images } : {}) });
151
+ return { ok: true, result: parsed };
115
152
  }
116
153
  // image / multi-part content: hand back the raw content array.
117
- return { ok: true, result: r?.content ? { content: r.content } : (r ?? {}) };
154
+ const result = r?.content ? { content: r.content } : (r ?? {});
155
+ emit({ ok: true, result: summarizeForLog(result), ...(images.length ? { images } : {}) });
156
+ return { ok: true, result };
118
157
  } catch (e) {
158
+ emit({ ok: false, error: e?.message ?? String(e) });
119
159
  return { ok: false, error: e?.message ?? String(e) };
120
160
  }
121
161
  }
122
162
 
163
+ // A RETURNED result is a FAILURE (→ non-2xx) when it carries an explicit failure
164
+ // signal: a `false` on a verb-status flag, or a top-level `error` string. This is
165
+ // the single rule that makes every tool behave the same on the transport — a tool
166
+ // can fail by throwing or by returning one of these, and either way the caller
167
+ // gets a non-2xx it can't ignore.
168
+ //
169
+ // NOT failures (these are valid ANSWERS / STATE, stay 2xx):
170
+ // • notSupported:true — the feature genuinely isn't on this platform/core
171
+ // • matched:false / found:false / hit:false — a lookup whose answer is "no"
172
+ // • looksLikeGraphic:false — a classification result
173
+ // • loaded:false / paused:false — STATE fields (is a ROM loaded? is it paused?),
174
+ // not "the action failed". This is why the flag list is DELIBERATELY narrow:
175
+ // only generic verdict flags + a couple of unambiguous action verbs. Anything
176
+ // else that wants to signal failure must do it with a top-level `error` string
177
+ // (or throw) — both of which are unambiguous.
178
+ const FAILURE_FLAGS = ["ok", "success", "opened", "applied"];
179
+ function looksLikeFailure(parsed) {
180
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return false;
181
+ // A top-level error string is unambiguous.
182
+ if (typeof parsed.error === "string" && parsed.error) return true;
183
+ // A generic verdict / unambiguous-action flag explicitly set to false.
184
+ for (const f of FAILURE_FLAGS) {
185
+ if (parsed[f] === false) return true;
186
+ }
187
+ return false;
188
+ }
189
+
123
190
  /**
124
191
  * Convert a tool's stored inputSchema (a strict zod object, or a raw shape if
125
192
  * the stamp didn't take) to a JSON Schema (zod v4 native). Used by the OpenAPI
package/src/mcp/server.js CHANGED
@@ -57,7 +57,7 @@ const PKG_VERSION = (() => {
57
57
  // AGENTS.md is the CHANNEL-NEUTRAL body (workflow knowledge, footguns, per-platform
58
58
  // docs) — it must not contain "how to connect / how to call" prose, because that
59
59
  // differs per delivery channel. The MCP channel prepends mcpPreamble ("call the
60
- // MCP tools…", never mentions HTTP routes); the skill channel (GET /romdev-skill.md)
60
+ // MCP tools…", never mentions HTTP routes); the skill channel (GET /skills/romdev/SKILL.md)
61
61
  // prepends skillPreamble ("POST /tool/{name}…", never mentions MCP). Both live in
62
62
  // src/http/skill-doc.js so neither leaks into the other surface.
63
63
  async function loadAgentsBody() {
@@ -351,7 +351,7 @@ async function main() {
351
351
  });
352
352
 
353
353
  // ── HTTP tool surface (the non-MCP way to drive romdev) ───────────────────
354
- // POST /tool/:name + /openapi.json + /documentation + /romdev-skill.md, all
354
+ // POST /tool/:name + /openapi.json + /documentation + /skills/romdev/SKILL.md, all
355
355
  // generated from the same tool registry the MCP path uses. Same Express app,
356
356
  // same localhost trust, per-agent dynamic sessions. Lets MCP-wary users (or
357
357
  // agents that prefer the Agent Skills standard) use romdev with near-zero
@@ -410,10 +410,11 @@ async function main() {
410
410
  process.exit(1);
411
411
  });
412
412
  httpServer.on("listening", () => {
413
- log.info(`romdev listening on http://${bannerHost}:${port}/mcp`);
414
413
  log.info("");
415
- log.info(`prefer a skill? save: http://${bannerHost}:${port}/romdev-skill.md`);
416
- log.info(`browse/try the tools: http://${bannerHost}:${port}/documentation`);
414
+ log.info(`romdev (v${PKG_VERSION}) listening on http://${bannerHost}:${port}/mcp`);
415
+ log.info("");
416
+ log.info(`prefer a skill? save: http://${bannerHost}:${port}/skills/romdev/SKILL.md`);
417
+ log.info("");
417
418
  log.info(`optional observer: http://${bannerHost}:${port}/livestream`);
418
419
  log.info("");
419
420
  log.info("connect your coding agent: https://github.com/monteslu/romdev#connect");
package/src/mcp/state.js CHANGED
@@ -23,12 +23,14 @@ export function getHost(sessionKey) {
23
23
  if (!host) {
24
24
  throw new Error(
25
25
  "No ROM loaded in this session — call loadMedia({path}) first. " +
26
- "If you WERE mid-session and just got reconnected (the server restarted, " +
27
- "or your session expired and your client re-initialized): the emulator " +
28
- "state is held in server memory only, so it did not survive just " +
29
- "re-run loadMedia({path}) with the ROM you were working on (it's still on " +
30
- "disk) to pick back up. Re-applying any in-progress changes means " +
31
- "rebuilding/reloading; a fresh boot is the recovery point.",
26
+ "If you DID loadMedia and still see this, your calls are landing in DIFFERENT " +
27
+ "sessions: over plain HTTP/skill you must send the SAME `x-romdev-session` " +
28
+ "header on every call (pick one stable id and reuse it) a new/missing id is " +
29
+ "a fresh empty session each time. " +
30
+ "If you WERE mid-session and just got reconnected (the server restarted or " +
31
+ "your session expired): emulator state is held in server memory only, so it " +
32
+ "did not survive — re-run loadMedia({path}) with your ROM (still on disk) to " +
33
+ "pick back up. A fresh boot is the recovery point.",
32
34
  );
33
35
  }
34
36
  return host;
@@ -37,7 +37,7 @@ export const MERGE_MAP = {
37
37
  host: { absorbs: ["unloadMedia", "shutdown", "reset", "pause", "resume"], axis: "op" },
38
38
  // ── frame (step/screenshot/stepAndShot/stepInstruction; stepInstruction folded from watch-memory.js) ──
39
39
  frame: { absorbs: ["stepFrames", "screenshot", "stepAndScreenshot", "stepInstruction"], axis: "op" },
40
- // ── scaffold (project/game + snippets; patchGbHeader stays standalone in project.js) ──
40
+ // ── scaffold (project/game + snippets; patchGbHeader folded into romPatch op:'gbHeader') ──
41
41
  scaffold: { absorbs: ["createProject", "createGame", "starterSnippets", "copyStarterSnippets"], axis: "op" },
42
42
  // ── cart (identify/extract/wrap; identifyRom from rom-id.js, rest from cart-parts.js) ──
43
43
  cart: { absorbs: ["identifyRom", "extractCart", "wrapRomFromParts"], axis: "op" },
@@ -61,14 +61,14 @@ export const MERGE_MAP = {
61
61
  cpu: { absorbs: ["getCPUState", "setRegister", "callSubroutine", "decompressWith"], axis: "op" },
62
62
  // ── breakpoint (STOP-on-first; all 4 from watch-memory.js) ──
63
63
  breakpoint: { absorbs: ["findWriter", "runUntilWrite", "runUntilPC", "runUntilRead"], axis: "on" },
64
- // ── watch (LOG-ALL; all 3 from watch-memory.js) ──
65
- watch: { absorbs: ["watchMemory", "watchRange", "logPCRange"], axis: "on" },
66
- // ── dmaTrace (Genesis VDP-DMA; watchDma from watch-memory.js, traceVramSource from trace-vram-source.js) ──
67
- dmaTrace: { absorbs: ["watchDma", "traceVramSource"], axis: "precision" },
64
+ // ── watch (LOG-ALL; watchMemory/watchRange/logPCRange + Genesis VDP-DMA trace
65
+ // on:'dma' from watchDma/traceVramSource all from watch-memory.js +
66
+ // trace-vram-source.js. dmaTrace was folded in as watch({on:'dma'}).) ──
67
+ watch: { absorbs: ["watchMemory", "watchRange", "logPCRange", "watchDma", "traceVramSource"], axis: "on" },
68
68
  // ── build (compile/run; buildSource/buildProject/runSource from toolchain.js, buildSourceWithDebug from symbols.js). ENTRY-TIER. ──
69
69
  build: { absorbs: ["buildSource", "buildSourceWithDebug", "buildProject", "runSource"], axis: "output" },
70
- // ── romPatch (8-op ROM-hack toolkit; patchFile/patchRom from rom-id.js, spliceCHR from splice-chr.js, relocateBlock/makeStoredBlock/findPointerTo from reinject.js, findFreeSpace from free-space.js, diffRoms from diff-roms.js) ──
71
- romPatch: { absorbs: ["patchFile", "patchRom", "spliceCHR", "relocateBlock", "makeStoredBlock", "findFreeSpace", "findPointerTo", "diffRoms"], axis: "op" },
70
+ // ── romPatch (9-op ROM-hack toolkit; patchFile/patchRom from rom-id.js, spliceCHR from splice-chr.js, relocateBlock/makeStoredBlock/findPointerTo from reinject.js, findFreeSpace from free-space.js, diffRoms from diff-roms.js, patchGbHeader as op:'gbHeader') ──
71
+ romPatch: { absorbs: ["patchFile", "patchRom", "spliceCHR", "relocateBlock", "makeStoredBlock", "findFreeSpace", "findPointerTo", "diffRoms", "patchGbHeader"], axis: "op" },
72
72
  // ── catalog (orient; listCategories + getStatus, both entry-tier in index.js) ──
73
73
  catalog: { absorbs: ["listCategories", "getStatus"], axis: "op" },
74
74
  // ── playtest (show-a-human window FSM; all 4 from playtest.js). ENTRY-TIER. ──
@@ -294,8 +294,9 @@ export function registerCheatTools(server, z, sessionKey) {
294
294
  "Cheat lookup / search / apply / create for the loaded ROM. `op`: " +
295
295
  "'lookup' (THIS game's known cheats from the bundled DB — returns labeled RAM addresses + Game Genie/ROM code " +
296
296
  "sites, so it answers 'which byte holds X?' for free); " +
297
- "'search' (fuzzy-find a game by NAME when you don't have the exact No-Intro title — returns game names + cheat " +
298
- "counts, then lookup the chosen one); " +
297
+ "'search' (fuzzy-find a game by NAME when you don't have the exact No-Intro title — searches ALL platforms by " +
298
+ "default and each match reports its own `platform`, so you don't need to know the console; pass `platform` only " +
299
+ "to scope it. Returns game names + cheat counts; then lookup the chosen one with its platform); " +
299
300
  "'apply' (enable a cheat on the LOADED game — pass a raw `code` or a `desc` from lookup); " +
300
301
  "'clear' (remove all active cheats); 'make' (CREATE a shareable code from an address+value). " +
301
302
  "TRUST: lookup matches by NAME/fuzzy similarity, NOT a verified CRC — a PROBABLE match. Labels are usually " +
@@ -322,7 +323,7 @@ export function registerCheatTools(server, z, sessionKey) {
322
323
  index: z.number().int().min(0).optional().describe("op=apply: cheat slot (default: next free slot). Reuse a slot to replace it."),
323
324
  enabled: z.boolean().default(true).describe("op=apply: false disables the slot instead of enabling."),
324
325
  // make / search / lookup share `platform`
325
- platform: z.enum([...MAKE_CHEAT_PLATFORMS]).optional().describe("op=lookup: override platform detection. op=search/make: REQUIRED — the target platform (all 14 tier-1)."),
326
+ platform: z.enum([...MAKE_CHEAT_PLATFORMS]).optional().describe("op=lookup: override platform detection. op=search: OPTIONAL — omit to search ALL platforms (each match returns its own `platform`); pass one only to scope the search. op=make: REQUIRED — the target platform (all 14 tier-1)."),
326
327
  address: z.number().int().min(0).optional().describe("op=make: address to cheat (RAM addr, or the ROM addr to patch)."),
327
328
  value: z.number().int().min(0).max(255).optional().describe("op=make: replacement byte (0-255). Provide value OR values."),
328
329
  values: z.array(z.number().int().min(0).max(255)).min(1).max(64).optional().describe("op=make: batch — a code per value at the same address. Returns variants[]."),
@@ -60,9 +60,22 @@ import { jsonContent, safeTool, withClearToolErrors } from "../util.js";
60
60
  import { getHostOrNull, setDisclosure } from "../state.js";
61
61
  import { MERGE_MAP } from "../tool-manifest.js";
62
62
  import { readFile } from "node:fs/promises";
63
+ import { readFileSync } from "node:fs";
63
64
  import { fileURLToPath } from "node:url";
64
65
  import { dirname, join } from "node:path";
65
66
 
67
+ // Package version — surfaced by catalog({op:'status'|'whatsNew'}) so an agent can
68
+ // check the running romdev version with a plain TOOL CALL (works over MCP AND the
69
+ // HTTP/skill surface), e.g. to detect a saved skill is stale. (GET /healthz also
70
+ // reports it for non-tool HTTP clients.)
71
+ const PKG_VERSION = (() => {
72
+ try {
73
+ return JSON.parse(readFileSync(join(dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "package.json"), "utf8")).version;
74
+ } catch {
75
+ return "0.0.0";
76
+ }
77
+ })();
78
+
66
79
  // catalog({op:'whatsNew'}): the recent CHANGELOG + an old→new RENAME TABLE
67
80
  // derived from MERGE_MAP (the single source of truth for the consolidation), so
68
81
  // an agent resuming a handoff written against an older server can re-map every
@@ -91,6 +104,7 @@ async function buildWhatsNew() {
91
104
  changelog = sections.slice(0, 3).join("## ").trim();
92
105
  } catch { /* changelog not present in this install */ }
93
106
  return {
107
+ romdevVersion: PKG_VERSION,
94
108
  note: "Pre-1.0 the tool surface is consolidated freely with NO deprecated aliases. If a tool name from an older handoff is missing, it's almost certainly now an `op` (or other axis) on a domain tool — find it below, then read that tool's description for the exact op enum and params.",
95
109
  renameTable: renames,
96
110
  axisLegend: "Every domain tool is keyed by ONE axis: op (most), output (build), on (breakpoint), target (disasm), view (background), source (palette), stage (encodeArt), from (importArt). The value names the operation, e.g. romPatch({op:'findPointer'}).",
@@ -215,7 +229,7 @@ export function registerTools(server, z, sessionKey) {
215
229
  "• op:'whatsNew' — the recent CHANGELOG + an OLD→NEW tool RENAME TABLE. Call this FIRST if you're resuming work from a handoff written against an older server: pre-1.0 the surface is consolidated freely (no deprecated aliases), so a name you remember may now be an `op` on a domain tool. This maps them in one read instead of probing each tool.",
216
230
  {
217
231
  op: z.enum(["categories", "status", "whatsNew"]).default("categories")
218
- .describe("categories=tool-category catalog; status=live session snapshot (host/platform/frameCount/media); whatsNew=recent CHANGELOG + old→new tool rename table."),
232
+ .describe("categories=tool-category catalog; status=live session snapshot (romdevVersion + host/platform/frameCount/media — call this to check the running version, e.g. is a saved skill stale); whatsNew=recent CHANGELOG + old→new tool rename table."),
219
233
  },
220
234
  safeTool(async ({ op = "categories" }) => {
221
235
  if (op === "whatsNew") {
@@ -228,6 +242,7 @@ export function registerTools(server, z, sessionKey) {
228
242
  ? { ...host.getStatus() }
229
243
  : { loaded: false, hint: "no host yet; call loadMedia (in category 'run') to load a ROM" };
230
244
  return jsonContent({
245
+ romdevVersion: PKG_VERSION,
231
246
  ...base,
232
247
  loadedCategories: cats.filter((c) => c.loaded).map((c) => c.name),
233
248
  unloadedCategories: cats.filter((c) => !c.loaded).map((c) => c.name),
@@ -235,6 +250,7 @@ export function registerTools(server, z, sessionKey) {
235
250
  }
236
251
  const categories = disclosure.listCategories();
237
252
  return jsonContent({
253
+ romdevVersion: PKG_VERSION,
238
254
  categories,
239
255
  note: "Every tool registers at session init — this catalog is just a map grouped by purpose, NOT a gate. Call any tool by name directly.",
240
256
  humanInTheLoopHint: "Iterate INTERNALLY on screenshots first (build({output:'run'}) returns one inline; frame({op:'screenshot'/'stepAndShot'}) re-shoots the live host) — don't open a window to debug. Once the game actually boots and shows the feature you're working on, call playtest({}) so your human can watch and play it live. Opening a window on a black screen or a crash just wastes the human's attention — show them something that works.",
@@ -327,7 +343,7 @@ const TOOL_OWNER = {
327
343
  playtest: "show",
328
344
  // advanced category
329
345
  runUntil: "advanced",
330
- watch: "advanced", breakpoint: "advanced", dmaTrace: "advanced",
346
+ watch: "advanced", breakpoint: "advanced",
331
347
  recordSession: "advanced",
332
348
  // entry tier itself
333
349
  catalog: "entry",