pmx-canvas 0.1.36 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/CHANGELOG.md +409 -0
  2. package/Readme.md +2 -2
  3. package/dist/json-render/index.js +89 -334
  4. package/dist/types/mcp/canvas-access.d.ts +5 -171
  5. package/dist/types/server/ax-state-manager.d.ts +256 -0
  6. package/dist/types/server/ax-state.d.ts +1 -1
  7. package/dist/types/server/canvas-operations.d.ts +1 -12
  8. package/dist/types/server/canvas-state.d.ts +3 -23
  9. package/dist/types/server/index.d.ts +6 -24
  10. package/dist/types/server/operations/composites.d.ts +121 -0
  11. package/dist/types/server/operations/http.d.ts +7 -0
  12. package/dist/types/server/operations/index.d.ts +8 -0
  13. package/dist/types/server/operations/invoker.d.ts +13 -0
  14. package/dist/types/server/operations/mcp.d.ts +15 -0
  15. package/dist/types/server/operations/ops/annotation.d.ts +2 -0
  16. package/dist/types/server/operations/ops/app.d.ts +33 -0
  17. package/dist/types/server/operations/ops/ax-await.d.ts +2 -0
  18. package/dist/types/server/operations/ops/ax-shared.d.ts +31 -0
  19. package/dist/types/server/operations/ops/ax-state.d.ts +2 -0
  20. package/dist/types/server/operations/ops/ax-timeline.d.ts +2 -0
  21. package/dist/types/server/operations/ops/ax-work.d.ts +2 -0
  22. package/dist/types/server/operations/ops/batch.d.ts +19 -0
  23. package/dist/types/server/operations/ops/edges.d.ts +2 -0
  24. package/dist/types/server/operations/ops/groups.d.ts +2 -0
  25. package/dist/types/server/operations/ops/json-render.d.ts +31 -0
  26. package/dist/types/server/operations/ops/nodes.d.ts +62 -0
  27. package/dist/types/server/operations/ops/query.d.ts +2 -0
  28. package/dist/types/server/operations/ops/snapshots.d.ts +2 -0
  29. package/dist/types/server/operations/ops/validate.d.ts +2 -0
  30. package/dist/types/server/operations/ops/viewport.d.ts +2 -0
  31. package/dist/types/server/operations/ops/webview.d.ts +2 -0
  32. package/dist/types/server/operations/registry.d.ts +15 -0
  33. package/dist/types/server/operations/types.d.ts +116 -0
  34. package/dist/types/server/operations/webview-runner.d.ts +69 -0
  35. package/docs/RELEASE.md +5 -0
  36. package/docs/adr-001-bun-only-runtime.md +46 -0
  37. package/docs/api-stability.md +57 -0
  38. package/docs/ax-state-contract.md +72 -0
  39. package/docs/mcp.md +60 -11
  40. package/docs/plans/plan-005-operation-registry.md +84 -0
  41. package/docs/plans/plan-006-mcp-tool-consolidation.md +109 -0
  42. package/docs/plans/plan-007-ax-domain.md +99 -0
  43. package/docs/plans/plan-008-registry-finish.md +91 -0
  44. package/docs/tech-debt-assessment-2026-06.md +90 -0
  45. package/package.json +3 -3
  46. package/skills/pmx-canvas/SKILL.md +192 -186
  47. package/skills/pmx-canvas/evals/evals.json +3 -3
  48. package/skills/pmx-canvas/references/codex-app-adapter.md +13 -14
  49. package/skills/pmx-canvas/references/github-copilot-app-adapter.md +4 -5
  50. package/src/cli/agent.ts +52 -31
  51. package/src/mcp/canvas-access.ts +30 -830
  52. package/src/mcp/server.ts +162 -2014
  53. package/src/server/ax-state-manager.ts +808 -0
  54. package/src/server/ax-state.ts +2 -2
  55. package/src/server/canvas-operations.ts +2 -328
  56. package/src/server/canvas-schema.ts +2 -2
  57. package/src/server/canvas-state.ts +95 -465
  58. package/src/server/index.ts +54 -190
  59. package/src/server/operations/composites.ts +355 -0
  60. package/src/server/operations/http.ts +103 -0
  61. package/src/server/operations/index.ts +65 -0
  62. package/src/server/operations/invoker.ts +87 -0
  63. package/src/server/operations/mcp.ts +221 -0
  64. package/src/server/operations/ops/annotation.ts +60 -0
  65. package/src/server/operations/ops/app.ts +447 -0
  66. package/src/server/operations/ops/ax-await.ts +216 -0
  67. package/src/server/operations/ops/ax-shared.ts +38 -0
  68. package/src/server/operations/ops/ax-state.ts +249 -0
  69. package/src/server/operations/ops/ax-timeline.ts +381 -0
  70. package/src/server/operations/ops/ax-work.ts +635 -0
  71. package/src/server/operations/ops/batch.ts +365 -0
  72. package/src/server/operations/ops/edges.ts +166 -0
  73. package/src/server/operations/ops/groups.ts +176 -0
  74. package/src/server/operations/ops/json-render.ts +691 -0
  75. package/src/server/operations/ops/nodes.ts +1047 -0
  76. package/src/server/operations/ops/query.ts +281 -0
  77. package/src/server/operations/ops/snapshots.ts +366 -0
  78. package/src/server/operations/ops/validate.ts +37 -0
  79. package/src/server/operations/ops/viewport.ts +219 -0
  80. package/src/server/operations/ops/webview.ts +339 -0
  81. package/src/server/operations/registry.ts +79 -0
  82. package/src/server/operations/types.ts +150 -0
  83. package/src/server/operations/webview-runner.ts +77 -0
  84. package/src/server/server.ts +158 -2255
  85. package/src/server/web-artifacts.ts +6 -2
@@ -0,0 +1,91 @@
1
+ # Plan 008 — Finish the operation-registry refactor (plan-005 items 8–9 + plan-006 completion)
2
+
3
+ **Status:** Complete
4
+ **Date:** 2026-06-15
5
+ **Depends on:** plan-005 (registry — slices 1–7 merged), plan-006 (consolidation — waves 1–2 merged), plan-007 (AX domain — merged).
6
+ **Motivation:** Close out the registry refactor so v0.2 ships a coherent, complete surface. After this, the registry covers every n-way-duplicated operation; the only legacy left is the deliberate single-transport / poor-fit set.
7
+
8
+ ## Verdicts (from the remaining-surface investigation)
9
+
10
+ | Operation | Verdict | Why |
11
+ |---|---|---|
12
+ | `canvas_validate` (board validation) | **Migrate** — `validate.get` op (read) | Pure read; clean fit; unblocks `canvas_query` validate action |
13
+ | `canvas_remove_annotation` | **Migrate** — `annotation.remove` op | Trivial DELETE-by-id mutation; unblocks `canvas_view` remove-annotation action |
14
+ | webview `status`/`start`/`stop`/`resize`/`evaluate` | **DONE (Wave 3)** — migrated via runner injection; `canvas_webview` composite shipped | The webview machinery (`startCanvasAutomationWebView` …) lives in `server.ts`, which `operations/` must NOT import (the isolation rule). Resolved with a **runner-injection** pattern (`src/server/operations/webview-runner.ts` + `setWebviewRunner` in `server.ts`), exactly mirroring `setOperationEventEmitter`. Ops in `src/server/operations/ops/webview.ts`; composite `canvas_webview` folds the 5 actions |
15
+ | `canvas_screenshot` | **Stay legacy** (intentional) | Returns a binary image payload the registry JSON wire shape does not model. Stays a standalone hand-written tool (and route) |
16
+ | `canvas_refresh_webpage_node` | **DONE (Wave 5)** — deprecate-only fold into `canvas_node` | The original "needs `refresh` action + per-action input injection" rationale was wrong: `refresh:true` is a plain param reachable via `canvas_node {action:"update", refresh:true}`. The audit surfaced a real failure-path GAP (`node.update`'s `formatResult` masked a FAILED refresh as `{ ok:true }` with no `isError` over the local invoker), which was then **closed** — `formatResult` now passes a refresh result through verbatim and surfaces `ok:false` as `isError`, matching the standalone tool and the HTTP 400. With the failure path equivalent, the standalone is deprecated → `canvas_node {action:"update", refresh:true}` (see Wave 5) |
17
+ | `canvas_add_html_node` / `canvas_add_html_primitive` | **DONE (Wave 5)** — deprecate-only fold into `canvas_node` | Verdict REVERSED: no mechanism needed. `node.add` already routes `type:"html"` + `primitive\|kind` → `createHtmlPrimitiveNode` and merges the top-level html fields into node data, so `canvas_node {action:"add", type:"html", …}` is already equivalent. Deprecate-only: a `Deprecated: use canvas_node …` prefix on each standalone description; no new action / op / SDK / freeze change |
18
+ | `canvas_open_mcp_app` / `canvas_add_diagram` / `canvas_build_web_artifact` | **DONE (Wave 4)** — migrated as `mcpapp.open` / `diagram.open` / `webartifact.build`; `canvas_app` composite shipped | The earlier "poor fit" verdict was wrong on reflection: `executeOperation` is async (the long-running build fits — the caveat is MCP-client timeouts, not registry fit), and the runtimes are server-independent **domain modules** (`mcp-app-runtime` / `diagram-presets` / `web-artifacts`), not `server.ts` — so the op handlers call them directly, NO runner injection. `web-artifacts.ts` was made server-independent by switching its one `emitPrimaryWorkbenchEvent` to the already-injected `emitCanvasLayoutUpdate`. The three ops are `mutates:false` (they emit `ext-app-open`/`ext-app-result` via `ctx.emit`, or web-artifacts emits its own layout frame). `canvas_app` folds the 3 |
19
+ | `canvas_batch` | **Migrate last** — `canvas.batch` meta-op | The remaining registry slice; deletes the 290-line switch |
20
+ | `canvas_ax_interaction` / `canvas_ingest_activity` | **Stay legacy** (already decided — trust boundary / firehose) | plan-007 |
21
+
22
+ ## Wave 1 — clean migrations + the two free composite actions
23
+
24
+ Two new ops (follow the established pattern; delete legacy handler + route + MCP tool + orphaned CanvasAccess per op). Both are server-independent (no `server.ts`/`index.ts` import):
25
+ - **`validate.get`** — `GET /api/canvas/validate`, mutates:false, no emit; serialize = `validateCanvasLayout(canvasState.getLayout())`; MCP `canvas_validate` (no args).
26
+ - **`annotation.remove`** — `DELETE /api/canvas/annotation/:id`, mutates:true (auto layout emit); 404 on missing; returns `{ ok:true, removed:id }`; MCP `canvas_remove_annotation { id }`.
27
+
28
+ Consolidation (additive; these are clean `action→op`, NO mechanism extension — that's why they're in scope and refresh/add-primitive are not):
29
+ - **`canvas_query`** + `validate` action → `validate.get`. Deprecate `canvas_validate`.
30
+ - **`canvas_view`** + `remove-annotation` action → `annotation.remove`. Deprecate `canvas_remove_annotation`.
31
+
32
+ Tool names: the 2 migrated tools keep their names (hand-written → registry-served); no freeze-count change. Deprecation prefixes auto-derive from the composite definitions.
33
+
34
+ **Deferred (documented, not in this campaign):** `canvas_node` refresh + `canvas_render` add-primitive + `canvas_add_html_node` folding (need a per-action input-injection mechanism — over-engineering for niche actions), `canvas_snapshot` (v0.3 name collision). These legacy tools keep working, unchanged. (`canvas_webview` is **done** — Wave 3 below; its server.ts coupling was resolved with runner injection. `canvas_app` is **done** — Wave 4 below; the "poor fit" verdict was reversed. `canvas_screenshot` intentionally stays standalone — binary payload.) **Update (Wave 5):** the html deferral was wrong — `node.add` already exposes the params, so the html fold is deprecate-only (no mechanism). Only the refresh fold turned out to be a genuine result-equivalence gap. See Wave 5.
35
+
36
+ ## Wave 2 — batch (plan-005 item 9, last, highest risk)
37
+
38
+ Convert `executeCanvasBatch` (the ~290-line switch in canvas-operations.ts) into a `canvas.batch` registry meta-op:
39
+ - **`runWithSuppressedEmits(fn)` wraps the batch loop** — the registry runs each op through the normal `executeOperation` path while a depth counter suppresses both auto `canvas-layout-update` frames and explicit `ctx.emit` events. The batch meta-op then emits one final layout frame.
40
+ - The `canvas.batch` handler: read `{ operations:[...] }` or a bare `[...]` (shared array-preserving reader); for each entry resolve `$ref`/`assign` against prior results, then dispatch the legacy batch allowlist through `executeOperation` inside `runWithSuppressedEmits`; collect `results`/`refs`; on failure record `failedIndex`/`error` and stop (preserve current semantics). `mutates:false` + ONE manual `ctx.emit('canvas-layout-update')` at the end. Result shape `{ ok, results, refs, failedIndex?, error? }` byte-identical.
41
+ - All 11 batch op names (`node.add/update/remove`, `graph.add`, `edge.add`, `group.create/add/remove`, `pin.set` [+ add/remove modes], `snapshot.save`, `arrange`) are already registered — names match.
42
+ - Delete the switch. Per-entry mutation history still records individually (undo per step preserved).
43
+ - **Risk: highest** — last, separately committed, one-commit revert. Verify: every op name in batch + standalone, `$ref` chaining, bare-array + `{operations}` shapes, SSE single-final-emit (operation-parity counts frames), failure at each index, local + remote.
44
+
45
+ ## Wave 3 — webview (DONE) — runner injection
46
+
47
+ Migrate the 5 browser-automation tools (`status`/`start`/`stop`/`resize`/`evaluate`) to the registry. The blocker was that the webview machinery lives in `server.ts`, which `operations/` must NOT import. Resolved with **runner injection**, mirroring `setOperationEventEmitter`:
48
+
49
+ - **`src/server/operations/webview-runner.ts`** declares a `WebviewRunner` interface (`status` / `start` / `stop` / `resize` / `evaluate`) + a module-level injected instance with `setWebviewRunner(runner)` and `getWebviewRunner()` (throws a clear error if not injected). `screenshot` is intentionally NOT in the runner (binary).
50
+ - **`server.ts`** calls `setWebviewRunner({ … })` at module load (same point as `setOperationEventEmitter`), wiring the real `getCanvasAutomationWebViewStatus` / `startCanvasAutomationWebView` / `stopCanvasAutomationWebView` / `resizeCanvasAutomationWebView` / `evaluateCanvasAutomationWebView` functions. The `start` closure carries the success/error asymmetry the legacy route preserved (200 ok; 503 server-not-running; 501 unsupported runtime; 500 supported-failure — with the webview status in the error body so callers can read `lastError`).
51
+ - **`src/server/operations/ops/webview.ts`** — 5 `mutates:false` ops (webview is a side surface — no `canvas-layout-update` frame). Each handler calls `getWebviewRunner()`, never `server.ts`. Routes match the legacy paths exactly (`GET /api/workbench/webview`, `POST …/start`, `DELETE …/webview`, `POST …/resize`, `POST …/evaluate`). The `evaluate` op preserves the exact arg validation (exactly one of `expression`/`script`, MCP message in `buildInput`, HTTP message in the handler), the async-IIFE script wrap, and the arbitrary-eval trust posture (relocated, unchanged). `start`'s MCP `buildInput` sandboxes `dataStoreDir` to the workspace (MCP-only, as the legacy tool did). The fetch handler dispatches `/api/workbench/webview*` through `dispatchOperationRoute` (a null return falls through to the still-hand-written screenshot route).
52
+ - **Composite** `canvas_webview` (additive): `status` → `webview.status`, `start` → `webview.start`, `stop` → `webview.stop`, `resize` → `webview.resize`, `evaluate` → `webview.evaluate`. Deprecation prefixes auto-derive via `buildCompositeDeprecationNotes`.
53
+ - **Deleted legacy:** the 5 MCP tool blocks (`mcp/server.ts`), the 5 HTTP handlers + routes + `parseCanvasAutomationWebViewRequestBody` (`server.ts`), and the orphaned `CanvasAccess` methods (`startAutomationWebView` / `stopAutomationWebView` / `resizeAutomationWebView` / `evaluateAutomationWebView` + the `WebViewEnvelope` / `WebViewStopEnvelope` / `WebViewEvaluateEnvelope` interfaces + the `AutomationWebViewOptions` / `AutomationEvaluateResult` type aliases). **KEPT:** `CanvasAccess.getAutomationWebViewStatus` + `screenshotAutomationWebView` (the standalone `canvas_screenshot` tool needs both) and the public SDK `PmxCanvas` webview methods.
54
+ - **Freeze:** `canvas_webview` is a new tool name → freeze list 81 → 82 (the only deliberate freeze change). The 5 legacy webview tool names stay in the list (still registered, now registry-served).
55
+ - **Divergence (documented):** the only allowed unification is the local-vs-remote error asymmetry. For `evaluate`/`resize` a runtime error throws an `OperationError` (HTTP 400 `{ ok:false, error }` — no `webview` field, vs the legacy handler's `{ ok:false, error, webview }`); the MCP result is byte-identical (isError + bare message) because the HTTP invoker reads only `error`. No test asserts that error body shape.
56
+
57
+ ## Wave 4 — external / built-content apps (DONE) — the reversed "poor fit"
58
+
59
+ Migrate the 3 deferred external/built-content tools to the registry. The deferral was wrong on reflection: `executeOperation` is async (the long-running web-artifact build fits — its "long-running" caveat is about MCP-client timeouts, not registry fit), and the runtimes are server-independent **domain modules**, not `server.ts`. So the op handlers call them DIRECTLY — no runner injection.
60
+
61
+ - **Server-independence (verified):** `mcp-app-runtime.ts` (`openMcpApp` as `openExternalMcpApp` / `closeMcpAppSession`), `diagram-presets.ts` (`buildExcalidrawOpenMcpAppInput` / `ensureExcalidrawCheckpointId` / `isExcalidrawCreateView`), and `ext-app-lookup.ts` (`findCanvasExtAppNodeId`) import no `server.ts`/`index.ts`. `web-artifacts.ts` had ONE coupling — `import { emitPrimaryWorkbenchEvent } from './server.js'` used in `openWebArtifactInCanvas`. Switched it to the already-injected `emitCanvasLayoutUpdate` (exported from `canvas-operations.ts`, wired by `server.ts` via `setCanvasLayoutUpdateEmitter` to the same `emitPrimaryWorkbenchEvent('canvas-layout-update', { layout })`). `web-artifacts.ts` is now server-independent → no runner injection, no import cycle (`operations/ops/app.ts → web-artifacts.ts → canvas-operations.ts`, never `server.ts`).
62
+ - **`src/server/operations/ops/app.ts`** — 3 `mutates:false` ops + a shared `openMcpAppCore(input, ctx)`:
63
+ - **`mcpapp.open`** → `canvas_open_mcp_app` → `POST /api/canvas/mcp-app/open`. The relocated legacy SDK `openMcpApp` body: `openExternalMcpApp`, the `Date.now()-Math.random()` `toolCallId`, prior-session `closeMcpAppSession`, the Excalidraw checkpoint tagging, `ext-app-open` + `ext-app-result` via `ctx.emit` (→ the registry emitter → `emitPrimaryWorkbenchEvent`), node-id resolution via `findCanvasExtAppNodeId`. Returns `{ ok, id?, nodeId, toolCallId, sessionId, resourceUri }` byte-identical.
64
+ - **`diagram.open`** → `canvas_add_diagram` → `POST /api/canvas/diagram`. Thin preset: `buildExcalidrawOpenMcpAppInput` then delegate to `openMcpAppCore` (the SSE pair fires ONCE — diagram.open does not re-emit). Same return shape.
65
+ - **`webartifact.build`** → `canvas_build_web_artifact` → `POST /api/canvas/web-artifact`. Async handler awaits `buildWebArtifactOnCanvas` and returns the byte-identical metadata envelope `{ ok, path, bytes, projectPath, openedInCanvas, startedAt, completedAt, durationMs, timeoutMs, id?, nodeId, url, metadata, logs, stdout?, stderr? }`. Long-running (minutes) is fine for an async op — no timeouts added. `projectPath`/`outputPath` are sandboxed via `web-artifacts.ts` `resolveWorkspacePath` (the legacy HTTP `resolveWorkspacePath` + MCP `safeWorkspacePath` unified to one server-side check).
66
+ - **SDK stays public:** `PmxCanvas.openMcpApp` / `addDiagram` delegate to `executeOperation('mcpapp.open' | 'diagram.open', input)` (cast to `OpenMcpAppCoreResult`) — the same single execution path, so the ext-app-* frames fire once via `ctx.emit`. `PmxCanvas.buildWebArtifact` calls `buildWebArtifactOnCanvas` directly (its documented return is the full `WebArtifactCanvasBuildResult`, not the wire envelope; the op core IS the same runtime, so no divergence).
67
+ - **Composite** `canvas_app` (additive): `open-mcp-app` → `mcpapp.open`, `diagram` → `diagram.open`, `build-artifact` → `webartifact.build`. Deprecation prefixes auto-derive via `buildCompositeDeprecationNotes`.
68
+ - **Deleted legacy:** the 3 MCP tool blocks (`mcp/server.ts`) + the now-orphaned `safeWorkspacePath`/`workspaceRoot`/`isPathInside` helpers + the `node:path` import; the 3 HTTP handlers (`handleCanvasOpenMcpApp` / `handleCanvasAddDiagram` / `handleCanvasBuildWebArtifact`) + `runAndEmitOpenMcpApp` + `RunAndEmitOpenMcpAppParams` + `randomExtAppToolCallId` + `parseExternalMcpTransportConfig` + `normalizeStringRecord` + their routes + the orphaned imports (`openMcpApp`, `ExternalMcpTransportConfig`, `buildExcalidrawOpenMcpAppInput`, `buildWebArtifactOnCanvas`, `resolveWorkspacePath`) (`server.ts`); the orphaned `CanvasAccess` methods (`openMcpApp` / `addDiagram` / `buildWebArtifact` on both local + remote impls + the interface) and their type aliases (`OpenMcpAppInput` / `OpenMcpAppResult` / `AddDiagramInput` / `WebArtifactInput` / `WebArtifactResult`); the SDK's now-orphaned private `findCanvasExtAppNodeId` method + its `ext-app-lookup` import.
69
+ - **Freeze:** `canvas_app` is a new tool name → freeze list 82 → 83. The 3 legacy tool names stay in the list (still registered, now registry-served).
70
+ - **Divergence (documented):** (1) the local-vs-remote error asymmetry — `mcpapp.open`'s node-precondition failures throw `OperationError` (404 missing node, 400 non-ext-app node), which the legacy HTTP handler returned as explicit 404/400 and the legacy SDK threw as a plain `Error`; over MCP both become a bare-message isError. (2) The canonical core is the SDK shape: the legacy HTTP `runAndEmitOpenMcpApp` returned two extra fields (`serverName`, `toolName`) and used the existing-node title as a fallback for in-place updates; the unified op returns the SDK shape (no `serverName`/`toolName`) and uses `opened.tool.title ?? opened.tool.name` for the title. No test asserts those dropped fields or the in-place title fallback.
71
+
72
+ ## Wave 5 — the final fold (DONE) — the reversed "needs input injection" verdict
73
+
74
+ Fold the 3 deferred html/webpage tools. The key discovery: the assumption that a fold needs a NEW composite action plus a per-action INPUT-INJECTION mechanism the composite layer lacks was **wrong**. The registry `node.add` / `node.update` ops already absorb the behaviors via plain params, and the `canvas_node` composite already exposes those params (its add/update action schemas derive from `node.add`/`node.update`'s `inputShape` + `extraShape`).
75
+
76
+ - **Verified equivalence (file:line):**
77
+ - `node.add` shape (`src/server/operations/ops/nodes.ts` `nodeAddShape`, lines ~677–688) already carries `html` / `primitive` / `kind` / `presentation` / `slideTitles` / `embeddedNodeIds` / `embeddedUrls` / `summary` / `agentSummary` / `description`; it is a `z.looseObject` so `strictSize` / `axCapabilities` pass through.
78
+ - The `node.add` handler routes `type:"html"` + `primitive|kind` → `createHtmlPrimitiveNode` (line ~768) and bare `type:"html"` → `createBasicCanvasNode`, which MERGES the top-level html fields into node data (lines ~387–399) — exactly mirroring the legacy SDK `addHtmlNode` / `addHtmlPrimitive`.
79
+ - `node.update` shape already has `refresh` (line ~860); the handler routes `webpage` + `refresh === true` → `refreshCanvasWebpageNode` (lines ~923–934).
80
+ - **Deprecate-only (html, 2 of 3):** prefix each standalone tool description in `src/mcp/server.ts` with `Deprecated: use canvas_node …` (matching the auto-derived composite-note wording). `canvas_add_html_node` → `action:"add", type:"html"`; `canvas_add_html_primitive` → `action:"add", type:"html", primitive:"<kind>"`. **Direct prefix, not `buildCompositeDeprecationNotes`:** that helper is keyed by registry-OPERATION name and only reaches registry-served tools; these 2 are hand-written tools, so a direct description prefix is the correct one-place edit. **No new action, no mechanism, no op/SDK change, no freeze-count change** (the 2 tools stay registered, just annotated; `canvas_node` is already frozen).
81
+ - **GAP found AND closed (refresh):** the audit surfaced that `refresh:true` IS reachable via `canvas_node {action:"update", refresh:true}` but the RESULT diverged on the failure path. `refreshCanvasWebpageNode` returns `{ ok, id, error? }` (no `node` field); `node.update`'s `formatResult` read `body.node`, found none, and returned a hardcoded `{ ok:true, id }`. Over the MCP-default **`LocalOperationInvoker`** the handler result is returned WITHOUT throwing (the HTTP `status: => 400` mapping never runs locally), so a FAILED refresh surfaced as `{ ok:true, id }` with **no `isError`** — a false success (a live bug reachable today, independent of any deprecation). **Fix:** `node.update`'s `formatResult` now, when there is no `node`, passes the body through verbatim and sets `isError` when `body.ok === false` — matching the HTTP 400 and the legacy `canvas_refresh_webpage_node` (`{ ok:false, id, error }` + `isError`). With the failure path equivalent, the standalone is deprecated → `canvas_node {action:"update", refresh:true}`. `axCapabilities` was also added to `nodeAddShape` + `node.add`'s `extraShape` so the html AX-bridge config is **advertised** (it previously only passed through `z.looseObject`, invisible to schema-guided agents migrating off `canvas_add_html_node`).
82
+ - **Parity tests** (`tests/unit/mcp-composites.test.ts`, head-to-head pattern): (1) `canvas_node add type:"html"` vs `canvas_add_html_node` — same node type/title + `data.presentation`/`slideTitles`/`html`/`embeddedNodeIds`/`embeddedUrls`/`axCapabilities` read back via `canvas_node get full:true`; (2) `canvas_node add type:"html" primitive:"choice-grid" strictSize` vs `canvas_add_html_primitive` — same `type:"html"` + `data.htmlPrimitive === kind` + `data.strictSize`; (3) `canvas_node update refresh:true` vs `canvas_refresh_webpage_node` on the FAILURE path (a connection-refused `http://127.0.0.1:1` — deterministic, no network egress) — both `isError` + `ok:false`.
83
+ - **Remaining legacy after Wave 5:** only `canvas_snapshot` (v0.3 name collision — the save-snapshot tool still holds the name) and `canvas_screenshot` (binary image payload, intentionally standalone). Every other n-way-duplicated operation is now registry-backed and folded into a composite.
84
+
85
+ ## Verification (every wave)
86
+
87
+ 1. `bun run typecheck`
88
+ 2. Targeted: `operation-parity`, `mcp-tool-freeze`, `mcp-server`, `mcp-composites`, `server-api`, `cli-node`, `canvas-operations`, `pmx-canvas-sdk` (+ the batch/webview/validate suites)
89
+ 3. Full `bun test tests/unit`
90
+ 4. Guard tests (operation-parity / mcp-tool-freeze / mcp-server) edited only deliberately; wire shapes + tool names byte-compatible; `operations/` never imports server.ts/index.ts.
91
+ 5. `dist/types` regenerated before the PR.
@@ -0,0 +1,90 @@
1
+ # Tech Debt Assessment & Direction Proposal — June 2026
2
+
3
+ **Status:** Proposed
4
+ **Date:** 2026-06-10
5
+ **Scope:** Full-repo audit at v0.1.36 (~70k lines TS; src/server 23.2k, src/client 13.9k, src/mcp 4.2k, src/cli 4.0k)
6
+
7
+ ## Verdict
8
+
9
+ Code quality is better than the release velocity would suggest: zero `as any` across the codebase, real test suites, disciplined changelogs. Architecture quality is the problem. The debt is not scattered; it is one systemic disease: **n-way duplication with manual sync**. Almost every entry in `.learnings/` traces back to it.
10
+
11
+ ## Tech debt, ranked
12
+
13
+ ### 1. The 4-layer copy machine (critical)
14
+
15
+ Every operation is hand-written four times — `CanvasStateManager` → `PmxCanvas` → HTTP handler in `server.ts` → MCP tool in `src/mcp/server.ts` — each with its own validation and error shapes. The CLI (`src/cli/agent.ts`, 3,300 lines) duplicates it a fifth time with raw fetch calls instead of using the SDK.
16
+
17
+ Evidence from `.learnings/` that this is actively producing bugs:
18
+
19
+ - [LRN-20260606-006] Fix #32 applied to only one of two duplicated mutation paths (`updateNode()` vs `applyUpdates()`).
20
+ - [LRN-20260607-005] New `json-render` sourceSurface enum member silently took the permissive default because the `scoped` guard in `applyAxInteraction` was not updated. Security near-miss.
21
+ - [LRN-20260608-002] `readJson` hardening silently killed the documented bare-array shape of `POST /api/canvas/batch` (#49).
22
+
23
+ These are not three bugs. They are one architecture failing three times.
24
+
25
+ **Fix:** a single operation registry. One zod schema per canvas operation; derive the MCP tool, HTTP handler, CLI command, and SDK method from it. Collapses `server.ts` (5,934 lines, raw if-else routing, 14 mutable module-level globals) and `src/mcp/server.ts` (2,861 lines, ~70% schema boilerplate) as a side effect.
26
+
27
+ ### 2. 69 MCP tools is bad AX (high)
28
+
29
+ A project whose thesis is agent experience ships a tool surface that consumes a large slice of every connected agent's context window. Many tools are near-identical passthroughs (`full`/`verbose` flag pairs copied across 12+ tools). Consolidate to roughly 20 composable tools. This is product debt, not just code debt. Depends on item 1.
30
+
31
+ ### 3. CanvasStateManager mixes too many concerns (high)
32
+
33
+ `canvas-state.ts` (2,498 lines) handles node/edge CRUD, AX state, undo/redo, viewport, pins, SQLite persistence, and snapshots in one class. AX state is re-normalized against node IDs on every mutation, so deleting a node silently orphans work items with no event. AX data is split between snapshotted in-memory state and audit-only DB tables with no documented contract.
34
+
35
+ **Fix:** split canvas layout state from AX state; document the snapshotted-vs-audit-only partition explicitly (the CLAUDE.md section is a start, the code does not enforce it).
36
+
37
+ ### 4. E2E is not a CI gate (high)
38
+
39
+ The Playwright/Bun ESM loader blocker ([ERR-20260508-001]) has been open for weeks. E2E was removed from the publish workflow after the apt-mirror hang ([LRN-20260603-002]) and does not gate PRs. The bugs that matter (iframe blank flicker, literal `\n` in ledger, SVG calc()) were all caught only by browser tests. Green CI can currently ship a broken canvas.
40
+
41
+ **Fix:** run Playwright via Node's runner in CI permanently, and make headless e2e a hard PR gate.
42
+
43
+ ### 5. Triple-mirrored skill trees + duplicate agent docs (medium)
44
+
45
+ `.agents/skills/`, `.claude/skills/`, `.opencode/skills/` must be byte-identical, enforced by `validate-agent-skill-mirrors.sh`. CLAUDE.md and AGENTS.md are near-identical (4 diff lines as of today) and already drifting.
46
+
47
+ **Fix:** one canonical source, generate the mirrors at build time. ~20 lines of script replacing a permanent tax.
48
+
49
+ ### 6. Dual rendering stack (medium)
50
+
51
+ Preact + signals for the canvas, plus React 19 + recharts + a separate Tailwind build for the json-render viewer: ~2.1MB of bundle, zero shared code, duplicate theming. Defensible as a deliberate choice, but it is weight carried for one node type. Decide whether json-render earns its stack or should slim down.
52
+
53
+ ### 7. No API versioning despite breaking patch releases (medium)
54
+
55
+ 0.1.35 and 0.1.36 both changed HTTP contract behavior in patch releases. No version negotiation, no deprecation path. Consumers cannot pin safely.
56
+
57
+ ### Smaller items
58
+
59
+ - `readJson` silently returns `{}` on malformed input; handlers cannot distinguish bad requests from empty ones. Prefer loud validation errors (this pattern already caused #49).
60
+ - `listSnapshotsFromDB` interpolates `LIMIT ${limit}` instead of parameterizing. Safe only because of upstream normalization. Fragile.
61
+ - `server-api.test.ts` (4,950 lines) and `cli-node.test.ts` (2,877 lines) are integration tests against a live server posing as unit tests: slow, order-dependent, hard to debug.
62
+ - Client renderers and state bridges (sse-bridge, canvas-store) are e2e-tested only; no coverage visibility.
63
+ - Stale `dist/` bundle trap is documented but not guarded; the dev server should warn when the bundle is older than `src/client/`.
64
+
65
+ ## What is actually fine
66
+
67
+ The TypeScript guardrails are working (zero `as any`). The SQLite persistence layer is mostly clean and parameterized. The AX primitive design (capability ceilings, surface scoping, single trust boundary in `applyAxInteraction`) is sound; the bugs were in the duplication around it, not the design. The `.learnings/` loop is genuinely catching and recording real failures.
68
+
69
+ ## Direction proposal
70
+
71
+ ### Phase 1 (now, 2–3 weeks): stop shipping features, fix the structure
72
+
73
+ 1. Build the operation registry (item 1). This is the single highest-leverage change and the prerequisite for everything below.
74
+ 2. Fix the Playwright gate (item 4) in the same window.
75
+ 3. Kill the skill-mirror triplication (item 5).
76
+
77
+ ### Phase 2: v0.2 as the stability release
78
+
79
+ - Versioned HTTP API with a published breaking-change policy.
80
+ - MCP tool surface consolidated to ~20 composable tools.
81
+ - CLI rebuilt on the SDK.
82
+ - Deliberate Bun-only decision: stay Bun-only for the SDK and treat MCP + HTTP as the universal surface. MCP is the real distribution channel; a Node dual-build is effort on the least differentiated path.
83
+
84
+ ### Phase 3: double down on AX
85
+
86
+ The moat is not the canvas. Infinite canvases are a commodity. Pinned context, spatial semantics, approval gates, steering, and the human-curates-agent-reads loop are not — nobody else is building "the agent's extended working memory" as a primitive layer. Once the foundation is stable: document and version the AX contract, and consider speccing it so other canvas hosts could implement it. That is the difference between another agent dashboard and owning a category.
87
+
88
+ ### The uncomfortable truth
89
+
90
+ 16 releases in the last 8 days is agent-velocity outrunning architecture. The agents building this faithfully replicate the duplication because the structure rewards it. Fix the structure and the same velocity becomes safe instead of compounding.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pmx-canvas",
3
- "version": "0.1.36",
3
+ "version": "0.2.0",
4
4
  "description": "Spatial canvas workbench for coding agents — infinite 2D canvas with agent-native CLI, MCP integration, nodes, edges, file watching, and snapshots",
5
5
  "type": "module",
6
6
  "main": "./src/server/index.ts",
@@ -44,10 +44,10 @@
44
44
  "test": "PMX_CANVAS_DISABLE_BROWSER_OPEN=1 bun test tests/unit",
45
45
  "test:unit": "PMX_CANVAS_DISABLE_BROWSER_OPEN=1 bun test tests/unit",
46
46
  "test:coverage": "bun test tests/unit --coverage --coverage-reporter=text --coverage-reporter=lcov --coverage-dir coverage",
47
- "test:web-canvas": "PMX_CANVAS_DISABLE_BROWSER_OPEN=1 bun run build && PMX_CANVAS_DISABLE_BROWSER_OPEN=1 bun x playwright test",
47
+ "test:web-canvas": "PMX_CANVAS_DISABLE_BROWSER_OPEN=1 bun run build && PMX_CANVAS_DISABLE_BROWSER_OPEN=1 bash scripts/run-playwright.sh",
48
48
  "test:e2e": "bun run test:web-canvas",
49
49
  "test:e2e-cli": "bash scripts/e2e-cli-coverage.sh",
50
- "test:web-canvas:headed": "bun run build && bun x playwright test --headed",
50
+ "test:web-canvas:headed": "bun run build && bash scripts/run-playwright.sh --headed",
51
51
  "test:e2e:headed": "bun run test:web-canvas:headed",
52
52
  "test:all": "bun run test && bun run test:web-canvas",
53
53
  "test:install-browsers": "bun x playwright install chromium",