pmx-canvas 0.1.35 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +461 -0
- package/Readme.md +14 -2
- package/dist/canvas/index.js +82 -41
- package/dist/json-render/index.js +89 -334
- package/dist/types/client/nodes/ExtAppFrame.d.ts +2 -0
- package/dist/types/mcp/canvas-access.d.ts +12 -159
- package/dist/types/server/ax-context.d.ts +1 -1
- package/dist/types/server/ax-state-manager.d.ts +256 -0
- package/dist/types/server/ax-state.d.ts +29 -1
- package/dist/types/server/ax-wait.d.ts +23 -0
- package/dist/types/server/canvas-operations.d.ts +1 -12
- package/dist/types/server/canvas-state.d.ts +46 -14
- package/dist/types/server/html-surface.d.ts +7 -0
- package/dist/types/server/index.d.ts +66 -26
- package/dist/types/server/operations/composites.d.ts +121 -0
- package/dist/types/server/operations/http.d.ts +7 -0
- package/dist/types/server/operations/index.d.ts +8 -0
- package/dist/types/server/operations/invoker.d.ts +13 -0
- package/dist/types/server/operations/mcp.d.ts +15 -0
- package/dist/types/server/operations/ops/annotation.d.ts +2 -0
- package/dist/types/server/operations/ops/app.d.ts +33 -0
- package/dist/types/server/operations/ops/ax-await.d.ts +2 -0
- package/dist/types/server/operations/ops/ax-shared.d.ts +31 -0
- package/dist/types/server/operations/ops/ax-state.d.ts +2 -0
- package/dist/types/server/operations/ops/ax-timeline.d.ts +2 -0
- package/dist/types/server/operations/ops/ax-work.d.ts +2 -0
- package/dist/types/server/operations/ops/batch.d.ts +19 -0
- package/dist/types/server/operations/ops/edges.d.ts +2 -0
- package/dist/types/server/operations/ops/groups.d.ts +2 -0
- package/dist/types/server/operations/ops/json-render.d.ts +31 -0
- package/dist/types/server/operations/ops/nodes.d.ts +62 -0
- package/dist/types/server/operations/ops/query.d.ts +2 -0
- package/dist/types/server/operations/ops/snapshots.d.ts +2 -0
- package/dist/types/server/operations/ops/validate.d.ts +2 -0
- package/dist/types/server/operations/ops/viewport.d.ts +2 -0
- package/dist/types/server/operations/ops/webview.d.ts +2 -0
- package/dist/types/server/operations/registry.d.ts +15 -0
- package/dist/types/server/operations/types.d.ts +116 -0
- package/dist/types/server/operations/webview-runner.d.ts +69 -0
- package/docs/RELEASE.md +5 -0
- package/docs/adr-001-bun-only-runtime.md +46 -0
- package/docs/api-stability.md +57 -0
- package/docs/ax-host-adapter-contract.md +65 -0
- package/docs/ax-state-contract.md +72 -0
- package/docs/http-api.md +34 -2
- package/docs/mcp.md +64 -11
- package/docs/plans/plan-005-operation-registry.md +84 -0
- package/docs/plans/plan-006-mcp-tool-consolidation.md +109 -0
- package/docs/plans/plan-007-ax-domain.md +99 -0
- package/docs/plans/plan-008-registry-finish.md +91 -0
- package/docs/screenshot.png +0 -0
- package/docs/tech-debt-assessment-2026-06.md +90 -0
- package/package.json +3 -3
- package/skills/pmx-canvas/SKILL.md +233 -185
- package/skills/pmx-canvas/evals/evals.json +3 -3
- package/skills/pmx-canvas/references/codex-app-adapter.md +24 -11
- package/skills/pmx-canvas/references/github-copilot-app-adapter.md +31 -1
- package/src/cli/agent.ts +52 -31
- package/src/client/nodes/ExtAppFrame.tsx +73 -5
- package/src/client/nodes/HtmlNode.tsx +12 -3
- package/src/client/nodes/McpAppNode.tsx +12 -3
- package/src/json-render/renderer/index.tsx +3 -0
- package/src/mcp/canvas-access.ts +43 -774
- package/src/mcp/server.ts +190 -2001
- package/src/server/ax-context.ts +7 -1
- package/src/server/ax-state-manager.ts +808 -0
- package/src/server/ax-state.ts +89 -2
- package/src/server/ax-wait.ts +56 -0
- package/src/server/canvas-operations.ts +2 -328
- package/src/server/canvas-schema.ts +2 -2
- package/src/server/canvas-state.ts +140 -382
- package/src/server/html-surface.ts +49 -11
- package/src/server/index.ts +136 -192
- package/src/server/operations/composites.ts +355 -0
- package/src/server/operations/http.ts +103 -0
- package/src/server/operations/index.ts +65 -0
- package/src/server/operations/invoker.ts +87 -0
- package/src/server/operations/mcp.ts +221 -0
- package/src/server/operations/ops/annotation.ts +60 -0
- package/src/server/operations/ops/app.ts +447 -0
- package/src/server/operations/ops/ax-await.ts +216 -0
- package/src/server/operations/ops/ax-shared.ts +38 -0
- package/src/server/operations/ops/ax-state.ts +249 -0
- package/src/server/operations/ops/ax-timeline.ts +381 -0
- package/src/server/operations/ops/ax-work.ts +635 -0
- package/src/server/operations/ops/batch.ts +365 -0
- package/src/server/operations/ops/edges.ts +166 -0
- package/src/server/operations/ops/groups.ts +176 -0
- package/src/server/operations/ops/json-render.ts +691 -0
- package/src/server/operations/ops/nodes.ts +1047 -0
- package/src/server/operations/ops/query.ts +281 -0
- package/src/server/operations/ops/snapshots.ts +366 -0
- package/src/server/operations/ops/validate.ts +37 -0
- package/src/server/operations/ops/viewport.ts +219 -0
- package/src/server/operations/ops/webview.ts +339 -0
- package/src/server/operations/registry.ts +79 -0
- package/src/server/operations/types.ts +150 -0
- package/src/server/operations/webview-runner.ts +77 -0
- package/src/server/server.ts +253 -2170
- package/src/server/web-artifacts.ts +6 -2
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { type Operation } from './types.js';
|
|
2
|
+
export declare function registerOperation(op: Operation): void;
|
|
3
|
+
export declare function getOperation(name: string): Operation;
|
|
4
|
+
export declare function listOperations(): Operation[];
|
|
5
|
+
type OperationEventEmitter = (event: string, payload: Record<string, unknown>) => void;
|
|
6
|
+
export declare function setOperationEventEmitter(emitter: OperationEventEmitter | null): void;
|
|
7
|
+
/** True while operation SSE emits are being suppressed (inside a meta-op such as
|
|
8
|
+
* canvas.batch). Ops whose effect depends on a live SSE emit firing — e.g.
|
|
9
|
+
* mcpapp.open, whose canvas node is created as a side-effect of `ext-app-open` —
|
|
10
|
+
* use this to reject loudly instead of silently no-op'ing in a suppressed run. */
|
|
11
|
+
export declare function isEmitSuppressed(): boolean;
|
|
12
|
+
/** Run `fn` with all operation SSE emits suppressed; restores depth on finally. */
|
|
13
|
+
export declare function runWithSuppressedEmits<T>(fn: () => Promise<T>): Promise<T>;
|
|
14
|
+
export declare function executeOperation(name: string, rawInput: unknown): Promise<unknown>;
|
|
15
|
+
export {};
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Operation registry core types (plan-005).
|
|
3
|
+
*
|
|
4
|
+
* One `Operation` describes a canvas operation once: input schema, the single
|
|
5
|
+
* handler implementation, and how the operation surfaces over HTTP and MCP.
|
|
6
|
+
* `defineOperation` wraps the typed pieces into a transport-agnostic record.
|
|
7
|
+
*
|
|
8
|
+
* Modules in `operations/` must never import `../server.ts` or `../index.ts`
|
|
9
|
+
* (the SSE emitter is injected via `setOperationEventEmitter`; the SDK imports
|
|
10
|
+
* the operation cores directly).
|
|
11
|
+
*/
|
|
12
|
+
import type { ZodRawShape } from 'zod';
|
|
13
|
+
import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
|
|
14
|
+
export type OperationErrorStatus = 400 | 404 | 409;
|
|
15
|
+
/** Operation failure that maps to an HTTP status + `{ ok:false, error }` body and MCP `isError`. */
|
|
16
|
+
export declare class OperationError extends Error {
|
|
17
|
+
readonly status: OperationErrorStatus;
|
|
18
|
+
/** Extra fields merged into the HTTP `{ ok:false, error }` body (e.g. the legacy
|
|
19
|
+
* webview-failure `webview` status snapshot). Omit for the plain envelope. */
|
|
20
|
+
readonly details?: Record<string, unknown>;
|
|
21
|
+
constructor(message: string, status?: OperationErrorStatus, details?: Record<string, unknown>);
|
|
22
|
+
}
|
|
23
|
+
export interface OperationContext {
|
|
24
|
+
/** Emit a workbench SSE event (e.g. extra `canvas-layout-update` frames, focus, viewport). */
|
|
25
|
+
emit(event: string, payload?: Record<string, unknown>): void;
|
|
26
|
+
}
|
|
27
|
+
export interface OperationHttpRoute {
|
|
28
|
+
method: 'GET' | 'POST' | 'PATCH' | 'PUT' | 'DELETE';
|
|
29
|
+
/** EXACT legacy path; `:param` segments capture path parameters. */
|
|
30
|
+
path: string;
|
|
31
|
+
/**
|
|
32
|
+
* Per-op input reader. The default merges query params, a JSON object body
|
|
33
|
+
* (arrays/primitives are preserved by the shared reader — a per-op reader
|
|
34
|
+
* decides how to use them), and path params (params win).
|
|
35
|
+
*/
|
|
36
|
+
readInput?: (req: Request, params: Record<string, string>, url: URL) => Promise<Record<string, unknown>> | Record<string, unknown>;
|
|
37
|
+
/** HTTP status for a successful result. Defaults to 200. */
|
|
38
|
+
status?: (result: unknown) => number;
|
|
39
|
+
/**
|
|
40
|
+
* Return parsed non-2xx JSON bodies to operation callers instead of throwing.
|
|
41
|
+
* Use only for operations whose MCP contract formats structured failure bodies
|
|
42
|
+
* itself (for example canvas.batch partial failures).
|
|
43
|
+
*/
|
|
44
|
+
errorBodyAsResult?: boolean;
|
|
45
|
+
}
|
|
46
|
+
/** Host capabilities available to MCP result formatters. */
|
|
47
|
+
export interface OperationMcpToolHost {
|
|
48
|
+
getPinnedNodeIds(): Promise<string[]>;
|
|
49
|
+
/**
|
|
50
|
+
* Invoke another registered operation over the host's transport (local or
|
|
51
|
+
* HTTP) — structural subset of OperationInvoker to avoid an import cycle.
|
|
52
|
+
* Used by formatters that need a follow-up read (undo/redo history flags,
|
|
53
|
+
* restore summary).
|
|
54
|
+
*/
|
|
55
|
+
invoker(): {
|
|
56
|
+
invoke(name: string, input: Record<string, unknown>): Promise<unknown>;
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
export interface OperationMcpTool {
|
|
60
|
+
/** Frozen legacy tool name (see tests/unit/mcp-tool-freeze.test.ts). */
|
|
61
|
+
toolName: string;
|
|
62
|
+
description: string;
|
|
63
|
+
/**
|
|
64
|
+
* MCP-only presentation flags and typed overrides merged over the operation
|
|
65
|
+
* input shape when advertising the tool schema (e.g. `full` / `verbose`).
|
|
66
|
+
*/
|
|
67
|
+
extraShape?: ZodRawShape;
|
|
68
|
+
/** Map raw MCP args onto operation input. May throw OperationError. */
|
|
69
|
+
buildInput?: (input: Record<string, unknown>) => Record<string, unknown>;
|
|
70
|
+
/** Format the wire-shaped operation result into a tool result. */
|
|
71
|
+
formatResult?: (result: unknown, input: Record<string, unknown>, host: OperationMcpToolHost) => Promise<CallToolResult> | CallToolResult;
|
|
72
|
+
}
|
|
73
|
+
/** Registered, transport-agnostic operation record. */
|
|
74
|
+
export interface Operation {
|
|
75
|
+
name: string;
|
|
76
|
+
/** true → the registry emits one `canvas-layout-update` after success. */
|
|
77
|
+
mutates: boolean;
|
|
78
|
+
/** Raw zod shape (for MCP tool schemas). */
|
|
79
|
+
inputShape: ZodRawShape;
|
|
80
|
+
http: OperationHttpRoute | null;
|
|
81
|
+
mcp: OperationMcpTool | null;
|
|
82
|
+
/** Validate raw input, run the handler, serialize to the canonical wire shape. */
|
|
83
|
+
execute(rawInput: unknown, ctx: OperationContext): Promise<unknown>;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Structural view of a zod schema (avoids fighting zod's generic variance).
|
|
87
|
+
* Any `z.looseObject(...)` satisfies this.
|
|
88
|
+
*/
|
|
89
|
+
export interface OperationInputSchema<I> {
|
|
90
|
+
safeParse(value: unknown): {
|
|
91
|
+
success: true;
|
|
92
|
+
data: I;
|
|
93
|
+
} | {
|
|
94
|
+
success: false;
|
|
95
|
+
error: {
|
|
96
|
+
issues: Array<{
|
|
97
|
+
path: PropertyKey[];
|
|
98
|
+
message: string;
|
|
99
|
+
}>;
|
|
100
|
+
};
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
export interface OperationDefinition<I extends Record<string, unknown>, O> {
|
|
104
|
+
name: string;
|
|
105
|
+
mutates: boolean;
|
|
106
|
+
/** MUST be loose (z.looseObject / .passthrough()) — legacy ignores unknown keys. */
|
|
107
|
+
input: OperationInputSchema<I>;
|
|
108
|
+
inputShape: ZodRawShape;
|
|
109
|
+
http?: OperationHttpRoute;
|
|
110
|
+
mcp?: OperationMcpTool;
|
|
111
|
+
/** The single implementation. Mutate via canvasState/canvas-operations so history records. */
|
|
112
|
+
handler: (input: I, ctx: OperationContext) => O | Promise<O>;
|
|
113
|
+
/** Map handler output to the HTTP wire body. Defaults to identity. */
|
|
114
|
+
serialize?: (output: O) => unknown;
|
|
115
|
+
}
|
|
116
|
+
export declare function defineOperation<I extends Record<string, unknown>, O>(def: OperationDefinition<I, O>): Operation;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Webview runner injection (plan-008 Wave 3).
|
|
3
|
+
*
|
|
4
|
+
* The Bun.WebView automation machinery (startCanvasAutomationWebView / stop /
|
|
5
|
+
* evaluate / resize / status) lives in `../server.ts`, which `operations/` must
|
|
6
|
+
* NEVER import (the isolation rule). The webview ops call into the runner
|
|
7
|
+
* declared here; `server.ts` injects the real implementation at module load via
|
|
8
|
+
* `setWebviewRunner`, exactly mirroring how `setOperationEventEmitter` injects
|
|
9
|
+
* the SSE emitter.
|
|
10
|
+
*
|
|
11
|
+
* `screenshot` is intentionally NOT part of this runner: it returns a binary
|
|
12
|
+
* payload and stays a standalone hand-written tool (`canvas_screenshot`).
|
|
13
|
+
*/
|
|
14
|
+
/** Webview status shape (structurally the server.ts CanvasAutomationWebViewStatus). */
|
|
15
|
+
export interface WebviewStatus {
|
|
16
|
+
supported: boolean;
|
|
17
|
+
active: boolean;
|
|
18
|
+
headlessOnly: true;
|
|
19
|
+
url: string | null;
|
|
20
|
+
backend: 'webkit' | 'chrome' | null;
|
|
21
|
+
width: number | null;
|
|
22
|
+
height: number | null;
|
|
23
|
+
dataStoreDir: string | null;
|
|
24
|
+
startedAt: string | null;
|
|
25
|
+
lastError: string | null;
|
|
26
|
+
}
|
|
27
|
+
/** Start options (structurally the server.ts CanvasAutomationWebViewOptions). */
|
|
28
|
+
export interface WebviewStartOptions {
|
|
29
|
+
backend?: 'webkit' | 'chrome';
|
|
30
|
+
width?: number;
|
|
31
|
+
height?: number;
|
|
32
|
+
chromePath?: string;
|
|
33
|
+
chromeArgv?: string[];
|
|
34
|
+
dataStoreDir?: string;
|
|
35
|
+
}
|
|
36
|
+
/** Outcome of a start attempt, carrying the success/error asymmetry the legacy
|
|
37
|
+
* route preserved:
|
|
38
|
+
* - success → 200 { ok:true, webview }
|
|
39
|
+
* - the canvas server is not running → 503 { ok:false, error } (no webview)
|
|
40
|
+
* - a supported start failed → 500, an unsupported runtime → 501; both return
|
|
41
|
+
* { ok:false, error, webview } and the 500-vs-501 split is read off
|
|
42
|
+
* `webview.supported` (the status), so no separate field is needed. */
|
|
43
|
+
export type WebviewStartResult = {
|
|
44
|
+
ok: true;
|
|
45
|
+
webview: WebviewStatus;
|
|
46
|
+
} | {
|
|
47
|
+
ok: false;
|
|
48
|
+
serverNotRunning: true;
|
|
49
|
+
error: string;
|
|
50
|
+
} | {
|
|
51
|
+
ok: false;
|
|
52
|
+
serverNotRunning?: false;
|
|
53
|
+
error: string;
|
|
54
|
+
webview: WebviewStatus;
|
|
55
|
+
};
|
|
56
|
+
export interface WebviewRunner {
|
|
57
|
+
/** Current automation status (never throws). */
|
|
58
|
+
status(): WebviewStatus;
|
|
59
|
+
/** Start or replace the headless automation session for the workbench page. */
|
|
60
|
+
start(options: WebviewStartOptions): Promise<WebviewStartResult>;
|
|
61
|
+
/** Stop the active session (resolves false when none was active). May throw. */
|
|
62
|
+
stop(): Promise<boolean>;
|
|
63
|
+
/** Resize the active viewport. Throws when no session is active. */
|
|
64
|
+
resize(width: number, height: number): Promise<WebviewStatus>;
|
|
65
|
+
/** Evaluate JavaScript in the active page. Throws when no session is active. */
|
|
66
|
+
evaluate(expression: string): Promise<unknown>;
|
|
67
|
+
}
|
|
68
|
+
export declare function setWebviewRunner(runner: WebviewRunner | null): void;
|
|
69
|
+
export declare function getWebviewRunner(): WebviewRunner;
|
package/docs/RELEASE.md
CHANGED
|
@@ -35,6 +35,11 @@ bun run release:smoke # packs + boots from a clean dir
|
|
|
35
35
|
bun run pack:dry-run # confirms the tarball shape
|
|
36
36
|
```
|
|
37
37
|
|
|
38
|
+
`bun run test:web-canvas` invokes Playwright through
|
|
39
|
+
`scripts/run-playwright.sh`, which runs the Playwright CLI under Node —
|
|
40
|
+
do not call `bun x playwright test` directly; it fails before test
|
|
41
|
+
discovery with a `.esm.preflight` loader error (ERR-20260508-001).
|
|
42
|
+
|
|
38
43
|
`bun run test:e2e-cli` starts a local server in a fresh temp workspace
|
|
39
44
|
and exercises the CLI flows from
|
|
40
45
|
[`docs/evals/e2e-cli-coverage.md`](evals/e2e-cli-coverage.md).
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# ADR-001: Bun-only runtime, MCP + HTTP as the universal surfaces
|
|
2
|
+
|
|
3
|
+
**Status:** Accepted
|
|
4
|
+
**Date:** 2026-06-12
|
|
5
|
+
**Context for:** v0.2 stability release (docs/tech-debt-assessment-2026-06.md, Phase 2)
|
|
6
|
+
|
|
7
|
+
## Context
|
|
8
|
+
|
|
9
|
+
pmx-canvas ships as TypeScript source executed directly by Bun. `package.json` points `main`/`exports` at `src/server/index.ts`, the `bin` entry is `src/cli/index.ts` with a `#!/usr/bin/env bun` shebang, and `engines` requires `bun >= 1.3.14`. There is no compiled JS distribution; `dist/` carries only the client bundle, the json-render viewer, and type declarations.
|
|
10
|
+
|
|
11
|
+
The runtime dependence on Bun is not incidental. `Bun.serve` is the HTTP + SSE server, `bun:sqlite` is the persistence layer, `Bun.WebView` backs the screenshot/evaluate automation tools, and `bun test` is the test runner. These are load-bearing APIs across `src/server/`, not shims that a bundler could paper over.
|
|
12
|
+
|
|
13
|
+
The recurring question is whether v0.2 should add a Node-compatible dual build (ESM + CJS dist, ported server internals) so Node projects can `import 'pmx-canvas'`. The tech debt assessment already leaned no; this ADR makes the decision explicit and binding.
|
|
14
|
+
|
|
15
|
+
The decisive observation: nobody integrates with pmx-canvas by importing it. Agents connect over MCP (stdio). Everything else (scripts, harnesses, CI, other languages) talks HTTP + SSE on localhost. The in-process SDK is a convenience for Bun-native tooling and our own CLI, not the distribution channel. A Node build would be sustained effort (porting `Bun.serve`, replacing `bun:sqlite`, a second test matrix, a real build step where today there is none) spent on the least differentiated integration path, while the operation registry refactor is actively consolidating the surfaces that do matter.
|
|
16
|
+
|
|
17
|
+
## Decision
|
|
18
|
+
|
|
19
|
+
pmx-canvas stays Bun-only for the SDK and runtime.
|
|
20
|
+
|
|
21
|
+
1. MCP (stdio) and the HTTP API are the universal integration surfaces. They are runtime-agnostic by construction: any client that can spawn a process or open a socket can use them.
|
|
22
|
+
2. No Node dual-build (ESM + CJS dist of the server/SDK) will be produced. The package continues to ship TypeScript source executed by Bun.
|
|
23
|
+
3. The programmatic SDK (`import { createCanvas } from 'pmx-canvas'`) is documented as Bun-runtime-only.
|
|
24
|
+
4. Bun-specific APIs (`Bun.serve`, `bun:sqlite`, `Bun.WebView`, `bun test`) remain first-class; no compatibility shims are added for hypothetical Node hosting.
|
|
25
|
+
|
|
26
|
+
## Consequences
|
|
27
|
+
|
|
28
|
+
The honest costs:
|
|
29
|
+
|
|
30
|
+
- **Node-based programmatic consumers cannot import the package.** A Node project that wants canvas access must run the server (any of: `bunx pmx-canvas`, a daemon, MCP auto-start) and integrate over HTTP or MCP. This is a real limitation for anyone wanting in-process embedding from Node, and we are accepting it deliberately.
|
|
31
|
+
- **`bin` requires bun on PATH.** The CLI shebang is `#!/usr/bin/env bun`. `npm install -g pmx-canvas` succeeds but the binary fails at invocation on a machine without bun.
|
|
32
|
+
- **npx-style installation has a sharp edge.** `npx pmx-canvas` fetches the package fine but execution still resolves the bun shebang; without bun installed it fails with a confusing error rather than a clear "install bun" message. `bunx pmx-canvas` is the canonical one-shot command and docs must lead with it. The README and CLI install docs should state the bun prerequisite up front, and a fast preflight check in `src/cli/index.ts` that prints an actionable message when bun is missing is cheap insurance (worth doing, not required by this ADR).
|
|
33
|
+
- **MCP client configs must spawn bun.** MCP server entries point at `bunx pmx-canvas --mcp` (or a bun invocation), not `node`/`npx`. Example configs in docs must be consistent about this.
|
|
34
|
+
- **We forgo the npm-ecosystem long tail.** Some integrations will never happen because `import` was the only path their authors would take. We judge that tail small relative to the agents-over-MCP center of mass.
|
|
35
|
+
|
|
36
|
+
What we gain: zero build step for the server, one runtime to test against, continued free use of `bun:sqlite` and `Bun.serve` without abstraction layers, and engineering time pointed at the operation registry and AX surface instead of distribution plumbing.
|
|
37
|
+
|
|
38
|
+
## Alternatives considered
|
|
39
|
+
|
|
40
|
+
- **Full Node dual-build (ESM + CJS dist).** Requires replacing `Bun.serve` (Hono/Express adapter), `bun:sqlite` (better-sqlite3, a native dependency with its own install pain), and dropping or forking `Bun.WebView`. Doubles the test matrix permanently. Rejected: high sustained cost on the path with the least demand.
|
|
41
|
+
- **Node-compatible SDK client only (thin HTTP wrapper published for Node).** Cheaper, but it is just a typed fetch client; any consumer can write one in an afternoon, and an official one creates a second public surface to version and freeze. Rejected for v0.2; can be revisited if real demand appears, without violating this ADR (the server stays Bun-only either way).
|
|
42
|
+
- **Compile-to-single-binary (`bun build --compile`).** Solves "bun on PATH" for the CLI but not programmatic import, adds per-platform release artifacts, and complicates the MCP spawn story. Out of scope for v0.2; does not change this decision.
|
|
43
|
+
|
|
44
|
+
## Revisit triggers
|
|
45
|
+
|
|
46
|
+
Reopen this ADR if (a) a major MCP host platform cannot spawn bun, or (b) repeated, concrete integration requests arrive that HTTP/MCP genuinely cannot serve (in-process embedding with shared memory, for example). Absent those, Bun-only stands.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# API Stability Contract (v0.2)
|
|
2
|
+
|
|
3
|
+
**Status:** Accepted, effective from v0.2.0
|
|
4
|
+
**Date:** 2026-06-12
|
|
5
|
+
**Context:** docs/tech-debt-assessment-2026-06.md item 7 (breaking patch releases, no deprecation path) and Phase 2 of the direction proposal. See also docs/adr-001-bun-only-runtime.md for which surfaces are universal.
|
|
6
|
+
|
|
7
|
+
The problem this fixes: 0.1.35 and 0.1.36 both changed HTTP contract behavior in patch releases. Consumers could not pin safely. From v0.2.0, they can.
|
|
8
|
+
|
|
9
|
+
## Public surfaces
|
|
10
|
+
|
|
11
|
+
These four surfaces are the contract. Anything not listed here is internal.
|
|
12
|
+
|
|
13
|
+
1. **HTTP API:** all `/api/canvas/*` routes documented in `docs/http-api.md`: method, path, request shape, response shape, and status codes.
|
|
14
|
+
2. **MCP surface:** tool names, tool input schemas (field names, types, required/optional status), and the fixed resource URIs (`canvas://layout`, `canvas://pinned-context`, and the rest of the frozen 14). Per-skill resources (`canvas://skills/<name>`) track the `skills/` directory and are explicitly not frozen by name.
|
|
15
|
+
3. **CLI:** the `pmx-canvas` subcommands and flags documented in `docs/cli.md` (including `serve`, `--mcp`, `--port`, `--theme`), their argument shapes, and their output formats where documented as machine-readable.
|
|
16
|
+
4. **SDK:** the exports of the package entry (`src/server/index.ts` via the `exports` map): the `PmxCanvas` class surface, `createCanvas`, and the exported types and helpers. Bun runtime only, per ADR-001.
|
|
17
|
+
|
|
18
|
+
## Policy
|
|
19
|
+
|
|
20
|
+
We are in 0.x semver, and we use it honestly:
|
|
21
|
+
|
|
22
|
+
- **Minor versions (0.2 → 0.3) may break public surfaces.** Breaking changes are allowed only at minor boundaries.
|
|
23
|
+
- **Patch versions never break public surfaces.** A patch may fix bugs, tighten validation of inputs that were never accepted as documented, and add purely additive fields. If a documented request that worked stops working, or a documented response shape changes, that is not a patch.
|
|
24
|
+
- **Every breaking change gets a CHANGELOG entry under a `### Breaking` heading before release.** Not after, not in a follow-up. The release checklist in docs/RELEASE.md treats a breaking change without that heading as a release blocker.
|
|
25
|
+
- **MCP tool names are frozen by `tests/unit/mcp-tool-freeze.test.ts`.** The test pins the literal tool-name list and the fixed resource URIs. Renaming or removing a tool requires editing that test in the same commit, which makes the break deliberate and reviewable rather than accidental. If you find yourself updating the freeze test, you owe a `### Breaking` entry and a minor version.
|
|
26
|
+
- **Additive changes are always allowed:** new tools, new routes, new optional input fields, new response fields. Consumers must tolerate unknown fields in responses.
|
|
27
|
+
|
|
28
|
+
## Deprecation
|
|
29
|
+
|
|
30
|
+
A public surface is marked deprecated at least one minor version before removal. Concretely:
|
|
31
|
+
|
|
32
|
+
1. Mark it in the docs (`docs/http-api.md`, `docs/mcp.md`, `docs/cli.md`, or `docs/sdk.md`) and in the MCP tool description where applicable.
|
|
33
|
+
2. Record it in the CHANGELOG under `### Deprecated` in the minor that deprecates it.
|
|
34
|
+
3. Remove it no earlier than the next minor, with a `### Breaking` entry naming the replacement.
|
|
35
|
+
|
|
36
|
+
So a tool deprecated in 0.2.x survives all of 0.2.x and may be removed in 0.3.0. Plan-006 (MCP tool consolidation) is the first consumer of this mechanism.
|
|
37
|
+
|
|
38
|
+
## Explicitly out of contract
|
|
39
|
+
|
|
40
|
+
These can change in any release without notice:
|
|
41
|
+
|
|
42
|
+
- **SSE event payload internals.** The existence of the `/api/workbench/events` stream is public; the field-level shape of individual event frames is not. Build on the HTTP read endpoints, not on event internals.
|
|
43
|
+
- **Undocumented endpoints.** Anything reachable but not in `docs/http-api.md` (internal prompt/trace/theme plumbing, browser-only routes) is internal.
|
|
44
|
+
- **The `.pmx-canvas/` on-disk layout.** `canvas.db` schema, artifact directory structure, daemon pid/log files. Migrations keep old data readable; the format itself is ours to change.
|
|
45
|
+
- **Anything under `src/` not exported from the package entry.** Deep imports (`pmx-canvas/src/server/whatever`) get no stability promise even where the file layout makes them possible.
|
|
46
|
+
- **Browser UI:** DOM structure, CSS custom properties, client bundle internals.
|
|
47
|
+
|
|
48
|
+
## Enforcement: the operation registry
|
|
49
|
+
|
|
50
|
+
The contract is only as real as its single source of truth. The operation registry (`src/server/operations/`, docs/plans/plan-005-operation-registry.md) gives each canvas operation exactly one zod input schema and one handler, from which the HTTP route, MCP tool, CLI command, and SDK method derive. One schema per operation means one place where the contract lives, one diff to review when it changes, and no cross-surface drift of the kind that produced the 0.1.x breakages (the same operation behaving differently over HTTP vs local MCP access).
|
|
51
|
+
|
|
52
|
+
Two mechanical guards back the policy:
|
|
53
|
+
|
|
54
|
+
- `tests/unit/mcp-tool-freeze.test.ts`: tool names and fixed resource URIs cannot change silently.
|
|
55
|
+
- `tests/unit/operation-parity.test.ts`: migrated operations behave identically across surfaces, including tolerance of unknown input keys (schemas stay loose; strict parsing would be an invisible break).
|
|
56
|
+
|
|
57
|
+
Operations not yet migrated to the registry are covered by the same policy; the registry just makes compliance cheap instead of disciplined.
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# AX host-adapter contract
|
|
2
|
+
|
|
3
|
+
PMX Canvas owns the **AX data layer** — work items, approval gates, steering,
|
|
4
|
+
evidence, review annotations, elicitations, mode requests, the timeline, host
|
|
5
|
+
capabilities, and the tool/prompt policy — over HTTP and MCP. What makes AX
|
|
6
|
+
*interactive* on a given coding harness (GitHub Copilot, Codex, Claude Code, …) is
|
|
7
|
+
a thin **adapter** that wires PMX's neutral surfaces to that harness's lifecycle.
|
|
8
|
+
|
|
9
|
+
"Agnostic" means a documented interface plus PMX-side behavior plus one small
|
|
10
|
+
reference adapter per harness — not zero-adapter magic. The genuinely harness-owned
|
|
11
|
+
acts (waking a turn, per-turn context injection, forwarding native tool hooks,
|
|
12
|
+
native modals) still need a per-harness adapter; PMX owns everything on its side of
|
|
13
|
+
the line (queues, endpoints, schemas, the canvas-surface fallback).
|
|
14
|
+
|
|
15
|
+
## The interface
|
|
16
|
+
|
|
17
|
+
Every adapter implements as much of this as its host allows; PMX provides the
|
|
18
|
+
surface each one binds to.
|
|
19
|
+
|
|
20
|
+
| Adapter method | PMX surface (owned) | Harness-owned part |
|
|
21
|
+
| --- | --- | --- |
|
|
22
|
+
| `pullContext()` | `GET /api/canvas/ax/context?consumer=<id>` · `canvas://ax-context` — full board **plus** a compact `delivery` lead block | When/where to inject it into the model's turn |
|
|
23
|
+
| `deliverSteer()` | `GET /api/canvas/ax/delivery/pending?consumer=<id>` · `canvas_claim_ax_delivery` → act → `POST …/delivery/<id>/mark` · `canvas_mark_ax_delivery` | Calling the host's native send/wake |
|
|
24
|
+
| `ingestActivity(event)` | `POST /api/canvas/ax/activity` · `canvas_ingest_activity` — board auto-reacts | Forwarding the host's tool/session hooks |
|
|
25
|
+
| `awaitGate(id)` | `GET /api/canvas/ax/{approval\|elicitation\|mode}/<id>?waitMs=` · `canvas_await_*` | Optionally surfacing a native modal; the agent must await PMX |
|
|
26
|
+
| `mirrorLog(event)` *(optional)* | `GET /api/canvas/ax/timeline` · `canvas://ax-timeline` | Writing AX events into the host's own chat/session log |
|
|
27
|
+
|
|
28
|
+
## Steering is gated, not pushed (#54)
|
|
29
|
+
|
|
30
|
+
A board action (e.g. an `ax.steer` emit from a surface button) enqueues a steering
|
|
31
|
+
message; it does **not** wake the agent. It reaches the next turn only when:
|
|
32
|
+
|
|
33
|
+
1. **The pin/focus gate is open.** A typical adapter injects `/api/canvas/ax/context`
|
|
34
|
+
only when something is pinned or focused (`pinned.count > 0 || focus.nodeIds.length > 0`).
|
|
35
|
+
A steering board must therefore stay pinned, or its button should also emit
|
|
36
|
+
`ax.focus.set` on the board node, to hold the gate open.
|
|
37
|
+
2. **A human message fires the turn.** A sandbox button click cannot itself create a
|
|
38
|
+
new agent turn (an app-platform constraint). Any human prompt triggers the injection.
|
|
39
|
+
3. **The agent acts, then acks.** Injected `pendingSteering` / `pendingActivity` is
|
|
40
|
+
*to-do*, not narration: act on it, then `canvas_mark_ax_delivery` the steering
|
|
41
|
+
(or resolve the work item / gate). Until acked, steering re-injects every gated turn.
|
|
42
|
+
|
|
43
|
+
The `delivery` lead block (`GET /api/canvas/ax/context?consumer=<id>`) is the
|
|
44
|
+
robustness hedge: it's compact and sits above the full dump, so an adapter can inject
|
|
45
|
+
it un-truncated even on a busy board where the full context is clipped.
|
|
46
|
+
|
|
47
|
+
## The two primitives that close the loop
|
|
48
|
+
|
|
49
|
+
- **Activity ingestion (bidirectional board).** Before, AX was one-directional
|
|
50
|
+
(agent → board). With `ingestActivity`, the agent's *real work* flows back: a failed
|
|
51
|
+
tool becomes a blocked work item + a review finding + evidence without the agent
|
|
52
|
+
remembering to push it. Reactions are kind-driven and overridable per call.
|
|
53
|
+
- **Blocking gates (gates that actually gate).** Before, an approval gate was inert
|
|
54
|
+
data the agent had to poll. With `canvas_await_approval` (and the `?waitMs` HTTP
|
|
55
|
+
long-poll), the agent requests a gate then *blocks* until the human resolves it in
|
|
56
|
+
the browser — real human-in-the-loop control on any harness.
|
|
57
|
+
|
|
58
|
+
## What stays harness-owned
|
|
59
|
+
|
|
60
|
+
Waking a turn, the exact per-turn injection timing, forwarding native tool hooks, and
|
|
61
|
+
native blocking modals are the host's job — PMX defines the neutral interface and owns
|
|
62
|
+
its side. Model/abort control (`setModel`, `abort`) is intentionally out of scope.
|
|
63
|
+
|
|
64
|
+
See [`docs/http-api.md`](http-api.md) and [`docs/mcp.md`](mcp.md) for the full surface,
|
|
65
|
+
and the per-harness notes under `skills/pmx-canvas/references/`.
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# AX state contract (authoritative)
|
|
2
|
+
|
|
3
|
+
The agent-experience (AX) state is split into **three partitions** with distinct
|
|
4
|
+
storage and lifecycle rules. This document is the authoritative spec for the
|
|
5
|
+
snapshot-vs-audit boundary; it is the documented module boundary for
|
|
6
|
+
`AxStateManager` (`src/server/ax-state-manager.ts`), which `CanvasStateManager`
|
|
7
|
+
holds and delegates to.
|
|
8
|
+
|
|
9
|
+
| Partition | Members | Storage | Snapshotted | Cleared by `canvas_clear` | Cleared by `restore` |
|
|
10
|
+
|-----------|---------|---------|:-----------:|:-------------------------:|:--------------------:|
|
|
11
|
+
| **Canvas-bound** | `focus`, `workItems`, `approvalGates`, `reviewAnnotations`, `elicitations`, `modeRequests`, `policy` | in-memory `_axState` + one JSON blob in the `ax_state` table | ✅ | ✅ | ✅ (replaced by the snapshot's AX) |
|
|
12
|
+
| **Timeline (audit-only)** | `agent-event`, `evidence-item`, `steering-message` | `ax_events` / `ax_evidence` / `ax_steering` tables, 500-row retention, sequential ids | ❌ | ❌ | ❌ |
|
|
13
|
+
| **Host/session** | `host-capability` | `ax_host_capabilities` table | ❌ | ❌ | ❌ |
|
|
14
|
+
|
|
15
|
+
**Rules.** Canvas-bound state travels with the canvas (snapshot / restore / clear);
|
|
16
|
+
timeline and host data are diagnostic and survive all three. Timeline rows are
|
|
17
|
+
append-only, retention-bounded (`AX_TIMELINE_RETENTION = 500` per table), and
|
|
18
|
+
read via `canvas_get_ax_timeline` / `canvas://ax-timeline`. The host-capability
|
|
19
|
+
row is reported by adapters and read via `canvas_get_ax`.
|
|
20
|
+
|
|
21
|
+
## Read surfaces
|
|
22
|
+
|
|
23
|
+
- **Canvas-bound:** `canvas_get_ax`, `canvas://ax`, `canvas://ax-context`, `canvas://ax-work`
|
|
24
|
+
- **Timeline:** `canvas_get_ax_timeline`, `canvas://ax-timeline`, `canvas://ax-pending-steering`, `canvas://ax-delivery`
|
|
25
|
+
- **Host:** `canvas_get_ax`
|
|
26
|
+
|
|
27
|
+
## Node-deletion semantics (soft-orphan + audit)
|
|
28
|
+
|
|
29
|
+
When a node is removed, the canvas-bound partition is re-normalized against the
|
|
30
|
+
surviving node set (`AxStateManager.revalidateAfterNodeRemoval`):
|
|
31
|
+
|
|
32
|
+
- **Work items / approval gates / elicitations / mode requests** that referenced
|
|
33
|
+
the deleted node keep the item but **strip the dangling node id** ("re-anchored").
|
|
34
|
+
The data semantics are soft-orphan: the work is not destroyed.
|
|
35
|
+
- **Node-anchored review annotations** (`anchorType: 'node'`) for the deleted node
|
|
36
|
+
are **dropped entirely** ("removed") — they are meaningless without their node.
|
|
37
|
+
|
|
38
|
+
This re-normalization was previously **silent**. It now records exactly one
|
|
39
|
+
auditable **timeline** event when (and only when) something was actually affected:
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
kind: 'note'
|
|
43
|
+
source: 'system'
|
|
44
|
+
summary: 'Node "<title>" deleted — re-anchored N AX item(s),
|
|
45
|
+
removed M node-anchored review annotation(s). [(focus anchor cleared)]'
|
|
46
|
+
data: {
|
|
47
|
+
systemEvent: 'ax-node-orphan',
|
|
48
|
+
removedNodeId: '<node id>',
|
|
49
|
+
reanchoredIds: [ ...work/gate/elicitation/mode ids... ],
|
|
50
|
+
removedReviewIds: [ ...review annotation ids... ],
|
|
51
|
+
reanchoredFocus: <boolean>, // true if focus.nodeIds referenced the deleted node
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The audit lives in the **timeline** (audit partition) — correct per the contract:
|
|
56
|
+
it is diagnostic continuity, not canvas-bound state, so it survives clear/restore
|
|
57
|
+
and is not part of any snapshot. `recordAxEvent` is timeline-only and does not
|
|
58
|
+
re-enter the canvas-bound normalization path, so there is no recursion.
|
|
59
|
+
|
|
60
|
+
The audit is scoped to `removeNode` (the live, observable change). `restore`
|
|
61
|
+
replaces the whole canvas wholesale and its snapshot AX was already consistent
|
|
62
|
+
when it was saved, so it is not audited.
|
|
63
|
+
|
|
64
|
+
**Append-only / undo semantics.** The note records a historical fact (at time T,
|
|
65
|
+
deleting node X re-anchored these items), not current state. It is **not rolled
|
|
66
|
+
back on undo** and **not duplicated on redo**: undo restores the canvas-bound AX
|
|
67
|
+
state (the re-anchoring is reversed in the live state) but leaves the note as a
|
|
68
|
+
record; redo replays `removeNode` inside suppressed recording
|
|
69
|
+
(`_suppressRecordingDepth > 0`), which re-runs the re-normalization but does
|
|
70
|
+
**not** append a second note. Consumers should read `reanchoredIds` /
|
|
71
|
+
`removedReviewIds` against the *current* canvas-bound state, not assume the
|
|
72
|
+
referenced items are still re-anchored.
|
package/docs/http-api.md
CHANGED
|
@@ -46,8 +46,18 @@ curl -X POST http://localhost:4313/api/canvas/node \
|
|
|
46
46
|
curl -X POST http://localhost:4313/api/canvas/node \
|
|
47
47
|
-H "Content-Type: application/json" \
|
|
48
48
|
-d '{"type":"html-primitive","kind":"choice-grid","title":"Options","data":{"items":[{"title":"Small patch","summary":"Least disruption."}]}}'
|
|
49
|
+
|
|
50
|
+
# Opt an html node into AX. Top-level `html` AND `axCapabilities` are accepted on
|
|
51
|
+
# POST add and PATCH update (and may also be nested under `data`).
|
|
52
|
+
curl -X POST http://localhost:4313/api/canvas/node \
|
|
53
|
+
-H "Content-Type: application/json" \
|
|
54
|
+
-d '{"type":"html","title":"AX board","html":"<p>steering board</p>","axCapabilities":{"enabled":true,"allowed":["ax.steer"]}}'
|
|
49
55
|
```
|
|
50
56
|
|
|
57
|
+
A node creation request must resolve a `type` — pass it in the body (`{ "type":
|
|
58
|
+
... }`) or as a `?type=` query param. An empty / type-less body returns `400`
|
|
59
|
+
rather than silently creating a markdown node.
|
|
60
|
+
|
|
51
61
|
## Edges
|
|
52
62
|
|
|
53
63
|
```bash
|
|
@@ -177,7 +187,9 @@ curl http://localhost:4313/api/canvas/ax/host-capability
|
|
|
177
187
|
|
|
178
188
|
Validation: `/ax/event` requires a valid `kind` + `summary` (400 otherwise);
|
|
179
189
|
`/ax/evidence` requires `kind` + `title`; `/ax/steer`, `/ax/work`,
|
|
180
|
-
`/ax/approval`, `/ax/review` require their primary field; `PATCH /ax/work
|
|
190
|
+
`/ax/approval`, `/ax/review` require their primary field; `POST`/`PATCH /ax/work`
|
|
191
|
+
reject an unknown `status` with 400 (the tokens are `todo`, `in-progress`,
|
|
192
|
+
`blocked`, `done`, `cancelled` — hyphens, not underscores); `PATCH /ax/work/:id`
|
|
181
193
|
and `PATCH /ax/review/:id` return 404 for unknown IDs; approval resolve returns
|
|
182
194
|
404 if the gate is missing or already resolved.
|
|
183
195
|
|
|
@@ -218,6 +230,24 @@ curl -X POST http://localhost:4313/api/canvas/ax/mode/<id>/resolve \
|
|
|
218
230
|
-d '{"decision":"approved"}'
|
|
219
231
|
curl http://localhost:4313/api/canvas/ax/mode
|
|
220
232
|
|
|
233
|
+
# Activity ingestion — forward an agent tool/session event; the board auto-reacts
|
|
234
|
+
# (kind-driven, overridable: failure → work item + review + evidence; tool-result
|
|
235
|
+
# + outcome:"success" → evidence). Set a reaction to false to suppress it.
|
|
236
|
+
curl -X POST http://localhost:4313/api/canvas/ax/activity \
|
|
237
|
+
-H "Content-Type: application/json" \
|
|
238
|
+
-d '{"kind":"failure","title":"tsc failed","summary":"type error in x.ts","nodeIds":["node-1"],"source":"api"}'
|
|
239
|
+
|
|
240
|
+
# Blocking gate read — read one gate, or long-poll with ?waitMs until the human
|
|
241
|
+
# resolves it in the browser (gates that actually gate). Returns { <primitive>, pending }.
|
|
242
|
+
curl "http://localhost:4313/api/canvas/ax/approval/<id>" # immediate read
|
|
243
|
+
curl "http://localhost:4313/api/canvas/ax/approval/<id>?waitMs=30000" # blocks ≤30s / until resolved
|
|
244
|
+
curl "http://localhost:4313/api/canvas/ax/elicitation/<id>?waitMs=30000"
|
|
245
|
+
curl "http://localhost:4313/api/canvas/ax/mode/<id>?waitMs=30000"
|
|
246
|
+
|
|
247
|
+
# Context — optional ?consumer= filters the compact, loop-safe `delivery` lead block
|
|
248
|
+
# (undelivered steering + open work/approvals it can act on) for per-turn injection.
|
|
249
|
+
curl "http://localhost:4313/api/canvas/ax/context?consumer=copilot"
|
|
250
|
+
|
|
221
251
|
# Commands — list the registry, invoke a command (records a `command` agent-event)
|
|
222
252
|
curl http://localhost:4313/api/canvas/ax/command
|
|
223
253
|
curl -X POST http://localhost:4313/api/canvas/ax/command \
|
|
@@ -234,7 +264,9 @@ curl -X POST http://localhost:4313/api/canvas/ax/policy \
|
|
|
234
264
|
Validation: `/ax/interaction` returns `{ ok: false, code }` (403 `ax-disabled` /
|
|
235
265
|
`not-allowed`, 400 `invalid-payload` / `unknown-command`, 404 `unknown-node`);
|
|
236
266
|
`/ax/command` rejects an unknown command name with 400; `/ax/elicitation/:id/respond`
|
|
237
|
-
and `/ax/mode/:id/resolve` return 404 for unknown IDs
|
|
267
|
+
and `/ax/mode/:id/resolve` return 404 for unknown IDs; `/ax/activity` requires a
|
|
268
|
+
valid `kind` + `title` (400 otherwise); the single-item gate GETs return 404 for
|
|
269
|
+
unknown IDs and clamp `?waitMs` to ≤120000.
|
|
238
270
|
|
|
239
271
|
## Diagrams (Excalidraw preset)
|
|
240
272
|
|