@fusionkit/session-harness 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,135 @@
1
+ import assert from "node:assert/strict";
2
+ import { mkdtempSync, rmSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { after, before, test } from "node:test";
6
+ import { verifyReceiptBundle } from "@fusionkit/protocol";
7
+ import { CapabilityMismatchError } from "@fusionkit/runner";
8
+ import { makeRepo, startStack } from "@fusionkit/testkit";
9
+ import { captureWorkspace } from "@fusionkit/workspace";
10
+ import { isPiAgentRun, piAuthFromEnv, piHarnessBackend } from "../index.js";
11
+ import { emptyHarnessLog, fakeHarness, fakeLocalSandboxProvider } from "./fakes.js";
12
+ // ---------------------------------------------------------------------------
13
+ // auth: a local endpoint maps to explicit customEnv; everything else fails closed
14
+ // ---------------------------------------------------------------------------
15
+ test("pi auth: a local OpenAI-compatible endpoint maps to explicit customEnv", () => {
16
+ const auth = piAuthFromEnv({
17
+ OPENAI_BASE_URL: "http://127.0.0.1:11434/v1",
18
+ OPENAI_API_KEY: "local-dummy"
19
+ });
20
+ assert.deepEqual(auth, {
21
+ customEnv: {
22
+ OPENAI_API_KEY: "local-dummy",
23
+ OPENAI_BASE_URL: "http://127.0.0.1:11434/v1"
24
+ }
25
+ });
26
+ });
27
+ test("pi auth: a gateway key is forwarded as customEnv too", () => {
28
+ const auth = piAuthFromEnv({ AI_GATEWAY_API_KEY: "gw" });
29
+ assert.deepEqual(auth, { customEnv: { AI_GATEWAY_API_KEY: "gw" } });
30
+ });
31
+ test("pi auth: no provider credential fails closed", () => {
32
+ assert.throws(() => piAuthFromEnv({ OPENAI_BASE_URL: "http://127.0.0.1:11434/v1" }), (error) => error instanceof CapabilityMismatchError && /refusing to fall back/.test(error.message));
33
+ });
34
+ test("pi auth: env vars the pi path cannot deliver fail closed", () => {
35
+ assert.throws(() => piAuthFromEnv({ OPENAI_API_KEY: "k", CUSTOM_FLAG: "1" }), (error) => error instanceof CapabilityMismatchError && /CUSTOM_FLAG/.test(error.message));
36
+ });
37
+ // ---------------------------------------------------------------------------
38
+ // delegation: non-pi executions go to the fallback (hermetic) backend
39
+ // ---------------------------------------------------------------------------
40
+ test("pi backend reports the hermetic tier and recognizes pi agent runs", () => {
41
+ const backend = piHarnessBackend();
42
+ assert.equal(backend.isolation, "hermetic");
43
+ });
44
+ // ---------------------------------------------------------------------------
45
+ // end to end: a governed pi run through the real HarnessAgent
46
+ //
47
+ // As in the claude-code e2e, the fakes replace only what needs a live local
48
+ // model (the pi adapter) and a real sandbox (a local directory in place of
49
+ // just-bash). The binding wiring, generic backend, staging, mirror-back,
50
+ // event chain, and offline verification are all real — and the run is
51
+ // labeled with the hermetic tier the pi binding declares.
52
+ // ---------------------------------------------------------------------------
53
+ const POOL = "swarm-pool";
54
+ let stack;
55
+ let repoDir;
56
+ let sandboxRoot;
57
+ const harnessLog = emptyHarnessLog();
58
+ before(async () => {
59
+ sandboxRoot = mkdtempSync(join(tmpdir(), "warrant-fake-pi-sandbox-"));
60
+ stack = await startStack({
61
+ pool: POOL,
62
+ startRunner: true,
63
+ backends: [
64
+ piHarnessBackend({
65
+ createHarness: ({ env }) => {
66
+ harnessLog.envSeen.push(env);
67
+ return fakeHarness(harnessLog, "fake-pi");
68
+ },
69
+ createSandboxProvider: () => fakeLocalSandboxProvider(sandboxRoot)
70
+ })
71
+ ],
72
+ policy: (policy) => {
73
+ policy.agents.allow = ["pi"];
74
+ }
75
+ });
76
+ repoDir = makeRepo({
77
+ files: { "README.md": "# pi fixture\n", "data.txt": "alpha\nbeta\n" }
78
+ });
79
+ });
80
+ after(async () => {
81
+ await stack.stop();
82
+ rmSync(repoDir, { recursive: true, force: true });
83
+ rmSync(sandboxRoot, { recursive: true, force: true });
84
+ });
85
+ test("e2e: a pi contract runs through the real HarnessAgent on the hermetic tier", async () => {
86
+ const captured = captureWorkspace(repoDir);
87
+ await stack.client.putBlob(captured.bundle);
88
+ if (captured.dirtyDiff)
89
+ await stack.client.putBlob(captured.dirtyDiff);
90
+ const created = await stack.client.requestRun({
91
+ requestedBy: { kind: "human", id: "swarm-worker" },
92
+ agentKind: "pi",
93
+ prompt: "count the lines in data.txt",
94
+ pool: POOL,
95
+ secretNames: [],
96
+ workspace: captured.manifest,
97
+ network: { defaultDeny: true, allowHosts: [] },
98
+ budget: {},
99
+ disclosure: "minimal-context",
100
+ isolation: "hermetic",
101
+ execution: {
102
+ kind: "agent",
103
+ agent: { kind: "pi" },
104
+ prompt: "count the lines in data.txt",
105
+ env: {
106
+ vars: {
107
+ OPENAI_BASE_URL: "http://127.0.0.1:11434/v1",
108
+ OPENAI_API_KEY: "local-dummy"
109
+ }
110
+ }
111
+ }
112
+ });
113
+ const contract = created.runId;
114
+ assert.ok(isPiAgentRun);
115
+ assert.equal(await stack.runOnce(), contract);
116
+ const bundle = await stack.client.getBundle(contract);
117
+ assert.equal(bundle.receipt.status, "completed");
118
+ assert.equal(bundle.receipt.runner.isolation, "hermetic");
119
+ assert.deepEqual(verifyReceiptBundle(bundle).problems, []);
120
+ // The pi adapter saw the prompt and the broker-resolved local endpoint env,
121
+ // never the host environment.
122
+ assert.deepEqual(harnessLog.prompts, ["count the lines in data.txt"]);
123
+ assert.deepEqual(harnessLog.envSeen, [
124
+ {
125
+ OPENAI_BASE_URL: "http://127.0.0.1:11434/v1",
126
+ OPENAI_API_KEY: "local-dummy"
127
+ }
128
+ ]);
129
+ // The worker's edit was mirrored back into the runner's checkout.
130
+ assert.ok(bundle.receipt.workspaceOut.diffHash, "expected a workspace diff");
131
+ const diff = await stack.client.getBlob(bundle.receipt.workspaceOut.diffHash);
132
+ assert.ok(diff.toString("utf8").includes("result.txt"));
133
+ const fileEvents = bundle.events.filter((e) => e.event.type === "file.changed");
134
+ assert.ok(fileEvents.some((e) => e.event.type === "file.changed" && e.event.path === "result.txt"), "expected the mirrored result.txt in the boundary file events");
135
+ });
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Structured session transcript: the harness's typed event stream rendered
3
+ * as one JSON line per part. This becomes the run's log artifact, replacing
4
+ * the merged stdout/stderr a CLI invocation would have produced — every
5
+ * tool call, file-change notice, and finish reason the harness reported is
6
+ * preserved verbatim and hash-addressed in the receipt.
7
+ *
8
+ * The recorder is deliberately liberal in what it accepts: harness stream
9
+ * parts are an experimental, evolving union (`@ai-sdk/harness` canary), so
10
+ * known part types are mapped to stable transcript shapes and unknown types
11
+ * are recorded by name only. Nothing here throws on a novel part.
12
+ */
13
+ /** One JSON line of the transcript. */
14
+ export type TranscriptLine = {
15
+ part: string;
16
+ } & Record<string, unknown>;
17
+ export declare class TranscriptRecorder {
18
+ private readonly lines;
19
+ private readonly textById;
20
+ private failed;
21
+ /** Ingest one stream part from the harness agent's full stream. */
22
+ ingest(value: unknown): void;
23
+ /** Record a turn-level failure (a thrown stream/iteration error). */
24
+ fail(error: unknown): void;
25
+ /** 0 when the turn finished cleanly, 1 when any error part was seen. */
26
+ exitCode(): number;
27
+ /** Render the transcript as a JSONL buffer, optionally truncated. */
28
+ toBuffer(maxBytes?: number): Buffer;
29
+ private push;
30
+ private appendText;
31
+ private flushText;
32
+ private flushAllText;
33
+ }
@@ -0,0 +1,214 @@
1
+ /**
2
+ * Structured session transcript: the harness's typed event stream rendered
3
+ * as one JSON line per part. This becomes the run's log artifact, replacing
4
+ * the merged stdout/stderr a CLI invocation would have produced — every
5
+ * tool call, file-change notice, and finish reason the harness reported is
6
+ * preserved verbatim and hash-addressed in the receipt.
7
+ *
8
+ * The recorder is deliberately liberal in what it accepts: harness stream
9
+ * parts are an experimental, evolving union (`@ai-sdk/harness` canary), so
10
+ * known part types are mapped to stable transcript shapes and unknown types
11
+ * are recorded by name only. Nothing here throws on a novel part.
12
+ */
13
+ function jsonSafe(value) {
14
+ if (value === undefined)
15
+ return undefined;
16
+ try {
17
+ return JSON.parse(JSON.stringify(value, (_key, v) => (typeof v === "bigint" ? String(v) : v)));
18
+ }
19
+ catch {
20
+ return String(value);
21
+ }
22
+ }
23
+ export class TranscriptRecorder {
24
+ lines = [];
25
+ textById = new Map();
26
+ failed = false;
27
+ /** Ingest one stream part from the harness agent's full stream. */
28
+ ingest(value) {
29
+ if (typeof value !== "object" || value === null)
30
+ return;
31
+ const part = value;
32
+ if (typeof part.type !== "string")
33
+ return;
34
+ switch (part.type) {
35
+ case "start":
36
+ case "start-step":
37
+ case "finish-step":
38
+ case "text-start":
39
+ case "reasoning-start":
40
+ case "raw":
41
+ return; // structural framing; no evidence content of its own
42
+ case "stream-start": {
43
+ this.push({
44
+ part: "stream-start",
45
+ ...(part.modelId !== undefined ? { modelId: part.modelId } : {}),
46
+ ...(Array.isArray(part.warnings) && part.warnings.length > 0
47
+ ? { warnings: jsonSafe(part.warnings) }
48
+ : {})
49
+ });
50
+ return;
51
+ }
52
+ case "text-delta": {
53
+ this.appendText("text", part);
54
+ return;
55
+ }
56
+ case "reasoning-delta": {
57
+ this.appendText("reasoning", part);
58
+ return;
59
+ }
60
+ case "text-end": {
61
+ this.flushText("text", part);
62
+ return;
63
+ }
64
+ case "reasoning-end": {
65
+ this.flushText("reasoning", part);
66
+ return;
67
+ }
68
+ case "tool-call": {
69
+ this.push({
70
+ part: "tool-call",
71
+ toolCallId: part.toolCallId,
72
+ toolName: part.toolName,
73
+ input: jsonSafe(part.input)
74
+ });
75
+ return;
76
+ }
77
+ case "tool-result": {
78
+ // HarnessV1 emits `result`; the AI SDK stream surface emits `output`.
79
+ const output = "output" in part ? part.output : part.result;
80
+ this.push({
81
+ part: "tool-result",
82
+ toolCallId: part.toolCallId,
83
+ toolName: part.toolName,
84
+ output: jsonSafe(output),
85
+ ...(part.isError === true ? { isError: true } : {})
86
+ });
87
+ return;
88
+ }
89
+ case "tool-error": {
90
+ this.push({
91
+ part: "tool-error",
92
+ toolCallId: part.toolCallId,
93
+ toolName: part.toolName,
94
+ error: errorMessage(part.error)
95
+ });
96
+ return;
97
+ }
98
+ case "tool-approval-request": {
99
+ this.push({
100
+ part: "tool-approval-request",
101
+ approvalId: part.approvalId,
102
+ toolCallId: part.toolCallId
103
+ });
104
+ return;
105
+ }
106
+ case "file-change": {
107
+ this.push({ part: "file-change", event: part.event, path: part.path });
108
+ return;
109
+ }
110
+ case "compaction": {
111
+ this.push({ part: "compaction", trigger: part.trigger, summary: part.summary });
112
+ return;
113
+ }
114
+ case "finish": {
115
+ const finishReason = finishReasonOf(part.finishReason);
116
+ if (finishReason === "error")
117
+ this.failed = true;
118
+ this.push({
119
+ part: "finish",
120
+ finishReason,
121
+ ...(part.totalUsage !== undefined ? { totalUsage: jsonSafe(part.totalUsage) } : {})
122
+ });
123
+ return;
124
+ }
125
+ case "error":
126
+ case "abort": {
127
+ this.failed = true;
128
+ this.push({ part: part.type, error: errorMessage(part.error) });
129
+ return;
130
+ }
131
+ default: {
132
+ // Unknown/novel part types: record the occurrence without an
133
+ // unbounded payload, so the transcript stays evidence-shaped even
134
+ // as the experimental harness union evolves.
135
+ this.push({ part: part.type });
136
+ return;
137
+ }
138
+ }
139
+ }
140
+ /** Record a turn-level failure (a thrown stream/iteration error). */
141
+ fail(error) {
142
+ this.failed = true;
143
+ this.push({ part: "turn-failed", error: errorMessage(error) });
144
+ }
145
+ /** 0 when the turn finished cleanly, 1 when any error part was seen. */
146
+ exitCode() {
147
+ return this.failed ? 1 : 0;
148
+ }
149
+ /** Render the transcript as a JSONL buffer, optionally truncated. */
150
+ toBuffer(maxBytes) {
151
+ this.flushAllText();
152
+ const body = this.lines.map((line) => JSON.stringify(line)).join("\n");
153
+ const buffer = Buffer.from(body.length > 0 ? `${body}\n` : "", "utf8");
154
+ if (maxBytes !== undefined && buffer.byteLength > maxBytes) {
155
+ return buffer.subarray(0, maxBytes);
156
+ }
157
+ return buffer;
158
+ }
159
+ push(line) {
160
+ this.lines.push(line);
161
+ }
162
+ appendText(kind, part) {
163
+ const id = typeof part.id === "string" ? part.id : `${kind}:anonymous`;
164
+ // HarnessV1 emits `delta`; the AI SDK stream surface emits `text`.
165
+ const deltaValue = "delta" in part ? part.delta : part.text;
166
+ const delta = typeof deltaValue === "string" ? deltaValue : "";
167
+ const entry = this.textById.get(id) ?? { kind, text: "" };
168
+ entry.text += delta;
169
+ this.textById.set(id, entry);
170
+ }
171
+ flushText(kind, part) {
172
+ const id = typeof part.id === "string" ? part.id : `${kind}:anonymous`;
173
+ const entry = this.textById.get(id);
174
+ if (!entry)
175
+ return;
176
+ this.textById.delete(id);
177
+ if (entry.text.length === 0)
178
+ return;
179
+ this.push({ part: entry.kind, text: entry.text });
180
+ }
181
+ flushAllText() {
182
+ for (const entry of this.textById.values()) {
183
+ if (entry.text.length > 0)
184
+ this.push({ part: entry.kind, text: entry.text });
185
+ }
186
+ this.textById.clear();
187
+ }
188
+ }
189
+ /**
190
+ * A finish reason is a plain string on the AI SDK stream surface and a
191
+ * `{ unified, raw? }` object at the harness/provider level; accept both.
192
+ */
193
+ function finishReasonOf(value) {
194
+ if (typeof value === "string")
195
+ return value;
196
+ if (typeof value === "object" && value !== null && "unified" in value) {
197
+ const unified = value.unified;
198
+ if (typeof unified === "string")
199
+ return unified;
200
+ }
201
+ return "unknown";
202
+ }
203
+ function errorMessage(error) {
204
+ if (error instanceof Error)
205
+ return error.message;
206
+ if (typeof error === "string")
207
+ return error;
208
+ try {
209
+ return JSON.stringify(jsonSafe(error));
210
+ }
211
+ catch {
212
+ return String(error);
213
+ }
214
+ }
package/package.json ADDED
@@ -0,0 +1,45 @@
1
+ {
2
+ "name": "@fusionkit/session-harness",
3
+ "private": false,
4
+ "version": "0.1.0",
5
+ "repository": {
6
+ "type": "git",
7
+ "url": "git+https://github.com/velum-labs/handoffkit.git",
8
+ "directory": "packages/session-harness"
9
+ },
10
+ "description": "AI SDK harness session backend for Warrant runners: drives vendor agent harnesses (Claude Code) through @ai-sdk/harness inside a Vercel Sandbox microVM, capturing the structured harness event stream as governed-session evidence.",
11
+ "license": "UNLICENSED",
12
+ "type": "module",
13
+ "exports": {
14
+ ".": {
15
+ "types": "./dist/index.d.ts",
16
+ "default": "./dist/index.js"
17
+ }
18
+ },
19
+ "files": [
20
+ "dist"
21
+ ],
22
+ "publishConfig": {
23
+ "registry": "https://registry.npmjs.org",
24
+ "access": "public",
25
+ "provenance": true
26
+ },
27
+ "dependencies": {
28
+ "@ai-sdk/harness": "1.0.0-canary.6",
29
+ "@ai-sdk/harness-claude-code": "1.0.0-canary.2",
30
+ "@ai-sdk/harness-pi": "1.0.0-canary.2",
31
+ "@ai-sdk/sandbox-just-bash": "1.0.0-canary.6",
32
+ "@ai-sdk/sandbox-vercel": "1.0.0-canary.6",
33
+ "ws": "8.21.0",
34
+ "@fusionkit/runner": "0.1.0",
35
+ "@fusionkit/session-hermetic": "0.1.0",
36
+ "@fusionkit/protocol": "0.1.0",
37
+ "@fusionkit/session-vercel-sandbox": "0.1.0"
38
+ },
39
+ "devDependencies": {
40
+ "@fusionkit/plane": "0.1.0",
41
+ "@fusionkit/sdk": "0.1.0",
42
+ "@fusionkit/testkit": "0.1.0",
43
+ "@fusionkit/workspace": "0.1.0"
44
+ }
45
+ }