@fusionkit/session-harness 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth.d.ts +34 -0
- package/dist/auth.js +119 -0
- package/dist/backend.d.ts +63 -0
- package/dist/backend.js +154 -0
- package/dist/claude-code.d.ts +57 -0
- package/dist/claude-code.js +84 -0
- package/dist/index.d.ts +22 -0
- package/dist/index.js +18 -0
- package/dist/pi.d.ts +66 -0
- package/dist/pi.js +72 -0
- package/dist/test/fakes.d.ts +24 -0
- package/dist/test/fakes.js +187 -0
- package/dist/test/harness.test.d.ts +1 -0
- package/dist/test/harness.test.js +275 -0
- package/dist/test/pi.test.d.ts +1 -0
- package/dist/test/pi.test.js +135 -0
- package/dist/transcript.d.ts +33 -0
- package/dist/transcript.js +214 -0
- package/package.json +45 -0
package/dist/pi.js
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { hermeticBackend } from "@fusionkit/session-hermetic";
|
|
2
|
+
import { piAuthFromEnv } from "./auth.js";
|
|
3
|
+
import { AiSdkHarnessBackend, isAgentRunFor } from "./backend.js";
|
|
4
|
+
// Pi keeps its own per-session state under these directories; they are
|
|
5
|
+
// runtime plumbing, not workspace output, so they are excluded from staging
|
|
6
|
+
// and mirror-back on top of the shared sandbox ignore set.
|
|
7
|
+
const PI_EXTRA_IGNORES = [".pi", ".agent-runs"];
|
|
8
|
+
// The default factories load the AI SDK Pi wrappers lazily, on first use,
|
|
9
|
+
// rather than with a top-of-module import. This is deliberate and is the one
|
|
10
|
+
// sanctioned exception to the imports-at-top rule in this package:
|
|
11
|
+
//
|
|
12
|
+
// - `@ai-sdk/harness-pi` statically imports `@earendil-works/pi-coding-agent`
|
|
13
|
+
// (a ~12 MB host coding-agent runtime) at its own module top level, and
|
|
14
|
+
// `@ai-sdk/sandbox-just-bash` statically imports `just-bash`. Both are
|
|
15
|
+
// genuine *host* runtimes for the real Pi path only.
|
|
16
|
+
// - The governed-plane code, the unit tests (piAuthFromEnv), and the
|
|
17
|
+
// fake-harness e2e path must not require those runtimes to be installed —
|
|
18
|
+
// exactly as the claude-code path never requires `@anthropic-ai/claude-agent-sdk`
|
|
19
|
+
// on the host (it bootstraps inside the sandbox). Both missing peers are
|
|
20
|
+
// declared ignorable in pnpm-workspace.yaml.
|
|
21
|
+
// - A top-level import here would force-load pi-coding-agent the moment
|
|
22
|
+
// anything imports this module, defeating that. So the runtime values are
|
|
23
|
+
// loaded only when a default Pi binding actually executes a run; the types
|
|
24
|
+
// come from `import type` (erased, no runtime cost) at call sites.
|
|
25
|
+
function defaultPiHarness(options) {
|
|
26
|
+
return async (input) => {
|
|
27
|
+
const { createPi } = await import("@ai-sdk/harness-pi");
|
|
28
|
+
const auth = piAuthFromEnv(input.env);
|
|
29
|
+
const adapter = createPi({
|
|
30
|
+
auth,
|
|
31
|
+
...(options.model !== undefined ? { model: options.model } : {}),
|
|
32
|
+
...(options.thinking !== undefined ? { thinkingLevel: options.thinking } : {})
|
|
33
|
+
});
|
|
34
|
+
// Same instance-split bridge as the claude-code binding: harness-pi
|
|
35
|
+
// resolves its own @ai-sdk/harness peer, nominally distinct from the
|
|
36
|
+
// agent's despite the exact-version alignment.
|
|
37
|
+
return adapter;
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
function defaultPiSandbox(_options) {
|
|
41
|
+
return async (_input) => {
|
|
42
|
+
const { createJustBashSandbox } = await import("@ai-sdk/sandbox-just-bash");
|
|
43
|
+
// just-bash exposes no ports and no real network, so there is nothing to
|
|
44
|
+
// configure from the contract here: a fresh virtual filesystem per
|
|
45
|
+
// session is the whole substrate. The workspace is staged into it by the
|
|
46
|
+
// generic backend's onSandboxSession hook.
|
|
47
|
+
return createJustBashSandbox();
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/** True when the contract asks for the pi agent harness. */
|
|
51
|
+
export function isPiAgentRun(contract) {
|
|
52
|
+
return isAgentRunFor(contract, "pi");
|
|
53
|
+
}
|
|
54
|
+
/** The Pi harness binding (hermetic isolation tier). */
|
|
55
|
+
export function piBinding(options = {}) {
|
|
56
|
+
return {
|
|
57
|
+
agentKind: "pi",
|
|
58
|
+
isolation: "hermetic",
|
|
59
|
+
extraIgnores: PI_EXTRA_IGNORES,
|
|
60
|
+
createHarness: options.createHarness ?? defaultPiHarness(options),
|
|
61
|
+
createSandboxProvider: options.createSandboxProvider ?? defaultPiSandbox(options)
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Create an AI SDK harness session backend for the Pi runtime: hosts
|
|
66
|
+
* `piBinding(options)` with a hermetic fallback. This is the runner's single
|
|
67
|
+
* "hermetic" tier when local-model swarm workers are in play.
|
|
68
|
+
*/
|
|
69
|
+
export function piHarnessBackend(options = {}) {
|
|
70
|
+
const fallback = options.fallback ?? hermeticBackend();
|
|
71
|
+
return new AiSdkHarnessBackend({ binding: piBinding(options), fallback });
|
|
72
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { HarnessV1SandboxProvider } from "@ai-sdk/harness";
|
|
2
|
+
import type { HarnessAdapter } from "../index.js";
|
|
3
|
+
export type FakeHarnessLog = {
|
|
4
|
+
prompts: string[];
|
|
5
|
+
envSeen: Record<string, string>[];
|
|
6
|
+
workDirs: string[];
|
|
7
|
+
destroyed: number;
|
|
8
|
+
};
|
|
9
|
+
export declare function emptyHarnessLog(): FakeHarnessLog;
|
|
10
|
+
/**
|
|
11
|
+
* A fake harness adapter that reads a staged workspace file and writes a
|
|
12
|
+
* result file through the sandbox surface, then emits a clean structured
|
|
13
|
+
* stream. `harnessId` lets a test label it as claude-code, pi, etc.; the
|
|
14
|
+
* behavior is identical because the generic backend treats every binding the
|
|
15
|
+
* same way.
|
|
16
|
+
*/
|
|
17
|
+
export declare function fakeHarness(log: FakeHarnessLog, harnessId?: string): HarnessAdapter;
|
|
18
|
+
/**
|
|
19
|
+
* A sandbox provider over a local directory: `run`/`spawn` execute through
|
|
20
|
+
* /bin/sh and the file surface is node:fs. Implements the same
|
|
21
|
+
* `HarnessV1SandboxProvider` contract as the real providers, so the generic
|
|
22
|
+
* backend's staging and mirror-back run unchanged.
|
|
23
|
+
*/
|
|
24
|
+
export declare function fakeLocalSandboxProvider(root: string): HarnessV1SandboxProvider;
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared test doubles for the harness backends: a fake harness adapter and a
|
|
3
|
+
* sandbox provider over a local directory. Together they exercise the entire
|
|
4
|
+
* generic backend path (staging, transcript, mirror-back, event chain)
|
|
5
|
+
* through the real `HarnessAgent`, replacing only what would otherwise need
|
|
6
|
+
* cloud credentials or a microVM.
|
|
7
|
+
*/
|
|
8
|
+
import { execFile, spawn as spawnChild } from "node:child_process";
|
|
9
|
+
import { createReadStream } from "node:fs";
|
|
10
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
11
|
+
import { dirname } from "node:path";
|
|
12
|
+
import { Readable } from "node:stream";
|
|
13
|
+
import { promisify } from "node:util";
|
|
14
|
+
const execFileAsync = promisify(execFile);
|
|
15
|
+
const usage = {
|
|
16
|
+
inputTokens: { total: 7, noCache: 7, cacheRead: undefined, cacheWrite: undefined },
|
|
17
|
+
outputTokens: { total: 3, text: 3, reasoning: undefined }
|
|
18
|
+
};
|
|
19
|
+
export function emptyHarnessLog() {
|
|
20
|
+
return { prompts: [], envSeen: [], workDirs: [], destroyed: 0 };
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* A fake harness adapter that reads a staged workspace file and writes a
|
|
24
|
+
* result file through the sandbox surface, then emits a clean structured
|
|
25
|
+
* stream. `harnessId` lets a test label it as claude-code, pi, etc.; the
|
|
26
|
+
* behavior is identical because the generic backend treats every binding the
|
|
27
|
+
* same way.
|
|
28
|
+
*/
|
|
29
|
+
export function fakeHarness(log, harnessId = "fake-harness") {
|
|
30
|
+
const resumeState = {
|
|
31
|
+
harnessId,
|
|
32
|
+
specificationVersion: "harness-v1",
|
|
33
|
+
data: {}
|
|
34
|
+
};
|
|
35
|
+
return {
|
|
36
|
+
specificationVersion: "harness-v1",
|
|
37
|
+
harnessId,
|
|
38
|
+
builtinTools: {},
|
|
39
|
+
doStart: async (start) => {
|
|
40
|
+
const sandbox = start.sandboxSession.restricted();
|
|
41
|
+
log.workDirs.push(start.sessionWorkDir);
|
|
42
|
+
const session = {
|
|
43
|
+
sessionId: start.sessionId,
|
|
44
|
+
isResume: false,
|
|
45
|
+
doPromptTurn: async ({ prompt, emit }) => {
|
|
46
|
+
const promptText = typeof prompt === "string" ? prompt : JSON.stringify(prompt);
|
|
47
|
+
log.prompts.push(promptText);
|
|
48
|
+
const staged = await sandbox.readTextFile({
|
|
49
|
+
path: `${start.sessionWorkDir}/data.txt`
|
|
50
|
+
});
|
|
51
|
+
await sandbox.writeTextFile({
|
|
52
|
+
path: `${start.sessionWorkDir}/result.txt`,
|
|
53
|
+
content: `lines=${(staged ?? "").trim().split("\n").length}\n`
|
|
54
|
+
});
|
|
55
|
+
const parts = [
|
|
56
|
+
{ type: "stream-start", warnings: [] },
|
|
57
|
+
{ type: "text-start", id: "t1" },
|
|
58
|
+
{ type: "text-delta", id: "t1", delta: "governed harness turn" },
|
|
59
|
+
{ type: "text-end", id: "t1" },
|
|
60
|
+
{ type: "file-change", event: "create", path: "result.txt" },
|
|
61
|
+
{ type: "finish-step", finishReason: { unified: "stop", raw: "end_turn" }, usage },
|
|
62
|
+
{ type: "finish", finishReason: { unified: "stop", raw: "end_turn" }, totalUsage: usage }
|
|
63
|
+
];
|
|
64
|
+
for (const part of parts)
|
|
65
|
+
emit(part);
|
|
66
|
+
return {
|
|
67
|
+
submitToolResult: async () => undefined,
|
|
68
|
+
done: Promise.resolve()
|
|
69
|
+
};
|
|
70
|
+
},
|
|
71
|
+
doCompact: async () => {
|
|
72
|
+
throw new Error("compaction unsupported by the fake harness");
|
|
73
|
+
},
|
|
74
|
+
doContinueTurn: async ({ emit }) => {
|
|
75
|
+
emit({ type: "finish", finishReason: { unified: "stop", raw: "end_turn" }, totalUsage: usage });
|
|
76
|
+
return { submitToolResult: async () => undefined, done: Promise.resolve() };
|
|
77
|
+
},
|
|
78
|
+
doSuspendTurn: async () => ({ ...resumeState, type: "continue-turn" }),
|
|
79
|
+
doDetach: async () => ({ ...resumeState, type: "resume-session" }),
|
|
80
|
+
doStop: async () => ({ ...resumeState, type: "resume-session" }),
|
|
81
|
+
doDestroy: async () => {
|
|
82
|
+
log.destroyed += 1;
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
return session;
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* A sandbox provider over a local directory: `run`/`spawn` execute through
|
|
91
|
+
* /bin/sh and the file surface is node:fs. Implements the same
|
|
92
|
+
* `HarnessV1SandboxProvider` contract as the real providers, so the generic
|
|
93
|
+
* backend's staging and mirror-back run unchanged.
|
|
94
|
+
*/
|
|
95
|
+
export function fakeLocalSandboxProvider(root) {
|
|
96
|
+
async function runCommand(command, workingDirectory) {
|
|
97
|
+
try {
|
|
98
|
+
const { stdout, stderr } = await execFileAsync("/bin/sh", ["-c", command], {
|
|
99
|
+
cwd: workingDirectory ?? root
|
|
100
|
+
});
|
|
101
|
+
return { exitCode: 0, stdout, stderr };
|
|
102
|
+
}
|
|
103
|
+
catch (error) {
|
|
104
|
+
const failure = error;
|
|
105
|
+
return {
|
|
106
|
+
exitCode: typeof failure.code === "number" ? failure.code : 1,
|
|
107
|
+
stdout: failure.stdout ?? "",
|
|
108
|
+
stderr: failure.stderr ?? ""
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
const session = {
|
|
113
|
+
id: "fake-local-sandbox",
|
|
114
|
+
description: `fake local sandbox at ${root}`,
|
|
115
|
+
defaultWorkingDirectory: root,
|
|
116
|
+
ports: [4000],
|
|
117
|
+
getPortUrl: async ({ port, protocol }) => `${protocol ?? "http"}://127.0.0.1:${port}/`,
|
|
118
|
+
stop: async () => undefined,
|
|
119
|
+
restricted: () => session,
|
|
120
|
+
readFile: async ({ path }) => {
|
|
121
|
+
try {
|
|
122
|
+
return Readable.toWeb(createReadStream(path));
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
},
|
|
128
|
+
readBinaryFile: async ({ path }) => {
|
|
129
|
+
try {
|
|
130
|
+
return new Uint8Array(await readFile(path));
|
|
131
|
+
}
|
|
132
|
+
catch {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
readTextFile: async ({ path }) => {
|
|
137
|
+
try {
|
|
138
|
+
return await readFile(path, "utf8");
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
},
|
|
144
|
+
writeFile: async ({ path, content }) => {
|
|
145
|
+
const chunks = [];
|
|
146
|
+
const reader = content.getReader();
|
|
147
|
+
for (;;) {
|
|
148
|
+
const { done, value } = await reader.read();
|
|
149
|
+
if (done)
|
|
150
|
+
break;
|
|
151
|
+
chunks.push(value);
|
|
152
|
+
}
|
|
153
|
+
await mkdir(dirname(path), { recursive: true });
|
|
154
|
+
await writeFile(path, Buffer.concat(chunks));
|
|
155
|
+
},
|
|
156
|
+
writeBinaryFile: async ({ path, content }) => {
|
|
157
|
+
await mkdir(dirname(path), { recursive: true });
|
|
158
|
+
await writeFile(path, content);
|
|
159
|
+
},
|
|
160
|
+
writeTextFile: async ({ path, content }) => {
|
|
161
|
+
await mkdir(dirname(path), { recursive: true });
|
|
162
|
+
await writeFile(path, content, "utf8");
|
|
163
|
+
},
|
|
164
|
+
run: async ({ command, workingDirectory }) => runCommand(command, workingDirectory),
|
|
165
|
+
spawn: async ({ command, workingDirectory }) => {
|
|
166
|
+
const child = spawnChild("/bin/sh", ["-c", command], {
|
|
167
|
+
cwd: workingDirectory ?? root
|
|
168
|
+
});
|
|
169
|
+
return {
|
|
170
|
+
...(child.pid !== undefined ? { pid: child.pid } : {}),
|
|
171
|
+
stdout: Readable.toWeb(child.stdout),
|
|
172
|
+
stderr: Readable.toWeb(child.stderr),
|
|
173
|
+
wait: () => new Promise((resolve) => {
|
|
174
|
+
child.on("close", (code) => resolve({ exitCode: code ?? 0 }));
|
|
175
|
+
}),
|
|
176
|
+
kill: async () => {
|
|
177
|
+
child.kill();
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
return {
|
|
183
|
+
specificationVersion: "harness-sandbox-v1",
|
|
184
|
+
providerId: "fake-local",
|
|
185
|
+
createSession: async () => session
|
|
186
|
+
};
|
|
187
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import assert from "node:assert/strict";
|
|
2
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { after, before, test } from "node:test";
|
|
6
|
+
import { verifyReceiptBundle } from "@fusionkit/protocol";
|
|
7
|
+
import { CapabilityMismatchError, prepareExecution } from "@fusionkit/runner";
|
|
8
|
+
import { makeRepo, startStack } from "@fusionkit/testkit";
|
|
9
|
+
import { captureWorkspace } from "@fusionkit/workspace";
|
|
10
|
+
import { aiSdkHarnessBackend, claudeCodeAuthFromEnv, isClaudeCodeAgentRun, TranscriptRecorder } from "../index.js";
|
|
11
|
+
import { emptyHarnessLog, fakeHarness, fakeLocalSandboxProvider } from "./fakes.js";
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// auth: explicit credentials only, fail closed on everything else
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
test("auth: anthropic credentials map to explicit settings with host fallback suppressed", () => {
|
|
16
|
+
const auth = claudeCodeAuthFromEnv({ ANTHROPIC_API_KEY: "sk-ant-test" });
|
|
17
|
+
assert.deepEqual(auth, {
|
|
18
|
+
anthropic: { apiKey: "sk-ant-test", authToken: "", baseUrl: "" }
|
|
19
|
+
});
|
|
20
|
+
});
|
|
21
|
+
test("auth: an auth token alone still occupies the api-key slot with an empty string", () => {
|
|
22
|
+
const auth = claudeCodeAuthFromEnv({
|
|
23
|
+
ANTHROPIC_AUTH_TOKEN: "tok",
|
|
24
|
+
ANTHROPIC_BASE_URL: "https://proxy.example.com"
|
|
25
|
+
});
|
|
26
|
+
assert.deepEqual(auth, {
|
|
27
|
+
anthropic: { apiKey: "", authToken: "tok", baseUrl: "https://proxy.example.com" }
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
test("auth: a gateway key wins and the default base url is pinned explicitly", () => {
|
|
31
|
+
const auth = claudeCodeAuthFromEnv({
|
|
32
|
+
AI_GATEWAY_API_KEY: "gw-key",
|
|
33
|
+
ANTHROPIC_API_KEY: "sk-ant-test"
|
|
34
|
+
});
|
|
35
|
+
assert.deepEqual(auth, {
|
|
36
|
+
gateway: { apiKey: "gw-key", baseUrl: "https://ai-gateway.vercel.sh" }
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
test("auth: no credential in the session env fails closed", () => {
|
|
40
|
+
assert.throws(() => claudeCodeAuthFromEnv({}), (error) => error instanceof CapabilityMismatchError && /refusing to fall back/.test(error.message));
|
|
41
|
+
});
|
|
42
|
+
test("auth: env vars the harness path cannot deliver fail closed", () => {
|
|
43
|
+
assert.throws(() => claudeCodeAuthFromEnv({ ANTHROPIC_API_KEY: "k", CUSTOM_FLAG: "1" }), (error) => error instanceof CapabilityMismatchError && /CUSTOM_FLAG/.test(error.message));
|
|
44
|
+
});
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
// transcript: structured stream parts become JSONL evidence
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
function transcriptLines(recorder) {
|
|
49
|
+
const body = recorder.toBuffer().toString("utf8").trim();
|
|
50
|
+
if (body.length === 0)
|
|
51
|
+
return [];
|
|
52
|
+
return body.split("\n").map((line) => JSON.parse(line));
|
|
53
|
+
}
|
|
54
|
+
test("transcript: text deltas aggregate, tool calls and finish are recorded", () => {
|
|
55
|
+
const recorder = new TranscriptRecorder();
|
|
56
|
+
recorder.ingest({ type: "stream-start", modelId: "claude-sonnet-4-6" });
|
|
57
|
+
recorder.ingest({ type: "text-start", id: "t1" });
|
|
58
|
+
recorder.ingest({ type: "text-delta", id: "t1", delta: "hello " });
|
|
59
|
+
recorder.ingest({ type: "text-delta", id: "t1", delta: "world" });
|
|
60
|
+
recorder.ingest({ type: "text-end", id: "t1" });
|
|
61
|
+
recorder.ingest({
|
|
62
|
+
type: "tool-call",
|
|
63
|
+
toolCallId: "c1",
|
|
64
|
+
toolName: "bash",
|
|
65
|
+
input: { command: "npm test" }
|
|
66
|
+
});
|
|
67
|
+
recorder.ingest({
|
|
68
|
+
type: "tool-result",
|
|
69
|
+
toolCallId: "c1",
|
|
70
|
+
toolName: "bash",
|
|
71
|
+
output: { exitCode: 0 }
|
|
72
|
+
});
|
|
73
|
+
recorder.ingest({ type: "file-change", event: "modify", path: "src/app.ts" });
|
|
74
|
+
recorder.ingest({ type: "finish", finishReason: "stop" });
|
|
75
|
+
const lines = transcriptLines(recorder);
|
|
76
|
+
assert.deepEqual(lines.map((line) => line.part), ["stream-start", "text", "tool-call", "tool-result", "file-change", "finish"]);
|
|
77
|
+
assert.equal(lines[1]?.text, "hello world");
|
|
78
|
+
assert.deepEqual(lines[2]?.input, { command: "npm test" });
|
|
79
|
+
assert.equal(lines[4]?.path, "src/app.ts");
|
|
80
|
+
assert.equal(recorder.exitCode(), 0);
|
|
81
|
+
});
|
|
82
|
+
test("transcript: the AI SDK field spellings (text, output) are accepted too", () => {
|
|
83
|
+
const recorder = new TranscriptRecorder();
|
|
84
|
+
recorder.ingest({ type: "text-start", id: "a" });
|
|
85
|
+
recorder.ingest({ type: "text-delta", id: "a", text: "via-ai-sdk" });
|
|
86
|
+
recorder.ingest({ type: "text-end", id: "a" });
|
|
87
|
+
recorder.ingest({
|
|
88
|
+
type: "tool-result",
|
|
89
|
+
toolCallId: "c",
|
|
90
|
+
toolName: "bash",
|
|
91
|
+
result: { ok: true }
|
|
92
|
+
});
|
|
93
|
+
const lines = transcriptLines(recorder);
|
|
94
|
+
assert.equal(lines[0]?.text, "via-ai-sdk");
|
|
95
|
+
assert.deepEqual(lines[1]?.output, { ok: true });
|
|
96
|
+
});
|
|
97
|
+
test("transcript: error parts and turn failures produce a non-zero exit code", () => {
|
|
98
|
+
const errored = new TranscriptRecorder();
|
|
99
|
+
errored.ingest({ type: "error", error: new Error("bridge died") });
|
|
100
|
+
assert.equal(errored.exitCode(), 1);
|
|
101
|
+
assert.equal(transcriptLines(errored)[0]?.error, "bridge died");
|
|
102
|
+
const failed = new TranscriptRecorder();
|
|
103
|
+
failed.ingest({ type: "finish", finishReason: "error" });
|
|
104
|
+
assert.equal(failed.exitCode(), 1);
|
|
105
|
+
const thrown = new TranscriptRecorder();
|
|
106
|
+
thrown.fail(new Error("turn exploded"));
|
|
107
|
+
assert.equal(thrown.exitCode(), 1);
|
|
108
|
+
assert.equal(transcriptLines(thrown)[0]?.part, "turn-failed");
|
|
109
|
+
});
|
|
110
|
+
test("transcript: unknown part types are recorded by name without payload", () => {
|
|
111
|
+
const recorder = new TranscriptRecorder();
|
|
112
|
+
recorder.ingest({ type: "some-novel-part", giant: "x".repeat(4096) });
|
|
113
|
+
assert.deepEqual(transcriptLines(recorder), [{ part: "some-novel-part" }]);
|
|
114
|
+
});
|
|
115
|
+
test("transcript: the log honors the contract's max-bytes cap", () => {
|
|
116
|
+
const recorder = new TranscriptRecorder();
|
|
117
|
+
recorder.ingest({ type: "text-start", id: "t" });
|
|
118
|
+
recorder.ingest({ type: "text-delta", id: "t", delta: "y".repeat(1000) });
|
|
119
|
+
recorder.ingest({ type: "text-end", id: "t" });
|
|
120
|
+
assert.ok(recorder.toBuffer(64).byteLength <= 64);
|
|
121
|
+
});
|
|
122
|
+
// ---------------------------------------------------------------------------
|
|
123
|
+
// delegation: non-claude-code executions go to the fallback backend untouched
|
|
124
|
+
// ---------------------------------------------------------------------------
|
|
125
|
+
function contractFor(agentKind, prompt) {
|
|
126
|
+
return {
|
|
127
|
+
version: "warrant.contract.v1",
|
|
128
|
+
runId: "run_test",
|
|
129
|
+
issuedAt: new Date().toISOString(),
|
|
130
|
+
issuer: { keyId: "k", role: "plane" },
|
|
131
|
+
requestedBy: { kind: "human", id: "tester" },
|
|
132
|
+
agent: { kind: agentKind },
|
|
133
|
+
task: { prompt },
|
|
134
|
+
runner: { pool: "eng-prod" },
|
|
135
|
+
workspace: {
|
|
136
|
+
version: "warrant.manifest.v1",
|
|
137
|
+
baseRef: "0".repeat(40),
|
|
138
|
+
bundleHash: "0".repeat(64),
|
|
139
|
+
untrackedFiles: [],
|
|
140
|
+
deniedPatterns: [],
|
|
141
|
+
deniedPaths: []
|
|
142
|
+
},
|
|
143
|
+
policyHash: "0".repeat(64),
|
|
144
|
+
secrets: [],
|
|
145
|
+
network: { defaultDeny: true, allowHosts: [] },
|
|
146
|
+
budget: {},
|
|
147
|
+
disclosure: "minimal-context",
|
|
148
|
+
isolation: "vercel-sandbox",
|
|
149
|
+
expiresAt: new Date(Date.now() + 60_000).toISOString(),
|
|
150
|
+
signatures: []
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
test("delegation: command contracts are executed by the fallback backend", async () => {
|
|
154
|
+
const seen = [];
|
|
155
|
+
const fallback = {
|
|
156
|
+
isolation: "vercel-sandbox",
|
|
157
|
+
supports: () => true,
|
|
158
|
+
execute: async (input) => {
|
|
159
|
+
seen.push(input.contract.agent.kind);
|
|
160
|
+
return { exitCode: 0, log: Buffer.from("fallback ran") };
|
|
161
|
+
}
|
|
162
|
+
};
|
|
163
|
+
const backend = aiSdkHarnessBackend({ fallback });
|
|
164
|
+
const commandContract = contractFor("command", "echo hi");
|
|
165
|
+
assert.equal(isClaudeCodeAgentRun(commandContract), false);
|
|
166
|
+
assert.equal(backend.supports("shell", commandContract), true);
|
|
167
|
+
const result = await backend.execute({
|
|
168
|
+
contract: commandContract,
|
|
169
|
+
repoDir: ".",
|
|
170
|
+
secrets: [],
|
|
171
|
+
execution: prepareExecution({ contract: commandContract, mockScriptPath: "unused" }),
|
|
172
|
+
emit: () => undefined
|
|
173
|
+
});
|
|
174
|
+
assert.equal(result.log.toString("utf8"), "fallback ran");
|
|
175
|
+
assert.deepEqual(seen, ["command"]);
|
|
176
|
+
const agentContract = contractFor("claude-code", "fix it");
|
|
177
|
+
assert.equal(isClaudeCodeAgentRun(agentContract), true);
|
|
178
|
+
assert.equal(backend.supports("argv", agentContract), true);
|
|
179
|
+
});
|
|
180
|
+
// ---------------------------------------------------------------------------
|
|
181
|
+
// end to end: a governed run through the real HarnessAgent
|
|
182
|
+
//
|
|
183
|
+
// The fakes (shared in ./fakes.ts) replace only what needs credentials: the
|
|
184
|
+
// harness adapter (in place of the claude-code bridge) and the sandbox
|
|
185
|
+
// provider (a local directory in place of a Firecracker microVM). Everything
|
|
186
|
+
// between the signed contract and the receipt is real: plane, runner,
|
|
187
|
+
// workspace materialization, the HarnessAgent orchestration, staging,
|
|
188
|
+
// mirror-back, event chain, and offline verification.
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
const POOL = "eng-prod";
|
|
191
|
+
let stack;
|
|
192
|
+
let repoDir;
|
|
193
|
+
let sandboxRoot;
|
|
194
|
+
const harnessLog = emptyHarnessLog();
|
|
195
|
+
before(async () => {
|
|
196
|
+
sandboxRoot = mkdtempSync(join(tmpdir(), "warrant-fake-sandbox-"));
|
|
197
|
+
stack = await startStack({
|
|
198
|
+
pool: POOL,
|
|
199
|
+
startRunner: true,
|
|
200
|
+
backends: [
|
|
201
|
+
aiSdkHarnessBackend({
|
|
202
|
+
createHarness: ({ env }) => {
|
|
203
|
+
harnessLog.envSeen.push(env);
|
|
204
|
+
return fakeHarness(harnessLog, "fake-claude-code");
|
|
205
|
+
},
|
|
206
|
+
createSandboxProvider: () => fakeLocalSandboxProvider(sandboxRoot)
|
|
207
|
+
})
|
|
208
|
+
],
|
|
209
|
+
policy: (policy) => {
|
|
210
|
+
policy.agents.allow = ["claude-code"];
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
repoDir = makeRepo({
|
|
214
|
+
files: { "README.md": "# harness fixture\n", "data.txt": "one\ntwo\nthree\n" }
|
|
215
|
+
});
|
|
216
|
+
});
|
|
217
|
+
after(async () => {
|
|
218
|
+
await stack.stop();
|
|
219
|
+
rmSync(repoDir, { recursive: true, force: true });
|
|
220
|
+
rmSync(sandboxRoot, { recursive: true, force: true });
|
|
221
|
+
});
|
|
222
|
+
test("e2e: a claude-code contract runs through the real HarnessAgent and yields a verifiable receipt", async () => {
|
|
223
|
+
const captured = captureWorkspace(repoDir);
|
|
224
|
+
await stack.client.putBlob(captured.bundle);
|
|
225
|
+
if (captured.dirtyDiff)
|
|
226
|
+
await stack.client.putBlob(captured.dirtyDiff);
|
|
227
|
+
const created = await stack.client.requestRun({
|
|
228
|
+
requestedBy: { kind: "human", id: "harness-tester" },
|
|
229
|
+
agentKind: "claude-code",
|
|
230
|
+
prompt: "count the lines in data.txt",
|
|
231
|
+
pool: POOL,
|
|
232
|
+
secretNames: [],
|
|
233
|
+
workspace: captured.manifest,
|
|
234
|
+
network: { defaultDeny: true, allowHosts: [] },
|
|
235
|
+
budget: {},
|
|
236
|
+
disclosure: "minimal-context",
|
|
237
|
+
isolation: "vercel-sandbox",
|
|
238
|
+
execution: {
|
|
239
|
+
kind: "agent",
|
|
240
|
+
agent: { kind: "claude-code" },
|
|
241
|
+
prompt: "count the lines in data.txt",
|
|
242
|
+
env: { vars: { ANTHROPIC_API_KEY: "test-key-not-real" } }
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
assert.equal(await stack.runOnce(), created.runId);
|
|
246
|
+
const bundle = await stack.client.getBundle(created.runId);
|
|
247
|
+
// The receipt records the tier honestly and verifies offline.
|
|
248
|
+
assert.equal(bundle.receipt.status, "completed");
|
|
249
|
+
assert.equal(bundle.receipt.runner.isolation, "vercel-sandbox");
|
|
250
|
+
assert.deepEqual(verifyReceiptBundle(bundle).problems, []);
|
|
251
|
+
// The harness saw the prompt and the broker-resolved env; never the host env.
|
|
252
|
+
assert.deepEqual(harnessLog.prompts, ["count the lines in data.txt"]);
|
|
253
|
+
assert.deepEqual(harnessLog.envSeen, [{ ANTHROPIC_API_KEY: "test-key-not-real" }]);
|
|
254
|
+
assert.equal(harnessLog.destroyed, 1);
|
|
255
|
+
// The workspace was staged into the session workdir and the result file
|
|
256
|
+
// mirrored back into the runner's checkout (visible in the git diff).
|
|
257
|
+
assert.ok(bundle.receipt.workspaceOut.diffHash, "expected a workspace diff");
|
|
258
|
+
const diff = await stack.client.getBlob(bundle.receipt.workspaceOut.diffHash);
|
|
259
|
+
assert.ok(diff.toString("utf8").includes("result.txt"));
|
|
260
|
+
// The session log artifact is the structured JSONL transcript.
|
|
261
|
+
const logEvent = bundle.events.find((e) => e.event.type === "artifact.created" && e.event.kind === "log");
|
|
262
|
+
assert.ok(logEvent && logEvent.event.type === "artifact.created");
|
|
263
|
+
const log = (await stack.client.getBlob(logEvent.event.hash)).toString("utf8");
|
|
264
|
+
const lines = log
|
|
265
|
+
.trim()
|
|
266
|
+
.split("\n")
|
|
267
|
+
.map((line) => JSON.parse(line));
|
|
268
|
+
assert.ok(lines.some((line) => line.part === "text" && line.text === "governed harness turn"), `expected the aggregated text part in: ${log}`);
|
|
269
|
+
assert.ok(lines.some((line) => line.part === "finish" && line.finishReason === "stop"), `expected the finish part in: ${log}`);
|
|
270
|
+
// The boundary event chain saw exactly one executed command for the turn.
|
|
271
|
+
const commandEvents = bundle.events.filter((e) => e.event.type === "command.executed");
|
|
272
|
+
assert.equal(commandEvents.length, 1);
|
|
273
|
+
const fileEvents = bundle.events.filter((e) => e.event.type === "file.changed");
|
|
274
|
+
assert.ok(fileEvents.some((e) => e.event.type === "file.changed" && e.event.path === "result.txt"), "expected the mirrored result.txt in the boundary file events");
|
|
275
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|