@purista/harness 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +23 -0
  3. package/dist/agents/index.d.ts +34 -0
  4. package/dist/agents/index.js +301 -0
  5. package/dist/errors/catalog.d.ts +185 -0
  6. package/dist/errors/catalog.js +144 -0
  7. package/dist/errors/harness-error.d.ts +64 -0
  8. package/dist/errors/harness-error.js +58 -0
  9. package/dist/errors/index.d.ts +3 -0
  10. package/dist/errors/index.js +3 -0
  11. package/dist/errors/redaction.d.ts +5 -0
  12. package/dist/errors/redaction.js +64 -0
  13. package/dist/harness/defineHarness.d.ts +640 -0
  14. package/dist/harness/defineHarness.js +176 -0
  15. package/dist/harness/errors.d.ts +62 -0
  16. package/dist/harness/errors.js +67 -0
  17. package/dist/harness/types.d.ts +27 -0
  18. package/dist/harness/types.js +1 -0
  19. package/dist/index.d.ts +14 -0
  20. package/dist/index.js +12 -0
  21. package/dist/logger/index.d.ts +2 -0
  22. package/dist/logger/index.js +2 -0
  23. package/dist/logger/json-logger.d.ts +31 -0
  24. package/dist/logger/json-logger.js +65 -0
  25. package/dist/logger/logger.d.ts +31 -0
  26. package/dist/logger/logger.js +1 -0
  27. package/dist/models/json.d.ts +6 -0
  28. package/dist/models/json.js +1 -0
  29. package/dist/models/registry.d.ts +112 -0
  30. package/dist/models/registry.js +286 -0
  31. package/dist/models/state.d.ts +64 -0
  32. package/dist/models/state.js +1 -0
  33. package/dist/ports/base-model-provider.d.ts +56 -0
  34. package/dist/ports/base-model-provider.js +343 -0
  35. package/dist/ports/capabilities.d.ts +70 -0
  36. package/dist/ports/capabilities.js +38 -0
  37. package/dist/ports/feedback.d.ts +29 -0
  38. package/dist/ports/feedback.js +1 -0
  39. package/dist/ports/harness-context.d.ts +20 -0
  40. package/dist/ports/harness-context.js +1 -0
  41. package/dist/ports/index.d.ts +6 -0
  42. package/dist/ports/index.js +6 -0
  43. package/dist/ports/model-provider.d.ts +280 -0
  44. package/dist/ports/model-provider.js +1 -0
  45. package/dist/ports/state.d.ts +72 -0
  46. package/dist/ports/state.js +24 -0
  47. package/dist/runtime/durable.d.ts +134 -0
  48. package/dist/runtime/durable.js +185 -0
  49. package/dist/runtime/index.d.ts +2 -0
  50. package/dist/runtime/index.js +2 -0
  51. package/dist/runtime/steps.d.ts +22 -0
  52. package/dist/runtime/steps.js +51 -0
  53. package/dist/sandbox/index.d.ts +111 -0
  54. package/dist/sandbox/index.js +165 -0
  55. package/dist/sessions/index.d.ts +23 -0
  56. package/dist/sessions/index.js +718 -0
  57. package/dist/skills/index.d.ts +8 -0
  58. package/dist/skills/index.js +88 -0
  59. package/dist/state/in-memory.d.ts +35 -0
  60. package/dist/state/in-memory.js +140 -0
  61. package/dist/telemetry/index.d.ts +1 -0
  62. package/dist/telemetry/index.js +1 -0
  63. package/dist/telemetry/shim.d.ts +26 -0
  64. package/dist/telemetry/shim.js +120 -0
  65. package/dist/testing/capabilities.d.ts +11 -0
  66. package/dist/testing/capabilities.js +20 -0
  67. package/dist/testing/fakeModelProvider.d.ts +25 -0
  68. package/dist/testing/fakeModelProvider.js +79 -0
  69. package/dist/testing/feedback.d.ts +10 -0
  70. package/dist/testing/feedback.js +24 -0
  71. package/dist/testing/fixtures/mcp/fake-http-server.d.ts +8 -0
  72. package/dist/testing/fixtures/mcp/fake-http-server.js +95 -0
  73. package/dist/testing/index.d.ts +8 -0
  74. package/dist/testing/index.js +11 -0
  75. package/dist/testing/sandboxContract.d.ts +4 -0
  76. package/dist/testing/sandboxContract.js +74 -0
  77. package/dist/testing/sandboxSnapshot.d.ts +7 -0
  78. package/dist/testing/sandboxSnapshot.js +201 -0
  79. package/dist/testing/stateStoreContract.d.ts +2 -0
  80. package/dist/testing/stateStoreContract.js +109 -0
  81. package/dist/tools/index.d.ts +9 -0
  82. package/dist/tools/index.js +123 -0
  83. package/dist/tools/mcp/http.d.ts +2 -0
  84. package/dist/tools/mcp/http.js +109 -0
  85. package/dist/tools/mcp/index.d.ts +2 -0
  86. package/dist/tools/mcp/index.js +2 -0
  87. package/dist/tools/mcp/runner.d.ts +74 -0
  88. package/dist/tools/mcp/runner.js +238 -0
  89. package/dist/tools/mcp/schema.d.ts +41 -0
  90. package/dist/tools/mcp/schema.js +251 -0
  91. package/dist/tools/mcp/stdio.d.ts +2 -0
  92. package/dist/tools/mcp/stdio.js +122 -0
  93. package/dist/ulid/index.d.ts +6 -0
  94. package/dist/ulid/index.js +35 -0
  95. package/dist/workflows/index.d.ts +8 -0
  96. package/dist/workflows/index.js +26 -0
  97. package/package.json +75 -0
@@ -0,0 +1,95 @@
1
+ import { randomUUID } from 'node:crypto';
2
+ import { createServer } from 'node:http';
3
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
4
+ import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
5
+ import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
6
+ import * as z from 'zod/v4';
7
+ export async function startFakeHttpMcpServer(options = {}) {
8
+ const transports = new Map();
9
+ const server = createServer(async (req, res) => {
10
+ if (!headersMatch(req, options.requiredHeaders ?? {})) {
11
+ res.writeHead(401, { 'content-type': 'application/json' });
12
+ res.end(JSON.stringify({ error: 'unauthorized' }));
13
+ return;
14
+ }
15
+ if (req.url !== '/mcp') {
16
+ res.writeHead(404).end();
17
+ return;
18
+ }
19
+ if (req.method === 'GET') {
20
+ res.writeHead(405, { allow: 'POST' }).end('Method Not Allowed');
21
+ return;
22
+ }
23
+ if (req.method !== 'POST' && req.method !== 'DELETE') {
24
+ res.writeHead(405, { allow: 'POST, DELETE' }).end('Method Not Allowed');
25
+ return;
26
+ }
27
+ await handleMcpRequest(req, res, transports);
28
+ });
29
+ await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve));
30
+ const address = server.address();
31
+ if (!address || typeof address === 'string')
32
+ throw new Error('Fake MCP server did not bind to a TCP port.');
33
+ return {
34
+ url: `http://127.0.0.1:${address.port}/mcp`,
35
+ close: async () => {
36
+ await Promise.allSettled([...transports.values()].map((transport) => transport.close()));
37
+ await closeServer(server);
38
+ }
39
+ };
40
+ }
41
+ async function handleMcpRequest(req, res, transports) {
42
+ const parsedBody = req.method === 'POST' ? await readJson(req) : undefined;
43
+ const sessionId = req.headers['mcp-session-id'];
44
+ let transport = typeof sessionId === 'string' ? transports.get(sessionId) : undefined;
45
+ if (!transport && parsedBody !== undefined && isInitializeRequest(parsedBody)) {
46
+ transport = new StreamableHTTPServerTransport({
47
+ sessionIdGenerator: () => randomUUID(),
48
+ enableJsonResponse: true,
49
+ onsessioninitialized: (id) => { if (transport)
50
+ transports.set(id, transport); }
51
+ });
52
+ await createFakeMcpServer().connect(transport);
53
+ }
54
+ if (!transport) {
55
+ res.writeHead(400, { 'content-type': 'application/json' });
56
+ res.end(JSON.stringify({ error: 'missing or invalid MCP session' }));
57
+ return;
58
+ }
59
+ await transport.handleRequest(req, res, parsedBody);
60
+ }
61
+ function createFakeMcpServer() {
62
+ const server = new McpServer({ name: 'purista-fake-http-mcp', version: '0.0.0' });
63
+ server.registerTool('echo', {
64
+ description: 'Echoes a message as structured content.',
65
+ inputSchema: {
66
+ message: z.string(),
67
+ delayMs: z.number().optional()
68
+ },
69
+ outputSchema: {
70
+ echo: z.string()
71
+ }
72
+ }, async ({ message, delayMs }) => {
73
+ if (delayMs)
74
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
75
+ const structuredContent = { echo: message };
76
+ return {
77
+ content: [{ type: 'text', text: JSON.stringify(structuredContent) }],
78
+ structuredContent
79
+ };
80
+ });
81
+ return server;
82
+ }
83
+ async function readJson(req) {
84
+ const chunks = [];
85
+ for await (const chunk of req)
86
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
87
+ const raw = Buffer.concat(chunks).toString('utf8');
88
+ return raw ? JSON.parse(raw) : undefined;
89
+ }
90
+ function headersMatch(req, required) {
91
+ return Object.entries(required).every(([name, value]) => req.headers[name.toLowerCase()] === value);
92
+ }
93
+ function closeServer(server) {
94
+ return new Promise((resolve, reject) => server.close((error) => error ? reject(error) : resolve()));
95
+ }
@@ -0,0 +1,8 @@
1
+ export { FakeModelProvider } from './fakeModelProvider.js';
2
+ export { adapterCapabilitiesContract, fakeCapabilityAdapter, type FakeCapabilityAdapter } from './capabilities.js';
3
+ export { createInMemoryFeedbackRecorder } from './feedback.js';
4
+ export { sandboxContract } from './sandboxContract.js';
5
+ export { fakeSnapshotSandbox, sandboxSnapshotContract } from './sandboxSnapshot.js';
6
+ export { stateStoreContract } from './stateStoreContract.js';
7
+ /** Returns a fresh harness builder for tests. */
8
+ export declare function makeHarness(): import("../harness/defineHarness.js").HarnessBuilder<{}>;
@@ -0,0 +1,11 @@
1
+ import { defineHarness } from '../harness/defineHarness.js';
2
+ export { FakeModelProvider } from './fakeModelProvider.js';
3
+ export { adapterCapabilitiesContract, fakeCapabilityAdapter } from './capabilities.js';
4
+ export { createInMemoryFeedbackRecorder } from './feedback.js';
5
+ export { sandboxContract } from './sandboxContract.js';
6
+ export { fakeSnapshotSandbox, sandboxSnapshotContract } from './sandboxSnapshot.js';
7
+ export { stateStoreContract } from './stateStoreContract.js';
8
+ /** Returns a fresh harness builder for tests. */
9
+ export function makeHarness() {
10
+ return defineHarness();
11
+ }
@@ -0,0 +1,4 @@
1
+ import type { Sandbox } from '../sandbox/index.js';
2
+ export declare function sandboxContract(make: () => Sandbox | Promise<Sandbox>, opts: {
3
+ executor: 'available' | 'unavailable';
4
+ }): void;
@@ -0,0 +1,74 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { OperationCancelledError, OperationTimeoutError, SandboxNoExecutorError } from '../errors/index.js';
3
+ export function sandboxContract(make, opts) {
4
+ describe(`sandboxContract (${opts.executor})`, () => {
5
+ it('open returns expected executor', async () => {
6
+ const sb = await make();
7
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
8
+ expect(session.executor).toBe(opts.executor);
9
+ });
10
+ it('read/write/list/stat/exists/remove roundtrip', async () => {
11
+ const sb = await make();
12
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
13
+ await session.write('/workspace/a.txt', 'hello');
14
+ expect(await session.readText('/workspace/a.txt')).toBe('hello');
15
+ expect(await session.exists('/workspace/a.txt')).toBe(true);
16
+ const list = await session.list('/workspace');
17
+ expect(list.some((e) => e.path === '/workspace/a.txt')).toBe(true);
18
+ const stat = await session.stat('/workspace/a.txt');
19
+ expect(stat.kind).toBe('file');
20
+ await session.remove('/workspace/a.txt');
21
+ expect(await session.exists('/workspace/a.txt')).toBe(false);
22
+ });
23
+ it('mount works', async () => {
24
+ const sb = await make();
25
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
26
+ await session.mount(new Map([['SKILL.md', 'abc']]), '/skills/foo');
27
+ expect(await session.readText('/skills/foo/SKILL.md')).toBe('abc');
28
+ });
29
+ it('rejects relative paths', async () => {
30
+ const sb = await make();
31
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
32
+ await expect(session.write('relative.txt', 'x')).rejects.toThrow();
33
+ });
34
+ it('exec availability semantics', async () => {
35
+ const sb = await make();
36
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
37
+ if (opts.executor === 'unavailable') {
38
+ await expect(session.exec('echo hi')).rejects.toBeInstanceOf(SandboxNoExecutorError);
39
+ }
40
+ else {
41
+ const result = await session.exec('echo hi');
42
+ expect(result.stdout).toBe('hi\n');
43
+ expect(result.stderr).toBe('');
44
+ expect(result.exitCode).toBe(0);
45
+ expect(result.durationSeconds).toBeGreaterThanOrEqual(0);
46
+ }
47
+ });
48
+ if (opts.executor === 'available') {
49
+ it('exec honors stdin, env, and cwd options', async () => {
50
+ const sb = await make();
51
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
52
+ const result = await session.exec('printf "$GREETING:"; printf "$PWD"; printf ":"; cat', {
53
+ cwd: '/workspace',
54
+ env: { GREETING: 'hello' },
55
+ stdin: 'input'
56
+ });
57
+ expect(result.stdout).toBe('hello:/workspace:input');
58
+ expect(result.exitCode).toBe(0);
59
+ });
60
+ it('exec honors timeoutMs', async () => {
61
+ const sb = await make();
62
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
63
+ await expect(session.exec('sleep 1', { timeoutMs: 10 })).rejects.toBeInstanceOf(OperationTimeoutError);
64
+ });
65
+ it('exec honors pre-aborted signals', async () => {
66
+ const sb = await make();
67
+ const session = await sb.open({ sessionId: 's1', runId: 'r1' });
68
+ const controller = new AbortController();
69
+ controller.abort();
70
+ await expect(session.exec('echo hi', { signal: controller.signal })).rejects.toBeInstanceOf(OperationCancelledError);
71
+ });
72
+ }
73
+ });
74
+ }
@@ -0,0 +1,7 @@
1
+ import type { HibernateCapableSandbox, ResumeCapableSandbox, Sandbox, SnapshotCapableSandbox } from '../sandbox/index.js';
2
+ type SnapshotSandbox = Sandbox<readonly ['sandbox.fs', 'sandbox.snapshot', 'sandbox.resume', 'sandbox.hibernate']> & SnapshotCapableSandbox & ResumeCapableSandbox & HibernateCapableSandbox;
3
+ /** Deterministic in-memory sandbox fixture that implements snapshot/resume/hibernate. */
4
+ export declare function fakeSnapshotSandbox(): SnapshotSandbox;
5
+ /** Contract tests for adapters that opt into sandbox snapshot/resume support. */
6
+ export declare function sandboxSnapshotContract(make: () => SnapshotSandbox | Promise<SnapshotSandbox>): void;
7
+ export {};
@@ -0,0 +1,201 @@
1
+ import path from 'node:path';
2
+ import { describe, expect, it } from 'vitest';
3
+ import { SandboxError, SandboxNoExecutorError } from '../errors/index.js';
4
+ function now() { return new Date().toISOString(); }
5
+ function cloneNode(node) {
6
+ return node.kind === 'file'
7
+ ? { kind: 'file', data: new Uint8Array(node.data), modifiedAt: node.modifiedAt }
8
+ : { kind: 'directory', modifiedAt: node.modifiedAt };
9
+ }
10
+ function cloneFs(fs) {
11
+ return new Map([...fs.entries()].map(([key, node]) => [key, cloneNode(node)]));
12
+ }
13
+ function normalizePath(input) {
14
+ if (!input.startsWith('/'))
15
+ throw new SandboxError('Invalid path', { reason: 'invalid_path' });
16
+ const normalized = path.posix.normalize(input);
17
+ if (!normalized.startsWith('/'))
18
+ throw new SandboxError('Invalid path', { reason: 'invalid_path' });
19
+ return normalized;
20
+ }
21
+ class FakeSnapshotSandboxSession {
22
+ sessionId;
23
+ runId;
24
+ executor = 'unavailable';
25
+ closed = false;
26
+ fs;
27
+ constructor(sessionId, runId, fs) {
28
+ this.sessionId = sessionId;
29
+ this.runId = runId;
30
+ this.fs = fs ? cloneFs(fs) : new Map([['/', { kind: 'directory', modifiedAt: now() }]]);
31
+ }
32
+ snapshotFs() {
33
+ this.assertOpen();
34
+ return cloneFs(this.fs);
35
+ }
36
+ assertOpen() {
37
+ if (this.closed)
38
+ throw new SandboxError('Sandbox session is closed.', { reason: 'session_closed' });
39
+ }
40
+ ensureParent(filePath) {
41
+ const parts = normalizePath(filePath).split('/').filter(Boolean);
42
+ let current = '/';
43
+ for (let i = 0; i < parts.length - 1; i += 1) {
44
+ current = current === '/' ? `/${parts[i]}` : `${current}/${parts[i]}`;
45
+ if (!this.fs.has(current))
46
+ this.fs.set(current, { kind: 'directory', modifiedAt: now() });
47
+ }
48
+ }
49
+ async read(filePath) {
50
+ this.assertOpen();
51
+ const node = this.fs.get(normalizePath(filePath));
52
+ if (!node || node.kind !== 'file')
53
+ throw new SandboxError('File not found', { reason: 'fs_failed' });
54
+ return new Uint8Array(node.data);
55
+ }
56
+ async readText(filePath) {
57
+ return new TextDecoder().decode(await this.read(filePath));
58
+ }
59
+ async write(filePath, data) {
60
+ this.assertOpen();
61
+ const p = normalizePath(filePath);
62
+ this.ensureParent(p);
63
+ const bytes = typeof data === 'string' ? new TextEncoder().encode(data) : new Uint8Array(data);
64
+ this.fs.set(p, { kind: 'file', data: bytes, modifiedAt: now() });
65
+ }
66
+ async remove(filePath, opts) {
67
+ this.assertOpen();
68
+ const p = normalizePath(filePath);
69
+ if (opts?.recursive) {
70
+ for (const key of [...this.fs.keys()]) {
71
+ if (key === p || key.startsWith(`${p}/`))
72
+ this.fs.delete(key);
73
+ }
74
+ return;
75
+ }
76
+ this.fs.delete(p);
77
+ }
78
+ async list(rootPath, opts) {
79
+ this.assertOpen();
80
+ const root = normalizePath(rootPath);
81
+ const out = [];
82
+ for (const [entryPath, node] of this.fs.entries()) {
83
+ if (entryPath === root)
84
+ continue;
85
+ if (!entryPath.startsWith(root === '/' ? '/' : `${root}/`))
86
+ continue;
87
+ const relative = root === '/' ? entryPath.slice(1) : entryPath.slice(root.length + 1);
88
+ if (!opts?.recursive && relative.includes('/'))
89
+ continue;
90
+ if (opts?.glob && !new RegExp(opts.glob.replaceAll('.', '\\.').replaceAll('*', '.*')).test(entryPath))
91
+ continue;
92
+ out.push({
93
+ name: entryPath.split('/').at(-1) ?? '',
94
+ path: entryPath,
95
+ kind: node.kind,
96
+ ...(node.kind === 'file' ? { size: node.data.byteLength } : {})
97
+ });
98
+ }
99
+ return out.sort((a, b) => a.path.localeCompare(b.path));
100
+ }
101
+ async stat(filePath) {
102
+ this.assertOpen();
103
+ const node = this.fs.get(normalizePath(filePath));
104
+ if (!node)
105
+ throw new SandboxError('Path not found', { reason: 'fs_failed' });
106
+ return { kind: node.kind, size: node.kind === 'file' ? node.data.byteLength : 0, modifiedAt: node.modifiedAt };
107
+ }
108
+ async exists(filePath) {
109
+ this.assertOpen();
110
+ return this.fs.has(normalizePath(filePath));
111
+ }
112
+ async mount(files, atPath) {
113
+ this.assertOpen();
114
+ const base = normalizePath(atPath);
115
+ for (const [rel, data] of files.entries()) {
116
+ const relNorm = rel.startsWith('/') ? rel.slice(1) : rel;
117
+ await this.write(`${base}/${relNorm}`, data);
118
+ }
119
+ }
120
+ async exec() {
121
+ this.assertOpen();
122
+ throw new SandboxNoExecutorError('Sandbox executor unavailable.', { session_id: this.sessionId });
123
+ }
124
+ async close() {
125
+ this.closed = true;
126
+ }
127
+ }
128
+ /** Deterministic in-memory sandbox fixture that implements snapshot/resume/hibernate. */
129
+ export function fakeSnapshotSandbox() {
130
+ let nextSnapshot = 1;
131
+ const snapshots = new Map();
132
+ function assertFakeSession(session) {
133
+ if (!(session instanceof FakeSnapshotSandboxSession)) {
134
+ throw new SandboxError('Snapshot helper received an unknown session implementation.', { reason: 'invalid_session' });
135
+ }
136
+ return session;
137
+ }
138
+ return {
139
+ capabilities: ['sandbox.fs', 'sandbox.snapshot', 'sandbox.resume', 'sandbox.hibernate'],
140
+ async open(opts) {
141
+ return new FakeSnapshotSandboxSession(opts.sessionId, opts.runId);
142
+ },
143
+ async snapshot(session) {
144
+ const fakeSession = assertFakeSession(session);
145
+ const snapshotId = `snapshot_${nextSnapshot}`;
146
+ nextSnapshot += 1;
147
+ const metadata = { sessionId: fakeSession.sessionId, runId: fakeSession.runId };
148
+ snapshots.set(snapshotId, { fs: fakeSession.snapshotFs(), metadata });
149
+ return { snapshotId, metadata };
150
+ },
151
+ async resume(opts) {
152
+ const snapshot = snapshots.get(opts.snapshotId);
153
+ if (!snapshot) {
154
+ throw new SandboxError('Snapshot not found.', { reason: 'unknown_snapshot' });
155
+ }
156
+ return new FakeSnapshotSandboxSession(opts.sessionId, opts.runId, snapshot.fs);
157
+ },
158
+ async hibernate(session) {
159
+ const snapshot = await this.snapshot(session);
160
+ await session.close();
161
+ return snapshot;
162
+ }
163
+ };
164
+ }
165
+ /** Contract tests for adapters that opt into sandbox snapshot/resume support. */
166
+ export function sandboxSnapshotContract(make) {
167
+ describe('sandboxSnapshotContract', () => {
168
+ it('creates snapshot ids', async () => {
169
+ const sandbox = await make();
170
+ const session = await sandbox.open({ sessionId: 'contract-s1', runId: 'contract-r1' });
171
+ await session.write('/workspace/a.txt', 'hello');
172
+ const snapshot = await sandbox.snapshot(session);
173
+ expect(snapshot.snapshotId).toEqual(expect.any(String));
174
+ expect(snapshot.snapshotId.length).toBeGreaterThan(0);
175
+ });
176
+ it('resumes a usable session from a snapshot', async () => {
177
+ const sandbox = await make();
178
+ const session = await sandbox.open({ sessionId: 'contract-s1', runId: 'contract-r1' });
179
+ await session.write('/workspace/a.txt', 'hello');
180
+ const snapshot = await sandbox.snapshot(session);
181
+ const resumed = await sandbox.resume({ snapshotId: snapshot.snapshotId, sessionId: 'contract-s2', runId: 'contract-r2' });
182
+ expect(await resumed.readText('/workspace/a.txt')).toBe('hello');
183
+ await resumed.write('/workspace/b.txt', 'world');
184
+ expect(await resumed.readText('/workspace/b.txt')).toBe('world');
185
+ });
186
+ it('throws SandboxError for unknown snapshots', async () => {
187
+ const sandbox = await make();
188
+ await expect(sandbox.resume({ snapshotId: 'snapshot_missing', sessionId: 'contract-s1', runId: 'contract-r1' }))
189
+ .rejects.toBeInstanceOf(SandboxError);
190
+ });
191
+ it('hibernates by snapshotting and closing the active session', async () => {
192
+ const sandbox = await make();
193
+ const session = await sandbox.open({ sessionId: 'contract-s1', runId: 'contract-r1' });
194
+ await session.write('/workspace/a.txt', 'hello');
195
+ const snapshot = await sandbox.hibernate(session);
196
+ await expect(session.readText('/workspace/a.txt')).rejects.toBeInstanceOf(SandboxError);
197
+ const resumed = await sandbox.resume({ snapshotId: snapshot.snapshotId, sessionId: 'contract-s2', runId: 'contract-r2' });
198
+ expect(await resumed.readText('/workspace/a.txt')).toBe('hello');
199
+ });
200
+ });
201
+ }
@@ -0,0 +1,2 @@
1
+ import type { StateStore } from '../ports/state.js';
2
+ export declare function stateStoreContract(make: () => StateStore | Promise<StateStore>): void;
@@ -0,0 +1,109 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { StateError } from '../errors/index.js';
3
+ const session = {
4
+ id: 'session_1',
5
+ createdAt: '2026-01-01T00:00:00.000Z',
6
+ updatedAt: '2026-01-01T00:00:00.000Z',
7
+ runCount: 0
8
+ };
9
+ const messages = [
10
+ { id: '01A', sessionId: session.id, role: 'user', content: 'a', timestamp: '2026-01-01T00:00:00.000Z' },
11
+ { id: '01B', sessionId: session.id, role: 'assistant', content: 'b', timestamp: '2026-01-01T00:00:01.000Z' },
12
+ { id: '01C', sessionId: session.id, role: 'assistant', content: 'c', timestamp: '2026-01-01T00:00:02.000Z' }
13
+ ];
14
+ const [m1, m2, m3] = messages;
15
+ const run = {
16
+ id: 'run_1',
17
+ sessionId: session.id,
18
+ kind: 'workflow',
19
+ target: 'wf',
20
+ startedAt: '2026-01-01T00:00:00.000Z',
21
+ status: 'running'
22
+ };
23
+ const event = {
24
+ id: '01EVT',
25
+ runId: run.id,
26
+ at: '2026-01-01T00:00:00.000Z',
27
+ type: 'run.started',
28
+ payload: { ok: true }
29
+ };
30
+ export function stateStoreContract(make) {
31
+ describe('stateStoreContract', () => {
32
+ it('getSession returns undefined for unknown id', async () => {
33
+ const store = await make();
34
+ await expect(store.getSession('missing')).resolves.toBeUndefined();
35
+ });
36
+ it('upsertSession and getSession round-trip', async () => {
37
+ const store = await make();
38
+ await store.upsertSession(session);
39
+ await expect(store.getSession(session.id)).resolves.toEqual(session);
40
+ });
41
+ it('appendMessages preserves order across calls', async () => {
42
+ const store = await make();
43
+ await store.appendMessages(session.id, [m1]);
44
+ await store.appendMessages(session.id, [m2, m3]);
45
+ await expect(store.listMessages(session.id)).resolves.toEqual(messages);
46
+ });
47
+ it('listMessages honors limit and before cursor', async () => {
48
+ const store = await make();
49
+ await store.appendMessages(session.id, messages);
50
+ await expect(store.listMessages(session.id, { limit: 2 })).resolves.toEqual([m2, m3]);
51
+ await expect(store.listMessages(session.id, { before: '01C' })).resolves.toEqual([m1, m2]);
52
+ });
53
+ it('clearMessages removes all messages', async () => {
54
+ const store = await make();
55
+ await store.appendMessages(session.id, messages);
56
+ await store.clearMessages(session.id);
57
+ await expect(store.listMessages(session.id)).resolves.toEqual([]);
58
+ });
59
+ it('createRun and getRun round-trip', async () => {
60
+ const store = await make();
61
+ await store.createRun(run);
62
+ await expect(store.getRun(run.id)).resolves.toEqual(run);
63
+ });
64
+ it('finishRun updates patch fields only', async () => {
65
+ const store = await make();
66
+ await store.createRun(run);
67
+ await store.finishRun(run.id, {
68
+ status: 'succeeded',
69
+ finishedAt: '2026-01-01T00:00:03.000Z',
70
+ output: { ok: true }
71
+ });
72
+ await expect(store.getRun(run.id)).resolves.toMatchObject({
73
+ id: run.id,
74
+ status: 'succeeded',
75
+ finishedAt: '2026-01-01T00:00:03.000Z',
76
+ output: { ok: true }
77
+ });
78
+ });
79
+ it('listRuns sorted descending by startedAt then id', async () => {
80
+ const store = await make();
81
+ await store.createRun(run);
82
+ await store.createRun({ ...run, id: 'run_2', startedAt: '2026-01-01T00:00:05.000Z' });
83
+ await store.createRun({ ...run, id: 'run_3', startedAt: '2026-01-01T00:00:05.000Z' });
84
+ await expect(store.listRuns(session.id)).resolves.toEqual([
85
+ expect.objectContaining({ id: 'run_3' }),
86
+ expect.objectContaining({ id: 'run_2' }),
87
+ expect.objectContaining({ id: 'run_1' })
88
+ ]);
89
+ });
90
+ it('appendEvents and listEvents round-trip with after cursor', async () => {
91
+ const store = await make();
92
+ await store.appendEvents(run.id, [event, { ...event, id: '01EVT2', payload: { ok: 2 } }]);
93
+ await expect(store.listEvents(run.id)).resolves.toHaveLength(2);
94
+ await expect(store.listEvents(run.id, { after: '01EVT' })).resolves.toEqual([
95
+ expect.objectContaining({ id: '01EVT2' })
96
+ ]);
97
+ });
98
+ it('duplicate message id throws StateError', async () => {
99
+ const store = await make();
100
+ await store.appendMessages(session.id, [m1]);
101
+ await expect(store.appendMessages(session.id, [m1])).rejects.toBeInstanceOf(StateError);
102
+ });
103
+ it('duplicate message ids in the same append batch throw StateError', async () => {
104
+ const store = await make();
105
+ await expect(store.appendMessages(session.id, [m1, { ...m1 }])).rejects.toBeInstanceOf(StateError);
106
+ await expect(store.listMessages(session.id)).resolves.toEqual([]);
107
+ });
108
+ });
109
+ }
@@ -0,0 +1,9 @@
1
+ import type { JsonValue } from '../models/json.js';
2
+ import type { Message } from '../models/state.js';
3
+ import type { BuiltinToolName } from '../harness/defineHarness.js';
4
+ import type { ModelToolSpec } from '../ports/model-provider.js';
5
+ import type { SandboxSession } from '../sandbox/index.js';
6
+ export declare const BUILTIN_ALIAS_TO_CANONICAL: Record<string, BuiltinToolName>;
7
+ export declare function getBuiltinToolSpecs(enabled: readonly BuiltinToolName[], session: SandboxSession): ModelToolSpec[];
8
+ export declare function invokeBuiltinTool(nameOrAlias: string, input: unknown, session: SandboxSession, signal?: AbortSignal): Promise<JsonValue>;
9
+ export declare function toToolErrorMessage(toolCallId: string, error: unknown): Message;
@@ -0,0 +1,123 @@
1
+ import { z } from 'zod';
2
+ import { SandboxNoExecutorError, ToolNotFoundError, ValidationError, serializeError } from '../errors/index.js';
3
+ export const BUILTIN_ALIAS_TO_CANONICAL = {
4
+ bash: 'bash', Bash: 'bash',
5
+ read: 'read', Read: 'read',
6
+ write: 'write', Write: 'write',
7
+ edit: 'edit', Edit: 'edit',
8
+ glob: 'glob', Glob: 'glob',
9
+ grep: 'grep', Grep: 'grep',
10
+ list: 'list', List: 'list', LS: 'list'
11
+ };
12
+ const schemas = {
13
+ bash: { input: z.object({ command: z.string().min(1), cwd: z.string().optional(), timeoutMs: z.number().int().positive().optional() }), output: z.object({ stdout: z.string(), stderr: z.string(), exitCode: z.number().int() }), description: 'Run a shell command in the sandbox. Returns stdout, stderr, exitCode.' },
14
+ read: { input: z.object({ path: z.string().min(1), encoding: z.literal('utf-8').default('utf-8') }), output: z.object({ content: z.string() }), description: 'Read a text file from the sandbox.' },
15
+ write: { input: z.object({ path: z.string().min(1), content: z.string() }), output: z.object({ bytesWritten: z.number().int().nonnegative() }), description: 'Write or overwrite a text file in the sandbox.' },
16
+ edit: { input: z.object({ path: z.string().min(1), old_string: z.string().min(1), new_string: z.string() }), output: z.object({ replaced: z.literal(1) }), description: 'Replace exactly one occurrence of old_string with new_string in the given file.' },
17
+ glob: { input: z.object({ pattern: z.string().min(1), root: z.string().default('/') }), output: z.object({ paths: z.array(z.string()) }), description: 'List files matching a glob pattern under root (recursive).' },
18
+ grep: { input: z.object({ pattern: z.string().min(1), path: z.string().default('/'), maxResults: z.number().int().positive().default(100) }), output: z.object({ matches: z.array(z.object({ path: z.string(), line: z.number().int(), text: z.string() })) }), description: 'Search file contents for a regex pattern. Returns matching lines with paths and line numbers.' },
19
+ list: { input: z.object({ path: z.string().min(1) }), output: z.object({ entries: z.array(z.object({ name: z.string(), kind: z.enum(['file', 'directory']), size: z.number().int().optional() })) }), description: 'List directory entries (non-recursive).' }
20
+ };
21
+ export function getBuiltinToolSpecs(enabled, session) {
22
+ return enabled.filter((name) => !(name === 'bash' && session.executor === 'unavailable')).map((name) => ({
23
+ name,
24
+ description: schemas[name].description,
25
+ parameters: z.toJSONSchema(schemas[name].input)
26
+ }));
27
+ }
28
+ export async function invokeBuiltinTool(nameOrAlias, input, session, signal) {
29
+ const canonical = BUILTIN_ALIAS_TO_CANONICAL[nameOrAlias];
30
+ if (!canonical)
31
+ throw new ToolNotFoundError('Built-in tool was not found.', { tool_id: nameOrAlias, where: 'model_response' });
32
+ const name = canonical;
33
+ try {
34
+ switch (name) {
35
+ case 'bash': {
36
+ if (session.executor === 'unavailable')
37
+ throw new SandboxNoExecutorError('Sandbox executor unavailable.', { session_id: 'unknown' });
38
+ const parsed = schemas.bash.input.parse(input);
39
+ const res = await session.exec(parsed.command, {
40
+ ...(parsed.cwd !== undefined ? { cwd: parsed.cwd } : {}),
41
+ ...(parsed.timeoutMs !== undefined ? { timeoutMs: parsed.timeoutMs } : {}),
42
+ ...(signal ? { signal } : {})
43
+ });
44
+ return schemas.bash.output.parse({ stdout: res.stdout, stderr: res.stderr, exitCode: res.exitCode });
45
+ }
46
+ case 'read': {
47
+ const parsed = schemas.read.input.parse(input);
48
+ return schemas.read.output.parse({ content: await session.readText(parsed.path, parsed.encoding) });
49
+ }
50
+ case 'write': {
51
+ const parsed = schemas.write.input.parse(input);
52
+ await session.write(parsed.path, parsed.content);
53
+ return schemas.write.output.parse({ bytesWritten: new TextEncoder().encode(parsed.content).byteLength });
54
+ }
55
+ case 'edit': {
56
+ const parsed = schemas.edit.input.parse(input);
57
+ const content = await session.readText(parsed.path);
58
+ const count = content.split(parsed.old_string).length - 1;
59
+ if (count !== 1)
60
+ throw new ValidationError('edit requires exactly one match', { where: 'tool_input', issues: { path: parsed.path, matches: count } });
61
+ await session.write(parsed.path, content.replace(parsed.old_string, parsed.new_string));
62
+ return { replaced: 1 };
63
+ }
64
+ case 'glob': {
65
+ const parsed = schemas.glob.input.parse(input);
66
+ const files = await session.list(parsed.root, { recursive: true, glob: parsed.pattern });
67
+ return schemas.glob.output.parse({ paths: files.map((f) => f.path) });
68
+ }
69
+ case 'grep': {
70
+ const parsed = schemas.grep.input.parse(input);
71
+ let rx;
72
+ try {
73
+ rx = new RegExp(parsed.pattern);
74
+ }
75
+ catch (error) {
76
+ throw new ValidationError('grep pattern must be a valid regular expression', {
77
+ where: 'tool_input',
78
+ issues: [{ path: 'pattern', message: error instanceof Error ? error.message : 'Invalid regular expression' }]
79
+ });
80
+ }
81
+ const entries = await session.list(parsed.path, { recursive: true });
82
+ const matches = [];
83
+ for (const entry of entries) {
84
+ if (entry.kind !== 'file')
85
+ continue;
86
+ const lines = (await session.readText(entry.path)).split('\n');
87
+ for (let i = 0; i < lines.length; i += 1) {
88
+ const currentLine = lines[i];
89
+ if (currentLine !== undefined && rx.test(currentLine))
90
+ matches.push({ path: entry.path, line: i + 1, text: currentLine });
91
+ if (matches.length >= parsed.maxResults)
92
+ return schemas.grep.output.parse({ matches });
93
+ }
94
+ }
95
+ return schemas.grep.output.parse({ matches });
96
+ }
97
+ case 'list': {
98
+ const parsed = schemas.list.input.parse(input);
99
+ const entries = await session.list(parsed.path);
100
+ return schemas.list.output.parse({
101
+ entries: entries.map((entry) => ({ name: entry.name, kind: entry.kind, ...(entry.size !== undefined ? { size: entry.size } : {}) }))
102
+ });
103
+ }
104
+ default:
105
+ throw new ToolNotFoundError('Built-in tool was not found.', { tool_id: name, where: 'registry' });
106
+ }
107
+ }
108
+ catch (error) {
109
+ if (error instanceof z.ZodError)
110
+ throw new ValidationError('Tool input validation failed', { where: 'tool_input', issues: JSON.parse(JSON.stringify(error.issues)) });
111
+ throw error;
112
+ }
113
+ }
114
+ export function toToolErrorMessage(toolCallId, error) {
115
+ return {
116
+ id: `msg_${Date.now()}`,
117
+ sessionId: '',
118
+ role: 'tool',
119
+ content: '',
120
+ toolResults: [{ toolCallId, error: serializeError(error) }],
121
+ timestamp: new Date().toISOString()
122
+ };
123
+ }
@@ -0,0 +1,2 @@
1
+ import type { ResolvedMcpHttpTool, McpTransportRunner } from './runner.js';
2
+ export declare function createHttpMcpTransportRunner(config: ResolvedMcpHttpTool): McpTransportRunner;