@purista/harness 1.2.6 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/agents/index.d.ts +7 -1
- package/dist/agents/index.js +126 -44
- package/dist/errors/catalog.d.ts +18 -2
- package/dist/errors/catalog.js +10 -0
- package/dist/eval/index.d.ts +3 -3
- package/dist/eval/index.js +15 -1
- package/dist/harness/defineHarness.d.ts +149 -3
- package/dist/harness/defineHarness.js +110 -1
- package/dist/index.d.ts +38 -18
- package/dist/index.js +30 -16
- package/dist/local/index.d.ts +36 -0
- package/dist/local/index.js +24 -0
- package/dist/local/local-sandbox.d.ts +25 -0
- package/dist/local/local-sandbox.js +368 -0
- package/dist/local/local-workspace.d.ts +56 -0
- package/dist/local/local-workspace.js +496 -0
- package/dist/local/ref-hash.d.ts +6 -0
- package/dist/local/ref-hash.js +9 -0
- package/dist/local/sqlite-storage.d.ts +106 -0
- package/dist/local/sqlite-storage.js +680 -0
- package/dist/models/adapter-utils.d.ts +52 -0
- package/dist/models/adapter-utils.js +81 -0
- package/dist/models/registry.js +28 -37
- package/dist/models/stream-pump.d.ts +16 -0
- package/dist/models/stream-pump.js +77 -0
- package/dist/ports/base-model-provider.d.ts +7 -1
- package/dist/ports/base-model-provider.js +384 -87
- package/dist/ports/capabilities.d.ts +16 -2
- package/dist/ports/context-checkpoints.d.ts +63 -0
- package/dist/ports/context-checkpoints.js +33 -0
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/model-provider.d.ts +94 -0
- package/dist/runtime/durable.d.ts +11 -0
- package/dist/runtime/durable.js +15 -2
- package/dist/runtime/sessionDurable.js +47 -21
- package/dist/runtime/steps.d.ts +22 -1
- package/dist/runtime/steps.js +53 -2
- package/dist/sessions/index.d.ts +17 -6
- package/dist/sessions/index.js +345 -84
- package/dist/skills/index.d.ts +0 -2
- package/dist/skills/index.js +0 -8
- package/dist/state/in-memory.js +6 -6
- package/dist/telemetry/shim.js +2 -6
- package/dist/telemetry/span-attrs.d.ts +9 -0
- package/dist/telemetry/span-attrs.js +27 -0
- package/dist/testing/durableWorkspaceStoreContract.js +69 -0
- package/dist/testing/fakeLogger.d.ts +29 -0
- package/dist/testing/fakeLogger.js +47 -0
- package/dist/testing/fakeSandbox.d.ts +27 -0
- package/dist/testing/fakeSandbox.js +153 -0
- package/dist/testing/fakeStateStore.d.ts +36 -0
- package/dist/testing/fakeStateStore.js +66 -0
- package/dist/testing/index.d.ts +10 -4
- package/dist/testing/index.js +14 -4
- package/dist/testing/loggerContract.d.ts +9 -0
- package/dist/testing/loggerContract.js +62 -0
- package/dist/testing/modelProviderContract.d.ts +12 -0
- package/dist/testing/modelProviderContract.js +222 -0
- package/dist/testing/recordEvents.d.ts +3 -0
- package/dist/testing/recordEvents.js +8 -0
- package/dist/testing/stateStoreContract.js +27 -0
- package/dist/tools/index.js +26 -1
- package/dist/tools/mcp/http.d.ts +2 -0
- package/dist/tools/mcp/http.js +34 -21
- package/dist/tools/mcp/runner.d.ts +4 -0
- package/dist/tools/mcp/runner.js +75 -21
- package/dist/tools/mcp/stdio.d.ts +7 -1
- package/dist/tools/mcp/stdio.js +102 -23
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workspace/in-memory.d.ts +1 -0
- package/dist/workspace/in-memory.js +47 -12
- package/package.json +5 -4
package/dist/skills/index.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { JsonValue } from '../models/json.js';
|
|
2
1
|
import type { DiscoveredSkills, DiscoverSkillsOptions, ResolvedSkill, SkillDefinition } from '../harness/defineHarness.js';
|
|
3
2
|
import type { SandboxSession } from '../sandbox/index.js';
|
|
4
3
|
export declare function loadSkillsSync(skills: Record<string, SkillDefinition>): Record<string, ResolvedSkill>;
|
|
@@ -6,4 +5,3 @@ export declare function loadSkills(skills: Record<string, SkillDefinition>): Pro
|
|
|
6
5
|
export declare function mountSkillsOnce(session: SandboxSession, mounted: Set<string>, skills: Record<string, ResolvedSkill>, skillIds: readonly string[]): Promise<void>;
|
|
7
6
|
export declare function buildSkillIndex(skills: Record<string, ResolvedSkill>, ids: readonly string[]): string;
|
|
8
7
|
export declare function discoverSkills(options?: DiscoverSkillsOptions): Promise<DiscoveredSkills>;
|
|
9
|
-
export declare function assertSerializable(value: unknown): asserts value is JsonValue;
|
package/dist/skills/index.js
CHANGED
|
@@ -319,11 +319,3 @@ export async function discoverSkills(options = {}) {
|
|
|
319
319
|
}
|
|
320
320
|
return { skills, diagnostics };
|
|
321
321
|
}
|
|
322
|
-
export function assertSerializable(value) {
|
|
323
|
-
try {
|
|
324
|
-
JSON.stringify(value);
|
|
325
|
-
}
|
|
326
|
-
catch {
|
|
327
|
-
throw new SkillManifestError('Non-serializable value', { reason: 'invalid_frontmatter', directory: '' });
|
|
328
|
-
}
|
|
329
|
-
}
|
package/dist/state/in-memory.js
CHANGED
|
@@ -41,7 +41,7 @@ export class InMemoryStateStore {
|
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
async appendMessages(sessionId, messages) {
|
|
44
|
-
return this.withMessageLock(sessionId, async () => {
|
|
44
|
+
return this.withMessageLock(sessionId, 'appendMessages', async () => {
|
|
45
45
|
const current = this.messages.get(sessionId) ?? [];
|
|
46
46
|
const ids = new Set(current.map((msg) => msg.id));
|
|
47
47
|
for (const message of messages) {
|
|
@@ -68,16 +68,16 @@ export class InMemoryStateStore {
|
|
|
68
68
|
return rows;
|
|
69
69
|
}
|
|
70
70
|
async clearMessages(sessionId) {
|
|
71
|
-
return this.withMessageLock(sessionId, async () => {
|
|
71
|
+
return this.withMessageLock(sessionId, 'clearMessages', async () => {
|
|
72
72
|
this.messages.delete(sessionId);
|
|
73
73
|
});
|
|
74
74
|
}
|
|
75
75
|
async replaceMessages(sessionId, messages) {
|
|
76
|
-
return this.withMessageLock(sessionId, async () => {
|
|
76
|
+
return this.withMessageLock(sessionId, 'replaceMessages', async () => {
|
|
77
77
|
const ids = new Set();
|
|
78
78
|
for (const message of messages) {
|
|
79
79
|
if (ids.has(message.id)) {
|
|
80
|
-
throw new StateError('Duplicate message id.', { op: '
|
|
80
|
+
throw new StateError('Duplicate message id.', { op: 'replaceMessages', reason: 'duplicate_message_id' });
|
|
81
81
|
}
|
|
82
82
|
ids.add(message.id);
|
|
83
83
|
}
|
|
@@ -137,7 +137,7 @@ export class InMemoryStateStore {
|
|
|
137
137
|
this.events.clear();
|
|
138
138
|
this.messageLocks.clear();
|
|
139
139
|
}
|
|
140
|
-
async withMessageLock(sessionId, fn) {
|
|
140
|
+
async withMessageLock(sessionId, op, fn) {
|
|
141
141
|
let lock = this.messageLocks.get(sessionId);
|
|
142
142
|
if (!lock) {
|
|
143
143
|
lock = new Mutex();
|
|
@@ -149,7 +149,7 @@ export class InMemoryStateStore {
|
|
|
149
149
|
catch (error) {
|
|
150
150
|
if (error instanceof StateError)
|
|
151
151
|
throw error;
|
|
152
|
-
throw new StateError('State store operation failed.', { op
|
|
152
|
+
throw new StateError('State store operation failed.', { op }, error);
|
|
153
153
|
}
|
|
154
154
|
}
|
|
155
155
|
}
|
package/dist/telemetry/shim.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { SpanStatusCode, context, metrics, propagation, trace } from '@opentelemetry/api';
|
|
2
2
|
import { ATTR_ERROR_TYPE } from '@opentelemetry/semantic-conventions';
|
|
3
3
|
import { HarnessError } from '../errors/index.js';
|
|
4
|
-
import {
|
|
4
|
+
import { sanitizeProviderBody } from '../errors/redaction.js';
|
|
5
5
|
import { HARNESS_VERSION } from '../version.js';
|
|
6
6
|
function sanitizeAttrs(attrs) {
|
|
7
7
|
const out = {};
|
|
@@ -83,11 +83,7 @@ export class OtelTelemetryShim {
|
|
|
83
83
|
}
|
|
84
84
|
catch (error) {
|
|
85
85
|
span.setAttributes(sanitizeAttrs(errorAttributes(error)));
|
|
86
|
-
const recordedError = error instanceof
|
|
87
|
-
? new Error(error.message)
|
|
88
|
-
: error instanceof Error
|
|
89
|
-
? new Error(error.message)
|
|
90
|
-
: new Error(String(error));
|
|
86
|
+
const recordedError = new Error(error instanceof Error ? error.message : String(error));
|
|
91
87
|
span.recordException(recordedError);
|
|
92
88
|
span.setStatus({ code: SpanStatusCode.ERROR, message: recordedError.message });
|
|
93
89
|
throw error;
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { JsonValue } from '../models/json.js';
|
|
2
|
+
/**
|
|
3
|
+
* Converts caller-supplied invoke metadata into namespaced span attributes.
|
|
4
|
+
*
|
|
5
|
+
* Only scalar values survive: strings up to 256 chars, finite numbers, and
|
|
6
|
+
* booleans. Keys must look like attribute identifiers; everything else is
|
|
7
|
+
* dropped so unvetted metadata can never grow spans without bound.
|
|
8
|
+
*/
|
|
9
|
+
export declare function metadataSpanAttrs(metadata: Readonly<Record<string, JsonValue>> | undefined): Record<string, string | number | boolean | undefined>;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Converts caller-supplied invoke metadata into namespaced span attributes.
|
|
3
|
+
*
|
|
4
|
+
* Only scalar values survive: strings up to 256 chars, finite numbers, and
|
|
5
|
+
* booleans. Keys must look like attribute identifiers; everything else is
|
|
6
|
+
* dropped so unvetted metadata can never grow spans without bound.
|
|
7
|
+
*/
|
|
8
|
+
export function metadataSpanAttrs(metadata) {
|
|
9
|
+
const attrs = {};
|
|
10
|
+
for (const [key, value] of Object.entries(metadata ?? {})) {
|
|
11
|
+
if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
|
|
12
|
+
continue;
|
|
13
|
+
if (typeof value === 'string') {
|
|
14
|
+
if (value.length <= 256)
|
|
15
|
+
attrs[`harness.metadata.${key}`] = value;
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
19
|
+
attrs[`harness.metadata.${key}`] = value;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
if (typeof value === 'boolean') {
|
|
23
|
+
attrs[`harness.metadata.${key}`] = value;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return attrs;
|
|
27
|
+
}
|
|
@@ -25,6 +25,75 @@ export function durableWorkspaceStoreContract(make) {
|
|
|
25
25
|
meta: { reason: 'idempotency_conflict' }
|
|
26
26
|
});
|
|
27
27
|
});
|
|
28
|
+
it('replays pause and resume results for repeated idempotency keys', async () => {
|
|
29
|
+
const adapter = await make();
|
|
30
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
31
|
+
const checkpoint = await adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'pause', signal });
|
|
32
|
+
const replayedCheckpoint = await adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'pause', signal });
|
|
33
|
+
expect(replayedCheckpoint.checkpointRef).toBe(checkpoint.checkpointRef);
|
|
34
|
+
expect(replayedCheckpoint.committedAt).toBe(checkpoint.committedAt);
|
|
35
|
+
const resumed = await adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's', runId: 'r2', attempt: 2, idempotencyKey: 'resume', signal });
|
|
36
|
+
const replayedResume = await adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's', runId: 'r2', attempt: 2, idempotencyKey: 'resume', signal });
|
|
37
|
+
expect(replayedResume.workspaceRef).toBe(resumed.workspaceRef);
|
|
38
|
+
expect(replayedResume.startedAt).toBe(resumed.startedAt);
|
|
39
|
+
});
|
|
40
|
+
it('conflicts when pause reuses a key under a different run/session', async () => {
|
|
41
|
+
const adapter = await make();
|
|
42
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
43
|
+
await adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'op', signal });
|
|
44
|
+
const otherHandle = { ...handle, runId: 'r2', sessionId: 's2' };
|
|
45
|
+
await expect(adapter.pauseWorkspace({ handle: otherHandle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'op', signal })).rejects.toMatchObject({
|
|
46
|
+
constructor: WorkspaceError,
|
|
47
|
+
meta: { reason: 'idempotency_conflict' }
|
|
48
|
+
});
|
|
49
|
+
});
|
|
50
|
+
it('conflicts when resume reuses a key under a different run/session', async () => {
|
|
51
|
+
const adapter = await make();
|
|
52
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
53
|
+
await adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's', runId: 'r2', attempt: 2, idempotencyKey: 'op', signal });
|
|
54
|
+
await expect(adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's3', runId: 'r3', attempt: 2, idempotencyKey: 'op', signal })).rejects.toMatchObject({
|
|
55
|
+
constructor: WorkspaceError,
|
|
56
|
+
meta: { reason: 'idempotency_conflict' }
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
it('conflicts when abort reuses a key under a different run/session', async () => {
|
|
60
|
+
const adapter = await make();
|
|
61
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
62
|
+
await adapter.abortWorkspace?.({ workspaceRef: handle.workspaceRef, runId: 'r', sessionId: 's', reason: 'cancelled', idempotencyKey: 'op', signal });
|
|
63
|
+
await expect(adapter.abortWorkspace?.({ workspaceRef: handle.workspaceRef, runId: 'r2', sessionId: 's2', reason: 'cancelled', idempotencyKey: 'op', signal })).rejects.toMatchObject({
|
|
64
|
+
constructor: WorkspaceError,
|
|
65
|
+
meta: { reason: 'idempotency_conflict' }
|
|
66
|
+
});
|
|
67
|
+
});
|
|
68
|
+
it('conflicts when a key crosses operation kinds (pause then resume)', async () => {
|
|
69
|
+
const adapter = await make();
|
|
70
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
71
|
+
await adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'shared', signal });
|
|
72
|
+
await expect(adapter.resumeWorkspace({ workspaceRef: handle.workspaceRef, sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'shared', signal })).rejects.toMatchObject({
|
|
73
|
+
constructor: WorkspaceError,
|
|
74
|
+
meta: { reason: 'idempotency_conflict' }
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
it('inspects a workspace through one of its checkpoint refs', async () => {
|
|
78
|
+
const adapter = await make();
|
|
79
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
80
|
+
const checkpoint = await adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'pause', signal });
|
|
81
|
+
const inspection = await adapter.inspectWorkspace?.({ checkpointRef: checkpoint.checkpointRef, signal });
|
|
82
|
+
expect(inspection?.workspaceRef).toBe(handle.workspaceRef);
|
|
83
|
+
expect(inspection?.checkpoints.map((item) => item.checkpointRef)).toContain(checkpoint.checkpointRef);
|
|
84
|
+
});
|
|
85
|
+
it('rejects pause on an aborted workspace and replays the abort result', async () => {
|
|
86
|
+
const adapter = await make();
|
|
87
|
+
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
88
|
+
const aborted = await adapter.abortWorkspace?.({ workspaceRef: handle.workspaceRef, runId: 'r', sessionId: 's', reason: 'cancelled', idempotencyKey: 'abort', signal });
|
|
89
|
+
const abortedAgain = await adapter.abortWorkspace?.({ workspaceRef: handle.workspaceRef, runId: 'r', sessionId: 's', reason: 'cancelled', idempotencyKey: 'abort', signal });
|
|
90
|
+
expect(abortedAgain?.state).toBe('aborted');
|
|
91
|
+
expect(abortedAgain?.abortedAt).toBe(aborted?.abortedAt);
|
|
92
|
+
await expect(adapter.pauseWorkspace({ handle, stepId: 'step-1', sequence: 1, attempt: 1, reason: 'step_completed', idempotencyKey: 'pause', signal })).rejects.toMatchObject({
|
|
93
|
+
constructor: WorkspaceError,
|
|
94
|
+
meta: { reason: 'aborted' }
|
|
95
|
+
});
|
|
96
|
+
});
|
|
28
97
|
it('blocks resume after abort and is idempotent on repeated cleanup', async () => {
|
|
29
98
|
const adapter = await make();
|
|
30
99
|
const handle = await adapter.startWorkspace({ sessionId: 's', runId: 'r', attempt: 1, idempotencyKey: 'start', signal });
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { Logger, LogLevel } from '../logger/index.js';
|
|
2
|
+
/** Single log record captured by {@link FakeLogger}. */
|
|
3
|
+
export interface FakeLogRecord {
|
|
4
|
+
level: LogLevel;
|
|
5
|
+
msg: string;
|
|
6
|
+
/** RFC3339 timestamp of the record. */
|
|
7
|
+
time: string;
|
|
8
|
+
/** Effective bindings (parent bindings merged with child bindings). */
|
|
9
|
+
bindings: Record<string, unknown>;
|
|
10
|
+
fields?: Record<string, unknown>;
|
|
11
|
+
}
|
|
12
|
+
/** Logger that captures records in memory for assertions. Child loggers share the parent's record list. */
|
|
13
|
+
export declare class FakeLogger implements Logger {
|
|
14
|
+
private readonly bindings;
|
|
15
|
+
readonly records: FakeLogRecord[];
|
|
16
|
+
constructor(bindings?: Record<string, unknown>, records?: FakeLogRecord[]);
|
|
17
|
+
trace(msg: string, fields?: Record<string, unknown>): void;
|
|
18
|
+
debug(msg: string, fields?: Record<string, unknown>): void;
|
|
19
|
+
info(msg: string, fields?: Record<string, unknown>): void;
|
|
20
|
+
warn(msg: string, fields?: Record<string, unknown>): void;
|
|
21
|
+
error(msg: string, fields?: Record<string, unknown>): void;
|
|
22
|
+
fatal(msg: string, fields?: Record<string, unknown>): void;
|
|
23
|
+
child(bindings: Record<string, unknown>): Logger;
|
|
24
|
+
/** Returns captured records filtered by level. */
|
|
25
|
+
recordsAt(level: LogLevel): FakeLogRecord[];
|
|
26
|
+
/** Clears all captured records. */
|
|
27
|
+
clear(): void;
|
|
28
|
+
private capture;
|
|
29
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/** Logger that captures records in memory for assertions. Child loggers share the parent's record list. */
|
|
2
|
+
export class FakeLogger {
|
|
3
|
+
bindings;
|
|
4
|
+
records;
|
|
5
|
+
constructor(bindings = {}, records) {
|
|
6
|
+
this.bindings = bindings;
|
|
7
|
+
this.records = records ?? [];
|
|
8
|
+
}
|
|
9
|
+
trace(msg, fields) {
|
|
10
|
+
this.capture('trace', msg, fields);
|
|
11
|
+
}
|
|
12
|
+
debug(msg, fields) {
|
|
13
|
+
this.capture('debug', msg, fields);
|
|
14
|
+
}
|
|
15
|
+
info(msg, fields) {
|
|
16
|
+
this.capture('info', msg, fields);
|
|
17
|
+
}
|
|
18
|
+
warn(msg, fields) {
|
|
19
|
+
this.capture('warn', msg, fields);
|
|
20
|
+
}
|
|
21
|
+
error(msg, fields) {
|
|
22
|
+
this.capture('error', msg, fields);
|
|
23
|
+
}
|
|
24
|
+
fatal(msg, fields) {
|
|
25
|
+
this.capture('fatal', msg, fields);
|
|
26
|
+
}
|
|
27
|
+
child(bindings) {
|
|
28
|
+
return new FakeLogger({ ...this.bindings, ...bindings }, this.records);
|
|
29
|
+
}
|
|
30
|
+
/** Returns captured records filtered by level. */
|
|
31
|
+
recordsAt(level) {
|
|
32
|
+
return this.records.filter((record) => record.level === level);
|
|
33
|
+
}
|
|
34
|
+
/** Clears all captured records. */
|
|
35
|
+
clear() {
|
|
36
|
+
this.records.length = 0;
|
|
37
|
+
}
|
|
38
|
+
capture(level, msg, fields) {
|
|
39
|
+
this.records.push({
|
|
40
|
+
level,
|
|
41
|
+
msg,
|
|
42
|
+
time: new Date().toISOString(),
|
|
43
|
+
bindings: { ...this.bindings },
|
|
44
|
+
...(fields ? { fields } : {})
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { ExecOptions, ExecResult } from '../harness/types.js';
|
|
2
|
+
import type { AdapterCapability } from '../ports/capabilities.js';
|
|
3
|
+
import type { ExecCapableSandboxSession, Sandbox } from '../sandbox/index.js';
|
|
4
|
+
/** Options for {@link FakeSandbox}. */
|
|
5
|
+
export interface FakeSandboxOptions {
|
|
6
|
+
/** Whether sessions report an available command executor. Default: `'available'`. */
|
|
7
|
+
executor?: 'available' | 'unavailable';
|
|
8
|
+
/** Optional scripted exec handler. Defaults to a deterministic `echo`-only executor. */
|
|
9
|
+
exec?: (command: string, opts?: ExecOptions) => ExecResult | Promise<ExecResult>;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Deterministic in-memory sandbox fake with a configurable executor flag.
|
|
13
|
+
*
|
|
14
|
+
* Sessions are typed exec-capable; when constructed with
|
|
15
|
+
* `executor: 'unavailable'` dynamically widened `exec(...)` calls throw
|
|
16
|
+
* `SandboxNoExecutorError`, matching the sandbox contract.
|
|
17
|
+
*/
|
|
18
|
+
export declare class FakeSandbox implements Sandbox {
|
|
19
|
+
private readonly options;
|
|
20
|
+
readonly capabilities: readonly AdapterCapability[];
|
|
21
|
+
constructor(options?: FakeSandboxOptions);
|
|
22
|
+
open(opts: {
|
|
23
|
+
sessionId: string;
|
|
24
|
+
runId: string;
|
|
25
|
+
signal?: AbortSignal;
|
|
26
|
+
}): Promise<ExecCapableSandboxSession>;
|
|
27
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { OperationCancelledError, SandboxError, SandboxNoExecutorError } from '../errors/index.js';
|
|
3
|
+
function now() {
|
|
4
|
+
return new Date().toISOString();
|
|
5
|
+
}
|
|
6
|
+
function normalizePath(input) {
|
|
7
|
+
if (!input.startsWith('/'))
|
|
8
|
+
throw new SandboxError('Invalid path', { reason: 'invalid_path' });
|
|
9
|
+
const normalized = path.posix.normalize(input);
|
|
10
|
+
if (!normalized.startsWith('/'))
|
|
11
|
+
throw new SandboxError('Invalid path', { reason: 'invalid_path' });
|
|
12
|
+
return normalized;
|
|
13
|
+
}
|
|
14
|
+
/** Deterministic default executor: supports `echo <text>`; everything else exits 127. */
|
|
15
|
+
function defaultExec(command) {
|
|
16
|
+
const trimmed = command.trim();
|
|
17
|
+
if (trimmed === 'echo' || trimmed.startsWith('echo ')) {
|
|
18
|
+
return { stdout: `${trimmed.slice(4).trim()}\n`, stderr: '', exitCode: 0, durationSeconds: 0 };
|
|
19
|
+
}
|
|
20
|
+
const name = trimmed.split(/\s+/)[0] ?? '';
|
|
21
|
+
return { stdout: '', stderr: `command not found: ${name}\n`, exitCode: 127, durationSeconds: 0 };
|
|
22
|
+
}
|
|
23
|
+
class FakeSandboxSession {
|
|
24
|
+
sessionId;
|
|
25
|
+
options;
|
|
26
|
+
executor;
|
|
27
|
+
closed = false;
|
|
28
|
+
fs = new Map([['/', { kind: 'directory', modifiedAt: now() }]]);
|
|
29
|
+
constructor(sessionId, options) {
|
|
30
|
+
this.sessionId = sessionId;
|
|
31
|
+
this.options = options;
|
|
32
|
+
this.executor = options.executor ?? 'available';
|
|
33
|
+
}
|
|
34
|
+
async read(filePath) {
|
|
35
|
+
this.assertOpen();
|
|
36
|
+
const node = this.fs.get(normalizePath(filePath));
|
|
37
|
+
if (!node || node.kind !== 'file')
|
|
38
|
+
throw new SandboxError('File not found', { reason: 'fs_failed' });
|
|
39
|
+
return new Uint8Array(node.data);
|
|
40
|
+
}
|
|
41
|
+
async readText(filePath) {
|
|
42
|
+
return new TextDecoder().decode(await this.read(filePath));
|
|
43
|
+
}
|
|
44
|
+
async write(filePath, data) {
|
|
45
|
+
this.assertOpen();
|
|
46
|
+
const target = normalizePath(filePath);
|
|
47
|
+
this.ensureParent(target);
|
|
48
|
+
const bytes = typeof data === 'string' ? new TextEncoder().encode(data) : new Uint8Array(data);
|
|
49
|
+
this.fs.set(target, { kind: 'file', data: bytes, modifiedAt: now() });
|
|
50
|
+
}
|
|
51
|
+
async remove(filePath, opts) {
|
|
52
|
+
this.assertOpen();
|
|
53
|
+
const target = normalizePath(filePath);
|
|
54
|
+
if (opts?.recursive) {
|
|
55
|
+
for (const key of [...this.fs.keys()]) {
|
|
56
|
+
if (key === target || key.startsWith(`${target}/`))
|
|
57
|
+
this.fs.delete(key);
|
|
58
|
+
}
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
this.fs.delete(target);
|
|
62
|
+
}
|
|
63
|
+
async list(rootPath, opts) {
|
|
64
|
+
this.assertOpen();
|
|
65
|
+
const root = normalizePath(rootPath);
|
|
66
|
+
const out = [];
|
|
67
|
+
for (const [entryPath, node] of this.fs.entries()) {
|
|
68
|
+
if (entryPath === root)
|
|
69
|
+
continue;
|
|
70
|
+
if (!entryPath.startsWith(root === '/' ? '/' : `${root}/`))
|
|
71
|
+
continue;
|
|
72
|
+
const relative = root === '/' ? entryPath.slice(1) : entryPath.slice(root.length + 1);
|
|
73
|
+
if (!opts?.recursive && relative.includes('/'))
|
|
74
|
+
continue;
|
|
75
|
+
if (opts?.glob && !new RegExp(opts.glob.replaceAll('.', '\\.').replaceAll('*', '.*')).test(entryPath))
|
|
76
|
+
continue;
|
|
77
|
+
out.push({
|
|
78
|
+
name: entryPath.split('/').at(-1) ?? '',
|
|
79
|
+
path: entryPath,
|
|
80
|
+
kind: node.kind,
|
|
81
|
+
...(node.kind === 'file' ? { size: node.data.byteLength } : {})
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
return out.sort((a, b) => a.path.localeCompare(b.path));
|
|
85
|
+
}
|
|
86
|
+
async stat(filePath) {
|
|
87
|
+
this.assertOpen();
|
|
88
|
+
const node = this.fs.get(normalizePath(filePath));
|
|
89
|
+
if (!node)
|
|
90
|
+
throw new SandboxError('Path not found', { reason: 'fs_failed' });
|
|
91
|
+
return { kind: node.kind, size: node.kind === 'file' ? node.data.byteLength : 0, modifiedAt: node.modifiedAt };
|
|
92
|
+
}
|
|
93
|
+
async exists(filePath) {
|
|
94
|
+
this.assertOpen();
|
|
95
|
+
return this.fs.has(normalizePath(filePath));
|
|
96
|
+
}
|
|
97
|
+
async mount(files, atPath) {
|
|
98
|
+
this.assertOpen();
|
|
99
|
+
const base = normalizePath(atPath);
|
|
100
|
+
for (const [rel, data] of files.entries()) {
|
|
101
|
+
const relNorm = rel.startsWith('/') ? rel.slice(1) : rel;
|
|
102
|
+
await this.write(`${base}/${relNorm}`, data);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
async exec(command, opts) {
|
|
106
|
+
this.assertOpen();
|
|
107
|
+
if (this.executor !== 'available') {
|
|
108
|
+
throw new SandboxNoExecutorError('Sandbox executor unavailable.', { session_id: this.sessionId });
|
|
109
|
+
}
|
|
110
|
+
if (opts?.signal?.aborted) {
|
|
111
|
+
throw new OperationCancelledError('Sandbox exec was cancelled.', { scope: 'sandbox' });
|
|
112
|
+
}
|
|
113
|
+
return (this.options.exec ?? defaultExec)(command, opts);
|
|
114
|
+
}
|
|
115
|
+
async close() {
|
|
116
|
+
this.closed = true;
|
|
117
|
+
}
|
|
118
|
+
assertOpen() {
|
|
119
|
+
if (this.closed)
|
|
120
|
+
throw new SandboxError('Sandbox session is closed.', { reason: 'session_closed' });
|
|
121
|
+
}
|
|
122
|
+
ensureParent(filePath) {
|
|
123
|
+
const parts = normalizePath(filePath).split('/').filter(Boolean);
|
|
124
|
+
let current = '/';
|
|
125
|
+
for (let i = 0; i < parts.length - 1; i += 1) {
|
|
126
|
+
current = current === '/' ? `/${parts[i]}` : `${current}/${parts[i]}`;
|
|
127
|
+
if (!this.fs.has(current))
|
|
128
|
+
this.fs.set(current, { kind: 'directory', modifiedAt: now() });
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Deterministic in-memory sandbox fake with a configurable executor flag.
|
|
134
|
+
*
|
|
135
|
+
* Sessions are typed exec-capable; when constructed with
|
|
136
|
+
* `executor: 'unavailable'` dynamically widened `exec(...)` calls throw
|
|
137
|
+
* `SandboxNoExecutorError`, matching the sandbox contract.
|
|
138
|
+
*/
|
|
139
|
+
export class FakeSandbox {
|
|
140
|
+
options;
|
|
141
|
+
capabilities;
|
|
142
|
+
constructor(options = {}) {
|
|
143
|
+
this.options = options;
|
|
144
|
+
this.capabilities = (options.executor ?? 'available') === 'available'
|
|
145
|
+
? ['sandbox.fs', 'sandbox.exec']
|
|
146
|
+
: ['sandbox.fs'];
|
|
147
|
+
}
|
|
148
|
+
async open(opts) {
|
|
149
|
+
// The session reports the configured executor flag at runtime; the static
|
|
150
|
+
// exec-capable session type mirrors `SandboxSession` dynamic widening.
|
|
151
|
+
return new FakeSandboxSession(opts.sessionId, this.options);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { InMemoryStateStore } from '../state/in-memory.js';
|
|
2
|
+
import type { Message, PersistedRunEvent, RunRecord, SessionRecord } from '../models/state.js';
|
|
3
|
+
import type { FinishRunPatch } from '../ports/state.js';
|
|
4
|
+
/** Operation names recorded by {@link FakeStateStore}. */
|
|
5
|
+
export type FakeStateStoreOp = 'getSession' | 'upsertSession' | 'closeSession' | 'appendMessages' | 'listMessages' | 'clearMessages' | 'replaceMessages' | 'createRun' | 'finishRun' | 'getRun' | 'listRuns' | 'appendEvents' | 'listEvents';
|
|
6
|
+
/** In-memory state store that records every invoked operation for test inspection. */
|
|
7
|
+
export declare class FakeStateStore extends InMemoryStateStore {
|
|
8
|
+
/** Ordered list of operations invoked on this store. */
|
|
9
|
+
readonly ops: FakeStateStoreOp[];
|
|
10
|
+
getSession(id: string): Promise<SessionRecord | undefined>;
|
|
11
|
+
upsertSession(record: SessionRecord): Promise<void>;
|
|
12
|
+
closeSession(id: string): Promise<void>;
|
|
13
|
+
appendMessages(sessionId: string, messages: Message[]): Promise<void>;
|
|
14
|
+
listMessages(sessionId: string, opts?: {
|
|
15
|
+
limit?: number;
|
|
16
|
+
before?: string;
|
|
17
|
+
}): Promise<Message[]>;
|
|
18
|
+
clearMessages(sessionId: string): Promise<void>;
|
|
19
|
+
replaceMessages(sessionId: string, messages: Message[]): Promise<void>;
|
|
20
|
+
createRun(record: RunRecord): Promise<void>;
|
|
21
|
+
finishRun(runId: string, patch: FinishRunPatch): Promise<void>;
|
|
22
|
+
getRun(runId: string): Promise<RunRecord | undefined>;
|
|
23
|
+
listRuns(sessionId: string, opts?: {
|
|
24
|
+
limit?: number;
|
|
25
|
+
before?: string;
|
|
26
|
+
}): Promise<RunRecord[]>;
|
|
27
|
+
appendEvents(runId: string, events: PersistedRunEvent[]): Promise<void>;
|
|
28
|
+
listEvents(runId: string, opts?: {
|
|
29
|
+
limit?: number;
|
|
30
|
+
after?: string;
|
|
31
|
+
}): Promise<PersistedRunEvent[]>;
|
|
32
|
+
/** Returns how often the given operation was invoked. */
|
|
33
|
+
opCount(op: FakeStateStoreOp): number;
|
|
34
|
+
/** Clears the recorded operation log without touching stored data. */
|
|
35
|
+
resetOps(): void;
|
|
36
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { InMemoryStateStore } from '../state/in-memory.js';
|
|
2
|
+
/** In-memory state store that records every invoked operation for test inspection. */
|
|
3
|
+
export class FakeStateStore extends InMemoryStateStore {
|
|
4
|
+
/** Ordered list of operations invoked on this store. */
|
|
5
|
+
ops = [];
|
|
6
|
+
async getSession(id) {
|
|
7
|
+
this.ops.push('getSession');
|
|
8
|
+
return super.getSession(id);
|
|
9
|
+
}
|
|
10
|
+
async upsertSession(record) {
|
|
11
|
+
this.ops.push('upsertSession');
|
|
12
|
+
return super.upsertSession(record);
|
|
13
|
+
}
|
|
14
|
+
async closeSession(id) {
|
|
15
|
+
this.ops.push('closeSession');
|
|
16
|
+
return super.closeSession(id);
|
|
17
|
+
}
|
|
18
|
+
async appendMessages(sessionId, messages) {
|
|
19
|
+
this.ops.push('appendMessages');
|
|
20
|
+
return super.appendMessages(sessionId, messages);
|
|
21
|
+
}
|
|
22
|
+
async listMessages(sessionId, opts = {}) {
|
|
23
|
+
this.ops.push('listMessages');
|
|
24
|
+
return super.listMessages(sessionId, opts);
|
|
25
|
+
}
|
|
26
|
+
async clearMessages(sessionId) {
|
|
27
|
+
this.ops.push('clearMessages');
|
|
28
|
+
return super.clearMessages(sessionId);
|
|
29
|
+
}
|
|
30
|
+
async replaceMessages(sessionId, messages) {
|
|
31
|
+
this.ops.push('replaceMessages');
|
|
32
|
+
return super.replaceMessages(sessionId, messages);
|
|
33
|
+
}
|
|
34
|
+
async createRun(record) {
|
|
35
|
+
this.ops.push('createRun');
|
|
36
|
+
return super.createRun(record);
|
|
37
|
+
}
|
|
38
|
+
async finishRun(runId, patch) {
|
|
39
|
+
this.ops.push('finishRun');
|
|
40
|
+
return super.finishRun(runId, patch);
|
|
41
|
+
}
|
|
42
|
+
async getRun(runId) {
|
|
43
|
+
this.ops.push('getRun');
|
|
44
|
+
return super.getRun(runId);
|
|
45
|
+
}
|
|
46
|
+
async listRuns(sessionId, opts = {}) {
|
|
47
|
+
this.ops.push('listRuns');
|
|
48
|
+
return super.listRuns(sessionId, opts);
|
|
49
|
+
}
|
|
50
|
+
async appendEvents(runId, events) {
|
|
51
|
+
this.ops.push('appendEvents');
|
|
52
|
+
return super.appendEvents(runId, events);
|
|
53
|
+
}
|
|
54
|
+
async listEvents(runId, opts = {}) {
|
|
55
|
+
this.ops.push('listEvents');
|
|
56
|
+
return super.listEvents(runId, opts);
|
|
57
|
+
}
|
|
58
|
+
/** Returns how often the given operation was invoked. */
|
|
59
|
+
opCount(op) {
|
|
60
|
+
return this.ops.filter((entry) => entry === op).length;
|
|
61
|
+
}
|
|
62
|
+
/** Clears the recorded operation log without touching stored data. */
|
|
63
|
+
resetOps() {
|
|
64
|
+
this.ops.length = 0;
|
|
65
|
+
}
|
|
66
|
+
}
|
package/dist/testing/index.d.ts
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
export { FakeModelProvider } from './fakeModelProvider.js';
|
|
2
|
+
export { FakeStateStore, type FakeStateStoreOp } from './fakeStateStore.js';
|
|
3
|
+
export { FakeSandbox, type FakeSandboxOptions } from './fakeSandbox.js';
|
|
4
|
+
export { FakeLogger, type FakeLogRecord } from './fakeLogger.js';
|
|
2
5
|
export { FakeMemoryAdapter, memoryAdapterContract } from './fakeMemoryAdapter.js';
|
|
3
6
|
export { InMemoryDurableWorkspaceStore, inMemoryDurableWorkspaceStore } from '../workspace/index.js';
|
|
4
|
-
export { durableWorkspaceStoreContract } from './durableWorkspaceStoreContract.js';
|
|
5
7
|
export { adapterCapabilitiesContract, fakeCapabilityAdapter, type FakeCapabilityAdapter } from './capabilities.js';
|
|
8
|
+
export { fakeSnapshotSandbox, sandboxSnapshotContract } from './sandboxSnapshot.js';
|
|
9
|
+
export { stateStoreContract } from './stateStoreContract.js';
|
|
10
|
+
export { sandboxContract } from './sandboxContract.js';
|
|
11
|
+
export { modelProviderContract } from './modelProviderContract.js';
|
|
12
|
+
export { loggerContract } from './loggerContract.js';
|
|
13
|
+
export { durableWorkspaceStoreContract } from './durableWorkspaceStoreContract.js';
|
|
14
|
+
export { recordEvents } from './recordEvents.js';
|
|
6
15
|
export { createInMemoryFeedbackRecorder } from './feedback.js';
|
|
7
16
|
export { evaluateDeterministicScorer } from '../eval/index.js';
|
|
8
17
|
export type { DeterministicScorerDefinition, ScorerResult, ScorerTarget } from '../eval/index.js';
|
|
9
|
-
export { sandboxContract } from './sandboxContract.js';
|
|
10
|
-
export { fakeSnapshotSandbox, sandboxSnapshotContract } from './sandboxSnapshot.js';
|
|
11
|
-
export { stateStoreContract } from './stateStoreContract.js';
|
|
12
18
|
/** Returns a fresh harness builder for tests. */
|
|
13
19
|
export declare function makeHarness(): import("../harness/defineHarness.js").HarnessBuilder<{}>;
|
package/dist/testing/index.js
CHANGED
|
@@ -1,14 +1,24 @@
|
|
|
1
1
|
import { defineHarness } from '../harness/defineHarness.js';
|
|
2
|
+
// Fakes
|
|
2
3
|
export { FakeModelProvider } from './fakeModelProvider.js';
|
|
4
|
+
export { FakeStateStore } from './fakeStateStore.js';
|
|
5
|
+
export { FakeSandbox } from './fakeSandbox.js';
|
|
6
|
+
export { FakeLogger } from './fakeLogger.js';
|
|
3
7
|
export { FakeMemoryAdapter, memoryAdapterContract } from './fakeMemoryAdapter.js';
|
|
4
8
|
export { InMemoryDurableWorkspaceStore, inMemoryDurableWorkspaceStore } from '../workspace/index.js';
|
|
5
|
-
export { durableWorkspaceStoreContract } from './durableWorkspaceStoreContract.js';
|
|
6
9
|
export { adapterCapabilitiesContract, fakeCapabilityAdapter } from './capabilities.js';
|
|
7
|
-
export { createInMemoryFeedbackRecorder } from './feedback.js';
|
|
8
|
-
export { evaluateDeterministicScorer } from '../eval/index.js';
|
|
9
|
-
export { sandboxContract } from './sandboxContract.js';
|
|
10
10
|
export { fakeSnapshotSandbox, sandboxSnapshotContract } from './sandboxSnapshot.js';
|
|
11
|
+
// Contract suites
|
|
11
12
|
export { stateStoreContract } from './stateStoreContract.js';
|
|
13
|
+
export { sandboxContract } from './sandboxContract.js';
|
|
14
|
+
export { modelProviderContract } from './modelProviderContract.js';
|
|
15
|
+
export { loggerContract } from './loggerContract.js';
|
|
16
|
+
export { durableWorkspaceStoreContract } from './durableWorkspaceStoreContract.js';
|
|
17
|
+
// Helpers
|
|
18
|
+
export { recordEvents } from './recordEvents.js';
|
|
19
|
+
export { createInMemoryFeedbackRecorder } from './feedback.js';
|
|
20
|
+
// AI eval test helpers (re-exported from the main entry for test ergonomics)
|
|
21
|
+
export { evaluateDeterministicScorer } from '../eval/index.js';
|
|
12
22
|
/** Returns a fresh harness builder for tests. */
|
|
13
23
|
export function makeHarness() {
|
|
14
24
|
return defineHarness();
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Logger } from '../logger/index.js';
|
|
2
|
+
/**
|
|
3
|
+
* Shared contract for `Logger` implementations.
|
|
4
|
+
*
|
|
5
|
+
* Record-shape assertions require a capturing logger that exposes its emitted
|
|
6
|
+
* records via a `records` array (e.g. `FakeLogger`); non-capturing loggers are
|
|
7
|
+
* verified for the behavioral contract only.
|
|
8
|
+
*/
|
|
9
|
+
export declare function loggerContract(make: () => Logger): void;
|