@purista/harness 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/index.d.ts +1 -0
- package/dist/agents/index.js +276 -141
- package/dist/errors/catalog.d.ts +4 -3
- package/dist/harness/defineHarness.d.ts +26 -2
- package/dist/harness/defineHarness.js +51 -2
- package/dist/index.d.ts +1 -1
- package/dist/memory/sandbox/index.js +7 -1
- package/dist/models/registry.js +45 -3
- package/dist/ports/base-model-provider.js +2 -0
- package/dist/ports/capabilities.d.ts +2 -0
- package/dist/ports/harness-context.d.ts +1 -0
- package/dist/ports/model-provider.d.ts +4 -0
- package/dist/ports/state.d.ts +6 -0
- package/dist/runtime/abort.d.ts +5 -0
- package/dist/runtime/abort.js +33 -0
- package/dist/runtime/durable.d.ts +2 -0
- package/dist/runtime/durable.js +6 -2
- package/dist/runtime/sessionDurable.d.ts +49 -0
- package/dist/runtime/sessionDurable.js +135 -0
- package/dist/runtime/steps.d.ts +19 -1
- package/dist/runtime/steps.js +21 -3
- package/dist/sandbox/index.d.ts +34 -0
- package/dist/sandbox/index.js +40 -3
- package/dist/sessions/index.d.ts +15 -2
- package/dist/sessions/index.js +212 -99
- package/dist/skills/index.js +19 -6
- package/dist/state/in-memory.d.ts +1 -0
- package/dist/state/in-memory.js +15 -0
- package/dist/telemetry/shim.js +9 -4
- package/dist/testing/durableWorkspaceStoreContract.d.ts +1 -1
- package/dist/testing/durableWorkspaceStoreContract.js +64 -28
- package/dist/tools/index.d.ts +2 -0
- package/dist/tools/index.js +15 -1
- package/dist/tools/mcp/runner.js +11 -6
- package/dist/tools/mcp/stdio.js +170 -1
- package/dist/ulid/index.d.ts +6 -1
- package/dist/ulid/index.js +31 -13
- package/dist/version.d.ts +2 -0
- package/dist/version.js +2 -0
- package/dist/workflows/index.js +7 -1
- package/dist/workspace/in-memory.d.ts +9 -10
- package/dist/workspace/in-memory.js +191 -48
- package/package.json +1 -1
- package/dist/harness/errors.d.ts +0 -62
- package/dist/harness/errors.js +0 -67
package/dist/sandbox/index.js
CHANGED
|
@@ -2,6 +2,10 @@ import { createRequire } from 'node:module';
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { OperationCancelledError, OperationTimeoutError, HarnessConfigError, SandboxError, SandboxNoExecutorError } from '../errors/index.js';
|
|
4
4
|
const require = createRequire(import.meta.url);
|
|
5
|
+
/** Returns true when a sandbox session can spawn long-lived processes. */
|
|
6
|
+
export function isSpawnCapableSession(session) {
|
|
7
|
+
return typeof session.spawn === 'function';
|
|
8
|
+
}
|
|
5
9
|
function now() { return new Date().toISOString(); }
|
|
6
10
|
function normalizePath(input) {
|
|
7
11
|
if (!input.startsWith('/'))
|
|
@@ -63,7 +67,7 @@ class MemorySandboxSession {
|
|
|
63
67
|
const relative = root === '/' ? k.slice(1) : k.slice(root.length + 1);
|
|
64
68
|
if (!opts?.recursive && relative.includes('/'))
|
|
65
69
|
continue;
|
|
66
|
-
if (opts?.glob && !
|
|
70
|
+
if (opts?.glob && !globToRegExp(opts.glob).test(k))
|
|
67
71
|
continue;
|
|
68
72
|
out.push({ name: k.split('/').at(-1) ?? '', path: k, kind: v.kind, ...(v.kind === 'file' ? { size: v.data.byteLength } : {}) });
|
|
69
73
|
}
|
|
@@ -155,11 +159,44 @@ export function bashSandbox(opts) {
|
|
|
155
159
|
}
|
|
156
160
|
};
|
|
157
161
|
}
|
|
162
|
+
/**
|
|
163
|
+
* Translate a glob to a fully-anchored RegExp matched against the absolute
|
|
164
|
+
* path. `*`/`**` match any characters and `?` matches a single character; all
|
|
165
|
+
* other regex metacharacters are escaped to literals so a pattern can never
|
|
166
|
+
* throw a `SyntaxError` or trigger catastrophic backtracking. Anchoring both
|
|
167
|
+
* ends fixes the previous over-match (e.g. `*.ts` no longer matches `a.tsx`).
|
|
168
|
+
*/
|
|
169
|
+
function globToRegExp(glob) {
|
|
170
|
+
let out = '^';
|
|
171
|
+
for (let i = 0; i < glob.length; i += 1) {
|
|
172
|
+
const char = glob[i];
|
|
173
|
+
if (char === '*') {
|
|
174
|
+
out += '.*';
|
|
175
|
+
if (glob[i + 1] === '*')
|
|
176
|
+
i += 1;
|
|
177
|
+
}
|
|
178
|
+
else if (char === '?') {
|
|
179
|
+
out += '.';
|
|
180
|
+
}
|
|
181
|
+
else if (/[.+^${}()|[\]\\]/.test(char)) {
|
|
182
|
+
out += `\\${char}`;
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
out += char;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return new RegExp(`${out}$`);
|
|
189
|
+
}
|
|
158
190
|
export function autoDetectSandbox() {
|
|
159
191
|
try {
|
|
160
192
|
return bashSandbox();
|
|
161
193
|
}
|
|
162
|
-
catch {
|
|
163
|
-
|
|
194
|
+
catch (error) {
|
|
195
|
+
// Only fall back to the no-executor sandbox when just-bash is absent.
|
|
196
|
+
// A real configuration/init error must surface, not silently downgrade.
|
|
197
|
+
if (error instanceof HarnessConfigError && error.meta?.reason === 'just_bash_not_installed') {
|
|
198
|
+
return inMemorySandbox();
|
|
199
|
+
}
|
|
200
|
+
throw error;
|
|
164
201
|
}
|
|
165
202
|
}
|
package/dist/sessions/index.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import type { Logger } from '../logger/index.js';
|
|
2
|
-
import type { Harness, HarnessDefaults, BuilderState, TelemetryOptions } from '../harness/defineHarness.js';
|
|
2
|
+
import type { RunEvent, Harness, HarnessDefaults, BuilderState, TelemetryOptions } from '../harness/defineHarness.js';
|
|
3
3
|
import type { MemoryAdapter } from '../ports/memory.js';
|
|
4
|
-
import type { HarnessInspection } from '../ports/capabilities.js';
|
|
4
|
+
import type { DurableRuntimeAdapter, HarnessInspection } from '../ports/capabilities.js';
|
|
5
|
+
import type { DurableWorkspaceStore } from '../ports/workspace.js';
|
|
5
6
|
import type { Sandbox } from '../sandbox/index.js';
|
|
6
7
|
import type { StateStore } from '../ports/state.js';
|
|
7
8
|
import { type TelemetryShim } from '../telemetry/index.js';
|
|
@@ -13,6 +14,8 @@ type HarnessDefinition<S extends BuilderState> = {
|
|
|
13
14
|
state: StateStore;
|
|
14
15
|
sandbox: Sandbox;
|
|
15
16
|
memory: MemoryAdapter;
|
|
17
|
+
runtime?: DurableRuntimeAdapter;
|
|
18
|
+
workspaceStore?: DurableWorkspaceStore;
|
|
16
19
|
defaults: HarnessDefaults;
|
|
17
20
|
models: NonNullable<S['models']>;
|
|
18
21
|
tools: NonNullable<S['tools']>;
|
|
@@ -21,5 +24,15 @@ type HarnessDefinition<S extends BuilderState> = {
|
|
|
21
24
|
workflows: NonNullable<S['workflows']>;
|
|
22
25
|
inspection: HarnessInspection;
|
|
23
26
|
};
|
|
27
|
+
/**
|
|
28
|
+
* Relay run events from an in-process run to a stream consumer.
|
|
29
|
+
*
|
|
30
|
+
* The unread events live in a bounded queue: consumed events are removed (no
|
|
31
|
+
* growing cursor over a shared array), and on overflow the oldest non-terminal
|
|
32
|
+
* unread event is dropped and counted, so a slow consumer never silently skips
|
|
33
|
+
* an unread event. Delivery is promise-notified rather than time-polled, so
|
|
34
|
+
* there is no fixed per-event latency or periodic timer.
|
|
35
|
+
*/
|
|
36
|
+
export declare function relayRunEvents(run: (onEvent: (event: RunEvent) => Promise<void>) => Promise<unknown>): AsyncIterable<RunEvent>;
|
|
24
37
|
export declare function createSessionHarness<S extends BuilderState>(definition: HarnessDefinition<S>): Harness<S>;
|
|
25
38
|
export {};
|
package/dist/sessions/index.js
CHANGED
|
@@ -3,6 +3,8 @@ import { ulid } from '../ulid/index.js';
|
|
|
3
3
|
import { runDefaultAgent } from '../agents/index.js';
|
|
4
4
|
import { runWorkflow } from '../workflows/index.js';
|
|
5
5
|
import { createMemoryFacade, createSessionMemory } from '../ports/memory.js';
|
|
6
|
+
import { beginDurableWorkflow, DURABLE_RUN_ID_PATTERN, isExecutableDurableRuntime } from '../runtime/sessionDurable.js';
|
|
7
|
+
import { HarnessConfigError } from '../errors/catalog.js';
|
|
6
8
|
import { loadSkillsSync } from '../skills/index.js';
|
|
7
9
|
import { createModelRegistry } from '../models/registry.js';
|
|
8
10
|
import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
|
|
@@ -11,6 +13,82 @@ const NEVER_ABORT_SIGNAL = new AbortController().signal;
|
|
|
11
13
|
function now() {
|
|
12
14
|
return new Date().toISOString();
|
|
13
15
|
}
|
|
16
|
+
const STREAM_MAX_BUFFERED_EVENTS = 1024;
|
|
17
|
+
const STREAM_TERMINAL_EVENT_TYPES = new Set(['run.finished', 'agent.finished']);
|
|
18
|
+
/**
|
|
19
|
+
* Relay run events from an in-process run to a stream consumer.
|
|
20
|
+
*
|
|
21
|
+
* The unread events live in a bounded queue: consumed events are removed (no
|
|
22
|
+
* growing cursor over a shared array), and on overflow the oldest non-terminal
|
|
23
|
+
* unread event is dropped and counted, so a slow consumer never silently skips
|
|
24
|
+
* an unread event. Delivery is promise-notified rather than time-polled, so
|
|
25
|
+
* there is no fixed per-event latency or periodic timer.
|
|
26
|
+
*/
|
|
27
|
+
export async function* relayRunEvents(run) {
|
|
28
|
+
const queue = [];
|
|
29
|
+
let dropped = 0;
|
|
30
|
+
let liveRunId = 'unknown';
|
|
31
|
+
let done = false;
|
|
32
|
+
let failure;
|
|
33
|
+
let wake;
|
|
34
|
+
const notify = () => {
|
|
35
|
+
const resolve = wake;
|
|
36
|
+
wake = undefined;
|
|
37
|
+
resolve?.();
|
|
38
|
+
};
|
|
39
|
+
const result = run((event) => {
|
|
40
|
+
if ('runId' in event)
|
|
41
|
+
liveRunId = event.runId;
|
|
42
|
+
if (queue.length >= STREAM_MAX_BUFFERED_EVENTS) {
|
|
43
|
+
const dropIndex = queue.findIndex((candidate) => !STREAM_TERMINAL_EVENT_TYPES.has(candidate.type));
|
|
44
|
+
if (dropIndex >= 0) {
|
|
45
|
+
queue.splice(dropIndex, 1);
|
|
46
|
+
dropped += 1;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
queue.push(event);
|
|
50
|
+
notify();
|
|
51
|
+
return Promise.resolve();
|
|
52
|
+
})
|
|
53
|
+
.catch((error) => {
|
|
54
|
+
failure = error;
|
|
55
|
+
return undefined;
|
|
56
|
+
})
|
|
57
|
+
.finally(() => {
|
|
58
|
+
done = true;
|
|
59
|
+
notify();
|
|
60
|
+
});
|
|
61
|
+
try {
|
|
62
|
+
while (true) {
|
|
63
|
+
if (dropped > 0) {
|
|
64
|
+
const droppedCount = dropped;
|
|
65
|
+
dropped = 0;
|
|
66
|
+
yield { type: 'stream.overflow', runId: liveRunId, at: now(), dropped: droppedCount };
|
|
67
|
+
}
|
|
68
|
+
while (queue.length > 0) {
|
|
69
|
+
yield queue.shift();
|
|
70
|
+
// Surface a fresh overflow notice promptly between events.
|
|
71
|
+
if (dropped > 0)
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
if (queue.length === 0 && dropped === 0) {
|
|
75
|
+
if (done) {
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
// No await between the empty check and installing `wake`, so a producer
|
|
79
|
+
// push cannot be lost between them.
|
|
80
|
+
await new Promise((resolve) => {
|
|
81
|
+
wake = resolve;
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
finally {
|
|
87
|
+
await result.catch(() => undefined);
|
|
88
|
+
}
|
|
89
|
+
if (failure)
|
|
90
|
+
throw failure;
|
|
91
|
+
}
|
|
14
92
|
function validateInvokeOptions(opts) {
|
|
15
93
|
if (opts?.historyWindow !== undefined && opts.historyWindow < 0) {
|
|
16
94
|
throw new ValidationError('Invoke options are invalid.', { where: 'invoke_options', issues: { historyWindow: opts.historyWindow } });
|
|
@@ -30,6 +108,12 @@ function normalizeMessage(message, sessionId) {
|
|
|
30
108
|
export function createSessionHarness(definition) {
|
|
31
109
|
const resolvedSkills = loadSkillsSync(definition.skills);
|
|
32
110
|
const sessionStates = new Map();
|
|
111
|
+
// In-flight session-state creations, memoized so concurrent first-time callers
|
|
112
|
+
// share one sandbox open (no orphaned sessions) and one SessionState object
|
|
113
|
+
// (so the synchronous busy check/set below serializes runs correctly).
|
|
114
|
+
const sessionStateOpenings = new Map();
|
|
115
|
+
// Stable per-harness-instance worker id used as the default durable lease owner.
|
|
116
|
+
const durableWorkerId = `worker_${ulid()}`;
|
|
33
117
|
const contentCaptureMode = resolveContentCaptureMode(definition.telemetry);
|
|
34
118
|
const telemetry = withTelemetryFlavor(definition.telemetryShim ?? createTelemetryShim(), definition.telemetry);
|
|
35
119
|
const adapterMetrics = createMetrics(telemetry, { 'harness.name': definition.name });
|
|
@@ -45,6 +129,7 @@ export function createSessionHarness(definition) {
|
|
|
45
129
|
toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
|
|
46
130
|
skillTimeoutMs: definition.defaults.skillTimeoutMs ?? 60_000,
|
|
47
131
|
modelTimeoutMs: definition.defaults.modelTimeoutMs ?? 300_000,
|
|
132
|
+
maxParallelToolCalls: definition.defaults.maxParallelToolCalls ?? 8,
|
|
48
133
|
...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
|
|
49
134
|
}
|
|
50
135
|
};
|
|
@@ -56,24 +141,36 @@ export function createSessionHarness(definition) {
|
|
|
56
141
|
if (existing) {
|
|
57
142
|
return existing;
|
|
58
143
|
}
|
|
144
|
+
const createdAt = now();
|
|
59
145
|
const created = {
|
|
60
146
|
id: sessionId,
|
|
61
|
-
createdAt
|
|
62
|
-
updatedAt:
|
|
147
|
+
createdAt,
|
|
148
|
+
updatedAt: createdAt,
|
|
63
149
|
runCount: 0
|
|
64
150
|
};
|
|
65
151
|
await definition.state.upsertSession(created);
|
|
66
152
|
return created;
|
|
67
153
|
}
|
|
68
|
-
|
|
154
|
+
function getSessionState(sessionId) {
|
|
69
155
|
const existing = sessionStates.get(sessionId);
|
|
70
156
|
if (existing) {
|
|
71
|
-
return existing;
|
|
157
|
+
return Promise.resolve(existing);
|
|
72
158
|
}
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
159
|
+
const pending = sessionStateOpenings.get(sessionId);
|
|
160
|
+
if (pending) {
|
|
161
|
+
return pending;
|
|
162
|
+
}
|
|
163
|
+
const opening = (async () => {
|
|
164
|
+
const sandboxSession = await definition.sandbox.open({ sessionId, runId: `init_${ulid()}` });
|
|
165
|
+
const created = { busy: false, sandboxSession, mountedSkills: new Set() };
|
|
166
|
+
sessionStates.set(sessionId, created);
|
|
167
|
+
sessionStateOpenings.delete(sessionId);
|
|
168
|
+
return created;
|
|
169
|
+
})();
|
|
170
|
+
// Let a failed open be retried instead of caching the rejection forever.
|
|
171
|
+
opening.catch(() => sessionStateOpenings.delete(sessionId));
|
|
172
|
+
sessionStateOpenings.set(sessionId, opening);
|
|
173
|
+
return opening;
|
|
77
174
|
}
|
|
78
175
|
async function appendEvents(runId, events) {
|
|
79
176
|
try {
|
|
@@ -141,6 +238,21 @@ export function createSessionHarness(definition) {
|
|
|
141
238
|
function memoryFacade(opts) {
|
|
142
239
|
return createMemoryFacade(memoryOptions(opts.sessionId, opts.sandboxSession, opts.signal, opts));
|
|
143
240
|
}
|
|
241
|
+
/**
|
|
242
|
+
* Validates `opts.durable` and returns the executable durable runtime, or
|
|
243
|
+
* `undefined` for an ephemeral run. Throws before any run record is created.
|
|
244
|
+
*/
|
|
245
|
+
function resolveDurableRuntime(opts) {
|
|
246
|
+
if (!opts?.durable)
|
|
247
|
+
return undefined;
|
|
248
|
+
if (!DURABLE_RUN_ID_PATTERN.test(opts.durable.runId)) {
|
|
249
|
+
throw new ValidationError('Durable run id is invalid.', { where: 'invoke_options', issues: { 'durable.runId': opts.durable.runId } });
|
|
250
|
+
}
|
|
251
|
+
if (!isExecutableDurableRuntime(definition.runtime)) {
|
|
252
|
+
throw new HarnessConfigError('Durable execution requires an executable .runtime(...) adapter.', { reason: 'durable_runtime_required', path: 'runtime' });
|
|
253
|
+
}
|
|
254
|
+
return definition.runtime;
|
|
255
|
+
}
|
|
144
256
|
return {
|
|
145
257
|
inspect() {
|
|
146
258
|
return definition.inspection;
|
|
@@ -202,14 +314,21 @@ export function createSessionHarness(definition) {
|
|
|
202
314
|
throw new ValidationError('Session history replacement failed validation.', { where: 'session_history', issues: { message } }, error);
|
|
203
315
|
}
|
|
204
316
|
});
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
317
|
+
if (definition.state.replaceMessages) {
|
|
318
|
+
await definition.state.replaceMessages(sessionId, parsed);
|
|
319
|
+
}
|
|
320
|
+
else {
|
|
321
|
+
// Non-atomic fallback for adapters without atomic replace.
|
|
322
|
+
await definition.state.clearMessages(sessionId);
|
|
323
|
+
if (parsed.length > 0) {
|
|
324
|
+
await definition.state.appendMessages(sessionId, parsed);
|
|
325
|
+
}
|
|
208
326
|
}
|
|
209
327
|
},
|
|
210
328
|
async close() {
|
|
211
329
|
await definition.state.closeSession(sessionId);
|
|
212
330
|
sessionStates.delete(sessionId);
|
|
331
|
+
sessionStateOpenings.delete(sessionId);
|
|
213
332
|
await state.sandboxSession.close();
|
|
214
333
|
}
|
|
215
334
|
};
|
|
@@ -248,51 +367,13 @@ export function createSessionHarness(definition) {
|
|
|
248
367
|
$infer: {}
|
|
249
368
|
};
|
|
250
369
|
async function* streamAgentCall(sessionId, agentId, agent, input, opts) {
|
|
251
|
-
|
|
252
|
-
const maxBufferedEvents = 1024;
|
|
253
|
-
let dropped = 0;
|
|
254
|
-
let done = false;
|
|
255
|
-
let failure;
|
|
256
|
-
let liveRunId = 'unknown';
|
|
257
|
-
const result = runAgentCall(sessionId, agentId, agent, input, opts, (event) => {
|
|
258
|
-
if ('runId' in event)
|
|
259
|
-
liveRunId = event.runId;
|
|
260
|
-
if (buffer.length >= maxBufferedEvents) {
|
|
261
|
-
const dropIndex = buffer.findIndex((candidate) => candidate.type !== 'run.finished');
|
|
262
|
-
if (dropIndex >= 0) {
|
|
263
|
-
buffer.splice(dropIndex, 1);
|
|
264
|
-
dropped += 1;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
buffer.push(event);
|
|
268
|
-
return Promise.resolve();
|
|
269
|
-
}).catch((error) => {
|
|
270
|
-
failure = error;
|
|
271
|
-
return undefined;
|
|
272
|
-
}).finally(() => {
|
|
273
|
-
done = true;
|
|
274
|
-
});
|
|
275
|
-
let cursor = 0;
|
|
276
|
-
while (true) {
|
|
277
|
-
if (dropped > 0) {
|
|
278
|
-
yield { type: 'stream.overflow', runId: liveRunId, at: now(), dropped };
|
|
279
|
-
dropped = 0;
|
|
280
|
-
}
|
|
281
|
-
while (cursor < buffer.length) {
|
|
282
|
-
yield buffer[cursor];
|
|
283
|
-
cursor += 1;
|
|
284
|
-
}
|
|
285
|
-
if (done) {
|
|
286
|
-
await result.catch(() => undefined);
|
|
287
|
-
if (failure)
|
|
288
|
-
throw failure;
|
|
289
|
-
return;
|
|
290
|
-
}
|
|
291
|
-
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
292
|
-
}
|
|
370
|
+
yield* relayRunEvents((onEvent) => runAgentCall(sessionId, agentId, agent, input, opts, onEvent));
|
|
293
371
|
}
|
|
294
372
|
async function runAgentCall(sessionId, agentId, agent, input, opts, onEvent) {
|
|
295
373
|
validateInvokeOptions(opts);
|
|
374
|
+
if (opts?.durable) {
|
|
375
|
+
throw new ValidationError('Durable execution is only supported for workflow runs.', { where: 'invoke_options', issues: { durable: 'agent_run' } });
|
|
376
|
+
}
|
|
296
377
|
if (opts?.signal?.aborted) {
|
|
297
378
|
throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
|
|
298
379
|
}
|
|
@@ -363,6 +444,7 @@ export function createSessionHarness(definition) {
|
|
|
363
444
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
364
445
|
signal: runSignal.signal,
|
|
365
446
|
toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
|
|
447
|
+
maxParallelToolCalls: definition.defaults.maxParallelToolCalls ?? 8,
|
|
366
448
|
logger: definition.logger,
|
|
367
449
|
telemetry,
|
|
368
450
|
emitEvent: emit,
|
|
@@ -418,51 +500,11 @@ export function createSessionHarness(definition) {
|
|
|
418
500
|
}
|
|
419
501
|
}
|
|
420
502
|
async function* streamWorkflowCall(sessionId, workflowId, workflow, input, opts) {
|
|
421
|
-
|
|
422
|
-
const maxBufferedEvents = 1024;
|
|
423
|
-
let dropped = 0;
|
|
424
|
-
let done = false;
|
|
425
|
-
let failure;
|
|
426
|
-
let liveRunId = 'unknown';
|
|
427
|
-
const result = runWorkflowCall(sessionId, workflowId, workflow, input, opts, (event) => {
|
|
428
|
-
if ('runId' in event)
|
|
429
|
-
liveRunId = event.runId;
|
|
430
|
-
if (buffer.length >= maxBufferedEvents) {
|
|
431
|
-
const dropIndex = buffer.findIndex((candidate) => candidate.type !== 'run.finished');
|
|
432
|
-
if (dropIndex >= 0) {
|
|
433
|
-
buffer.splice(dropIndex, 1);
|
|
434
|
-
dropped += 1;
|
|
435
|
-
}
|
|
436
|
-
}
|
|
437
|
-
buffer.push(event);
|
|
438
|
-
return Promise.resolve();
|
|
439
|
-
}).catch((error) => {
|
|
440
|
-
failure = error;
|
|
441
|
-
return undefined;
|
|
442
|
-
}).finally(() => {
|
|
443
|
-
done = true;
|
|
444
|
-
});
|
|
445
|
-
let cursor = 0;
|
|
446
|
-
while (true) {
|
|
447
|
-
if (dropped > 0) {
|
|
448
|
-
yield { type: 'stream.overflow', runId: liveRunId, at: now(), dropped };
|
|
449
|
-
dropped = 0;
|
|
450
|
-
}
|
|
451
|
-
while (cursor < buffer.length) {
|
|
452
|
-
yield buffer[cursor];
|
|
453
|
-
cursor += 1;
|
|
454
|
-
}
|
|
455
|
-
if (done) {
|
|
456
|
-
await result.catch(() => undefined);
|
|
457
|
-
if (failure)
|
|
458
|
-
throw failure;
|
|
459
|
-
return;
|
|
460
|
-
}
|
|
461
|
-
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
462
|
-
}
|
|
503
|
+
yield* relayRunEvents((onEvent) => runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent));
|
|
463
504
|
}
|
|
464
505
|
async function runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent) {
|
|
465
506
|
validateInvokeOptions(opts);
|
|
507
|
+
const durableRuntime = resolveDurableRuntime(opts);
|
|
466
508
|
if (opts?.signal?.aborted) {
|
|
467
509
|
throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
|
|
468
510
|
}
|
|
@@ -473,7 +515,7 @@ export function createSessionHarness(definition) {
|
|
|
473
515
|
}
|
|
474
516
|
state.busy = true;
|
|
475
517
|
const startedAt = now();
|
|
476
|
-
const runId = ulid();
|
|
518
|
+
const runId = opts?.durable ? opts.durable.runId : ulid();
|
|
477
519
|
const memory = memoryFacade({
|
|
478
520
|
sessionId,
|
|
479
521
|
runId,
|
|
@@ -503,7 +545,22 @@ export function createSessionHarness(definition) {
|
|
|
503
545
|
state.busy = false;
|
|
504
546
|
throw error;
|
|
505
547
|
}
|
|
548
|
+
let durableBinding;
|
|
506
549
|
try {
|
|
550
|
+
if (durableRuntime && opts?.durable) {
|
|
551
|
+
durableBinding = await beginDurableWorkflow({
|
|
552
|
+
runtime: durableRuntime,
|
|
553
|
+
...(definition.workspaceStore ? { workspaceStore: definition.workspaceStore } : {}),
|
|
554
|
+
durable: opts.durable,
|
|
555
|
+
defaultWorkerId: durableWorkerId,
|
|
556
|
+
sessionId,
|
|
557
|
+
workflowId,
|
|
558
|
+
input: input,
|
|
559
|
+
signal: runSignal.signal,
|
|
560
|
+
logger: definition.logger,
|
|
561
|
+
harnessName: definition.name
|
|
562
|
+
});
|
|
563
|
+
}
|
|
507
564
|
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
|
|
508
565
|
'harness.name': definition.name,
|
|
509
566
|
'harness.session.id': sessionId,
|
|
@@ -532,6 +589,7 @@ export function createSessionHarness(definition) {
|
|
|
532
589
|
metadata: opts?.metadata ?? {},
|
|
533
590
|
metrics: workflowMetrics,
|
|
534
591
|
memory,
|
|
592
|
+
step: durableBinding ? durableBinding.step : passthroughStep,
|
|
535
593
|
agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
|
|
536
594
|
agentId,
|
|
537
595
|
async (agentInput, agentOpts) => {
|
|
@@ -568,6 +626,7 @@ export function createSessionHarness(definition) {
|
|
|
568
626
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
569
627
|
signal: agentSignal.signal,
|
|
570
628
|
toolTimeoutMs: definition.defaults.toolTimeoutMs ?? 120_000,
|
|
629
|
+
maxParallelToolCalls: definition.defaults.maxParallelToolCalls ?? 8,
|
|
571
630
|
logger: definition.logger,
|
|
572
631
|
telemetry,
|
|
573
632
|
emitEvent: emit,
|
|
@@ -597,6 +656,9 @@ export function createSessionHarness(definition) {
|
|
|
597
656
|
}));
|
|
598
657
|
}));
|
|
599
658
|
const finishedAt = now();
|
|
659
|
+
if (durableBinding) {
|
|
660
|
+
await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_success' }, () => durableBinding.finishSuccess(result));
|
|
661
|
+
}
|
|
600
662
|
const runFinished = { type: 'run.finished', runId, at: finishedAt, output: result };
|
|
601
663
|
await emit(runFinished);
|
|
602
664
|
await definition.state.finishRun(runId, { status: 'succeeded', finishedAt, output: result });
|
|
@@ -608,6 +670,9 @@ export function createSessionHarness(definition) {
|
|
|
608
670
|
const finalError = normalizeRunError(error, runSignal.signal);
|
|
609
671
|
const finishedAt = now();
|
|
610
672
|
const serialized = serializeError(finalError);
|
|
673
|
+
if (durableBinding && finalError instanceof OperationCancelledError) {
|
|
674
|
+
await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_cancelled' }, () => durableBinding.finishCancelled(finalError));
|
|
675
|
+
}
|
|
611
676
|
const log = finalError instanceof OperationCancelledError ? definition.logger.warn.bind(definition.logger) : definition.logger.error.bind(definition.logger);
|
|
612
677
|
log('Harness workflow run failed.', {
|
|
613
678
|
harness: definition.name,
|
|
@@ -637,10 +702,41 @@ export function createSessionHarness(definition) {
|
|
|
637
702
|
throw finalError;
|
|
638
703
|
}
|
|
639
704
|
finally {
|
|
705
|
+
// Releases the lease for a non-cancel failure so a retry with the same run
|
|
706
|
+
// id can resume; a no-op once the run was settled (success/cancel).
|
|
707
|
+
if (durableBinding)
|
|
708
|
+
await durableBinding.dispose();
|
|
640
709
|
runSignal.cleanup();
|
|
641
710
|
state.busy = false;
|
|
642
711
|
}
|
|
643
712
|
}
|
|
713
|
+
/** Pass-through step used when a workflow runs without durable execution. */
|
|
714
|
+
function passthroughStep(_stepId, fn) {
|
|
715
|
+
return fn();
|
|
716
|
+
}
|
|
717
|
+
/**
|
|
718
|
+
* Runs a durable finalization side effect (runtime finish / workspace lifecycle)
|
|
719
|
+
* without ever masking the primary run outcome (spec 21 §16.1 step 7).
|
|
720
|
+
*/
|
|
721
|
+
async function guardDurableStep(args, step) {
|
|
722
|
+
try {
|
|
723
|
+
await step();
|
|
724
|
+
}
|
|
725
|
+
catch (error) {
|
|
726
|
+
telemetry.recordCounter('harness.runs.durable_errors', 1, {
|
|
727
|
+
harness: definition.name,
|
|
728
|
+
'harness.run.durable.operation': args.operation
|
|
729
|
+
});
|
|
730
|
+
definition.logger.error('Durable finalization step failed; preserving run outcome.', {
|
|
731
|
+
harness: definition.name,
|
|
732
|
+
session_id: args.sessionId,
|
|
733
|
+
run_id: args.runId,
|
|
734
|
+
workflow_id: args.workflowId,
|
|
735
|
+
operation: args.operation,
|
|
736
|
+
error: serializeError(error)
|
|
737
|
+
});
|
|
738
|
+
}
|
|
739
|
+
}
|
|
644
740
|
async function terminalizeFailedRun(args) {
|
|
645
741
|
await runFailureTerminalizationStep(args, 'emit_run_finished', args.emitRunFinished);
|
|
646
742
|
await runFailureTerminalizationStep(args, 'finish_run', args.finishRun);
|
|
@@ -869,6 +965,12 @@ function sanitizeEventForPersistence(event) {
|
|
|
869
965
|
};
|
|
870
966
|
case 'stream.overflow':
|
|
871
967
|
return { dropped: event.dropped };
|
|
968
|
+
default: {
|
|
969
|
+
// Exhaustiveness guard: adding a RunEvent variant without updating this
|
|
970
|
+
// sanitizer becomes a compile error instead of silently persisting undefined.
|
|
971
|
+
event;
|
|
972
|
+
return {};
|
|
973
|
+
}
|
|
872
974
|
}
|
|
873
975
|
}
|
|
874
976
|
function isJsonRecord(value) {
|
|
@@ -891,9 +993,11 @@ function normalizeSerializedRunError(error) {
|
|
|
891
993
|
}
|
|
892
994
|
function createRunSignal(parent, timeoutMs) {
|
|
893
995
|
const controller = new AbortController();
|
|
894
|
-
const relay = () => controller.abort(parent?.reason);
|
|
996
|
+
const relay = () => controller.abort(runAbortReason(parent?.reason));
|
|
895
997
|
if (parent)
|
|
896
998
|
parent.addEventListener('abort', relay, { once: true });
|
|
999
|
+
if (parent?.aborted)
|
|
1000
|
+
relay();
|
|
897
1001
|
const timeout = timeoutMs && timeoutMs > 0
|
|
898
1002
|
? setTimeout(() => controller.abort(new OperationTimeoutError('Run timed out.', { scope: 'run', timeout_ms: timeoutMs })), timeoutMs)
|
|
899
1003
|
: undefined;
|
|
@@ -911,10 +1015,14 @@ function combineSignals(primary, secondary) {
|
|
|
911
1015
|
if (!secondary)
|
|
912
1016
|
return { signal: primary, cleanup: () => undefined };
|
|
913
1017
|
const controller = new AbortController();
|
|
914
|
-
const relayPrimary = () => controller.abort(primary.reason);
|
|
915
|
-
const relaySecondary = () => controller.abort(secondary.reason);
|
|
1018
|
+
const relayPrimary = () => controller.abort(runAbortReason(primary.reason));
|
|
1019
|
+
const relaySecondary = () => controller.abort(runAbortReason(secondary.reason));
|
|
916
1020
|
primary.addEventListener('abort', relayPrimary, { once: true });
|
|
917
1021
|
secondary.addEventListener('abort', relaySecondary, { once: true });
|
|
1022
|
+
if (primary.aborted)
|
|
1023
|
+
relayPrimary();
|
|
1024
|
+
else if (secondary.aborted)
|
|
1025
|
+
relaySecondary();
|
|
918
1026
|
return {
|
|
919
1027
|
signal: controller.signal,
|
|
920
1028
|
cleanup: () => {
|
|
@@ -923,3 +1031,8 @@ function combineSignals(primary, secondary) {
|
|
|
923
1031
|
}
|
|
924
1032
|
};
|
|
925
1033
|
}
|
|
1034
|
+
function runAbortReason(reason) {
|
|
1035
|
+
if (reason instanceof OperationCancelledError || reason instanceof OperationTimeoutError)
|
|
1036
|
+
return reason;
|
|
1037
|
+
return new OperationCancelledError('Run was cancelled.', { scope: 'run' }, reason);
|
|
1038
|
+
}
|
package/dist/skills/index.js
CHANGED
|
@@ -178,16 +178,29 @@ export function loadSkillsSync(skills) {
|
|
|
178
178
|
export async function loadSkills(skills) {
|
|
179
179
|
return loadSkillsSync(skills);
|
|
180
180
|
}
|
|
181
|
-
|
|
181
|
+
const SKILL_MOUNT_MAX_FILES = 5_000;
|
|
182
|
+
const SKILL_MOUNT_MAX_BYTES = 100_000_000;
|
|
183
|
+
async function readDirRecursive(root, skillId) {
|
|
182
184
|
const files = new Map();
|
|
185
|
+
let totalBytes = 0;
|
|
183
186
|
const walk = async (dir) => {
|
|
184
187
|
const entries = await fsp.readdir(dir, { withFileTypes: true });
|
|
185
188
|
for (const entry of entries) {
|
|
186
189
|
const abs = path.join(dir, entry.name);
|
|
187
|
-
if (entry.isDirectory())
|
|
190
|
+
if (entry.isDirectory()) {
|
|
188
191
|
await walk(abs);
|
|
189
|
-
|
|
190
|
-
|
|
192
|
+
}
|
|
193
|
+
else if (entry.isFile()) {
|
|
194
|
+
if (files.size >= SKILL_MOUNT_MAX_FILES) {
|
|
195
|
+
throw new SkillManifestError('Skill exceeds the mount file-count limit.', { reason: 'scan_limit_reached', skill_id: skillId, directory: root });
|
|
196
|
+
}
|
|
197
|
+
const data = await fsp.readFile(abs);
|
|
198
|
+
totalBytes += data.byteLength;
|
|
199
|
+
if (totalBytes > SKILL_MOUNT_MAX_BYTES) {
|
|
200
|
+
throw new SkillManifestError('Skill exceeds the mount byte limit.', { reason: 'scan_limit_reached', skill_id: skillId, directory: root });
|
|
201
|
+
}
|
|
202
|
+
files.set(path.posix.normalize(path.relative(root, abs).split(path.sep).join('/')), data);
|
|
203
|
+
}
|
|
191
204
|
}
|
|
192
205
|
};
|
|
193
206
|
await walk(root);
|
|
@@ -195,7 +208,7 @@ async function readDirRecursive(root) {
|
|
|
195
208
|
}
|
|
196
209
|
export async function mountSkillsOnce(session, mounted, skills, skillIds) {
|
|
197
210
|
if (skillIds.length > 0 && typeof session.mount !== 'function') {
|
|
198
|
-
throw new SkillManifestError('Sandbox does not support skill mounting.', { reason: '
|
|
211
|
+
throw new SkillManifestError('Sandbox does not support skill mounting.', { reason: 'skill_sandbox_unsupported' });
|
|
199
212
|
}
|
|
200
213
|
for (const skillId of skillIds) {
|
|
201
214
|
if (mounted.has(skillId))
|
|
@@ -203,7 +216,7 @@ export async function mountSkillsOnce(session, mounted, skills, skillIds) {
|
|
|
203
216
|
const skill = skills[skillId];
|
|
204
217
|
if (!skill)
|
|
205
218
|
throw new SkillNotFoundError('Skill not found.', { skill_id: skillId });
|
|
206
|
-
const files = await readDirRecursive(skill.directory);
|
|
219
|
+
const files = await readDirRecursive(skill.directory, skillId);
|
|
207
220
|
await session.mount(files, skill.mountPath);
|
|
208
221
|
mounted.add(skillId);
|
|
209
222
|
}
|
|
@@ -18,6 +18,7 @@ export declare class InMemoryStateStore implements StateStore {
|
|
|
18
18
|
before?: string;
|
|
19
19
|
}): Promise<Message[]>;
|
|
20
20
|
clearMessages(sessionId: string): Promise<void>;
|
|
21
|
+
replaceMessages(sessionId: string, messages: Message[]): Promise<void>;
|
|
21
22
|
createRun(record: RunRecord): Promise<void>;
|
|
22
23
|
finishRun(runId: string, patch: FinishRunPatch): Promise<void>;
|
|
23
24
|
getRun(runId: string): Promise<RunRecord | undefined>;
|
package/dist/state/in-memory.js
CHANGED
|
@@ -32,6 +32,7 @@ export class InMemoryStateStore {
|
|
|
32
32
|
async closeSession(id) {
|
|
33
33
|
this.sessions.delete(id);
|
|
34
34
|
this.messages.delete(id);
|
|
35
|
+
this.messageLocks.delete(id);
|
|
35
36
|
for (const [runId, run] of this.runs) {
|
|
36
37
|
if (run.sessionId === id) {
|
|
37
38
|
this.runs.delete(runId);
|
|
@@ -71,6 +72,20 @@ export class InMemoryStateStore {
|
|
|
71
72
|
this.messages.delete(sessionId);
|
|
72
73
|
});
|
|
73
74
|
}
|
|
75
|
+
async replaceMessages(sessionId, messages) {
|
|
76
|
+
return this.withMessageLock(sessionId, async () => {
|
|
77
|
+
const ids = new Set();
|
|
78
|
+
for (const message of messages) {
|
|
79
|
+
if (ids.has(message.id)) {
|
|
80
|
+
throw new StateError('Duplicate message id.', { op: 'appendMessages', reason: 'duplicate_message_id' });
|
|
81
|
+
}
|
|
82
|
+
ids.add(message.id);
|
|
83
|
+
}
|
|
84
|
+
// Atomic clear+append under one lock: validate first, then commit so a
|
|
85
|
+
// failure never leaves history partially replaced.
|
|
86
|
+
this.messages.set(sessionId, [...messages]);
|
|
87
|
+
});
|
|
88
|
+
}
|
|
74
89
|
async createRun(record) {
|
|
75
90
|
this.runs.set(record.id, record);
|
|
76
91
|
}
|