@purista/harness 1.2.6 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/agents/index.d.ts +7 -1
- package/dist/agents/index.js +126 -44
- package/dist/errors/catalog.d.ts +18 -2
- package/dist/errors/catalog.js +10 -0
- package/dist/eval/index.d.ts +3 -3
- package/dist/eval/index.js +15 -1
- package/dist/harness/defineHarness.d.ts +149 -3
- package/dist/harness/defineHarness.js +110 -1
- package/dist/index.d.ts +38 -18
- package/dist/index.js +30 -16
- package/dist/local/index.d.ts +36 -0
- package/dist/local/index.js +24 -0
- package/dist/local/local-sandbox.d.ts +25 -0
- package/dist/local/local-sandbox.js +368 -0
- package/dist/local/local-workspace.d.ts +56 -0
- package/dist/local/local-workspace.js +496 -0
- package/dist/local/ref-hash.d.ts +6 -0
- package/dist/local/ref-hash.js +9 -0
- package/dist/local/sqlite-storage.d.ts +106 -0
- package/dist/local/sqlite-storage.js +680 -0
- package/dist/models/adapter-utils.d.ts +52 -0
- package/dist/models/adapter-utils.js +81 -0
- package/dist/models/registry.js +28 -37
- package/dist/models/stream-pump.d.ts +16 -0
- package/dist/models/stream-pump.js +77 -0
- package/dist/ports/base-model-provider.d.ts +7 -1
- package/dist/ports/base-model-provider.js +384 -87
- package/dist/ports/capabilities.d.ts +16 -2
- package/dist/ports/context-checkpoints.d.ts +63 -0
- package/dist/ports/context-checkpoints.js +33 -0
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/model-provider.d.ts +94 -0
- package/dist/runtime/durable.d.ts +11 -0
- package/dist/runtime/durable.js +15 -2
- package/dist/runtime/sessionDurable.js +47 -21
- package/dist/runtime/steps.d.ts +22 -1
- package/dist/runtime/steps.js +53 -2
- package/dist/sessions/index.d.ts +17 -6
- package/dist/sessions/index.js +345 -84
- package/dist/skills/index.d.ts +0 -2
- package/dist/skills/index.js +0 -8
- package/dist/state/in-memory.js +6 -6
- package/dist/telemetry/shim.js +2 -6
- package/dist/telemetry/span-attrs.d.ts +9 -0
- package/dist/telemetry/span-attrs.js +27 -0
- package/dist/testing/durableWorkspaceStoreContract.js +69 -0
- package/dist/testing/fakeLogger.d.ts +29 -0
- package/dist/testing/fakeLogger.js +47 -0
- package/dist/testing/fakeSandbox.d.ts +27 -0
- package/dist/testing/fakeSandbox.js +153 -0
- package/dist/testing/fakeStateStore.d.ts +36 -0
- package/dist/testing/fakeStateStore.js +66 -0
- package/dist/testing/index.d.ts +10 -4
- package/dist/testing/index.js +14 -4
- package/dist/testing/loggerContract.d.ts +9 -0
- package/dist/testing/loggerContract.js +62 -0
- package/dist/testing/modelProviderContract.d.ts +12 -0
- package/dist/testing/modelProviderContract.js +222 -0
- package/dist/testing/recordEvents.d.ts +3 -0
- package/dist/testing/recordEvents.js +8 -0
- package/dist/testing/stateStoreContract.js +27 -0
- package/dist/tools/index.js +26 -1
- package/dist/tools/mcp/http.d.ts +2 -0
- package/dist/tools/mcp/http.js +34 -21
- package/dist/tools/mcp/runner.d.ts +4 -0
- package/dist/tools/mcp/runner.js +75 -21
- package/dist/tools/mcp/stdio.d.ts +7 -1
- package/dist/tools/mcp/stdio.js +102 -23
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workspace/in-memory.d.ts +1 -0
- package/dist/workspace/in-memory.js +47 -12
- package/package.json +5 -4
package/dist/sessions/index.js
CHANGED
|
@@ -1,28 +1,58 @@
|
|
|
1
|
-
import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, serializeError } from '../errors/index.js';
|
|
1
|
+
import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, DelegationPolicyError, serializeError } from '../errors/index.js';
|
|
2
2
|
import { ulid } from '../ulid/index.js';
|
|
3
3
|
import { runDefaultAgent } from '../agents/index.js';
|
|
4
4
|
import { runWorkflow } from '../workflows/index.js';
|
|
5
5
|
import { createMemoryFacade, createSessionMemory } from '../ports/memory.js';
|
|
6
6
|
import { beginDurableWorkflow, DURABLE_RUN_ID_PATTERN, isExecutableDurableRuntime } from '../runtime/sessionDurable.js';
|
|
7
|
+
import { runStepWithRetry } from '../runtime/steps.js';
|
|
7
8
|
import { HarnessConfigError } from '../errors/catalog.js';
|
|
8
9
|
import { loadSkillsSync } from '../skills/index.js';
|
|
9
10
|
import { createModelRegistry } from '../models/registry.js';
|
|
10
11
|
import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
|
|
12
|
+
import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
|
|
13
|
+
import { abortError } from '../runtime/abort.js';
|
|
11
14
|
import { createMcpRunnerRegistry } from '../tools/mcp/runner.js';
|
|
12
15
|
const NEVER_ABORT_SIGNAL = new AbortController().signal;
|
|
16
|
+
const DEFAULT_MAX_CHILD_AGENT_CALLS = 32;
|
|
17
|
+
const DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS = 8;
|
|
18
|
+
const DEFAULT_MAX_DELEGATION_DEPTH = 1;
|
|
19
|
+
/**
|
|
20
|
+
* Workflows invoke leaf agents directly, so every child-agent call runs at
|
|
21
|
+
* depth 1 (spec 10 "Delegation policy": `maxDepth` default `1`, `0` disables
|
|
22
|
+
* child-agent delegation).
|
|
23
|
+
*/
|
|
24
|
+
const CHILD_DELEGATION_DEPTH = 1;
|
|
13
25
|
function now() {
|
|
14
26
|
return new Date().toISOString();
|
|
15
27
|
}
|
|
16
28
|
const STREAM_MAX_BUFFERED_EVENTS = 1024;
|
|
17
|
-
|
|
29
|
+
/**
|
|
30
|
+
* Event types that must never be dropped from the relay queue.
|
|
31
|
+
*
|
|
32
|
+
* Only `run.finished` qualifies: it occurs at most once per run and is the
|
|
33
|
+
* terminal event consumers key off to know the run is complete. `agent.finished`
|
|
34
|
+
* is emitted once per agent invocation (including every child-agent delegation
|
|
35
|
+
* call), so it can appear many times and must remain droppable to keep the
|
|
36
|
+
* queue bounded when a slow consumer falls behind during a delegation-heavy run.
|
|
37
|
+
*/
|
|
38
|
+
const STREAM_UNDROPPABLE_EVENT_TYPES = new Set(['run.finished']);
|
|
18
39
|
/**
|
|
19
40
|
* Relay run events from an in-process run to a stream consumer.
|
|
20
41
|
*
|
|
21
|
-
* The unread events live in a bounded queue:
|
|
22
|
-
* growing cursor over a shared array), and on
|
|
23
|
-
* unread event is dropped and counted, so a slow
|
|
24
|
-
*
|
|
25
|
-
*
|
|
42
|
+
* The unread events live in a bounded queue (cap: STREAM_MAX_BUFFERED_EVENTS):
|
|
43
|
+
* consumed events are removed (no growing cursor over a shared array), and on
|
|
44
|
+
* overflow the oldest droppable unread event is dropped and counted, so a slow
|
|
45
|
+
* consumer never silently skips an event without an accompanying
|
|
46
|
+
* `stream.overflow` notice. Only `run.finished` is undroppable; all other
|
|
47
|
+
* event types — including `agent.finished` — may be evicted under pressure.
|
|
48
|
+
* If no droppable event exists when the queue is full, the incoming event is
|
|
49
|
+
* discarded (counted) rather than growing the queue past the cap. Delivery is
|
|
50
|
+
* promise-notified rather than time-polled, so there is no fixed per-event
|
|
51
|
+
* latency or periodic timer.
|
|
52
|
+
*
|
|
53
|
+
* Abandoning the stream (`break` / `iterator.return()`) only detaches that
|
|
54
|
+
* consumer. It does not abort `relaySignal`; callers must pass `opts.signal`
|
|
55
|
+
* when they intend to cancel the underlying run.
|
|
26
56
|
*/
|
|
27
57
|
export async function* relayRunEvents(run) {
|
|
28
58
|
const queue = [];
|
|
@@ -31,6 +61,8 @@ export async function* relayRunEvents(run) {
|
|
|
31
61
|
let done = false;
|
|
32
62
|
let failure;
|
|
33
63
|
let wake;
|
|
64
|
+
const relayController = new AbortController();
|
|
65
|
+
let completedNormally = false;
|
|
34
66
|
const notify = () => {
|
|
35
67
|
const resolve = wake;
|
|
36
68
|
wake = undefined;
|
|
@@ -40,16 +72,23 @@ export async function* relayRunEvents(run) {
|
|
|
40
72
|
if ('runId' in event)
|
|
41
73
|
liveRunId = event.runId;
|
|
42
74
|
if (queue.length >= STREAM_MAX_BUFFERED_EVENTS) {
|
|
43
|
-
const dropIndex = queue.findIndex((candidate) => !
|
|
75
|
+
const dropIndex = queue.findIndex((candidate) => !STREAM_UNDROPPABLE_EVENT_TYPES.has(candidate.type));
|
|
44
76
|
if (dropIndex >= 0) {
|
|
45
77
|
queue.splice(dropIndex, 1);
|
|
46
78
|
dropped += 1;
|
|
47
79
|
}
|
|
80
|
+
else {
|
|
81
|
+
// Every queued event is undroppable; discard the incoming event to keep
|
|
82
|
+
// the queue bounded rather than growing past the cap.
|
|
83
|
+
dropped += 1;
|
|
84
|
+
notify();
|
|
85
|
+
return Promise.resolve();
|
|
86
|
+
}
|
|
48
87
|
}
|
|
49
88
|
queue.push(event);
|
|
50
89
|
notify();
|
|
51
90
|
return Promise.resolve();
|
|
52
|
-
})
|
|
91
|
+
}, relayController.signal)
|
|
53
92
|
.catch((error) => {
|
|
54
93
|
failure = error;
|
|
55
94
|
return undefined;
|
|
@@ -73,6 +112,7 @@ export async function* relayRunEvents(run) {
|
|
|
73
112
|
}
|
|
74
113
|
if (queue.length === 0 && dropped === 0) {
|
|
75
114
|
if (done) {
|
|
115
|
+
completedNormally = true;
|
|
76
116
|
break;
|
|
77
117
|
}
|
|
78
118
|
// No await between the empty check and installing `wake`, so a producer
|
|
@@ -84,7 +124,12 @@ export async function* relayRunEvents(run) {
|
|
|
84
124
|
}
|
|
85
125
|
}
|
|
86
126
|
finally {
|
|
87
|
-
|
|
127
|
+
if (completedNormally) {
|
|
128
|
+
await result.catch(() => undefined);
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
void result.catch(() => undefined);
|
|
132
|
+
}
|
|
88
133
|
}
|
|
89
134
|
if (failure)
|
|
90
135
|
throw failure;
|
|
@@ -133,7 +178,7 @@ export function createSessionHarness(definition) {
|
|
|
133
178
|
...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
|
|
134
179
|
}
|
|
135
180
|
};
|
|
136
|
-
configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools);
|
|
181
|
+
configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools, definition.runtime, definition.workspaceStore, definition.checkpoints);
|
|
137
182
|
const modelRegistry = createModelRegistry(definition.models, { telemetry, harnessName: definition.name });
|
|
138
183
|
const mcpRegistry = createMcpRunnerRegistry();
|
|
139
184
|
async function ensureSessionRecord(sessionId) {
|
|
@@ -253,6 +298,54 @@ export function createSessionHarness(definition) {
|
|
|
253
298
|
}
|
|
254
299
|
return definition.runtime;
|
|
255
300
|
}
|
|
301
|
+
function createContextCheckpoints(args) {
|
|
302
|
+
const store = definition.checkpoints;
|
|
303
|
+
const requireStore = () => {
|
|
304
|
+
if (!store) {
|
|
305
|
+
throw new ValidationError('No context checkpoint store is configured.', {
|
|
306
|
+
where: 'invoke_options',
|
|
307
|
+
issues: { reason: 'context_checkpoint_store_missing' }
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
return store;
|
|
311
|
+
};
|
|
312
|
+
const baseQuery = {
|
|
313
|
+
runId: args.runId,
|
|
314
|
+
sessionId: args.sessionId,
|
|
315
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
316
|
+
...(args.agentId ? { agentId: args.agentId } : {})
|
|
317
|
+
};
|
|
318
|
+
return {
|
|
319
|
+
async write(input) {
|
|
320
|
+
const json = JSON.stringify(input.payload);
|
|
321
|
+
if (json === undefined) {
|
|
322
|
+
throw new ValidationError('Context checkpoint payload must be JSON-serializable.', {
|
|
323
|
+
where: 'invoke_options',
|
|
324
|
+
issues: { reason: 'non_json_context_checkpoint_payload' }
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
const checkpoint = {
|
|
328
|
+
...baseQuery,
|
|
329
|
+
sequence: input.sequence,
|
|
330
|
+
kind: input.kind,
|
|
331
|
+
payload: input.payload,
|
|
332
|
+
payloadSizeBytes: Buffer.byteLength(json, 'utf8'),
|
|
333
|
+
createdAt: now(),
|
|
334
|
+
...(input.metadata ? { metadata: input.metadata } : {})
|
|
335
|
+
};
|
|
336
|
+
await requireStore().write(checkpoint, { signal: args.signal });
|
|
337
|
+
},
|
|
338
|
+
async list(query = {}) {
|
|
339
|
+
return requireStore().list({ ...baseQuery, ...query, signal: args.signal });
|
|
340
|
+
},
|
|
341
|
+
async read(ref) {
|
|
342
|
+
return requireStore().read({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
|
|
343
|
+
},
|
|
344
|
+
async delete(ref) {
|
|
345
|
+
await requireStore().delete({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
|
|
346
|
+
}
|
|
347
|
+
};
|
|
348
|
+
}
|
|
256
349
|
return {
|
|
257
350
|
inspect() {
|
|
258
351
|
return definition.inspection;
|
|
@@ -326,9 +419,13 @@ export function createSessionHarness(definition) {
|
|
|
326
419
|
}
|
|
327
420
|
},
|
|
328
421
|
async close() {
|
|
422
|
+
if (state.busy) {
|
|
423
|
+
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
424
|
+
}
|
|
329
425
|
await definition.state.closeSession(sessionId);
|
|
330
426
|
sessionStates.delete(sessionId);
|
|
331
427
|
sessionStateOpenings.delete(sessionId);
|
|
428
|
+
await mcpRegistry.closeForSandboxKey(sessionId);
|
|
332
429
|
await state.sandboxSession.close();
|
|
333
430
|
}
|
|
334
431
|
};
|
|
@@ -367,7 +464,11 @@ export function createSessionHarness(definition) {
|
|
|
367
464
|
$infer: {}
|
|
368
465
|
};
|
|
369
466
|
async function* streamAgentCall(sessionId, agentId, agent, input, opts) {
|
|
370
|
-
yield* relayRunEvents((onEvent) =>
|
|
467
|
+
yield* relayRunEvents((onEvent, relaySignal) => {
|
|
468
|
+
const combined = combineSignals(relaySignal, opts?.signal);
|
|
469
|
+
return runAgentCall(sessionId, agentId, agent, input, { ...opts, signal: combined.signal }, onEvent)
|
|
470
|
+
.finally(() => combined.cleanup());
|
|
471
|
+
});
|
|
371
472
|
}
|
|
372
473
|
async function runAgentCall(sessionId, agentId, agent, input, opts, onEvent) {
|
|
373
474
|
validateInvokeOptions(opts);
|
|
@@ -377,44 +478,43 @@ export function createSessionHarness(definition) {
|
|
|
377
478
|
if (opts?.signal?.aborted) {
|
|
378
479
|
throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
|
|
379
480
|
}
|
|
380
|
-
|
|
481
|
+
// Busy check precedes createRunSignal so an early SessionBusyError cannot
|
|
482
|
+
// leak the run-timeout timer or the caller-signal abort listener.
|
|
381
483
|
const state = await getSessionState(sessionId);
|
|
382
484
|
if (state.busy) {
|
|
383
485
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
384
486
|
}
|
|
385
487
|
state.busy = true;
|
|
488
|
+
const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
|
|
386
489
|
const startedAt = now();
|
|
387
490
|
const runId = ulid();
|
|
388
|
-
const memory = memoryFacade({
|
|
389
|
-
sessionId,
|
|
390
|
-
runId,
|
|
391
|
-
agentId,
|
|
392
|
-
signal: runSignal.signal,
|
|
393
|
-
sandboxSession: state.sandboxSession,
|
|
394
|
-
metadata: opts?.metadata ?? {}
|
|
395
|
-
});
|
|
396
|
-
const runRecord = {
|
|
397
|
-
id: runId,
|
|
398
|
-
sessionId,
|
|
399
|
-
kind: 'agent',
|
|
400
|
-
target: agentId,
|
|
401
|
-
startedAt,
|
|
402
|
-
status: 'running',
|
|
403
|
-
input: input
|
|
404
|
-
};
|
|
405
491
|
const emit = async (event) => {
|
|
406
492
|
const eventAt = 'at' in event ? event.at : now();
|
|
407
493
|
await onEvent?.(event);
|
|
408
494
|
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
|
|
409
495
|
};
|
|
496
|
+
let runCreated = false;
|
|
410
497
|
try {
|
|
498
|
+
const memory = memoryFacade({
|
|
499
|
+
sessionId,
|
|
500
|
+
runId,
|
|
501
|
+
agentId,
|
|
502
|
+
signal: runSignal.signal,
|
|
503
|
+
sandboxSession: state.sandboxSession,
|
|
504
|
+
metadata: opts?.metadata ?? {}
|
|
505
|
+
});
|
|
506
|
+
const checkpoints = createContextCheckpoints({ sessionId, runId, agentId, signal: runSignal.signal });
|
|
507
|
+
const runRecord = {
|
|
508
|
+
id: runId,
|
|
509
|
+
sessionId,
|
|
510
|
+
kind: 'agent',
|
|
511
|
+
target: agentId,
|
|
512
|
+
startedAt,
|
|
513
|
+
status: 'running',
|
|
514
|
+
input: input
|
|
515
|
+
};
|
|
411
516
|
await definition.state.createRun(runRecord);
|
|
412
|
-
|
|
413
|
-
catch (error) {
|
|
414
|
-
state.busy = false;
|
|
415
|
-
throw error;
|
|
416
|
-
}
|
|
417
|
-
try {
|
|
517
|
+
runCreated = true;
|
|
418
518
|
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.agent_prompt', {
|
|
419
519
|
'harness.name': definition.name,
|
|
420
520
|
'harness.session.id': sessionId,
|
|
@@ -444,6 +544,7 @@ export function createSessionHarness(definition) {
|
|
|
444
544
|
mcpRegistry,
|
|
445
545
|
session: state.sandboxSession,
|
|
446
546
|
memory,
|
|
547
|
+
checkpoints,
|
|
447
548
|
mountedSkills: state.mountedSkills,
|
|
448
549
|
...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
|
|
449
550
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
@@ -469,6 +570,9 @@ export function createSessionHarness(definition) {
|
|
|
469
570
|
}
|
|
470
571
|
catch (error) {
|
|
471
572
|
const finalError = normalizeRunError(error, runSignal.signal);
|
|
573
|
+
if (!runCreated) {
|
|
574
|
+
throw finalError;
|
|
575
|
+
}
|
|
472
576
|
const finishedAt = now();
|
|
473
577
|
const serialized = serializeError(finalError);
|
|
474
578
|
const log = finalError instanceof OperationCancelledError ? definition.logger.warn.bind(definition.logger) : definition.logger.error.bind(definition.logger);
|
|
@@ -505,7 +609,11 @@ export function createSessionHarness(definition) {
|
|
|
505
609
|
}
|
|
506
610
|
}
|
|
507
611
|
async function* streamWorkflowCall(sessionId, workflowId, workflow, input, opts) {
|
|
508
|
-
yield* relayRunEvents((onEvent) =>
|
|
612
|
+
yield* relayRunEvents((onEvent, relaySignal) => {
|
|
613
|
+
const combined = combineSignals(relaySignal, opts?.signal);
|
|
614
|
+
return runWorkflowCall(sessionId, workflowId, workflow, input, { ...opts, signal: combined.signal }, onEvent)
|
|
615
|
+
.finally(() => combined.cleanup());
|
|
616
|
+
});
|
|
509
617
|
}
|
|
510
618
|
async function runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent) {
|
|
511
619
|
validateInvokeOptions(opts);
|
|
@@ -513,22 +621,16 @@ export function createSessionHarness(definition) {
|
|
|
513
621
|
if (opts?.signal?.aborted) {
|
|
514
622
|
throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
|
|
515
623
|
}
|
|
516
|
-
|
|
624
|
+
// Busy check precedes createRunSignal so an early SessionBusyError cannot
|
|
625
|
+
// leak the run-timeout timer or the caller-signal abort listener.
|
|
517
626
|
const state = await getSessionState(sessionId);
|
|
518
627
|
if (state.busy) {
|
|
519
628
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
520
629
|
}
|
|
521
630
|
state.busy = true;
|
|
631
|
+
const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
|
|
522
632
|
const startedAt = now();
|
|
523
633
|
const runId = opts?.durable ? opts.durable.runId : ulid();
|
|
524
|
-
const memory = memoryFacade({
|
|
525
|
-
sessionId,
|
|
526
|
-
runId,
|
|
527
|
-
workflowId,
|
|
528
|
-
signal: runSignal.signal,
|
|
529
|
-
sandboxSession: state.sandboxSession,
|
|
530
|
-
metadata: opts?.metadata ?? {}
|
|
531
|
-
});
|
|
532
634
|
const runRecord = {
|
|
533
635
|
id: runId,
|
|
534
636
|
sessionId,
|
|
@@ -543,14 +645,16 @@ export function createSessionHarness(definition) {
|
|
|
543
645
|
await onEvent?.(event);
|
|
544
646
|
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
|
|
545
647
|
};
|
|
546
|
-
try {
|
|
547
|
-
await definition.state.createRun(runRecord);
|
|
548
|
-
}
|
|
549
|
-
catch (error) {
|
|
550
|
-
state.busy = false;
|
|
551
|
-
throw error;
|
|
552
|
-
}
|
|
553
648
|
let durableBinding;
|
|
649
|
+
let runSandboxSession = state.sandboxSession;
|
|
650
|
+
let runMountedSkills = state.mountedSkills;
|
|
651
|
+
let closeRunSandbox = false;
|
|
652
|
+
let runCreated = false;
|
|
653
|
+
const delegationState = {
|
|
654
|
+
totalChildAgentCalls: 0,
|
|
655
|
+
activeChildAgentCalls: 0,
|
|
656
|
+
inFlightChildCalls: new Set()
|
|
657
|
+
};
|
|
554
658
|
try {
|
|
555
659
|
if (durableRuntime && opts?.durable) {
|
|
556
660
|
durableBinding = await beginDurableWorkflow({
|
|
@@ -565,7 +669,23 @@ export function createSessionHarness(definition) {
|
|
|
565
669
|
logger: definition.logger,
|
|
566
670
|
harnessName: definition.name
|
|
567
671
|
});
|
|
672
|
+
if (definition.workspaceStore) {
|
|
673
|
+
runSandboxSession = await definition.sandbox.open({ sessionId, runId, signal: runSignal.signal });
|
|
674
|
+
runMountedSkills = new Set();
|
|
675
|
+
closeRunSandbox = true;
|
|
676
|
+
}
|
|
568
677
|
}
|
|
678
|
+
const memory = memoryFacade({
|
|
679
|
+
sessionId,
|
|
680
|
+
runId,
|
|
681
|
+
workflowId,
|
|
682
|
+
signal: runSignal.signal,
|
|
683
|
+
sandboxSession: runSandboxSession,
|
|
684
|
+
metadata: opts?.metadata ?? {}
|
|
685
|
+
});
|
|
686
|
+
const checkpoints = createContextCheckpoints({ sessionId, runId, workflowId, signal: runSignal.signal });
|
|
687
|
+
await definition.state.createRun(runRecord);
|
|
688
|
+
runCreated = true;
|
|
569
689
|
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
|
|
570
690
|
'harness.name': definition.name,
|
|
571
691
|
'harness.session.id': sessionId,
|
|
@@ -582,11 +702,13 @@ export function createSessionHarness(definition) {
|
|
|
582
702
|
'harness.run.id': runId,
|
|
583
703
|
'harness.workflow.id': workflowId
|
|
584
704
|
});
|
|
705
|
+
const delegationPolicy = resolveDelegationPolicy(workflow);
|
|
585
706
|
const workflowArgs = {
|
|
586
707
|
workflowId,
|
|
587
708
|
workflow,
|
|
588
709
|
input,
|
|
589
710
|
ctx: {
|
|
711
|
+
log: definition.logger,
|
|
590
712
|
signal: runSignal.signal,
|
|
591
713
|
runId,
|
|
592
714
|
sessionId,
|
|
@@ -599,12 +721,47 @@ export function createSessionHarness(definition) {
|
|
|
599
721
|
metadata: opts?.metadata ?? {},
|
|
600
722
|
metrics: workflowMetrics,
|
|
601
723
|
memory,
|
|
724
|
+
checkpoints,
|
|
602
725
|
step: durableBinding ? durableBinding.step : passthroughStep,
|
|
603
726
|
agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
|
|
604
727
|
agentId,
|
|
605
728
|
async (agentInput, agentOpts) => {
|
|
606
|
-
|
|
607
|
-
|
|
729
|
+
// Spec 10 "Cancellation": starting a child-agent call after
|
|
730
|
+
// abort throws OperationCancelledError synchronously, before
|
|
731
|
+
// policy checks run or budgets are consumed.
|
|
732
|
+
if (runSignal.signal.aborted) {
|
|
733
|
+
throw abortError(runSignal.signal, 'run', 'Run was cancelled.');
|
|
734
|
+
}
|
|
735
|
+
if (agentOpts?.signal?.aborted) {
|
|
736
|
+
throw new OperationCancelledError('Child-agent call was cancelled before start.', { scope: 'run' }, agentOpts.signal.reason);
|
|
737
|
+
}
|
|
738
|
+
validateInvokeOptions(agentOpts);
|
|
739
|
+
if (agentOpts?.durable) {
|
|
740
|
+
throw new ValidationError('Durable execution is only supported for workflow runs.', { where: 'invoke_options', issues: { durable: 'agent_run' } });
|
|
741
|
+
}
|
|
742
|
+
// An unknown per-call model alias is an invoke-option mistake;
|
|
743
|
+
// it must not pass the delegation gate or consume call budget.
|
|
744
|
+
if (agentOpts?.model !== undefined && !(agentOpts.model in definition.models)) {
|
|
745
|
+
throw new ValidationError('Unknown model alias for child-agent call.', { where: 'invoke_options', issues: { model: agentOpts.model } });
|
|
746
|
+
}
|
|
747
|
+
const selectedModelAlias = agentOpts?.model ?? agent.model;
|
|
748
|
+
assertDelegationAllowed({
|
|
749
|
+
policy: delegationPolicy,
|
|
750
|
+
state: delegationState,
|
|
751
|
+
workflowId,
|
|
752
|
+
agentId,
|
|
753
|
+
modelAlias: selectedModelAlias
|
|
754
|
+
});
|
|
755
|
+
// Compose signals before consuming budget so a composition
|
|
756
|
+
// failure can never leak an active delegation slot.
|
|
757
|
+
const combinedSignal = combineSignals(runSignal.signal, agentOpts?.signal);
|
|
758
|
+
const agentSignal = agentOpts?.timeoutMs !== undefined
|
|
759
|
+
? createRunSignal(combinedSignal.signal, agentOpts.timeoutMs)
|
|
760
|
+
: combinedSignal;
|
|
761
|
+
delegationState.totalChildAgentCalls += 1;
|
|
762
|
+
delegationState.activeChildAgentCalls += 1;
|
|
763
|
+
const delegationCallId = `delegate_${ulid()}`;
|
|
764
|
+
const childCall = (async () => {
|
|
608
765
|
const resolvedHistoryWindow = agentOpts?.historyWindow ?? opts?.historyWindow ?? definition.defaults.historyWindow;
|
|
609
766
|
const agentMetadata = { ...(opts?.metadata ?? {}), ...(agentOpts?.metadata ?? {}) };
|
|
610
767
|
const agentMemory = memoryFacade({
|
|
@@ -613,31 +770,37 @@ export function createSessionHarness(definition) {
|
|
|
613
770
|
workflowId,
|
|
614
771
|
agentId,
|
|
615
772
|
signal: agentSignal.signal,
|
|
616
|
-
sandboxSession:
|
|
773
|
+
sandboxSession: runSandboxSession,
|
|
617
774
|
metadata: agentMetadata
|
|
618
775
|
});
|
|
776
|
+
const agentCheckpoints = createContextCheckpoints({ sessionId, runId, workflowId, agentId, signal: agentSignal.signal });
|
|
619
777
|
const run = await runDefaultAgent({
|
|
620
778
|
harnessName: definition.name,
|
|
621
779
|
agentId,
|
|
622
780
|
runId,
|
|
623
781
|
sessionId,
|
|
624
782
|
workflowId,
|
|
783
|
+
delegationCallId,
|
|
784
|
+
delegationDepth: CHILD_DELEGATION_DEPTH,
|
|
625
785
|
input: agentInput,
|
|
626
786
|
history: await definition.state.listMessages(sessionId),
|
|
627
787
|
agent: agent,
|
|
788
|
+
modelAlias: selectedModelAlias,
|
|
628
789
|
models: withRunEventModelRegistry(modelRegistry, {
|
|
629
790
|
harnessName: definition.name,
|
|
630
791
|
sessionId,
|
|
631
792
|
runId,
|
|
632
793
|
workflowId,
|
|
633
|
-
agentId
|
|
794
|
+
agentId,
|
|
795
|
+
modelAlias: selectedModelAlias
|
|
634
796
|
}, emit),
|
|
635
797
|
skills: resolvedSkills,
|
|
636
798
|
customTools: definition.tools,
|
|
637
799
|
mcpRegistry,
|
|
638
|
-
session:
|
|
800
|
+
session: runSandboxSession,
|
|
639
801
|
memory: agentMemory,
|
|
640
|
-
|
|
802
|
+
checkpoints: agentCheckpoints,
|
|
803
|
+
mountedSkills: runMountedSkills,
|
|
641
804
|
...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
|
|
642
805
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
643
806
|
signal: agentSignal.signal,
|
|
@@ -652,9 +815,17 @@ export function createSessionHarness(definition) {
|
|
|
652
815
|
await definition.state.appendMessages(sessionId, run.emitted);
|
|
653
816
|
}
|
|
654
817
|
return run.output;
|
|
818
|
+
})();
|
|
819
|
+
delegationState.inFlightChildCalls.add(childCall);
|
|
820
|
+
try {
|
|
821
|
+
return await childCall;
|
|
655
822
|
}
|
|
656
823
|
finally {
|
|
824
|
+
delegationState.inFlightChildCalls.delete(childCall);
|
|
825
|
+
delegationState.activeChildAgentCalls -= 1;
|
|
657
826
|
agentSignal.cleanup();
|
|
827
|
+
if (agentSignal !== combinedSignal)
|
|
828
|
+
combinedSignal.cleanup();
|
|
658
829
|
}
|
|
659
830
|
}
|
|
660
831
|
]))
|
|
@@ -671,6 +842,11 @@ export function createSessionHarness(definition) {
|
|
|
671
842
|
...(opts ? { opts: { ...opts, signal: runSignal.signal } } : { opts: { signal: runSignal.signal } })
|
|
672
843
|
}));
|
|
673
844
|
}));
|
|
845
|
+
// A resolved handler may still have child-agent calls in flight; settle
|
|
846
|
+
// them before terminalizing so no run events trail run.finished.
|
|
847
|
+
if (delegationState.inFlightChildCalls.size > 0) {
|
|
848
|
+
await Promise.allSettled([...delegationState.inFlightChildCalls]);
|
|
849
|
+
}
|
|
674
850
|
const finishedAt = now();
|
|
675
851
|
if (durableBinding) {
|
|
676
852
|
await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_success' }, () => durableBinding.finishSuccess(result));
|
|
@@ -684,8 +860,18 @@ export function createSessionHarness(definition) {
|
|
|
684
860
|
}
|
|
685
861
|
catch (error) {
|
|
686
862
|
const finalError = normalizeRunError(error, runSignal.signal);
|
|
863
|
+
// A handler rejection mid-Promise.all must not orphan in-flight child
|
|
864
|
+
// agents: cancel them through the run signal and await settlement before
|
|
865
|
+
// run.finished is emitted and the session busy lock is released.
|
|
866
|
+
if (delegationState.inFlightChildCalls.size > 0) {
|
|
867
|
+
runSignal.abort(finalError);
|
|
868
|
+
await Promise.allSettled([...delegationState.inFlightChildCalls]);
|
|
869
|
+
}
|
|
687
870
|
const finishedAt = now();
|
|
688
871
|
const serialized = serializeError(finalError);
|
|
872
|
+
if (!runCreated) {
|
|
873
|
+
throw finalError;
|
|
874
|
+
}
|
|
689
875
|
if (durableBinding && finalError instanceof OperationCancelledError) {
|
|
690
876
|
await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_cancelled' }, () => durableBinding.finishCancelled(finalError));
|
|
691
877
|
}
|
|
@@ -722,13 +908,91 @@ export function createSessionHarness(definition) {
|
|
|
722
908
|
// id can resume; a no-op once the run was settled (success/cancel).
|
|
723
909
|
if (durableBinding)
|
|
724
910
|
await durableBinding.dispose();
|
|
911
|
+
if (closeRunSandbox) {
|
|
912
|
+
try {
|
|
913
|
+
await runSandboxSession.close();
|
|
914
|
+
}
|
|
915
|
+
catch (error) {
|
|
916
|
+
definition.logger.warn('Failed to close durable run sandbox.', {
|
|
917
|
+
harness: definition.name,
|
|
918
|
+
session_id: sessionId,
|
|
919
|
+
run_id: runId,
|
|
920
|
+
workflow_id: workflowId,
|
|
921
|
+
error: serializeError(error)
|
|
922
|
+
});
|
|
923
|
+
}
|
|
924
|
+
}
|
|
725
925
|
runSignal.cleanup();
|
|
726
926
|
state.busy = false;
|
|
727
927
|
}
|
|
728
928
|
}
|
|
729
929
|
/** Pass-through step used when a workflow runs without durable execution. */
|
|
730
|
-
function passthroughStep(_stepId, fn) {
|
|
731
|
-
return fn
|
|
930
|
+
function passthroughStep(_stepId, fn, options = {}) {
|
|
931
|
+
return runStepWithRetry(fn, options.retry);
|
|
932
|
+
}
|
|
933
|
+
function resolveDelegationPolicy(workflow) {
|
|
934
|
+
const configured = workflow.delegation;
|
|
935
|
+
const policy = configured ?? {};
|
|
936
|
+
const enabled = configured ? policy.enabled !== false : definition.defaults.delegation?.enabled === true;
|
|
937
|
+
return {
|
|
938
|
+
enabled,
|
|
939
|
+
...(policy.agents ? { allowedAgents: new Set(policy.agents) } : {}),
|
|
940
|
+
maxChildAgentCalls: policy.maxChildAgentCalls ?? definition.defaults.delegation?.maxChildAgentCalls ?? DEFAULT_MAX_CHILD_AGENT_CALLS,
|
|
941
|
+
maxParallelChildAgentCalls: policy.maxParallelChildAgentCalls ?? definition.defaults.delegation?.maxParallelChildAgentCalls ?? DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS,
|
|
942
|
+
maxDepth: policy.maxDepth ?? definition.defaults.delegation?.maxDepth ?? DEFAULT_MAX_DELEGATION_DEPTH,
|
|
943
|
+
...(policy.modelAliases ? { modelAliases: new Set(policy.modelAliases) } : {}),
|
|
944
|
+
agentModelAliases: new Map(Object.entries(policy.agentModelAliases ?? {}).map(([agentId, aliases]) => [agentId, new Set(aliases)]))
|
|
945
|
+
};
|
|
946
|
+
}
|
|
947
|
+
function assertDelegationAllowed(args) {
|
|
948
|
+
const { policy, state, workflowId, agentId, modelAlias } = args;
|
|
949
|
+
if (!policy.enabled) {
|
|
950
|
+
throw new DelegationPolicyError('Workflow child-agent delegation is disabled.', {
|
|
951
|
+
workflow_id: workflowId,
|
|
952
|
+
agent_id: agentId,
|
|
953
|
+
reason: 'delegation_disabled'
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
if (policy.allowedAgents && !policy.allowedAgents.has(agentId)) {
|
|
957
|
+
throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent.', {
|
|
958
|
+
workflow_id: workflowId,
|
|
959
|
+
agent_id: agentId,
|
|
960
|
+
reason: 'agent_not_allowed'
|
|
961
|
+
});
|
|
962
|
+
}
|
|
963
|
+
if (CHILD_DELEGATION_DEPTH > policy.maxDepth) {
|
|
964
|
+
throw new DelegationPolicyError('Workflow child-agent delegation depth exceeded.', {
|
|
965
|
+
workflow_id: workflowId,
|
|
966
|
+
agent_id: agentId,
|
|
967
|
+
reason: 'max_delegation_depth_exceeded',
|
|
968
|
+
limit: policy.maxDepth
|
|
969
|
+
});
|
|
970
|
+
}
|
|
971
|
+
if (state.totalChildAgentCalls >= policy.maxChildAgentCalls) {
|
|
972
|
+
throw new DelegationPolicyError('Workflow child-agent call budget exceeded.', {
|
|
973
|
+
workflow_id: workflowId,
|
|
974
|
+
agent_id: agentId,
|
|
975
|
+
reason: 'max_child_agent_calls_exceeded',
|
|
976
|
+
limit: policy.maxChildAgentCalls
|
|
977
|
+
});
|
|
978
|
+
}
|
|
979
|
+
if (state.activeChildAgentCalls >= policy.maxParallelChildAgentCalls) {
|
|
980
|
+
throw new DelegationPolicyError('Workflow parallel child-agent call budget exceeded.', {
|
|
981
|
+
workflow_id: workflowId,
|
|
982
|
+
agent_id: agentId,
|
|
983
|
+
reason: 'max_parallel_child_agent_calls_exceeded',
|
|
984
|
+
limit: policy.maxParallelChildAgentCalls
|
|
985
|
+
});
|
|
986
|
+
}
|
|
987
|
+
const allowedModels = policy.agentModelAliases.get(agentId) ?? policy.modelAliases;
|
|
988
|
+
if (allowedModels && !allowedModels.has(modelAlias)) {
|
|
989
|
+
throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent with the selected model alias.', {
|
|
990
|
+
workflow_id: workflowId,
|
|
991
|
+
agent_id: agentId,
|
|
992
|
+
reason: 'model_alias_not_allowed',
|
|
993
|
+
model_alias: modelAlias
|
|
994
|
+
});
|
|
995
|
+
}
|
|
732
996
|
}
|
|
733
997
|
/**
|
|
734
998
|
* Runs a durable finalization side effect (runtime finish / workspace lifecycle)
|
|
@@ -874,7 +1138,7 @@ function isObjectPartialChunk(chunk) {
|
|
|
874
1138
|
function isObjectFinishChunk(chunk) {
|
|
875
1139
|
return Boolean(chunk && typeof chunk === 'object' && chunk.kind === 'finish' && Object.prototype.hasOwnProperty.call(chunk, 'object'));
|
|
876
1140
|
}
|
|
877
|
-
function configureHarnessAdapters(context, models, state, sandbox, memory, tools) {
|
|
1141
|
+
function configureHarnessAdapters(context, models, state, sandbox, memory, tools, runtime, workspaceStore, checkpoints) {
|
|
878
1142
|
const seen = new Set();
|
|
879
1143
|
for (const alias of Object.values(models)) {
|
|
880
1144
|
configureOne(alias.provider, context, seen);
|
|
@@ -882,11 +1146,16 @@ function configureHarnessAdapters(context, models, state, sandbox, memory, tools
|
|
|
882
1146
|
configureOne(state, context, seen);
|
|
883
1147
|
configureOne(sandbox, context, seen);
|
|
884
1148
|
configureOne(memory, context, seen);
|
|
1149
|
+
configureOne(runtime, context, seen);
|
|
1150
|
+
configureOne(workspaceStore, context, seen);
|
|
1151
|
+
configureOne(checkpoints, context, seen);
|
|
885
1152
|
for (const tool of Object.values(tools)) {
|
|
886
1153
|
configureOne(tool, context, seen);
|
|
887
1154
|
}
|
|
888
1155
|
}
|
|
889
1156
|
function configureOne(adapter, context, seen) {
|
|
1157
|
+
if (!adapter)
|
|
1158
|
+
return;
|
|
890
1159
|
const configurable = adapter;
|
|
891
1160
|
if (!configurable.configureHarnessContext || seen.has(adapter))
|
|
892
1161
|
return;
|
|
@@ -933,26 +1202,6 @@ function resolveContentCaptureMode(options) {
|
|
|
933
1202
|
return envValue;
|
|
934
1203
|
return 'NO_CONTENT';
|
|
935
1204
|
}
|
|
936
|
-
function metadataSpanAttrs(metadata) {
|
|
937
|
-
const attrs = {};
|
|
938
|
-
for (const [key, value] of Object.entries(metadata ?? {})) {
|
|
939
|
-
if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
|
|
940
|
-
continue;
|
|
941
|
-
if (typeof value === 'string') {
|
|
942
|
-
if (value.length <= 256)
|
|
943
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
944
|
-
continue;
|
|
945
|
-
}
|
|
946
|
-
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
947
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
948
|
-
continue;
|
|
949
|
-
}
|
|
950
|
-
if (typeof value === 'boolean') {
|
|
951
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
return attrs;
|
|
955
|
-
}
|
|
956
1205
|
function isValidTraceparent(traceparent) {
|
|
957
1206
|
const match = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/.exec(traceparent);
|
|
958
1207
|
if (!match)
|
|
@@ -1030,10 +1279,10 @@ function sanitizeEventForPersistence(event) {
|
|
|
1030
1279
|
...(event.error ? { error: event.error } : {})
|
|
1031
1280
|
};
|
|
1032
1281
|
case 'agent.started':
|
|
1033
|
-
return
|
|
1282
|
+
return agentRunEventMeta(event);
|
|
1034
1283
|
case 'agent.finished':
|
|
1035
1284
|
return {
|
|
1036
|
-
|
|
1285
|
+
...agentRunEventMeta(event),
|
|
1037
1286
|
...(event.output !== undefined ? { output: '[redacted]' } : {}),
|
|
1038
1287
|
...(event.error ? { error: event.error } : {})
|
|
1039
1288
|
};
|
|
@@ -1091,6 +1340,16 @@ function modelStreamEventMeta(event) {
|
|
|
1091
1340
|
...(event.streamId ? { streamId: event.streamId } : {})
|
|
1092
1341
|
};
|
|
1093
1342
|
}
|
|
1343
|
+
function agentRunEventMeta(event) {
|
|
1344
|
+
return {
|
|
1345
|
+
agentId: event.agentId,
|
|
1346
|
+
...(event.workflowId ? { workflowId: event.workflowId } : {}),
|
|
1347
|
+
...(event.parentAgentId ? { parentAgentId: event.parentAgentId } : {}),
|
|
1348
|
+
...(event.delegationCallId ? { delegationCallId: event.delegationCallId } : {}),
|
|
1349
|
+
...(event.delegationDepth !== undefined ? { delegationDepth: event.delegationDepth } : {}),
|
|
1350
|
+
...(event.modelAlias ? { modelAlias: event.modelAlias } : {})
|
|
1351
|
+
};
|
|
1352
|
+
}
|
|
1094
1353
|
function isJsonRecord(value) {
|
|
1095
1354
|
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
1096
1355
|
}
|
|
@@ -1121,6 +1380,8 @@ function createRunSignal(parent, timeoutMs) {
|
|
|
1121
1380
|
: undefined;
|
|
1122
1381
|
return {
|
|
1123
1382
|
signal: controller.signal,
|
|
1383
|
+
/** Harness-initiated abort, e.g. to cancel in-flight child-agent calls. */
|
|
1384
|
+
abort: (reason) => controller.abort(runAbortReason(reason)),
|
|
1124
1385
|
cleanup: () => {
|
|
1125
1386
|
if (timeout)
|
|
1126
1387
|
clearTimeout(timeout);
|