@purista/harness 1.2.6 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/agents/index.d.ts +7 -1
- package/dist/agents/index.js +56 -38
- package/dist/errors/catalog.d.ts +18 -2
- package/dist/errors/catalog.js +10 -0
- package/dist/eval/index.d.ts +3 -3
- package/dist/eval/index.js +15 -1
- package/dist/harness/defineHarness.d.ts +91 -1
- package/dist/harness/defineHarness.js +110 -1
- package/dist/index.d.ts +37 -17
- package/dist/index.js +30 -16
- package/dist/local/index.d.ts +36 -0
- package/dist/local/index.js +24 -0
- package/dist/local/local-sandbox.d.ts +25 -0
- package/dist/local/local-sandbox.js +368 -0
- package/dist/local/local-workspace.d.ts +56 -0
- package/dist/local/local-workspace.js +496 -0
- package/dist/local/ref-hash.d.ts +6 -0
- package/dist/local/ref-hash.js +9 -0
- package/dist/local/sqlite-storage.d.ts +106 -0
- package/dist/local/sqlite-storage.js +680 -0
- package/dist/models/adapter-utils.d.ts +52 -0
- package/dist/models/adapter-utils.js +81 -0
- package/dist/models/registry.js +28 -37
- package/dist/models/stream-pump.d.ts +16 -0
- package/dist/models/stream-pump.js +77 -0
- package/dist/ports/base-model-provider.d.ts +7 -1
- package/dist/ports/base-model-provider.js +384 -87
- package/dist/ports/capabilities.d.ts +16 -2
- package/dist/ports/context-checkpoints.d.ts +63 -0
- package/dist/ports/context-checkpoints.js +33 -0
- package/dist/ports/index.d.ts +1 -0
- package/dist/ports/index.js +1 -0
- package/dist/ports/model-provider.d.ts +94 -0
- package/dist/runtime/durable.d.ts +11 -0
- package/dist/runtime/durable.js +15 -2
- package/dist/runtime/sessionDurable.js +47 -21
- package/dist/sessions/index.d.ts +17 -6
- package/dist/sessions/index.js +337 -81
- package/dist/skills/index.d.ts +0 -2
- package/dist/skills/index.js +0 -8
- package/dist/state/in-memory.js +6 -6
- package/dist/telemetry/shim.js +2 -6
- package/dist/telemetry/span-attrs.d.ts +9 -0
- package/dist/telemetry/span-attrs.js +27 -0
- package/dist/testing/durableWorkspaceStoreContract.js +69 -0
- package/dist/testing/fakeLogger.d.ts +29 -0
- package/dist/testing/fakeLogger.js +47 -0
- package/dist/testing/fakeSandbox.d.ts +27 -0
- package/dist/testing/fakeSandbox.js +153 -0
- package/dist/testing/fakeStateStore.d.ts +36 -0
- package/dist/testing/fakeStateStore.js +66 -0
- package/dist/testing/index.d.ts +10 -4
- package/dist/testing/index.js +14 -4
- package/dist/testing/loggerContract.d.ts +9 -0
- package/dist/testing/loggerContract.js +62 -0
- package/dist/testing/modelProviderContract.d.ts +12 -0
- package/dist/testing/modelProviderContract.js +222 -0
- package/dist/testing/recordEvents.d.ts +3 -0
- package/dist/testing/recordEvents.js +8 -0
- package/dist/testing/stateStoreContract.js +27 -0
- package/dist/tools/index.js +26 -1
- package/dist/tools/mcp/http.d.ts +2 -0
- package/dist/tools/mcp/http.js +34 -21
- package/dist/tools/mcp/runner.d.ts +4 -0
- package/dist/tools/mcp/runner.js +75 -21
- package/dist/tools/mcp/stdio.d.ts +7 -1
- package/dist/tools/mcp/stdio.js +102 -23
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/workspace/in-memory.d.ts +1 -0
- package/dist/workspace/in-memory.js +47 -12
- package/package.json +2 -1
package/dist/sessions/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, serializeError } from '../errors/index.js';
|
|
1
|
+
import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, DelegationPolicyError, serializeError } from '../errors/index.js';
|
|
2
2
|
import { ulid } from '../ulid/index.js';
|
|
3
3
|
import { runDefaultAgent } from '../agents/index.js';
|
|
4
4
|
import { runWorkflow } from '../workflows/index.js';
|
|
@@ -8,21 +8,50 @@ import { HarnessConfigError } from '../errors/catalog.js';
|
|
|
8
8
|
import { loadSkillsSync } from '../skills/index.js';
|
|
9
9
|
import { createModelRegistry } from '../models/registry.js';
|
|
10
10
|
import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
|
|
11
|
+
import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
|
|
12
|
+
import { abortError } from '../runtime/abort.js';
|
|
11
13
|
import { createMcpRunnerRegistry } from '../tools/mcp/runner.js';
|
|
12
14
|
const NEVER_ABORT_SIGNAL = new AbortController().signal;
|
|
15
|
+
const DEFAULT_MAX_CHILD_AGENT_CALLS = 32;
|
|
16
|
+
const DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS = 8;
|
|
17
|
+
const DEFAULT_MAX_DELEGATION_DEPTH = 1;
|
|
18
|
+
/**
|
|
19
|
+
* Workflows invoke leaf agents directly, so every child-agent call runs at
|
|
20
|
+
* depth 1 (spec 10 "Delegation policy": `maxDepth` default `1`, `0` disables
|
|
21
|
+
* child-agent delegation).
|
|
22
|
+
*/
|
|
23
|
+
const CHILD_DELEGATION_DEPTH = 1;
|
|
13
24
|
function now() {
|
|
14
25
|
return new Date().toISOString();
|
|
15
26
|
}
|
|
16
27
|
const STREAM_MAX_BUFFERED_EVENTS = 1024;
|
|
17
|
-
|
|
28
|
+
/**
|
|
29
|
+
* Event types that must never be dropped from the relay queue.
|
|
30
|
+
*
|
|
31
|
+
* Only `run.finished` qualifies: it occurs at most once per run and is the
|
|
32
|
+
* terminal event consumers key off to know the run is complete. `agent.finished`
|
|
33
|
+
* is emitted once per agent invocation (including every child-agent delegation
|
|
34
|
+
* call), so it can appear many times and must remain droppable to keep the
|
|
35
|
+
* queue bounded when a slow consumer falls behind during a delegation-heavy run.
|
|
36
|
+
*/
|
|
37
|
+
const STREAM_UNDROPPABLE_EVENT_TYPES = new Set(['run.finished']);
|
|
18
38
|
/**
|
|
19
39
|
* Relay run events from an in-process run to a stream consumer.
|
|
20
40
|
*
|
|
21
|
-
* The unread events live in a bounded queue:
|
|
22
|
-
* growing cursor over a shared array), and on
|
|
23
|
-
* unread event is dropped and counted, so a slow
|
|
24
|
-
*
|
|
25
|
-
*
|
|
41
|
+
* The unread events live in a bounded queue (cap: STREAM_MAX_BUFFERED_EVENTS):
|
|
42
|
+
* consumed events are removed (no growing cursor over a shared array), and on
|
|
43
|
+
* overflow the oldest droppable unread event is dropped and counted, so a slow
|
|
44
|
+
* consumer never silently skips an event without an accompanying
|
|
45
|
+
* `stream.overflow` notice. Only `run.finished` is undroppable; all other
|
|
46
|
+
* event types — including `agent.finished` — may be evicted under pressure.
|
|
47
|
+
* If no droppable event exists when the queue is full, the incoming event is
|
|
48
|
+
* discarded (counted) rather than growing the queue past the cap. Delivery is
|
|
49
|
+
* promise-notified rather than time-polled, so there is no fixed per-event
|
|
50
|
+
* latency or periodic timer.
|
|
51
|
+
*
|
|
52
|
+
* Abandoning the stream (`break` / `iterator.return()`) aborts `relaySignal`,
|
|
53
|
+
* so a run wired to it is cancelled promptly instead of blocking the consumer
|
|
54
|
+
* until the run finishes on its own.
|
|
26
55
|
*/
|
|
27
56
|
export async function* relayRunEvents(run) {
|
|
28
57
|
const queue = [];
|
|
@@ -31,6 +60,7 @@ export async function* relayRunEvents(run) {
|
|
|
31
60
|
let done = false;
|
|
32
61
|
let failure;
|
|
33
62
|
let wake;
|
|
63
|
+
const relayController = new AbortController();
|
|
34
64
|
const notify = () => {
|
|
35
65
|
const resolve = wake;
|
|
36
66
|
wake = undefined;
|
|
@@ -40,16 +70,23 @@ export async function* relayRunEvents(run) {
|
|
|
40
70
|
if ('runId' in event)
|
|
41
71
|
liveRunId = event.runId;
|
|
42
72
|
if (queue.length >= STREAM_MAX_BUFFERED_EVENTS) {
|
|
43
|
-
const dropIndex = queue.findIndex((candidate) => !
|
|
73
|
+
const dropIndex = queue.findIndex((candidate) => !STREAM_UNDROPPABLE_EVENT_TYPES.has(candidate.type));
|
|
44
74
|
if (dropIndex >= 0) {
|
|
45
75
|
queue.splice(dropIndex, 1);
|
|
46
76
|
dropped += 1;
|
|
47
77
|
}
|
|
78
|
+
else {
|
|
79
|
+
// Every queued event is undroppable; discard the incoming event to keep
|
|
80
|
+
// the queue bounded rather than growing past the cap.
|
|
81
|
+
dropped += 1;
|
|
82
|
+
notify();
|
|
83
|
+
return Promise.resolve();
|
|
84
|
+
}
|
|
48
85
|
}
|
|
49
86
|
queue.push(event);
|
|
50
87
|
notify();
|
|
51
88
|
return Promise.resolve();
|
|
52
|
-
})
|
|
89
|
+
}, relayController.signal)
|
|
53
90
|
.catch((error) => {
|
|
54
91
|
failure = error;
|
|
55
92
|
return undefined;
|
|
@@ -84,6 +121,9 @@ export async function* relayRunEvents(run) {
|
|
|
84
121
|
}
|
|
85
122
|
}
|
|
86
123
|
finally {
|
|
124
|
+
// Cancel the run before awaiting it so an abandoned stream does not block
|
|
125
|
+
// `iterator.return()` until the run finishes or times out.
|
|
126
|
+
relayController.abort(new OperationCancelledError('Run event stream was abandoned by the consumer.', { scope: 'run' }));
|
|
87
127
|
await result.catch(() => undefined);
|
|
88
128
|
}
|
|
89
129
|
if (failure)
|
|
@@ -133,7 +173,7 @@ export function createSessionHarness(definition) {
|
|
|
133
173
|
...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
|
|
134
174
|
}
|
|
135
175
|
};
|
|
136
|
-
configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools);
|
|
176
|
+
configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools, definition.runtime, definition.workspaceStore, definition.checkpoints);
|
|
137
177
|
const modelRegistry = createModelRegistry(definition.models, { telemetry, harnessName: definition.name });
|
|
138
178
|
const mcpRegistry = createMcpRunnerRegistry();
|
|
139
179
|
async function ensureSessionRecord(sessionId) {
|
|
@@ -253,6 +293,54 @@ export function createSessionHarness(definition) {
|
|
|
253
293
|
}
|
|
254
294
|
return definition.runtime;
|
|
255
295
|
}
|
|
296
|
+
function createContextCheckpoints(args) {
|
|
297
|
+
const store = definition.checkpoints;
|
|
298
|
+
const requireStore = () => {
|
|
299
|
+
if (!store) {
|
|
300
|
+
throw new ValidationError('No context checkpoint store is configured.', {
|
|
301
|
+
where: 'invoke_options',
|
|
302
|
+
issues: { reason: 'context_checkpoint_store_missing' }
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
return store;
|
|
306
|
+
};
|
|
307
|
+
const baseQuery = {
|
|
308
|
+
runId: args.runId,
|
|
309
|
+
sessionId: args.sessionId,
|
|
310
|
+
...(args.workflowId ? { workflowId: args.workflowId } : {}),
|
|
311
|
+
...(args.agentId ? { agentId: args.agentId } : {})
|
|
312
|
+
};
|
|
313
|
+
return {
|
|
314
|
+
async write(input) {
|
|
315
|
+
const json = JSON.stringify(input.payload);
|
|
316
|
+
if (json === undefined) {
|
|
317
|
+
throw new ValidationError('Context checkpoint payload must be JSON-serializable.', {
|
|
318
|
+
where: 'invoke_options',
|
|
319
|
+
issues: { reason: 'non_json_context_checkpoint_payload' }
|
|
320
|
+
});
|
|
321
|
+
}
|
|
322
|
+
const checkpoint = {
|
|
323
|
+
...baseQuery,
|
|
324
|
+
sequence: input.sequence,
|
|
325
|
+
kind: input.kind,
|
|
326
|
+
payload: input.payload,
|
|
327
|
+
payloadSizeBytes: Buffer.byteLength(json, 'utf8'),
|
|
328
|
+
createdAt: now(),
|
|
329
|
+
...(input.metadata ? { metadata: input.metadata } : {})
|
|
330
|
+
};
|
|
331
|
+
await requireStore().write(checkpoint, { signal: args.signal });
|
|
332
|
+
},
|
|
333
|
+
async list(query = {}) {
|
|
334
|
+
return requireStore().list({ ...baseQuery, ...query, signal: args.signal });
|
|
335
|
+
},
|
|
336
|
+
async read(ref) {
|
|
337
|
+
return requireStore().read({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
|
|
338
|
+
},
|
|
339
|
+
async delete(ref) {
|
|
340
|
+
await requireStore().delete({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
|
|
341
|
+
}
|
|
342
|
+
};
|
|
343
|
+
}
|
|
256
344
|
return {
|
|
257
345
|
inspect() {
|
|
258
346
|
return definition.inspection;
|
|
@@ -326,9 +414,13 @@ export function createSessionHarness(definition) {
|
|
|
326
414
|
}
|
|
327
415
|
},
|
|
328
416
|
async close() {
|
|
417
|
+
if (state.busy) {
|
|
418
|
+
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
419
|
+
}
|
|
329
420
|
await definition.state.closeSession(sessionId);
|
|
330
421
|
sessionStates.delete(sessionId);
|
|
331
422
|
sessionStateOpenings.delete(sessionId);
|
|
423
|
+
await mcpRegistry.closeForSandboxKey(sessionId);
|
|
332
424
|
await state.sandboxSession.close();
|
|
333
425
|
}
|
|
334
426
|
};
|
|
@@ -367,7 +459,11 @@ export function createSessionHarness(definition) {
|
|
|
367
459
|
$infer: {}
|
|
368
460
|
};
|
|
369
461
|
async function* streamAgentCall(sessionId, agentId, agent, input, opts) {
|
|
370
|
-
yield* relayRunEvents((onEvent) =>
|
|
462
|
+
yield* relayRunEvents((onEvent, relaySignal) => {
|
|
463
|
+
const combined = combineSignals(relaySignal, opts?.signal);
|
|
464
|
+
return runAgentCall(sessionId, agentId, agent, input, { ...opts, signal: combined.signal }, onEvent)
|
|
465
|
+
.finally(() => combined.cleanup());
|
|
466
|
+
});
|
|
371
467
|
}
|
|
372
468
|
async function runAgentCall(sessionId, agentId, agent, input, opts, onEvent) {
|
|
373
469
|
validateInvokeOptions(opts);
|
|
@@ -377,44 +473,43 @@ export function createSessionHarness(definition) {
|
|
|
377
473
|
if (opts?.signal?.aborted) {
|
|
378
474
|
throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
|
|
379
475
|
}
|
|
380
|
-
|
|
476
|
+
// Busy check precedes createRunSignal so an early SessionBusyError cannot
|
|
477
|
+
// leak the run-timeout timer or the caller-signal abort listener.
|
|
381
478
|
const state = await getSessionState(sessionId);
|
|
382
479
|
if (state.busy) {
|
|
383
480
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
384
481
|
}
|
|
385
482
|
state.busy = true;
|
|
483
|
+
const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
|
|
386
484
|
const startedAt = now();
|
|
387
485
|
const runId = ulid();
|
|
388
|
-
const memory = memoryFacade({
|
|
389
|
-
sessionId,
|
|
390
|
-
runId,
|
|
391
|
-
agentId,
|
|
392
|
-
signal: runSignal.signal,
|
|
393
|
-
sandboxSession: state.sandboxSession,
|
|
394
|
-
metadata: opts?.metadata ?? {}
|
|
395
|
-
});
|
|
396
|
-
const runRecord = {
|
|
397
|
-
id: runId,
|
|
398
|
-
sessionId,
|
|
399
|
-
kind: 'agent',
|
|
400
|
-
target: agentId,
|
|
401
|
-
startedAt,
|
|
402
|
-
status: 'running',
|
|
403
|
-
input: input
|
|
404
|
-
};
|
|
405
486
|
const emit = async (event) => {
|
|
406
487
|
const eventAt = 'at' in event ? event.at : now();
|
|
407
488
|
await onEvent?.(event);
|
|
408
489
|
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
|
|
409
490
|
};
|
|
491
|
+
let runCreated = false;
|
|
410
492
|
try {
|
|
493
|
+
const memory = memoryFacade({
|
|
494
|
+
sessionId,
|
|
495
|
+
runId,
|
|
496
|
+
agentId,
|
|
497
|
+
signal: runSignal.signal,
|
|
498
|
+
sandboxSession: state.sandboxSession,
|
|
499
|
+
metadata: opts?.metadata ?? {}
|
|
500
|
+
});
|
|
501
|
+
const checkpoints = createContextCheckpoints({ sessionId, runId, agentId, signal: runSignal.signal });
|
|
502
|
+
const runRecord = {
|
|
503
|
+
id: runId,
|
|
504
|
+
sessionId,
|
|
505
|
+
kind: 'agent',
|
|
506
|
+
target: agentId,
|
|
507
|
+
startedAt,
|
|
508
|
+
status: 'running',
|
|
509
|
+
input: input
|
|
510
|
+
};
|
|
411
511
|
await definition.state.createRun(runRecord);
|
|
412
|
-
|
|
413
|
-
catch (error) {
|
|
414
|
-
state.busy = false;
|
|
415
|
-
throw error;
|
|
416
|
-
}
|
|
417
|
-
try {
|
|
512
|
+
runCreated = true;
|
|
418
513
|
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.agent_prompt', {
|
|
419
514
|
'harness.name': definition.name,
|
|
420
515
|
'harness.session.id': sessionId,
|
|
@@ -444,6 +539,7 @@ export function createSessionHarness(definition) {
|
|
|
444
539
|
mcpRegistry,
|
|
445
540
|
session: state.sandboxSession,
|
|
446
541
|
memory,
|
|
542
|
+
checkpoints,
|
|
447
543
|
mountedSkills: state.mountedSkills,
|
|
448
544
|
...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
|
|
449
545
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
@@ -469,6 +565,9 @@ export function createSessionHarness(definition) {
|
|
|
469
565
|
}
|
|
470
566
|
catch (error) {
|
|
471
567
|
const finalError = normalizeRunError(error, runSignal.signal);
|
|
568
|
+
if (!runCreated) {
|
|
569
|
+
throw finalError;
|
|
570
|
+
}
|
|
472
571
|
const finishedAt = now();
|
|
473
572
|
const serialized = serializeError(finalError);
|
|
474
573
|
const log = finalError instanceof OperationCancelledError ? definition.logger.warn.bind(definition.logger) : definition.logger.error.bind(definition.logger);
|
|
@@ -505,7 +604,11 @@ export function createSessionHarness(definition) {
|
|
|
505
604
|
}
|
|
506
605
|
}
|
|
507
606
|
async function* streamWorkflowCall(sessionId, workflowId, workflow, input, opts) {
|
|
508
|
-
yield* relayRunEvents((onEvent) =>
|
|
607
|
+
yield* relayRunEvents((onEvent, relaySignal) => {
|
|
608
|
+
const combined = combineSignals(relaySignal, opts?.signal);
|
|
609
|
+
return runWorkflowCall(sessionId, workflowId, workflow, input, { ...opts, signal: combined.signal }, onEvent)
|
|
610
|
+
.finally(() => combined.cleanup());
|
|
611
|
+
});
|
|
509
612
|
}
|
|
510
613
|
async function runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent) {
|
|
511
614
|
validateInvokeOptions(opts);
|
|
@@ -513,22 +616,16 @@ export function createSessionHarness(definition) {
|
|
|
513
616
|
if (opts?.signal?.aborted) {
|
|
514
617
|
throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
|
|
515
618
|
}
|
|
516
|
-
|
|
619
|
+
// Busy check precedes createRunSignal so an early SessionBusyError cannot
|
|
620
|
+
// leak the run-timeout timer or the caller-signal abort listener.
|
|
517
621
|
const state = await getSessionState(sessionId);
|
|
518
622
|
if (state.busy) {
|
|
519
623
|
throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
|
|
520
624
|
}
|
|
521
625
|
state.busy = true;
|
|
626
|
+
const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
|
|
522
627
|
const startedAt = now();
|
|
523
628
|
const runId = opts?.durable ? opts.durable.runId : ulid();
|
|
524
|
-
const memory = memoryFacade({
|
|
525
|
-
sessionId,
|
|
526
|
-
runId,
|
|
527
|
-
workflowId,
|
|
528
|
-
signal: runSignal.signal,
|
|
529
|
-
sandboxSession: state.sandboxSession,
|
|
530
|
-
metadata: opts?.metadata ?? {}
|
|
531
|
-
});
|
|
532
629
|
const runRecord = {
|
|
533
630
|
id: runId,
|
|
534
631
|
sessionId,
|
|
@@ -543,14 +640,16 @@ export function createSessionHarness(definition) {
|
|
|
543
640
|
await onEvent?.(event);
|
|
544
641
|
await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
|
|
545
642
|
};
|
|
546
|
-
try {
|
|
547
|
-
await definition.state.createRun(runRecord);
|
|
548
|
-
}
|
|
549
|
-
catch (error) {
|
|
550
|
-
state.busy = false;
|
|
551
|
-
throw error;
|
|
552
|
-
}
|
|
553
643
|
let durableBinding;
|
|
644
|
+
let runSandboxSession = state.sandboxSession;
|
|
645
|
+
let runMountedSkills = state.mountedSkills;
|
|
646
|
+
let closeRunSandbox = false;
|
|
647
|
+
let runCreated = false;
|
|
648
|
+
const delegationState = {
|
|
649
|
+
totalChildAgentCalls: 0,
|
|
650
|
+
activeChildAgentCalls: 0,
|
|
651
|
+
inFlightChildCalls: new Set()
|
|
652
|
+
};
|
|
554
653
|
try {
|
|
555
654
|
if (durableRuntime && opts?.durable) {
|
|
556
655
|
durableBinding = await beginDurableWorkflow({
|
|
@@ -565,7 +664,23 @@ export function createSessionHarness(definition) {
|
|
|
565
664
|
logger: definition.logger,
|
|
566
665
|
harnessName: definition.name
|
|
567
666
|
});
|
|
667
|
+
if (definition.workspaceStore) {
|
|
668
|
+
runSandboxSession = await definition.sandbox.open({ sessionId, runId, signal: runSignal.signal });
|
|
669
|
+
runMountedSkills = new Set();
|
|
670
|
+
closeRunSandbox = true;
|
|
671
|
+
}
|
|
568
672
|
}
|
|
673
|
+
const memory = memoryFacade({
|
|
674
|
+
sessionId,
|
|
675
|
+
runId,
|
|
676
|
+
workflowId,
|
|
677
|
+
signal: runSignal.signal,
|
|
678
|
+
sandboxSession: runSandboxSession,
|
|
679
|
+
metadata: opts?.metadata ?? {}
|
|
680
|
+
});
|
|
681
|
+
const checkpoints = createContextCheckpoints({ sessionId, runId, workflowId, signal: runSignal.signal });
|
|
682
|
+
await definition.state.createRun(runRecord);
|
|
683
|
+
runCreated = true;
|
|
569
684
|
const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
|
|
570
685
|
'harness.name': definition.name,
|
|
571
686
|
'harness.session.id': sessionId,
|
|
@@ -582,11 +697,13 @@ export function createSessionHarness(definition) {
|
|
|
582
697
|
'harness.run.id': runId,
|
|
583
698
|
'harness.workflow.id': workflowId
|
|
584
699
|
});
|
|
700
|
+
const delegationPolicy = resolveDelegationPolicy(workflow);
|
|
585
701
|
const workflowArgs = {
|
|
586
702
|
workflowId,
|
|
587
703
|
workflow,
|
|
588
704
|
input,
|
|
589
705
|
ctx: {
|
|
706
|
+
log: definition.logger,
|
|
590
707
|
signal: runSignal.signal,
|
|
591
708
|
runId,
|
|
592
709
|
sessionId,
|
|
@@ -599,12 +716,47 @@ export function createSessionHarness(definition) {
|
|
|
599
716
|
metadata: opts?.metadata ?? {},
|
|
600
717
|
metrics: workflowMetrics,
|
|
601
718
|
memory,
|
|
719
|
+
checkpoints,
|
|
602
720
|
step: durableBinding ? durableBinding.step : passthroughStep,
|
|
603
721
|
agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
|
|
604
722
|
agentId,
|
|
605
723
|
async (agentInput, agentOpts) => {
|
|
606
|
-
|
|
607
|
-
|
|
724
|
+
// Spec 10 "Cancellation": starting a child-agent call after
|
|
725
|
+
// abort throws OperationCancelledError synchronously, before
|
|
726
|
+
// policy checks run or budgets are consumed.
|
|
727
|
+
if (runSignal.signal.aborted) {
|
|
728
|
+
throw abortError(runSignal.signal, 'run', 'Run was cancelled.');
|
|
729
|
+
}
|
|
730
|
+
if (agentOpts?.signal?.aborted) {
|
|
731
|
+
throw new OperationCancelledError('Child-agent call was cancelled before start.', { scope: 'run' }, agentOpts.signal.reason);
|
|
732
|
+
}
|
|
733
|
+
validateInvokeOptions(agentOpts);
|
|
734
|
+
if (agentOpts?.durable) {
|
|
735
|
+
throw new ValidationError('Durable execution is only supported for workflow runs.', { where: 'invoke_options', issues: { durable: 'agent_run' } });
|
|
736
|
+
}
|
|
737
|
+
// An unknown per-call model alias is an invoke-option mistake;
|
|
738
|
+
// it must not pass the delegation gate or consume call budget.
|
|
739
|
+
if (agentOpts?.model !== undefined && !(agentOpts.model in definition.models)) {
|
|
740
|
+
throw new ValidationError('Unknown model alias for child-agent call.', { where: 'invoke_options', issues: { model: agentOpts.model } });
|
|
741
|
+
}
|
|
742
|
+
const selectedModelAlias = agentOpts?.model ?? agent.model;
|
|
743
|
+
assertDelegationAllowed({
|
|
744
|
+
policy: delegationPolicy,
|
|
745
|
+
state: delegationState,
|
|
746
|
+
workflowId,
|
|
747
|
+
agentId,
|
|
748
|
+
modelAlias: selectedModelAlias
|
|
749
|
+
});
|
|
750
|
+
// Compose signals before consuming budget so a composition
|
|
751
|
+
// failure can never leak an active delegation slot.
|
|
752
|
+
const combinedSignal = combineSignals(runSignal.signal, agentOpts?.signal);
|
|
753
|
+
const agentSignal = agentOpts?.timeoutMs !== undefined
|
|
754
|
+
? createRunSignal(combinedSignal.signal, agentOpts.timeoutMs)
|
|
755
|
+
: combinedSignal;
|
|
756
|
+
delegationState.totalChildAgentCalls += 1;
|
|
757
|
+
delegationState.activeChildAgentCalls += 1;
|
|
758
|
+
const delegationCallId = `delegate_${ulid()}`;
|
|
759
|
+
const childCall = (async () => {
|
|
608
760
|
const resolvedHistoryWindow = agentOpts?.historyWindow ?? opts?.historyWindow ?? definition.defaults.historyWindow;
|
|
609
761
|
const agentMetadata = { ...(opts?.metadata ?? {}), ...(agentOpts?.metadata ?? {}) };
|
|
610
762
|
const agentMemory = memoryFacade({
|
|
@@ -613,31 +765,37 @@ export function createSessionHarness(definition) {
|
|
|
613
765
|
workflowId,
|
|
614
766
|
agentId,
|
|
615
767
|
signal: agentSignal.signal,
|
|
616
|
-
sandboxSession:
|
|
768
|
+
sandboxSession: runSandboxSession,
|
|
617
769
|
metadata: agentMetadata
|
|
618
770
|
});
|
|
771
|
+
const agentCheckpoints = createContextCheckpoints({ sessionId, runId, workflowId, agentId, signal: agentSignal.signal });
|
|
619
772
|
const run = await runDefaultAgent({
|
|
620
773
|
harnessName: definition.name,
|
|
621
774
|
agentId,
|
|
622
775
|
runId,
|
|
623
776
|
sessionId,
|
|
624
777
|
workflowId,
|
|
778
|
+
delegationCallId,
|
|
779
|
+
delegationDepth: CHILD_DELEGATION_DEPTH,
|
|
625
780
|
input: agentInput,
|
|
626
781
|
history: await definition.state.listMessages(sessionId),
|
|
627
782
|
agent: agent,
|
|
783
|
+
modelAlias: selectedModelAlias,
|
|
628
784
|
models: withRunEventModelRegistry(modelRegistry, {
|
|
629
785
|
harnessName: definition.name,
|
|
630
786
|
sessionId,
|
|
631
787
|
runId,
|
|
632
788
|
workflowId,
|
|
633
|
-
agentId
|
|
789
|
+
agentId,
|
|
790
|
+
modelAlias: selectedModelAlias
|
|
634
791
|
}, emit),
|
|
635
792
|
skills: resolvedSkills,
|
|
636
793
|
customTools: definition.tools,
|
|
637
794
|
mcpRegistry,
|
|
638
|
-
session:
|
|
795
|
+
session: runSandboxSession,
|
|
639
796
|
memory: agentMemory,
|
|
640
|
-
|
|
797
|
+
checkpoints: agentCheckpoints,
|
|
798
|
+
mountedSkills: runMountedSkills,
|
|
641
799
|
...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
|
|
642
800
|
maxSteps: definition.defaults.agentMaxIterations ?? 16,
|
|
643
801
|
signal: agentSignal.signal,
|
|
@@ -652,9 +810,17 @@ export function createSessionHarness(definition) {
|
|
|
652
810
|
await definition.state.appendMessages(sessionId, run.emitted);
|
|
653
811
|
}
|
|
654
812
|
return run.output;
|
|
813
|
+
})();
|
|
814
|
+
delegationState.inFlightChildCalls.add(childCall);
|
|
815
|
+
try {
|
|
816
|
+
return await childCall;
|
|
655
817
|
}
|
|
656
818
|
finally {
|
|
819
|
+
delegationState.inFlightChildCalls.delete(childCall);
|
|
820
|
+
delegationState.activeChildAgentCalls -= 1;
|
|
657
821
|
agentSignal.cleanup();
|
|
822
|
+
if (agentSignal !== combinedSignal)
|
|
823
|
+
combinedSignal.cleanup();
|
|
658
824
|
}
|
|
659
825
|
}
|
|
660
826
|
]))
|
|
@@ -671,6 +837,11 @@ export function createSessionHarness(definition) {
|
|
|
671
837
|
...(opts ? { opts: { ...opts, signal: runSignal.signal } } : { opts: { signal: runSignal.signal } })
|
|
672
838
|
}));
|
|
673
839
|
}));
|
|
840
|
+
// A resolved handler may still have child-agent calls in flight; settle
|
|
841
|
+
// them before terminalizing so no run events trail run.finished.
|
|
842
|
+
if (delegationState.inFlightChildCalls.size > 0) {
|
|
843
|
+
await Promise.allSettled([...delegationState.inFlightChildCalls]);
|
|
844
|
+
}
|
|
674
845
|
const finishedAt = now();
|
|
675
846
|
if (durableBinding) {
|
|
676
847
|
await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_success' }, () => durableBinding.finishSuccess(result));
|
|
@@ -684,8 +855,18 @@ export function createSessionHarness(definition) {
|
|
|
684
855
|
}
|
|
685
856
|
catch (error) {
|
|
686
857
|
const finalError = normalizeRunError(error, runSignal.signal);
|
|
858
|
+
// A handler rejection mid-Promise.all must not orphan in-flight child
|
|
859
|
+
// agents: cancel them through the run signal and await settlement before
|
|
860
|
+
// run.finished is emitted and the session busy lock is released.
|
|
861
|
+
if (delegationState.inFlightChildCalls.size > 0) {
|
|
862
|
+
runSignal.abort(finalError);
|
|
863
|
+
await Promise.allSettled([...delegationState.inFlightChildCalls]);
|
|
864
|
+
}
|
|
687
865
|
const finishedAt = now();
|
|
688
866
|
const serialized = serializeError(finalError);
|
|
867
|
+
if (!runCreated) {
|
|
868
|
+
throw finalError;
|
|
869
|
+
}
|
|
689
870
|
if (durableBinding && finalError instanceof OperationCancelledError) {
|
|
690
871
|
await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_cancelled' }, () => durableBinding.finishCancelled(finalError));
|
|
691
872
|
}
|
|
@@ -722,6 +903,20 @@ export function createSessionHarness(definition) {
|
|
|
722
903
|
// id can resume; a no-op once the run was settled (success/cancel).
|
|
723
904
|
if (durableBinding)
|
|
724
905
|
await durableBinding.dispose();
|
|
906
|
+
if (closeRunSandbox) {
|
|
907
|
+
try {
|
|
908
|
+
await runSandboxSession.close();
|
|
909
|
+
}
|
|
910
|
+
catch (error) {
|
|
911
|
+
definition.logger.warn('Failed to close durable run sandbox.', {
|
|
912
|
+
harness: definition.name,
|
|
913
|
+
session_id: sessionId,
|
|
914
|
+
run_id: runId,
|
|
915
|
+
workflow_id: workflowId,
|
|
916
|
+
error: serializeError(error)
|
|
917
|
+
});
|
|
918
|
+
}
|
|
919
|
+
}
|
|
725
920
|
runSignal.cleanup();
|
|
726
921
|
state.busy = false;
|
|
727
922
|
}
|
|
@@ -730,6 +925,70 @@ export function createSessionHarness(definition) {
|
|
|
730
925
|
function passthroughStep(_stepId, fn) {
|
|
731
926
|
return fn();
|
|
732
927
|
}
|
|
928
|
+
function resolveDelegationPolicy(workflow) {
|
|
929
|
+
const configured = workflow.delegation;
|
|
930
|
+
const policy = configured ?? {};
|
|
931
|
+
const enabled = configured ? policy.enabled !== false : definition.defaults.delegation?.enabled === true;
|
|
932
|
+
return {
|
|
933
|
+
enabled,
|
|
934
|
+
...(policy.agents ? { allowedAgents: new Set(policy.agents) } : {}),
|
|
935
|
+
maxChildAgentCalls: policy.maxChildAgentCalls ?? definition.defaults.delegation?.maxChildAgentCalls ?? DEFAULT_MAX_CHILD_AGENT_CALLS,
|
|
936
|
+
maxParallelChildAgentCalls: policy.maxParallelChildAgentCalls ?? definition.defaults.delegation?.maxParallelChildAgentCalls ?? DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS,
|
|
937
|
+
maxDepth: policy.maxDepth ?? definition.defaults.delegation?.maxDepth ?? DEFAULT_MAX_DELEGATION_DEPTH,
|
|
938
|
+
...(policy.modelAliases ? { modelAliases: new Set(policy.modelAliases) } : {}),
|
|
939
|
+
agentModelAliases: new Map(Object.entries(policy.agentModelAliases ?? {}).map(([agentId, aliases]) => [agentId, new Set(aliases)]))
|
|
940
|
+
};
|
|
941
|
+
}
|
|
942
|
+
function assertDelegationAllowed(args) {
|
|
943
|
+
const { policy, state, workflowId, agentId, modelAlias } = args;
|
|
944
|
+
if (!policy.enabled) {
|
|
945
|
+
throw new DelegationPolicyError('Workflow child-agent delegation is disabled.', {
|
|
946
|
+
workflow_id: workflowId,
|
|
947
|
+
agent_id: agentId,
|
|
948
|
+
reason: 'delegation_disabled'
|
|
949
|
+
});
|
|
950
|
+
}
|
|
951
|
+
if (policy.allowedAgents && !policy.allowedAgents.has(agentId)) {
|
|
952
|
+
throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent.', {
|
|
953
|
+
workflow_id: workflowId,
|
|
954
|
+
agent_id: agentId,
|
|
955
|
+
reason: 'agent_not_allowed'
|
|
956
|
+
});
|
|
957
|
+
}
|
|
958
|
+
if (CHILD_DELEGATION_DEPTH > policy.maxDepth) {
|
|
959
|
+
throw new DelegationPolicyError('Workflow child-agent delegation depth exceeded.', {
|
|
960
|
+
workflow_id: workflowId,
|
|
961
|
+
agent_id: agentId,
|
|
962
|
+
reason: 'max_delegation_depth_exceeded',
|
|
963
|
+
limit: policy.maxDepth
|
|
964
|
+
});
|
|
965
|
+
}
|
|
966
|
+
if (state.totalChildAgentCalls >= policy.maxChildAgentCalls) {
|
|
967
|
+
throw new DelegationPolicyError('Workflow child-agent call budget exceeded.', {
|
|
968
|
+
workflow_id: workflowId,
|
|
969
|
+
agent_id: agentId,
|
|
970
|
+
reason: 'max_child_agent_calls_exceeded',
|
|
971
|
+
limit: policy.maxChildAgentCalls
|
|
972
|
+
});
|
|
973
|
+
}
|
|
974
|
+
if (state.activeChildAgentCalls >= policy.maxParallelChildAgentCalls) {
|
|
975
|
+
throw new DelegationPolicyError('Workflow parallel child-agent call budget exceeded.', {
|
|
976
|
+
workflow_id: workflowId,
|
|
977
|
+
agent_id: agentId,
|
|
978
|
+
reason: 'max_parallel_child_agent_calls_exceeded',
|
|
979
|
+
limit: policy.maxParallelChildAgentCalls
|
|
980
|
+
});
|
|
981
|
+
}
|
|
982
|
+
const allowedModels = policy.agentModelAliases.get(agentId) ?? policy.modelAliases;
|
|
983
|
+
if (allowedModels && !allowedModels.has(modelAlias)) {
|
|
984
|
+
throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent with the selected model alias.', {
|
|
985
|
+
workflow_id: workflowId,
|
|
986
|
+
agent_id: agentId,
|
|
987
|
+
reason: 'model_alias_not_allowed',
|
|
988
|
+
model_alias: modelAlias
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
}
|
|
733
992
|
/**
|
|
734
993
|
* Runs a durable finalization side effect (runtime finish / workspace lifecycle)
|
|
735
994
|
* without ever masking the primary run outcome (spec 21 §16.1 step 7).
|
|
@@ -874,7 +1133,7 @@ function isObjectPartialChunk(chunk) {
|
|
|
874
1133
|
function isObjectFinishChunk(chunk) {
|
|
875
1134
|
return Boolean(chunk && typeof chunk === 'object' && chunk.kind === 'finish' && Object.prototype.hasOwnProperty.call(chunk, 'object'));
|
|
876
1135
|
}
|
|
877
|
-
function configureHarnessAdapters(context, models, state, sandbox, memory, tools) {
|
|
1136
|
+
function configureHarnessAdapters(context, models, state, sandbox, memory, tools, runtime, workspaceStore, checkpoints) {
|
|
878
1137
|
const seen = new Set();
|
|
879
1138
|
for (const alias of Object.values(models)) {
|
|
880
1139
|
configureOne(alias.provider, context, seen);
|
|
@@ -882,11 +1141,16 @@ function configureHarnessAdapters(context, models, state, sandbox, memory, tools
|
|
|
882
1141
|
configureOne(state, context, seen);
|
|
883
1142
|
configureOne(sandbox, context, seen);
|
|
884
1143
|
configureOne(memory, context, seen);
|
|
1144
|
+
configureOne(runtime, context, seen);
|
|
1145
|
+
configureOne(workspaceStore, context, seen);
|
|
1146
|
+
configureOne(checkpoints, context, seen);
|
|
885
1147
|
for (const tool of Object.values(tools)) {
|
|
886
1148
|
configureOne(tool, context, seen);
|
|
887
1149
|
}
|
|
888
1150
|
}
|
|
889
1151
|
function configureOne(adapter, context, seen) {
|
|
1152
|
+
if (!adapter)
|
|
1153
|
+
return;
|
|
890
1154
|
const configurable = adapter;
|
|
891
1155
|
if (!configurable.configureHarnessContext || seen.has(adapter))
|
|
892
1156
|
return;
|
|
@@ -933,26 +1197,6 @@ function resolveContentCaptureMode(options) {
|
|
|
933
1197
|
return envValue;
|
|
934
1198
|
return 'NO_CONTENT';
|
|
935
1199
|
}
|
|
936
|
-
function metadataSpanAttrs(metadata) {
|
|
937
|
-
const attrs = {};
|
|
938
|
-
for (const [key, value] of Object.entries(metadata ?? {})) {
|
|
939
|
-
if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
|
|
940
|
-
continue;
|
|
941
|
-
if (typeof value === 'string') {
|
|
942
|
-
if (value.length <= 256)
|
|
943
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
944
|
-
continue;
|
|
945
|
-
}
|
|
946
|
-
if (typeof value === 'number' && Number.isFinite(value)) {
|
|
947
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
948
|
-
continue;
|
|
949
|
-
}
|
|
950
|
-
if (typeof value === 'boolean') {
|
|
951
|
-
attrs[`harness.metadata.${key}`] = value;
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
return attrs;
|
|
955
|
-
}
|
|
956
1200
|
function isValidTraceparent(traceparent) {
|
|
957
1201
|
const match = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/.exec(traceparent);
|
|
958
1202
|
if (!match)
|
|
@@ -1030,10 +1274,10 @@ function sanitizeEventForPersistence(event) {
|
|
|
1030
1274
|
...(event.error ? { error: event.error } : {})
|
|
1031
1275
|
};
|
|
1032
1276
|
case 'agent.started':
|
|
1033
|
-
return
|
|
1277
|
+
return agentRunEventMeta(event);
|
|
1034
1278
|
case 'agent.finished':
|
|
1035
1279
|
return {
|
|
1036
|
-
|
|
1280
|
+
...agentRunEventMeta(event),
|
|
1037
1281
|
...(event.output !== undefined ? { output: '[redacted]' } : {}),
|
|
1038
1282
|
...(event.error ? { error: event.error } : {})
|
|
1039
1283
|
};
|
|
@@ -1091,6 +1335,16 @@ function modelStreamEventMeta(event) {
|
|
|
1091
1335
|
...(event.streamId ? { streamId: event.streamId } : {})
|
|
1092
1336
|
};
|
|
1093
1337
|
}
|
|
1338
|
+
function agentRunEventMeta(event) {
|
|
1339
|
+
return {
|
|
1340
|
+
agentId: event.agentId,
|
|
1341
|
+
...(event.workflowId ? { workflowId: event.workflowId } : {}),
|
|
1342
|
+
...(event.parentAgentId ? { parentAgentId: event.parentAgentId } : {}),
|
|
1343
|
+
...(event.delegationCallId ? { delegationCallId: event.delegationCallId } : {}),
|
|
1344
|
+
...(event.delegationDepth !== undefined ? { delegationDepth: event.delegationDepth } : {}),
|
|
1345
|
+
...(event.modelAlias ? { modelAlias: event.modelAlias } : {})
|
|
1346
|
+
};
|
|
1347
|
+
}
|
|
1094
1348
|
function isJsonRecord(value) {
|
|
1095
1349
|
return value !== null && typeof value === 'object' && !Array.isArray(value);
|
|
1096
1350
|
}
|
|
@@ -1121,6 +1375,8 @@ function createRunSignal(parent, timeoutMs) {
|
|
|
1121
1375
|
: undefined;
|
|
1122
1376
|
return {
|
|
1123
1377
|
signal: controller.signal,
|
|
1378
|
+
/** Harness-initiated abort, e.g. to cancel in-flight child-agent calls. */
|
|
1379
|
+
abort: (reason) => controller.abort(runAbortReason(reason)),
|
|
1124
1380
|
cleanup: () => {
|
|
1125
1381
|
if (timeout)
|
|
1126
1382
|
clearTimeout(timeout);
|