@purista/harness 1.2.6 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +56 -38
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +91 -1
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +37 -17
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.js +28 -37
  25. package/dist/models/stream-pump.d.ts +16 -0
  26. package/dist/models/stream-pump.js +77 -0
  27. package/dist/ports/base-model-provider.d.ts +7 -1
  28. package/dist/ports/base-model-provider.js +384 -87
  29. package/dist/ports/capabilities.d.ts +16 -2
  30. package/dist/ports/context-checkpoints.d.ts +63 -0
  31. package/dist/ports/context-checkpoints.js +33 -0
  32. package/dist/ports/index.d.ts +1 -0
  33. package/dist/ports/index.js +1 -0
  34. package/dist/ports/model-provider.d.ts +94 -0
  35. package/dist/runtime/durable.d.ts +11 -0
  36. package/dist/runtime/durable.js +15 -2
  37. package/dist/runtime/sessionDurable.js +47 -21
  38. package/dist/sessions/index.d.ts +17 -6
  39. package/dist/sessions/index.js +337 -81
  40. package/dist/skills/index.d.ts +0 -2
  41. package/dist/skills/index.js +0 -8
  42. package/dist/state/in-memory.js +6 -6
  43. package/dist/telemetry/shim.js +2 -6
  44. package/dist/telemetry/span-attrs.d.ts +9 -0
  45. package/dist/telemetry/span-attrs.js +27 -0
  46. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  47. package/dist/testing/fakeLogger.d.ts +29 -0
  48. package/dist/testing/fakeLogger.js +47 -0
  49. package/dist/testing/fakeSandbox.d.ts +27 -0
  50. package/dist/testing/fakeSandbox.js +153 -0
  51. package/dist/testing/fakeStateStore.d.ts +36 -0
  52. package/dist/testing/fakeStateStore.js +66 -0
  53. package/dist/testing/index.d.ts +10 -4
  54. package/dist/testing/index.js +14 -4
  55. package/dist/testing/loggerContract.d.ts +9 -0
  56. package/dist/testing/loggerContract.js +62 -0
  57. package/dist/testing/modelProviderContract.d.ts +12 -0
  58. package/dist/testing/modelProviderContract.js +222 -0
  59. package/dist/testing/recordEvents.d.ts +3 -0
  60. package/dist/testing/recordEvents.js +8 -0
  61. package/dist/testing/stateStoreContract.js +27 -0
  62. package/dist/tools/index.js +26 -1
  63. package/dist/tools/mcp/http.d.ts +2 -0
  64. package/dist/tools/mcp/http.js +34 -21
  65. package/dist/tools/mcp/runner.d.ts +4 -0
  66. package/dist/tools/mcp/runner.js +75 -21
  67. package/dist/tools/mcp/stdio.d.ts +7 -1
  68. package/dist/tools/mcp/stdio.js +102 -23
  69. package/dist/version.d.ts +1 -1
  70. package/dist/version.js +1 -1
  71. package/dist/workspace/in-memory.d.ts +1 -0
  72. package/dist/workspace/in-memory.js +47 -12
  73. package/package.json +2 -1
@@ -1,4 +1,4 @@
1
- import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, serializeError } from '../errors/index.js';
1
+ import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, DelegationPolicyError, serializeError } from '../errors/index.js';
2
2
  import { ulid } from '../ulid/index.js';
3
3
  import { runDefaultAgent } from '../agents/index.js';
4
4
  import { runWorkflow } from '../workflows/index.js';
@@ -8,21 +8,50 @@ import { HarnessConfigError } from '../errors/catalog.js';
8
8
  import { loadSkillsSync } from '../skills/index.js';
9
9
  import { createModelRegistry } from '../models/registry.js';
10
10
  import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
11
+ import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
12
+ import { abortError } from '../runtime/abort.js';
11
13
  import { createMcpRunnerRegistry } from '../tools/mcp/runner.js';
12
14
  const NEVER_ABORT_SIGNAL = new AbortController().signal;
15
+ const DEFAULT_MAX_CHILD_AGENT_CALLS = 32;
16
+ const DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS = 8;
17
+ const DEFAULT_MAX_DELEGATION_DEPTH = 1;
18
+ /**
19
+ * Workflows invoke leaf agents directly, so every child-agent call runs at
20
+ * depth 1 (spec 10 "Delegation policy": `maxDepth` default `1`, `0` disables
21
+ * child-agent delegation).
22
+ */
23
+ const CHILD_DELEGATION_DEPTH = 1;
13
24
  function now() {
14
25
  return new Date().toISOString();
15
26
  }
16
27
  const STREAM_MAX_BUFFERED_EVENTS = 1024;
17
- const STREAM_TERMINAL_EVENT_TYPES = new Set(['run.finished', 'agent.finished']);
28
+ /**
29
+ * Event types that must never be dropped from the relay queue.
30
+ *
31
+ * Only `run.finished` qualifies: it occurs at most once per run and is the
32
+ * terminal event consumers key off to know the run is complete. `agent.finished`
33
+ * is emitted once per agent invocation (including every child-agent delegation
34
+ * call), so it can appear many times and must remain droppable to keep the
35
+ * queue bounded when a slow consumer falls behind during a delegation-heavy run.
36
+ */
37
+ const STREAM_UNDROPPABLE_EVENT_TYPES = new Set(['run.finished']);
18
38
  /**
19
39
  * Relay run events from an in-process run to a stream consumer.
20
40
  *
21
- * The unread events live in a bounded queue: consumed events are removed (no
22
- * growing cursor over a shared array), and on overflow the oldest non-terminal
23
- * unread event is dropped and counted, so a slow consumer never silently skips
24
- * an unread event. Delivery is promise-notified rather than time-polled, so
25
- * there is no fixed per-event latency or periodic timer.
41
+ * The unread events live in a bounded queue (cap: STREAM_MAX_BUFFERED_EVENTS):
42
+ * consumed events are removed (no growing cursor over a shared array), and on
43
+ * overflow the oldest droppable unread event is dropped and counted, so a slow
44
+ * consumer never silently skips an event without an accompanying
45
+ * `stream.overflow` notice. Only `run.finished` is undroppable; all other
46
+ * event types — including `agent.finished` — may be evicted under pressure.
47
+ * If no droppable event exists when the queue is full, the incoming event is
48
+ * discarded (counted) rather than growing the queue past the cap. Delivery is
49
+ * promise-notified rather than time-polled, so there is no fixed per-event
50
+ * latency or periodic timer.
51
+ *
52
+ * Abandoning the stream (`break` / `iterator.return()`) aborts `relaySignal`,
53
+ * so a run wired to it is cancelled promptly instead of blocking the consumer
54
+ * until the run finishes on its own.
26
55
  */
27
56
  export async function* relayRunEvents(run) {
28
57
  const queue = [];
@@ -31,6 +60,7 @@ export async function* relayRunEvents(run) {
31
60
  let done = false;
32
61
  let failure;
33
62
  let wake;
63
+ const relayController = new AbortController();
34
64
  const notify = () => {
35
65
  const resolve = wake;
36
66
  wake = undefined;
@@ -40,16 +70,23 @@ export async function* relayRunEvents(run) {
40
70
  if ('runId' in event)
41
71
  liveRunId = event.runId;
42
72
  if (queue.length >= STREAM_MAX_BUFFERED_EVENTS) {
43
- const dropIndex = queue.findIndex((candidate) => !STREAM_TERMINAL_EVENT_TYPES.has(candidate.type));
73
+ const dropIndex = queue.findIndex((candidate) => !STREAM_UNDROPPABLE_EVENT_TYPES.has(candidate.type));
44
74
  if (dropIndex >= 0) {
45
75
  queue.splice(dropIndex, 1);
46
76
  dropped += 1;
47
77
  }
78
+ else {
79
+ // Every queued event is undroppable; discard the incoming event to keep
80
+ // the queue bounded rather than growing past the cap.
81
+ dropped += 1;
82
+ notify();
83
+ return Promise.resolve();
84
+ }
48
85
  }
49
86
  queue.push(event);
50
87
  notify();
51
88
  return Promise.resolve();
52
- })
89
+ }, relayController.signal)
53
90
  .catch((error) => {
54
91
  failure = error;
55
92
  return undefined;
@@ -84,6 +121,9 @@ export async function* relayRunEvents(run) {
84
121
  }
85
122
  }
86
123
  finally {
124
+ // Cancel the run before awaiting it so an abandoned stream does not block
125
+ // `iterator.return()` until the run finishes or times out.
126
+ relayController.abort(new OperationCancelledError('Run event stream was abandoned by the consumer.', { scope: 'run' }));
87
127
  await result.catch(() => undefined);
88
128
  }
89
129
  if (failure)
@@ -133,7 +173,7 @@ export function createSessionHarness(definition) {
133
173
  ...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
134
174
  }
135
175
  };
136
- configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools);
176
+ configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools, definition.runtime, definition.workspaceStore, definition.checkpoints);
137
177
  const modelRegistry = createModelRegistry(definition.models, { telemetry, harnessName: definition.name });
138
178
  const mcpRegistry = createMcpRunnerRegistry();
139
179
  async function ensureSessionRecord(sessionId) {
@@ -253,6 +293,54 @@ export function createSessionHarness(definition) {
253
293
  }
254
294
  return definition.runtime;
255
295
  }
296
+ function createContextCheckpoints(args) {
297
+ const store = definition.checkpoints;
298
+ const requireStore = () => {
299
+ if (!store) {
300
+ throw new ValidationError('No context checkpoint store is configured.', {
301
+ where: 'invoke_options',
302
+ issues: { reason: 'context_checkpoint_store_missing' }
303
+ });
304
+ }
305
+ return store;
306
+ };
307
+ const baseQuery = {
308
+ runId: args.runId,
309
+ sessionId: args.sessionId,
310
+ ...(args.workflowId ? { workflowId: args.workflowId } : {}),
311
+ ...(args.agentId ? { agentId: args.agentId } : {})
312
+ };
313
+ return {
314
+ async write(input) {
315
+ const json = JSON.stringify(input.payload);
316
+ if (json === undefined) {
317
+ throw new ValidationError('Context checkpoint payload must be JSON-serializable.', {
318
+ where: 'invoke_options',
319
+ issues: { reason: 'non_json_context_checkpoint_payload' }
320
+ });
321
+ }
322
+ const checkpoint = {
323
+ ...baseQuery,
324
+ sequence: input.sequence,
325
+ kind: input.kind,
326
+ payload: input.payload,
327
+ payloadSizeBytes: Buffer.byteLength(json, 'utf8'),
328
+ createdAt: now(),
329
+ ...(input.metadata ? { metadata: input.metadata } : {})
330
+ };
331
+ await requireStore().write(checkpoint, { signal: args.signal });
332
+ },
333
+ async list(query = {}) {
334
+ return requireStore().list({ ...baseQuery, ...query, signal: args.signal });
335
+ },
336
+ async read(ref) {
337
+ return requireStore().read({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
338
+ },
339
+ async delete(ref) {
340
+ await requireStore().delete({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
341
+ }
342
+ };
343
+ }
256
344
  return {
257
345
  inspect() {
258
346
  return definition.inspection;
@@ -326,9 +414,13 @@ export function createSessionHarness(definition) {
326
414
  }
327
415
  },
328
416
  async close() {
417
+ if (state.busy) {
418
+ throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
419
+ }
329
420
  await definition.state.closeSession(sessionId);
330
421
  sessionStates.delete(sessionId);
331
422
  sessionStateOpenings.delete(sessionId);
423
+ await mcpRegistry.closeForSandboxKey(sessionId);
332
424
  await state.sandboxSession.close();
333
425
  }
334
426
  };
@@ -367,7 +459,11 @@ export function createSessionHarness(definition) {
367
459
  $infer: {}
368
460
  };
369
461
  async function* streamAgentCall(sessionId, agentId, agent, input, opts) {
370
- yield* relayRunEvents((onEvent) => runAgentCall(sessionId, agentId, agent, input, opts, onEvent));
462
+ yield* relayRunEvents((onEvent, relaySignal) => {
463
+ const combined = combineSignals(relaySignal, opts?.signal);
464
+ return runAgentCall(sessionId, agentId, agent, input, { ...opts, signal: combined.signal }, onEvent)
465
+ .finally(() => combined.cleanup());
466
+ });
371
467
  }
372
468
  async function runAgentCall(sessionId, agentId, agent, input, opts, onEvent) {
373
469
  validateInvokeOptions(opts);
@@ -377,44 +473,43 @@ export function createSessionHarness(definition) {
377
473
  if (opts?.signal?.aborted) {
378
474
  throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
379
475
  }
380
- const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
476
+ // Busy check precedes createRunSignal so an early SessionBusyError cannot
477
+ // leak the run-timeout timer or the caller-signal abort listener.
381
478
  const state = await getSessionState(sessionId);
382
479
  if (state.busy) {
383
480
  throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
384
481
  }
385
482
  state.busy = true;
483
+ const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
386
484
  const startedAt = now();
387
485
  const runId = ulid();
388
- const memory = memoryFacade({
389
- sessionId,
390
- runId,
391
- agentId,
392
- signal: runSignal.signal,
393
- sandboxSession: state.sandboxSession,
394
- metadata: opts?.metadata ?? {}
395
- });
396
- const runRecord = {
397
- id: runId,
398
- sessionId,
399
- kind: 'agent',
400
- target: agentId,
401
- startedAt,
402
- status: 'running',
403
- input: input
404
- };
405
486
  const emit = async (event) => {
406
487
  const eventAt = 'at' in event ? event.at : now();
407
488
  await onEvent?.(event);
408
489
  await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
409
490
  };
491
+ let runCreated = false;
410
492
  try {
493
+ const memory = memoryFacade({
494
+ sessionId,
495
+ runId,
496
+ agentId,
497
+ signal: runSignal.signal,
498
+ sandboxSession: state.sandboxSession,
499
+ metadata: opts?.metadata ?? {}
500
+ });
501
+ const checkpoints = createContextCheckpoints({ sessionId, runId, agentId, signal: runSignal.signal });
502
+ const runRecord = {
503
+ id: runId,
504
+ sessionId,
505
+ kind: 'agent',
506
+ target: agentId,
507
+ startedAt,
508
+ status: 'running',
509
+ input: input
510
+ };
411
511
  await definition.state.createRun(runRecord);
412
- }
413
- catch (error) {
414
- state.busy = false;
415
- throw error;
416
- }
417
- try {
512
+ runCreated = true;
418
513
  const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.agent_prompt', {
419
514
  'harness.name': definition.name,
420
515
  'harness.session.id': sessionId,
@@ -444,6 +539,7 @@ export function createSessionHarness(definition) {
444
539
  mcpRegistry,
445
540
  session: state.sandboxSession,
446
541
  memory,
542
+ checkpoints,
447
543
  mountedSkills: state.mountedSkills,
448
544
  ...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
449
545
  maxSteps: definition.defaults.agentMaxIterations ?? 16,
@@ -469,6 +565,9 @@ export function createSessionHarness(definition) {
469
565
  }
470
566
  catch (error) {
471
567
  const finalError = normalizeRunError(error, runSignal.signal);
568
+ if (!runCreated) {
569
+ throw finalError;
570
+ }
472
571
  const finishedAt = now();
473
572
  const serialized = serializeError(finalError);
474
573
  const log = finalError instanceof OperationCancelledError ? definition.logger.warn.bind(definition.logger) : definition.logger.error.bind(definition.logger);
@@ -505,7 +604,11 @@ export function createSessionHarness(definition) {
505
604
  }
506
605
  }
507
606
  async function* streamWorkflowCall(sessionId, workflowId, workflow, input, opts) {
508
- yield* relayRunEvents((onEvent) => runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent));
607
+ yield* relayRunEvents((onEvent, relaySignal) => {
608
+ const combined = combineSignals(relaySignal, opts?.signal);
609
+ return runWorkflowCall(sessionId, workflowId, workflow, input, { ...opts, signal: combined.signal }, onEvent)
610
+ .finally(() => combined.cleanup());
611
+ });
509
612
  }
510
613
  async function runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent) {
511
614
  validateInvokeOptions(opts);
@@ -513,22 +616,16 @@ export function createSessionHarness(definition) {
513
616
  if (opts?.signal?.aborted) {
514
617
  throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
515
618
  }
516
- const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
619
+ // Busy check precedes createRunSignal so an early SessionBusyError cannot
620
+ // leak the run-timeout timer or the caller-signal abort listener.
517
621
  const state = await getSessionState(sessionId);
518
622
  if (state.busy) {
519
623
  throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
520
624
  }
521
625
  state.busy = true;
626
+ const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
522
627
  const startedAt = now();
523
628
  const runId = opts?.durable ? opts.durable.runId : ulid();
524
- const memory = memoryFacade({
525
- sessionId,
526
- runId,
527
- workflowId,
528
- signal: runSignal.signal,
529
- sandboxSession: state.sandboxSession,
530
- metadata: opts?.metadata ?? {}
531
- });
532
629
  const runRecord = {
533
630
  id: runId,
534
631
  sessionId,
@@ -543,14 +640,16 @@ export function createSessionHarness(definition) {
543
640
  await onEvent?.(event);
544
641
  await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
545
642
  };
546
- try {
547
- await definition.state.createRun(runRecord);
548
- }
549
- catch (error) {
550
- state.busy = false;
551
- throw error;
552
- }
553
643
  let durableBinding;
644
+ let runSandboxSession = state.sandboxSession;
645
+ let runMountedSkills = state.mountedSkills;
646
+ let closeRunSandbox = false;
647
+ let runCreated = false;
648
+ const delegationState = {
649
+ totalChildAgentCalls: 0,
650
+ activeChildAgentCalls: 0,
651
+ inFlightChildCalls: new Set()
652
+ };
554
653
  try {
555
654
  if (durableRuntime && opts?.durable) {
556
655
  durableBinding = await beginDurableWorkflow({
@@ -565,7 +664,23 @@ export function createSessionHarness(definition) {
565
664
  logger: definition.logger,
566
665
  harnessName: definition.name
567
666
  });
667
+ if (definition.workspaceStore) {
668
+ runSandboxSession = await definition.sandbox.open({ sessionId, runId, signal: runSignal.signal });
669
+ runMountedSkills = new Set();
670
+ closeRunSandbox = true;
671
+ }
568
672
  }
673
+ const memory = memoryFacade({
674
+ sessionId,
675
+ runId,
676
+ workflowId,
677
+ signal: runSignal.signal,
678
+ sandboxSession: runSandboxSession,
679
+ metadata: opts?.metadata ?? {}
680
+ });
681
+ const checkpoints = createContextCheckpoints({ sessionId, runId, workflowId, signal: runSignal.signal });
682
+ await definition.state.createRun(runRecord);
683
+ runCreated = true;
569
684
  const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
570
685
  'harness.name': definition.name,
571
686
  'harness.session.id': sessionId,
@@ -582,11 +697,13 @@ export function createSessionHarness(definition) {
582
697
  'harness.run.id': runId,
583
698
  'harness.workflow.id': workflowId
584
699
  });
700
+ const delegationPolicy = resolveDelegationPolicy(workflow);
585
701
  const workflowArgs = {
586
702
  workflowId,
587
703
  workflow,
588
704
  input,
589
705
  ctx: {
706
+ log: definition.logger,
590
707
  signal: runSignal.signal,
591
708
  runId,
592
709
  sessionId,
@@ -599,12 +716,47 @@ export function createSessionHarness(definition) {
599
716
  metadata: opts?.metadata ?? {},
600
717
  metrics: workflowMetrics,
601
718
  memory,
719
+ checkpoints,
602
720
  step: durableBinding ? durableBinding.step : passthroughStep,
603
721
  agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
604
722
  agentId,
605
723
  async (agentInput, agentOpts) => {
606
- const agentSignal = combineSignals(runSignal.signal, agentOpts?.signal);
607
- try {
724
+ // Spec 10 "Cancellation": starting a child-agent call after
725
+ // abort throws OperationCancelledError synchronously, before
726
+ // policy checks run or budgets are consumed.
727
+ if (runSignal.signal.aborted) {
728
+ throw abortError(runSignal.signal, 'run', 'Run was cancelled.');
729
+ }
730
+ if (agentOpts?.signal?.aborted) {
731
+ throw new OperationCancelledError('Child-agent call was cancelled before start.', { scope: 'run' }, agentOpts.signal.reason);
732
+ }
733
+ validateInvokeOptions(agentOpts);
734
+ if (agentOpts?.durable) {
735
+ throw new ValidationError('Durable execution is only supported for workflow runs.', { where: 'invoke_options', issues: { durable: 'agent_run' } });
736
+ }
737
+ // An unknown per-call model alias is an invoke-option mistake;
738
+ // it must not pass the delegation gate or consume call budget.
739
+ if (agentOpts?.model !== undefined && !(agentOpts.model in definition.models)) {
740
+ throw new ValidationError('Unknown model alias for child-agent call.', { where: 'invoke_options', issues: { model: agentOpts.model } });
741
+ }
742
+ const selectedModelAlias = agentOpts?.model ?? agent.model;
743
+ assertDelegationAllowed({
744
+ policy: delegationPolicy,
745
+ state: delegationState,
746
+ workflowId,
747
+ agentId,
748
+ modelAlias: selectedModelAlias
749
+ });
750
+ // Compose signals before consuming budget so a composition
751
+ // failure can never leak an active delegation slot.
752
+ const combinedSignal = combineSignals(runSignal.signal, agentOpts?.signal);
753
+ const agentSignal = agentOpts?.timeoutMs !== undefined
754
+ ? createRunSignal(combinedSignal.signal, agentOpts.timeoutMs)
755
+ : combinedSignal;
756
+ delegationState.totalChildAgentCalls += 1;
757
+ delegationState.activeChildAgentCalls += 1;
758
+ const delegationCallId = `delegate_${ulid()}`;
759
+ const childCall = (async () => {
608
760
  const resolvedHistoryWindow = agentOpts?.historyWindow ?? opts?.historyWindow ?? definition.defaults.historyWindow;
609
761
  const agentMetadata = { ...(opts?.metadata ?? {}), ...(agentOpts?.metadata ?? {}) };
610
762
  const agentMemory = memoryFacade({
@@ -613,31 +765,37 @@ export function createSessionHarness(definition) {
613
765
  workflowId,
614
766
  agentId,
615
767
  signal: agentSignal.signal,
616
- sandboxSession: state.sandboxSession,
768
+ sandboxSession: runSandboxSession,
617
769
  metadata: agentMetadata
618
770
  });
771
+ const agentCheckpoints = createContextCheckpoints({ sessionId, runId, workflowId, agentId, signal: agentSignal.signal });
619
772
  const run = await runDefaultAgent({
620
773
  harnessName: definition.name,
621
774
  agentId,
622
775
  runId,
623
776
  sessionId,
624
777
  workflowId,
778
+ delegationCallId,
779
+ delegationDepth: CHILD_DELEGATION_DEPTH,
625
780
  input: agentInput,
626
781
  history: await definition.state.listMessages(sessionId),
627
782
  agent: agent,
783
+ modelAlias: selectedModelAlias,
628
784
  models: withRunEventModelRegistry(modelRegistry, {
629
785
  harnessName: definition.name,
630
786
  sessionId,
631
787
  runId,
632
788
  workflowId,
633
- agentId
789
+ agentId,
790
+ modelAlias: selectedModelAlias
634
791
  }, emit),
635
792
  skills: resolvedSkills,
636
793
  customTools: definition.tools,
637
794
  mcpRegistry,
638
- session: state.sandboxSession,
795
+ session: runSandboxSession,
639
796
  memory: agentMemory,
640
- mountedSkills: state.mountedSkills,
797
+ checkpoints: agentCheckpoints,
798
+ mountedSkills: runMountedSkills,
641
799
  ...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
642
800
  maxSteps: definition.defaults.agentMaxIterations ?? 16,
643
801
  signal: agentSignal.signal,
@@ -652,9 +810,17 @@ export function createSessionHarness(definition) {
652
810
  await definition.state.appendMessages(sessionId, run.emitted);
653
811
  }
654
812
  return run.output;
813
+ })();
814
+ delegationState.inFlightChildCalls.add(childCall);
815
+ try {
816
+ return await childCall;
655
817
  }
656
818
  finally {
819
+ delegationState.inFlightChildCalls.delete(childCall);
820
+ delegationState.activeChildAgentCalls -= 1;
657
821
  agentSignal.cleanup();
822
+ if (agentSignal !== combinedSignal)
823
+ combinedSignal.cleanup();
658
824
  }
659
825
  }
660
826
  ]))
@@ -671,6 +837,11 @@ export function createSessionHarness(definition) {
671
837
  ...(opts ? { opts: { ...opts, signal: runSignal.signal } } : { opts: { signal: runSignal.signal } })
672
838
  }));
673
839
  }));
840
+ // A resolved handler may still have child-agent calls in flight; settle
841
+ // them before terminalizing so no run events trail run.finished.
842
+ if (delegationState.inFlightChildCalls.size > 0) {
843
+ await Promise.allSettled([...delegationState.inFlightChildCalls]);
844
+ }
674
845
  const finishedAt = now();
675
846
  if (durableBinding) {
676
847
  await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_success' }, () => durableBinding.finishSuccess(result));
@@ -684,8 +855,18 @@ export function createSessionHarness(definition) {
684
855
  }
685
856
  catch (error) {
686
857
  const finalError = normalizeRunError(error, runSignal.signal);
858
+ // A handler rejection mid-Promise.all must not orphan in-flight child
859
+ // agents: cancel them through the run signal and await settlement before
860
+ // run.finished is emitted and the session busy lock is released.
861
+ if (delegationState.inFlightChildCalls.size > 0) {
862
+ runSignal.abort(finalError);
863
+ await Promise.allSettled([...delegationState.inFlightChildCalls]);
864
+ }
687
865
  const finishedAt = now();
688
866
  const serialized = serializeError(finalError);
867
+ if (!runCreated) {
868
+ throw finalError;
869
+ }
689
870
  if (durableBinding && finalError instanceof OperationCancelledError) {
690
871
  await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_cancelled' }, () => durableBinding.finishCancelled(finalError));
691
872
  }
@@ -722,6 +903,20 @@ export function createSessionHarness(definition) {
722
903
  // id can resume; a no-op once the run was settled (success/cancel).
723
904
  if (durableBinding)
724
905
  await durableBinding.dispose();
906
+ if (closeRunSandbox) {
907
+ try {
908
+ await runSandboxSession.close();
909
+ }
910
+ catch (error) {
911
+ definition.logger.warn('Failed to close durable run sandbox.', {
912
+ harness: definition.name,
913
+ session_id: sessionId,
914
+ run_id: runId,
915
+ workflow_id: workflowId,
916
+ error: serializeError(error)
917
+ });
918
+ }
919
+ }
725
920
  runSignal.cleanup();
726
921
  state.busy = false;
727
922
  }
@@ -730,6 +925,70 @@ export function createSessionHarness(definition) {
730
925
  function passthroughStep(_stepId, fn) {
731
926
  return fn();
732
927
  }
928
+ function resolveDelegationPolicy(workflow) {
929
+ const configured = workflow.delegation;
930
+ const policy = configured ?? {};
931
+ const enabled = configured ? policy.enabled !== false : definition.defaults.delegation?.enabled === true;
932
+ return {
933
+ enabled,
934
+ ...(policy.agents ? { allowedAgents: new Set(policy.agents) } : {}),
935
+ maxChildAgentCalls: policy.maxChildAgentCalls ?? definition.defaults.delegation?.maxChildAgentCalls ?? DEFAULT_MAX_CHILD_AGENT_CALLS,
936
+ maxParallelChildAgentCalls: policy.maxParallelChildAgentCalls ?? definition.defaults.delegation?.maxParallelChildAgentCalls ?? DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS,
937
+ maxDepth: policy.maxDepth ?? definition.defaults.delegation?.maxDepth ?? DEFAULT_MAX_DELEGATION_DEPTH,
938
+ ...(policy.modelAliases ? { modelAliases: new Set(policy.modelAliases) } : {}),
939
+ agentModelAliases: new Map(Object.entries(policy.agentModelAliases ?? {}).map(([agentId, aliases]) => [agentId, new Set(aliases)]))
940
+ };
941
+ }
942
+ function assertDelegationAllowed(args) {
943
+ const { policy, state, workflowId, agentId, modelAlias } = args;
944
+ if (!policy.enabled) {
945
+ throw new DelegationPolicyError('Workflow child-agent delegation is disabled.', {
946
+ workflow_id: workflowId,
947
+ agent_id: agentId,
948
+ reason: 'delegation_disabled'
949
+ });
950
+ }
951
+ if (policy.allowedAgents && !policy.allowedAgents.has(agentId)) {
952
+ throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent.', {
953
+ workflow_id: workflowId,
954
+ agent_id: agentId,
955
+ reason: 'agent_not_allowed'
956
+ });
957
+ }
958
+ if (CHILD_DELEGATION_DEPTH > policy.maxDepth) {
959
+ throw new DelegationPolicyError('Workflow child-agent delegation depth exceeded.', {
960
+ workflow_id: workflowId,
961
+ agent_id: agentId,
962
+ reason: 'max_delegation_depth_exceeded',
963
+ limit: policy.maxDepth
964
+ });
965
+ }
966
+ if (state.totalChildAgentCalls >= policy.maxChildAgentCalls) {
967
+ throw new DelegationPolicyError('Workflow child-agent call budget exceeded.', {
968
+ workflow_id: workflowId,
969
+ agent_id: agentId,
970
+ reason: 'max_child_agent_calls_exceeded',
971
+ limit: policy.maxChildAgentCalls
972
+ });
973
+ }
974
+ if (state.activeChildAgentCalls >= policy.maxParallelChildAgentCalls) {
975
+ throw new DelegationPolicyError('Workflow parallel child-agent call budget exceeded.', {
976
+ workflow_id: workflowId,
977
+ agent_id: agentId,
978
+ reason: 'max_parallel_child_agent_calls_exceeded',
979
+ limit: policy.maxParallelChildAgentCalls
980
+ });
981
+ }
982
+ const allowedModels = policy.agentModelAliases.get(agentId) ?? policy.modelAliases;
983
+ if (allowedModels && !allowedModels.has(modelAlias)) {
984
+ throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent with the selected model alias.', {
985
+ workflow_id: workflowId,
986
+ agent_id: agentId,
987
+ reason: 'model_alias_not_allowed',
988
+ model_alias: modelAlias
989
+ });
990
+ }
991
+ }
733
992
  /**
734
993
  * Runs a durable finalization side effect (runtime finish / workspace lifecycle)
735
994
  * without ever masking the primary run outcome (spec 21 §16.1 step 7).
@@ -874,7 +1133,7 @@ function isObjectPartialChunk(chunk) {
874
1133
  function isObjectFinishChunk(chunk) {
875
1134
  return Boolean(chunk && typeof chunk === 'object' && chunk.kind === 'finish' && Object.prototype.hasOwnProperty.call(chunk, 'object'));
876
1135
  }
877
- function configureHarnessAdapters(context, models, state, sandbox, memory, tools) {
1136
+ function configureHarnessAdapters(context, models, state, sandbox, memory, tools, runtime, workspaceStore, checkpoints) {
878
1137
  const seen = new Set();
879
1138
  for (const alias of Object.values(models)) {
880
1139
  configureOne(alias.provider, context, seen);
@@ -882,11 +1141,16 @@ function configureHarnessAdapters(context, models, state, sandbox, memory, tools
882
1141
  configureOne(state, context, seen);
883
1142
  configureOne(sandbox, context, seen);
884
1143
  configureOne(memory, context, seen);
1144
+ configureOne(runtime, context, seen);
1145
+ configureOne(workspaceStore, context, seen);
1146
+ configureOne(checkpoints, context, seen);
885
1147
  for (const tool of Object.values(tools)) {
886
1148
  configureOne(tool, context, seen);
887
1149
  }
888
1150
  }
889
1151
  function configureOne(adapter, context, seen) {
1152
+ if (!adapter)
1153
+ return;
890
1154
  const configurable = adapter;
891
1155
  if (!configurable.configureHarnessContext || seen.has(adapter))
892
1156
  return;
@@ -933,26 +1197,6 @@ function resolveContentCaptureMode(options) {
933
1197
  return envValue;
934
1198
  return 'NO_CONTENT';
935
1199
  }
936
- function metadataSpanAttrs(metadata) {
937
- const attrs = {};
938
- for (const [key, value] of Object.entries(metadata ?? {})) {
939
- if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
940
- continue;
941
- if (typeof value === 'string') {
942
- if (value.length <= 256)
943
- attrs[`harness.metadata.${key}`] = value;
944
- continue;
945
- }
946
- if (typeof value === 'number' && Number.isFinite(value)) {
947
- attrs[`harness.metadata.${key}`] = value;
948
- continue;
949
- }
950
- if (typeof value === 'boolean') {
951
- attrs[`harness.metadata.${key}`] = value;
952
- }
953
- }
954
- return attrs;
955
- }
956
1200
  function isValidTraceparent(traceparent) {
957
1201
  const match = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/.exec(traceparent);
958
1202
  if (!match)
@@ -1030,10 +1274,10 @@ function sanitizeEventForPersistence(event) {
1030
1274
  ...(event.error ? { error: event.error } : {})
1031
1275
  };
1032
1276
  case 'agent.started':
1033
- return { agentId: event.agentId };
1277
+ return agentRunEventMeta(event);
1034
1278
  case 'agent.finished':
1035
1279
  return {
1036
- agentId: event.agentId,
1280
+ ...agentRunEventMeta(event),
1037
1281
  ...(event.output !== undefined ? { output: '[redacted]' } : {}),
1038
1282
  ...(event.error ? { error: event.error } : {})
1039
1283
  };
@@ -1091,6 +1335,16 @@ function modelStreamEventMeta(event) {
1091
1335
  ...(event.streamId ? { streamId: event.streamId } : {})
1092
1336
  };
1093
1337
  }
1338
+ function agentRunEventMeta(event) {
1339
+ return {
1340
+ agentId: event.agentId,
1341
+ ...(event.workflowId ? { workflowId: event.workflowId } : {}),
1342
+ ...(event.parentAgentId ? { parentAgentId: event.parentAgentId } : {}),
1343
+ ...(event.delegationCallId ? { delegationCallId: event.delegationCallId } : {}),
1344
+ ...(event.delegationDepth !== undefined ? { delegationDepth: event.delegationDepth } : {}),
1345
+ ...(event.modelAlias ? { modelAlias: event.modelAlias } : {})
1346
+ };
1347
+ }
1094
1348
  function isJsonRecord(value) {
1095
1349
  return value !== null && typeof value === 'object' && !Array.isArray(value);
1096
1350
  }
@@ -1121,6 +1375,8 @@ function createRunSignal(parent, timeoutMs) {
1121
1375
  : undefined;
1122
1376
  return {
1123
1377
  signal: controller.signal,
1378
+ /** Harness-initiated abort, e.g. to cancel in-flight child-agent calls. */
1379
+ abort: (reason) => controller.abort(runAbortReason(reason)),
1124
1380
  cleanup: () => {
1125
1381
  if (timeout)
1126
1382
  clearTimeout(timeout);