@purista/harness 1.2.6 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/README.md +6 -0
  2. package/dist/agents/index.d.ts +7 -1
  3. package/dist/agents/index.js +126 -44
  4. package/dist/errors/catalog.d.ts +18 -2
  5. package/dist/errors/catalog.js +10 -0
  6. package/dist/eval/index.d.ts +3 -3
  7. package/dist/eval/index.js +15 -1
  8. package/dist/harness/defineHarness.d.ts +149 -3
  9. package/dist/harness/defineHarness.js +110 -1
  10. package/dist/index.d.ts +38 -18
  11. package/dist/index.js +30 -16
  12. package/dist/local/index.d.ts +36 -0
  13. package/dist/local/index.js +24 -0
  14. package/dist/local/local-sandbox.d.ts +25 -0
  15. package/dist/local/local-sandbox.js +368 -0
  16. package/dist/local/local-workspace.d.ts +56 -0
  17. package/dist/local/local-workspace.js +496 -0
  18. package/dist/local/ref-hash.d.ts +6 -0
  19. package/dist/local/ref-hash.js +9 -0
  20. package/dist/local/sqlite-storage.d.ts +106 -0
  21. package/dist/local/sqlite-storage.js +680 -0
  22. package/dist/models/adapter-utils.d.ts +52 -0
  23. package/dist/models/adapter-utils.js +81 -0
  24. package/dist/models/registry.js +28 -37
  25. package/dist/models/stream-pump.d.ts +16 -0
  26. package/dist/models/stream-pump.js +77 -0
  27. package/dist/ports/base-model-provider.d.ts +7 -1
  28. package/dist/ports/base-model-provider.js +384 -87
  29. package/dist/ports/capabilities.d.ts +16 -2
  30. package/dist/ports/context-checkpoints.d.ts +63 -0
  31. package/dist/ports/context-checkpoints.js +33 -0
  32. package/dist/ports/index.d.ts +1 -0
  33. package/dist/ports/index.js +1 -0
  34. package/dist/ports/model-provider.d.ts +94 -0
  35. package/dist/runtime/durable.d.ts +11 -0
  36. package/dist/runtime/durable.js +15 -2
  37. package/dist/runtime/sessionDurable.js +47 -21
  38. package/dist/runtime/steps.d.ts +22 -1
  39. package/dist/runtime/steps.js +53 -2
  40. package/dist/sessions/index.d.ts +17 -6
  41. package/dist/sessions/index.js +345 -84
  42. package/dist/skills/index.d.ts +0 -2
  43. package/dist/skills/index.js +0 -8
  44. package/dist/state/in-memory.js +6 -6
  45. package/dist/telemetry/shim.js +2 -6
  46. package/dist/telemetry/span-attrs.d.ts +9 -0
  47. package/dist/telemetry/span-attrs.js +27 -0
  48. package/dist/testing/durableWorkspaceStoreContract.js +69 -0
  49. package/dist/testing/fakeLogger.d.ts +29 -0
  50. package/dist/testing/fakeLogger.js +47 -0
  51. package/dist/testing/fakeSandbox.d.ts +27 -0
  52. package/dist/testing/fakeSandbox.js +153 -0
  53. package/dist/testing/fakeStateStore.d.ts +36 -0
  54. package/dist/testing/fakeStateStore.js +66 -0
  55. package/dist/testing/index.d.ts +10 -4
  56. package/dist/testing/index.js +14 -4
  57. package/dist/testing/loggerContract.d.ts +9 -0
  58. package/dist/testing/loggerContract.js +62 -0
  59. package/dist/testing/modelProviderContract.d.ts +12 -0
  60. package/dist/testing/modelProviderContract.js +222 -0
  61. package/dist/testing/recordEvents.d.ts +3 -0
  62. package/dist/testing/recordEvents.js +8 -0
  63. package/dist/testing/stateStoreContract.js +27 -0
  64. package/dist/tools/index.js +26 -1
  65. package/dist/tools/mcp/http.d.ts +2 -0
  66. package/dist/tools/mcp/http.js +34 -21
  67. package/dist/tools/mcp/runner.d.ts +4 -0
  68. package/dist/tools/mcp/runner.js +75 -21
  69. package/dist/tools/mcp/stdio.d.ts +7 -1
  70. package/dist/tools/mcp/stdio.js +102 -23
  71. package/dist/version.d.ts +1 -1
  72. package/dist/version.js +1 -1
  73. package/dist/workspace/in-memory.d.ts +1 -0
  74. package/dist/workspace/in-memory.js +47 -12
  75. package/package.json +5 -4
@@ -1,28 +1,58 @@
1
- import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, serializeError } from '../errors/index.js';
1
+ import { InternalError, OperationCancelledError, OperationTimeoutError, HarnessError, SessionBusyError, ValidationError, DelegationPolicyError, serializeError } from '../errors/index.js';
2
2
  import { ulid } from '../ulid/index.js';
3
3
  import { runDefaultAgent } from '../agents/index.js';
4
4
  import { runWorkflow } from '../workflows/index.js';
5
5
  import { createMemoryFacade, createSessionMemory } from '../ports/memory.js';
6
6
  import { beginDurableWorkflow, DURABLE_RUN_ID_PATTERN, isExecutableDurableRuntime } from '../runtime/sessionDurable.js';
7
+ import { runStepWithRetry } from '../runtime/steps.js';
7
8
  import { HarnessConfigError } from '../errors/catalog.js';
8
9
  import { loadSkillsSync } from '../skills/index.js';
9
10
  import { createModelRegistry } from '../models/registry.js';
10
11
  import { createMetrics, createTelemetryShim } from '../telemetry/index.js';
12
+ import { metadataSpanAttrs } from '../telemetry/span-attrs.js';
13
+ import { abortError } from '../runtime/abort.js';
11
14
  import { createMcpRunnerRegistry } from '../tools/mcp/runner.js';
12
15
  const NEVER_ABORT_SIGNAL = new AbortController().signal;
16
+ const DEFAULT_MAX_CHILD_AGENT_CALLS = 32;
17
+ const DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS = 8;
18
+ const DEFAULT_MAX_DELEGATION_DEPTH = 1;
19
+ /**
20
+ * Workflows invoke leaf agents directly, so every child-agent call runs at
21
+ * depth 1 (spec 10 "Delegation policy": `maxDepth` default `1`, `0` disables
22
+ * child-agent delegation).
23
+ */
24
+ const CHILD_DELEGATION_DEPTH = 1;
13
25
  function now() {
14
26
  return new Date().toISOString();
15
27
  }
16
28
  const STREAM_MAX_BUFFERED_EVENTS = 1024;
17
- const STREAM_TERMINAL_EVENT_TYPES = new Set(['run.finished', 'agent.finished']);
29
+ /**
30
+ * Event types that must never be dropped from the relay queue.
31
+ *
32
+ * Only `run.finished` qualifies: it occurs at most once per run and is the
33
+ * terminal event consumers key off to know the run is complete. `agent.finished`
34
+ * is emitted once per agent invocation (including every child-agent delegation
35
+ * call), so it can appear many times and must remain droppable to keep the
36
+ * queue bounded when a slow consumer falls behind during a delegation-heavy run.
37
+ */
38
+ const STREAM_UNDROPPABLE_EVENT_TYPES = new Set(['run.finished']);
18
39
  /**
19
40
  * Relay run events from an in-process run to a stream consumer.
20
41
  *
21
- * The unread events live in a bounded queue: consumed events are removed (no
22
- * growing cursor over a shared array), and on overflow the oldest non-terminal
23
- * unread event is dropped and counted, so a slow consumer never silently skips
24
- * an unread event. Delivery is promise-notified rather than time-polled, so
25
- * there is no fixed per-event latency or periodic timer.
42
+ * The unread events live in a bounded queue (cap: STREAM_MAX_BUFFERED_EVENTS):
43
+ * consumed events are removed (no growing cursor over a shared array), and on
44
+ * overflow the oldest droppable unread event is dropped and counted, so a slow
45
+ * consumer never silently skips an event without an accompanying
46
+ * `stream.overflow` notice. Only `run.finished` is undroppable; all other
47
+ * event types — including `agent.finished` — may be evicted under pressure.
48
+ * If no droppable event exists when the queue is full, the incoming event is
49
+ * discarded (counted) rather than growing the queue past the cap. Delivery is
50
+ * promise-notified rather than time-polled, so there is no fixed per-event
51
+ * latency or periodic timer.
52
+ *
53
+ * Abandoning the stream (`break` / `iterator.return()`) only detaches that
54
+ * consumer. It does not abort `relaySignal`; callers must pass `opts.signal`
55
+ * when they intend to cancel the underlying run.
26
56
  */
27
57
  export async function* relayRunEvents(run) {
28
58
  const queue = [];
@@ -31,6 +61,8 @@ export async function* relayRunEvents(run) {
31
61
  let done = false;
32
62
  let failure;
33
63
  let wake;
64
+ const relayController = new AbortController();
65
+ let completedNormally = false;
34
66
  const notify = () => {
35
67
  const resolve = wake;
36
68
  wake = undefined;
@@ -40,16 +72,23 @@ export async function* relayRunEvents(run) {
40
72
  if ('runId' in event)
41
73
  liveRunId = event.runId;
42
74
  if (queue.length >= STREAM_MAX_BUFFERED_EVENTS) {
43
- const dropIndex = queue.findIndex((candidate) => !STREAM_TERMINAL_EVENT_TYPES.has(candidate.type));
75
+ const dropIndex = queue.findIndex((candidate) => !STREAM_UNDROPPABLE_EVENT_TYPES.has(candidate.type));
44
76
  if (dropIndex >= 0) {
45
77
  queue.splice(dropIndex, 1);
46
78
  dropped += 1;
47
79
  }
80
+ else {
81
+ // Every queued event is undroppable; discard the incoming event to keep
82
+ // the queue bounded rather than growing past the cap.
83
+ dropped += 1;
84
+ notify();
85
+ return Promise.resolve();
86
+ }
48
87
  }
49
88
  queue.push(event);
50
89
  notify();
51
90
  return Promise.resolve();
52
- })
91
+ }, relayController.signal)
53
92
  .catch((error) => {
54
93
  failure = error;
55
94
  return undefined;
@@ -73,6 +112,7 @@ export async function* relayRunEvents(run) {
73
112
  }
74
113
  if (queue.length === 0 && dropped === 0) {
75
114
  if (done) {
115
+ completedNormally = true;
76
116
  break;
77
117
  }
78
118
  // No await between the empty check and installing `wake`, so a producer
@@ -84,7 +124,12 @@ export async function* relayRunEvents(run) {
84
124
  }
85
125
  }
86
126
  finally {
87
- await result.catch(() => undefined);
127
+ if (completedNormally) {
128
+ await result.catch(() => undefined);
129
+ }
130
+ else {
131
+ void result.catch(() => undefined);
132
+ }
88
133
  }
89
134
  if (failure)
90
135
  throw failure;
@@ -133,7 +178,7 @@ export function createSessionHarness(definition) {
133
178
  ...(definition.defaults.historyWindow !== undefined ? { historyWindow: definition.defaults.historyWindow } : {})
134
179
  }
135
180
  };
136
- configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools);
181
+ configureHarnessAdapters(adapterContext, definition.models, definition.state, definition.sandbox, definition.memory, definition.tools, definition.runtime, definition.workspaceStore, definition.checkpoints);
137
182
  const modelRegistry = createModelRegistry(definition.models, { telemetry, harnessName: definition.name });
138
183
  const mcpRegistry = createMcpRunnerRegistry();
139
184
  async function ensureSessionRecord(sessionId) {
@@ -253,6 +298,54 @@ export function createSessionHarness(definition) {
253
298
  }
254
299
  return definition.runtime;
255
300
  }
301
+ function createContextCheckpoints(args) {
302
+ const store = definition.checkpoints;
303
+ const requireStore = () => {
304
+ if (!store) {
305
+ throw new ValidationError('No context checkpoint store is configured.', {
306
+ where: 'invoke_options',
307
+ issues: { reason: 'context_checkpoint_store_missing' }
308
+ });
309
+ }
310
+ return store;
311
+ };
312
+ const baseQuery = {
313
+ runId: args.runId,
314
+ sessionId: args.sessionId,
315
+ ...(args.workflowId ? { workflowId: args.workflowId } : {}),
316
+ ...(args.agentId ? { agentId: args.agentId } : {})
317
+ };
318
+ return {
319
+ async write(input) {
320
+ const json = JSON.stringify(input.payload);
321
+ if (json === undefined) {
322
+ throw new ValidationError('Context checkpoint payload must be JSON-serializable.', {
323
+ where: 'invoke_options',
324
+ issues: { reason: 'non_json_context_checkpoint_payload' }
325
+ });
326
+ }
327
+ const checkpoint = {
328
+ ...baseQuery,
329
+ sequence: input.sequence,
330
+ kind: input.kind,
331
+ payload: input.payload,
332
+ payloadSizeBytes: Buffer.byteLength(json, 'utf8'),
333
+ createdAt: now(),
334
+ ...(input.metadata ? { metadata: input.metadata } : {})
335
+ };
336
+ await requireStore().write(checkpoint, { signal: args.signal });
337
+ },
338
+ async list(query = {}) {
339
+ return requireStore().list({ ...baseQuery, ...query, signal: args.signal });
340
+ },
341
+ async read(ref) {
342
+ return requireStore().read({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
343
+ },
344
+ async delete(ref) {
345
+ await requireStore().delete({ runId: args.runId, sessionId: args.sessionId, sequence: ref.sequence, kind: ref.kind });
346
+ }
347
+ };
348
+ }
256
349
  return {
257
350
  inspect() {
258
351
  return definition.inspection;
@@ -326,9 +419,13 @@ export function createSessionHarness(definition) {
326
419
  }
327
420
  },
328
421
  async close() {
422
+ if (state.busy) {
423
+ throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
424
+ }
329
425
  await definition.state.closeSession(sessionId);
330
426
  sessionStates.delete(sessionId);
331
427
  sessionStateOpenings.delete(sessionId);
428
+ await mcpRegistry.closeForSandboxKey(sessionId);
332
429
  await state.sandboxSession.close();
333
430
  }
334
431
  };
@@ -367,7 +464,11 @@ export function createSessionHarness(definition) {
367
464
  $infer: {}
368
465
  };
369
466
  async function* streamAgentCall(sessionId, agentId, agent, input, opts) {
370
- yield* relayRunEvents((onEvent) => runAgentCall(sessionId, agentId, agent, input, opts, onEvent));
467
+ yield* relayRunEvents((onEvent, relaySignal) => {
468
+ const combined = combineSignals(relaySignal, opts?.signal);
469
+ return runAgentCall(sessionId, agentId, agent, input, { ...opts, signal: combined.signal }, onEvent)
470
+ .finally(() => combined.cleanup());
471
+ });
371
472
  }
372
473
  async function runAgentCall(sessionId, agentId, agent, input, opts, onEvent) {
373
474
  validateInvokeOptions(opts);
@@ -377,44 +478,43 @@ export function createSessionHarness(definition) {
377
478
  if (opts?.signal?.aborted) {
378
479
  throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
379
480
  }
380
- const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
481
+ // Busy check precedes createRunSignal so an early SessionBusyError cannot
482
+ // leak the run-timeout timer or the caller-signal abort listener.
381
483
  const state = await getSessionState(sessionId);
382
484
  if (state.busy) {
383
485
  throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
384
486
  }
385
487
  state.busy = true;
488
+ const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
386
489
  const startedAt = now();
387
490
  const runId = ulid();
388
- const memory = memoryFacade({
389
- sessionId,
390
- runId,
391
- agentId,
392
- signal: runSignal.signal,
393
- sandboxSession: state.sandboxSession,
394
- metadata: opts?.metadata ?? {}
395
- });
396
- const runRecord = {
397
- id: runId,
398
- sessionId,
399
- kind: 'agent',
400
- target: agentId,
401
- startedAt,
402
- status: 'running',
403
- input: input
404
- };
405
491
  const emit = async (event) => {
406
492
  const eventAt = 'at' in event ? event.at : now();
407
493
  await onEvent?.(event);
408
494
  await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
409
495
  };
496
+ let runCreated = false;
410
497
  try {
498
+ const memory = memoryFacade({
499
+ sessionId,
500
+ runId,
501
+ agentId,
502
+ signal: runSignal.signal,
503
+ sandboxSession: state.sandboxSession,
504
+ metadata: opts?.metadata ?? {}
505
+ });
506
+ const checkpoints = createContextCheckpoints({ sessionId, runId, agentId, signal: runSignal.signal });
507
+ const runRecord = {
508
+ id: runId,
509
+ sessionId,
510
+ kind: 'agent',
511
+ target: agentId,
512
+ startedAt,
513
+ status: 'running',
514
+ input: input
515
+ };
411
516
  await definition.state.createRun(runRecord);
412
- }
413
- catch (error) {
414
- state.busy = false;
415
- throw error;
416
- }
417
- try {
517
+ runCreated = true;
418
518
  const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.agent_prompt', {
419
519
  'harness.name': definition.name,
420
520
  'harness.session.id': sessionId,
@@ -444,6 +544,7 @@ export function createSessionHarness(definition) {
444
544
  mcpRegistry,
445
545
  session: state.sandboxSession,
446
546
  memory,
547
+ checkpoints,
447
548
  mountedSkills: state.mountedSkills,
448
549
  ...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
449
550
  maxSteps: definition.defaults.agentMaxIterations ?? 16,
@@ -469,6 +570,9 @@ export function createSessionHarness(definition) {
469
570
  }
470
571
  catch (error) {
471
572
  const finalError = normalizeRunError(error, runSignal.signal);
573
+ if (!runCreated) {
574
+ throw finalError;
575
+ }
472
576
  const finishedAt = now();
473
577
  const serialized = serializeError(finalError);
474
578
  const log = finalError instanceof OperationCancelledError ? definition.logger.warn.bind(definition.logger) : definition.logger.error.bind(definition.logger);
@@ -505,7 +609,11 @@ export function createSessionHarness(definition) {
505
609
  }
506
610
  }
507
611
  async function* streamWorkflowCall(sessionId, workflowId, workflow, input, opts) {
508
- yield* relayRunEvents((onEvent) => runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent));
612
+ yield* relayRunEvents((onEvent, relaySignal) => {
613
+ const combined = combineSignals(relaySignal, opts?.signal);
614
+ return runWorkflowCall(sessionId, workflowId, workflow, input, { ...opts, signal: combined.signal }, onEvent)
615
+ .finally(() => combined.cleanup());
616
+ });
509
617
  }
510
618
  async function runWorkflowCall(sessionId, workflowId, workflow, input, opts, onEvent) {
511
619
  validateInvokeOptions(opts);
@@ -513,22 +621,16 @@ export function createSessionHarness(definition) {
513
621
  if (opts?.signal?.aborted) {
514
622
  throw new OperationCancelledError('Run was cancelled before start.', { scope: 'run' });
515
623
  }
516
- const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
624
+ // Busy check precedes createRunSignal so an early SessionBusyError cannot
625
+ // leak the run-timeout timer or the caller-signal abort listener.
517
626
  const state = await getSessionState(sessionId);
518
627
  if (state.busy) {
519
628
  throw new SessionBusyError('Session is busy.', { session_id: sessionId, reason: 'concurrent_run' });
520
629
  }
521
630
  state.busy = true;
631
+ const runSignal = createRunSignal(opts?.signal, opts?.timeoutMs ?? definition.defaults.runTimeoutMs);
522
632
  const startedAt = now();
523
633
  const runId = opts?.durable ? opts.durable.runId : ulid();
524
- const memory = memoryFacade({
525
- sessionId,
526
- runId,
527
- workflowId,
528
- signal: runSignal.signal,
529
- sandboxSession: state.sandboxSession,
530
- metadata: opts?.metadata ?? {}
531
- });
532
634
  const runRecord = {
533
635
  id: runId,
534
636
  sessionId,
@@ -543,14 +645,16 @@ export function createSessionHarness(definition) {
543
645
  await onEvent?.(event);
544
646
  await appendEvents(runId, [{ id: ulid(), runId, at: eventAt, type: event.type, payload: sanitizeEventForPersistence(event) }]);
545
647
  };
546
- try {
547
- await definition.state.createRun(runRecord);
548
- }
549
- catch (error) {
550
- state.busy = false;
551
- throw error;
552
- }
553
648
  let durableBinding;
649
+ let runSandboxSession = state.sandboxSession;
650
+ let runMountedSkills = state.mountedSkills;
651
+ let closeRunSandbox = false;
652
+ let runCreated = false;
653
+ const delegationState = {
654
+ totalChildAgentCalls: 0,
655
+ activeChildAgentCalls: 0,
656
+ inFlightChildCalls: new Set()
657
+ };
554
658
  try {
555
659
  if (durableRuntime && opts?.durable) {
556
660
  durableBinding = await beginDurableWorkflow({
@@ -565,7 +669,23 @@ export function createSessionHarness(definition) {
565
669
  logger: definition.logger,
566
670
  harnessName: definition.name
567
671
  });
672
+ if (definition.workspaceStore) {
673
+ runSandboxSession = await definition.sandbox.open({ sessionId, runId, signal: runSignal.signal });
674
+ runMountedSkills = new Set();
675
+ closeRunSandbox = true;
676
+ }
568
677
  }
678
+ const memory = memoryFacade({
679
+ sessionId,
680
+ runId,
681
+ workflowId,
682
+ signal: runSignal.signal,
683
+ sandboxSession: runSandboxSession,
684
+ metadata: opts?.metadata ?? {}
685
+ });
686
+ const checkpoints = createContextCheckpoints({ sessionId, runId, workflowId, signal: runSignal.signal });
687
+ await definition.state.createRun(runRecord);
688
+ runCreated = true;
569
689
  const result = await withIncomingTraceContext(telemetry, opts, definition.logger, async () => telemetry.span('harness.session.prompt', {
570
690
  'harness.name': definition.name,
571
691
  'harness.session.id': sessionId,
@@ -582,11 +702,13 @@ export function createSessionHarness(definition) {
582
702
  'harness.run.id': runId,
583
703
  'harness.workflow.id': workflowId
584
704
  });
705
+ const delegationPolicy = resolveDelegationPolicy(workflow);
585
706
  const workflowArgs = {
586
707
  workflowId,
587
708
  workflow,
588
709
  input,
589
710
  ctx: {
711
+ log: definition.logger,
590
712
  signal: runSignal.signal,
591
713
  runId,
592
714
  sessionId,
@@ -599,12 +721,47 @@ export function createSessionHarness(definition) {
599
721
  metadata: opts?.metadata ?? {},
600
722
  metrics: workflowMetrics,
601
723
  memory,
724
+ checkpoints,
602
725
  step: durableBinding ? durableBinding.step : passthroughStep,
603
726
  agents: Object.fromEntries(Object.entries(definition.agents).map(([agentId, agent]) => [
604
727
  agentId,
605
728
  async (agentInput, agentOpts) => {
606
- const agentSignal = combineSignals(runSignal.signal, agentOpts?.signal);
607
- try {
729
+ // Spec 10 "Cancellation": starting a child-agent call after
730
+ // abort throws OperationCancelledError synchronously, before
731
+ // policy checks run or budgets are consumed.
732
+ if (runSignal.signal.aborted) {
733
+ throw abortError(runSignal.signal, 'run', 'Run was cancelled.');
734
+ }
735
+ if (agentOpts?.signal?.aborted) {
736
+ throw new OperationCancelledError('Child-agent call was cancelled before start.', { scope: 'run' }, agentOpts.signal.reason);
737
+ }
738
+ validateInvokeOptions(agentOpts);
739
+ if (agentOpts?.durable) {
740
+ throw new ValidationError('Durable execution is only supported for workflow runs.', { where: 'invoke_options', issues: { durable: 'agent_run' } });
741
+ }
742
+ // An unknown per-call model alias is an invoke-option mistake;
743
+ // it must not pass the delegation gate or consume call budget.
744
+ if (agentOpts?.model !== undefined && !(agentOpts.model in definition.models)) {
745
+ throw new ValidationError('Unknown model alias for child-agent call.', { where: 'invoke_options', issues: { model: agentOpts.model } });
746
+ }
747
+ const selectedModelAlias = agentOpts?.model ?? agent.model;
748
+ assertDelegationAllowed({
749
+ policy: delegationPolicy,
750
+ state: delegationState,
751
+ workflowId,
752
+ agentId,
753
+ modelAlias: selectedModelAlias
754
+ });
755
+ // Compose signals before consuming budget so a composition
756
+ // failure can never leak an active delegation slot.
757
+ const combinedSignal = combineSignals(runSignal.signal, agentOpts?.signal);
758
+ const agentSignal = agentOpts?.timeoutMs !== undefined
759
+ ? createRunSignal(combinedSignal.signal, agentOpts.timeoutMs)
760
+ : combinedSignal;
761
+ delegationState.totalChildAgentCalls += 1;
762
+ delegationState.activeChildAgentCalls += 1;
763
+ const delegationCallId = `delegate_${ulid()}`;
764
+ const childCall = (async () => {
608
765
  const resolvedHistoryWindow = agentOpts?.historyWindow ?? opts?.historyWindow ?? definition.defaults.historyWindow;
609
766
  const agentMetadata = { ...(opts?.metadata ?? {}), ...(agentOpts?.metadata ?? {}) };
610
767
  const agentMemory = memoryFacade({
@@ -613,31 +770,37 @@ export function createSessionHarness(definition) {
613
770
  workflowId,
614
771
  agentId,
615
772
  signal: agentSignal.signal,
616
- sandboxSession: state.sandboxSession,
773
+ sandboxSession: runSandboxSession,
617
774
  metadata: agentMetadata
618
775
  });
776
+ const agentCheckpoints = createContextCheckpoints({ sessionId, runId, workflowId, agentId, signal: agentSignal.signal });
619
777
  const run = await runDefaultAgent({
620
778
  harnessName: definition.name,
621
779
  agentId,
622
780
  runId,
623
781
  sessionId,
624
782
  workflowId,
783
+ delegationCallId,
784
+ delegationDepth: CHILD_DELEGATION_DEPTH,
625
785
  input: agentInput,
626
786
  history: await definition.state.listMessages(sessionId),
627
787
  agent: agent,
788
+ modelAlias: selectedModelAlias,
628
789
  models: withRunEventModelRegistry(modelRegistry, {
629
790
  harnessName: definition.name,
630
791
  sessionId,
631
792
  runId,
632
793
  workflowId,
633
- agentId
794
+ agentId,
795
+ modelAlias: selectedModelAlias
634
796
  }, emit),
635
797
  skills: resolvedSkills,
636
798
  customTools: definition.tools,
637
799
  mcpRegistry,
638
- session: state.sandboxSession,
800
+ session: runSandboxSession,
639
801
  memory: agentMemory,
640
- mountedSkills: state.mountedSkills,
802
+ checkpoints: agentCheckpoints,
803
+ mountedSkills: runMountedSkills,
641
804
  ...(resolvedHistoryWindow !== undefined ? { historyWindow: resolvedHistoryWindow } : {}),
642
805
  maxSteps: definition.defaults.agentMaxIterations ?? 16,
643
806
  signal: agentSignal.signal,
@@ -652,9 +815,17 @@ export function createSessionHarness(definition) {
652
815
  await definition.state.appendMessages(sessionId, run.emitted);
653
816
  }
654
817
  return run.output;
818
+ })();
819
+ delegationState.inFlightChildCalls.add(childCall);
820
+ try {
821
+ return await childCall;
655
822
  }
656
823
  finally {
824
+ delegationState.inFlightChildCalls.delete(childCall);
825
+ delegationState.activeChildAgentCalls -= 1;
657
826
  agentSignal.cleanup();
827
+ if (agentSignal !== combinedSignal)
828
+ combinedSignal.cleanup();
658
829
  }
659
830
  }
660
831
  ]))
@@ -671,6 +842,11 @@ export function createSessionHarness(definition) {
671
842
  ...(opts ? { opts: { ...opts, signal: runSignal.signal } } : { opts: { signal: runSignal.signal } })
672
843
  }));
673
844
  }));
845
+ // A resolved handler may still have child-agent calls in flight; settle
846
+ // them before terminalizing so no run events trail run.finished.
847
+ if (delegationState.inFlightChildCalls.size > 0) {
848
+ await Promise.allSettled([...delegationState.inFlightChildCalls]);
849
+ }
674
850
  const finishedAt = now();
675
851
  if (durableBinding) {
676
852
  await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_success' }, () => durableBinding.finishSuccess(result));
@@ -684,8 +860,18 @@ export function createSessionHarness(definition) {
684
860
  }
685
861
  catch (error) {
686
862
  const finalError = normalizeRunError(error, runSignal.signal);
863
+ // A handler rejection mid-Promise.all must not orphan in-flight child
864
+ // agents: cancel them through the run signal and await settlement before
865
+ // run.finished is emitted and the session busy lock is released.
866
+ if (delegationState.inFlightChildCalls.size > 0) {
867
+ runSignal.abort(finalError);
868
+ await Promise.allSettled([...delegationState.inFlightChildCalls]);
869
+ }
687
870
  const finishedAt = now();
688
871
  const serialized = serializeError(finalError);
872
+ if (!runCreated) {
873
+ throw finalError;
874
+ }
689
875
  if (durableBinding && finalError instanceof OperationCancelledError) {
690
876
  await guardDurableStep({ sessionId, runId, workflowId, operation: 'finish_cancelled' }, () => durableBinding.finishCancelled(finalError));
691
877
  }
@@ -722,13 +908,91 @@ export function createSessionHarness(definition) {
722
908
  // id can resume; a no-op once the run was settled (success/cancel).
723
909
  if (durableBinding)
724
910
  await durableBinding.dispose();
911
+ if (closeRunSandbox) {
912
+ try {
913
+ await runSandboxSession.close();
914
+ }
915
+ catch (error) {
916
+ definition.logger.warn('Failed to close durable run sandbox.', {
917
+ harness: definition.name,
918
+ session_id: sessionId,
919
+ run_id: runId,
920
+ workflow_id: workflowId,
921
+ error: serializeError(error)
922
+ });
923
+ }
924
+ }
725
925
  runSignal.cleanup();
726
926
  state.busy = false;
727
927
  }
728
928
  }
729
929
  /** Pass-through step used when a workflow runs without durable execution. */
730
- function passthroughStep(_stepId, fn) {
731
- return fn();
930
+ function passthroughStep(_stepId, fn, options = {}) {
931
+ return runStepWithRetry(fn, options.retry);
932
+ }
933
+ function resolveDelegationPolicy(workflow) {
934
+ const configured = workflow.delegation;
935
+ const policy = configured ?? {};
936
+ const enabled = configured ? policy.enabled !== false : definition.defaults.delegation?.enabled === true;
937
+ return {
938
+ enabled,
939
+ ...(policy.agents ? { allowedAgents: new Set(policy.agents) } : {}),
940
+ maxChildAgentCalls: policy.maxChildAgentCalls ?? definition.defaults.delegation?.maxChildAgentCalls ?? DEFAULT_MAX_CHILD_AGENT_CALLS,
941
+ maxParallelChildAgentCalls: policy.maxParallelChildAgentCalls ?? definition.defaults.delegation?.maxParallelChildAgentCalls ?? DEFAULT_MAX_PARALLEL_CHILD_AGENT_CALLS,
942
+ maxDepth: policy.maxDepth ?? definition.defaults.delegation?.maxDepth ?? DEFAULT_MAX_DELEGATION_DEPTH,
943
+ ...(policy.modelAliases ? { modelAliases: new Set(policy.modelAliases) } : {}),
944
+ agentModelAliases: new Map(Object.entries(policy.agentModelAliases ?? {}).map(([agentId, aliases]) => [agentId, new Set(aliases)]))
945
+ };
946
+ }
947
+ function assertDelegationAllowed(args) {
948
+ const { policy, state, workflowId, agentId, modelAlias } = args;
949
+ if (!policy.enabled) {
950
+ throw new DelegationPolicyError('Workflow child-agent delegation is disabled.', {
951
+ workflow_id: workflowId,
952
+ agent_id: agentId,
953
+ reason: 'delegation_disabled'
954
+ });
955
+ }
956
+ if (policy.allowedAgents && !policy.allowedAgents.has(agentId)) {
957
+ throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent.', {
958
+ workflow_id: workflowId,
959
+ agent_id: agentId,
960
+ reason: 'agent_not_allowed'
961
+ });
962
+ }
963
+ if (CHILD_DELEGATION_DEPTH > policy.maxDepth) {
964
+ throw new DelegationPolicyError('Workflow child-agent delegation depth exceeded.', {
965
+ workflow_id: workflowId,
966
+ agent_id: agentId,
967
+ reason: 'max_delegation_depth_exceeded',
968
+ limit: policy.maxDepth
969
+ });
970
+ }
971
+ if (state.totalChildAgentCalls >= policy.maxChildAgentCalls) {
972
+ throw new DelegationPolicyError('Workflow child-agent call budget exceeded.', {
973
+ workflow_id: workflowId,
974
+ agent_id: agentId,
975
+ reason: 'max_child_agent_calls_exceeded',
976
+ limit: policy.maxChildAgentCalls
977
+ });
978
+ }
979
+ if (state.activeChildAgentCalls >= policy.maxParallelChildAgentCalls) {
980
+ throw new DelegationPolicyError('Workflow parallel child-agent call budget exceeded.', {
981
+ workflow_id: workflowId,
982
+ agent_id: agentId,
983
+ reason: 'max_parallel_child_agent_calls_exceeded',
984
+ limit: policy.maxParallelChildAgentCalls
985
+ });
986
+ }
987
+ const allowedModels = policy.agentModelAliases.get(agentId) ?? policy.modelAliases;
988
+ if (allowedModels && !allowedModels.has(modelAlias)) {
989
+ throw new DelegationPolicyError('Workflow is not allowed to invoke this child agent with the selected model alias.', {
990
+ workflow_id: workflowId,
991
+ agent_id: agentId,
992
+ reason: 'model_alias_not_allowed',
993
+ model_alias: modelAlias
994
+ });
995
+ }
732
996
  }
733
997
  /**
734
998
  * Runs a durable finalization side effect (runtime finish / workspace lifecycle)
@@ -874,7 +1138,7 @@ function isObjectPartialChunk(chunk) {
874
1138
  function isObjectFinishChunk(chunk) {
875
1139
  return Boolean(chunk && typeof chunk === 'object' && chunk.kind === 'finish' && Object.prototype.hasOwnProperty.call(chunk, 'object'));
876
1140
  }
877
- function configureHarnessAdapters(context, models, state, sandbox, memory, tools) {
1141
+ function configureHarnessAdapters(context, models, state, sandbox, memory, tools, runtime, workspaceStore, checkpoints) {
878
1142
  const seen = new Set();
879
1143
  for (const alias of Object.values(models)) {
880
1144
  configureOne(alias.provider, context, seen);
@@ -882,11 +1146,16 @@ function configureHarnessAdapters(context, models, state, sandbox, memory, tools
882
1146
  configureOne(state, context, seen);
883
1147
  configureOne(sandbox, context, seen);
884
1148
  configureOne(memory, context, seen);
1149
+ configureOne(runtime, context, seen);
1150
+ configureOne(workspaceStore, context, seen);
1151
+ configureOne(checkpoints, context, seen);
885
1152
  for (const tool of Object.values(tools)) {
886
1153
  configureOne(tool, context, seen);
887
1154
  }
888
1155
  }
889
1156
  function configureOne(adapter, context, seen) {
1157
+ if (!adapter)
1158
+ return;
890
1159
  const configurable = adapter;
891
1160
  if (!configurable.configureHarnessContext || seen.has(adapter))
892
1161
  return;
@@ -933,26 +1202,6 @@ function resolveContentCaptureMode(options) {
933
1202
  return envValue;
934
1203
  return 'NO_CONTENT';
935
1204
  }
936
- function metadataSpanAttrs(metadata) {
937
- const attrs = {};
938
- for (const [key, value] of Object.entries(metadata ?? {})) {
939
- if (!/^[a-zA-Z][a-zA-Z0-9_.-]{0,63}$/.test(key))
940
- continue;
941
- if (typeof value === 'string') {
942
- if (value.length <= 256)
943
- attrs[`harness.metadata.${key}`] = value;
944
- continue;
945
- }
946
- if (typeof value === 'number' && Number.isFinite(value)) {
947
- attrs[`harness.metadata.${key}`] = value;
948
- continue;
949
- }
950
- if (typeof value === 'boolean') {
951
- attrs[`harness.metadata.${key}`] = value;
952
- }
953
- }
954
- return attrs;
955
- }
956
1205
  function isValidTraceparent(traceparent) {
957
1206
  const match = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/.exec(traceparent);
958
1207
  if (!match)
@@ -1030,10 +1279,10 @@ function sanitizeEventForPersistence(event) {
1030
1279
  ...(event.error ? { error: event.error } : {})
1031
1280
  };
1032
1281
  case 'agent.started':
1033
- return { agentId: event.agentId };
1282
+ return agentRunEventMeta(event);
1034
1283
  case 'agent.finished':
1035
1284
  return {
1036
- agentId: event.agentId,
1285
+ ...agentRunEventMeta(event),
1037
1286
  ...(event.output !== undefined ? { output: '[redacted]' } : {}),
1038
1287
  ...(event.error ? { error: event.error } : {})
1039
1288
  };
@@ -1091,6 +1340,16 @@ function modelStreamEventMeta(event) {
1091
1340
  ...(event.streamId ? { streamId: event.streamId } : {})
1092
1341
  };
1093
1342
  }
1343
+ function agentRunEventMeta(event) {
1344
+ return {
1345
+ agentId: event.agentId,
1346
+ ...(event.workflowId ? { workflowId: event.workflowId } : {}),
1347
+ ...(event.parentAgentId ? { parentAgentId: event.parentAgentId } : {}),
1348
+ ...(event.delegationCallId ? { delegationCallId: event.delegationCallId } : {}),
1349
+ ...(event.delegationDepth !== undefined ? { delegationDepth: event.delegationDepth } : {}),
1350
+ ...(event.modelAlias ? { modelAlias: event.modelAlias } : {})
1351
+ };
1352
+ }
1094
1353
  function isJsonRecord(value) {
1095
1354
  return value !== null && typeof value === 'object' && !Array.isArray(value);
1096
1355
  }
@@ -1121,6 +1380,8 @@ function createRunSignal(parent, timeoutMs) {
1121
1380
  : undefined;
1122
1381
  return {
1123
1382
  signal: controller.signal,
1383
+ /** Harness-initiated abort, e.g. to cancel in-flight child-agent calls. */
1384
+ abort: (reason) => controller.abort(runAbortReason(reason)),
1124
1385
  cleanup: () => {
1125
1386
  if (timeout)
1126
1387
  clearTimeout(timeout);