@dogpile/sdk 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/CHANGELOG.md +145 -0
  2. package/README.md +1 -0
  3. package/dist/browser/index.js +2270 -507
  4. package/dist/browser/index.js.map +1 -1
  5. package/dist/index.d.ts +5 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +2 -0
  8. package/dist/index.js.map +1 -1
  9. package/dist/providers/openai-compatible.d.ts +11 -0
  10. package/dist/providers/openai-compatible.d.ts.map +1 -1
  11. package/dist/providers/openai-compatible.js +87 -2
  12. package/dist/providers/openai-compatible.js.map +1 -1
  13. package/dist/runtime/broadcast.d.ts.map +1 -1
  14. package/dist/runtime/broadcast.js +1 -13
  15. package/dist/runtime/broadcast.js.map +1 -1
  16. package/dist/runtime/cancellation.d.ts +26 -0
  17. package/dist/runtime/cancellation.d.ts.map +1 -1
  18. package/dist/runtime/cancellation.js +38 -1
  19. package/dist/runtime/cancellation.js.map +1 -1
  20. package/dist/runtime/coordinator.d.ts +74 -1
  21. package/dist/runtime/coordinator.d.ts.map +1 -1
  22. package/dist/runtime/coordinator.js +929 -34
  23. package/dist/runtime/coordinator.js.map +1 -1
  24. package/dist/runtime/decisions.d.ts +25 -3
  25. package/dist/runtime/decisions.d.ts.map +1 -1
  26. package/dist/runtime/decisions.js +241 -3
  27. package/dist/runtime/decisions.js.map +1 -1
  28. package/dist/runtime/defaults.d.ts +37 -1
  29. package/dist/runtime/defaults.d.ts.map +1 -1
  30. package/dist/runtime/defaults.js +347 -0
  31. package/dist/runtime/defaults.js.map +1 -1
  32. package/dist/runtime/engine.d.ts.map +1 -1
  33. package/dist/runtime/engine.js +254 -24
  34. package/dist/runtime/engine.js.map +1 -1
  35. package/dist/runtime/ids.d.ts +19 -0
  36. package/dist/runtime/ids.d.ts.map +1 -0
  37. package/dist/runtime/ids.js +36 -0
  38. package/dist/runtime/ids.js.map +1 -0
  39. package/dist/runtime/logger.d.ts +61 -0
  40. package/dist/runtime/logger.d.ts.map +1 -0
  41. package/dist/runtime/logger.js +114 -0
  42. package/dist/runtime/logger.js.map +1 -0
  43. package/dist/runtime/retry.d.ts +99 -0
  44. package/dist/runtime/retry.d.ts.map +1 -0
  45. package/dist/runtime/retry.js +181 -0
  46. package/dist/runtime/retry.js.map +1 -0
  47. package/dist/runtime/sequential.d.ts.map +1 -1
  48. package/dist/runtime/sequential.js +9 -11
  49. package/dist/runtime/sequential.js.map +1 -1
  50. package/dist/runtime/shared.d.ts.map +1 -1
  51. package/dist/runtime/shared.js +1 -13
  52. package/dist/runtime/shared.js.map +1 -1
  53. package/dist/runtime/tools/built-in.d.ts +99 -0
  54. package/dist/runtime/tools/built-in.d.ts.map +1 -0
  55. package/dist/runtime/tools/built-in.js +577 -0
  56. package/dist/runtime/tools/built-in.js.map +1 -0
  57. package/dist/runtime/tools/vercel-ai.d.ts +67 -0
  58. package/dist/runtime/tools/vercel-ai.d.ts.map +1 -0
  59. package/dist/runtime/tools/vercel-ai.js +148 -0
  60. package/dist/runtime/tools/vercel-ai.js.map +1 -0
  61. package/dist/runtime/tools.d.ts +5 -268
  62. package/dist/runtime/tools.d.ts.map +1 -1
  63. package/dist/runtime/tools.js +7 -770
  64. package/dist/runtime/tools.js.map +1 -1
  65. package/dist/runtime/validation.d.ts +10 -0
  66. package/dist/runtime/validation.d.ts.map +1 -1
  67. package/dist/runtime/validation.js +73 -0
  68. package/dist/runtime/validation.js.map +1 -1
  69. package/dist/types/benchmark.d.ts +276 -0
  70. package/dist/types/benchmark.d.ts.map +1 -0
  71. package/dist/types/benchmark.js +2 -0
  72. package/dist/types/benchmark.js.map +1 -0
  73. package/dist/types/events.d.ts +816 -0
  74. package/dist/types/events.d.ts.map +1 -0
  75. package/dist/types/events.js +2 -0
  76. package/dist/types/events.js.map +1 -0
  77. package/dist/types/replay.d.ts +173 -0
  78. package/dist/types/replay.d.ts.map +1 -0
  79. package/dist/types/replay.js +2 -0
  80. package/dist/types/replay.js.map +1 -0
  81. package/dist/types.d.ts +135 -938
  82. package/dist/types.d.ts.map +1 -1
  83. package/dist/types.js.map +1 -1
  84. package/package.json +27 -1
  85. package/src/index.ts +14 -0
  86. package/src/providers/openai-compatible.ts +82 -3
  87. package/src/runtime/broadcast.ts +1 -16
  88. package/src/runtime/cancellation.ts +59 -1
  89. package/src/runtime/coordinator.ts +1164 -34
  90. package/src/runtime/decisions.ts +307 -4
  91. package/src/runtime/defaults.ts +376 -0
  92. package/src/runtime/engine.ts +363 -24
  93. package/src/runtime/ids.ts +41 -0
  94. package/src/runtime/logger.ts +152 -0
  95. package/src/runtime/retry.ts +270 -0
  96. package/src/runtime/sequential.ts +10 -13
  97. package/src/runtime/shared.ts +1 -16
  98. package/src/runtime/tools/built-in.ts +875 -0
  99. package/src/runtime/tools/vercel-ai.ts +269 -0
  100. package/src/runtime/tools.ts +60 -1255
  101. package/src/runtime/validation.ts +81 -0
  102. package/src/types/benchmark.ts +300 -0
  103. package/src/types/events.ts +895 -0
  104. package/src/types/replay.ts +212 -0
  105. package/src/types.ts +251 -997
@@ -1,24 +1,34 @@
1
+ import { DogpileError } from "../types.js";
1
2
  import type {
2
3
  AgentSpec,
3
4
  ConfiguredModelProvider,
4
5
  CoordinatorProtocolConfig,
5
6
  CostSummary,
7
+ DelegateAgentDecision,
6
8
  DogpileOptions,
7
9
  JsonObject,
8
10
  JsonValue,
9
11
  ModelRequest,
10
12
  ModelResponse,
13
+ ProtocolSelection,
11
14
  ReplayTraceProtocolDecision,
12
15
  ReplayTraceProviderCall,
13
16
  RuntimeTool,
14
17
  RuntimeToolExecutor,
15
18
  RunEvent,
16
19
  RunResult,
20
+ SubRunBudgetClampedEvent,
21
+ SubRunConcurrencyClampedEvent,
22
+ SubRunFailedEvent,
23
+ SubRunQueuedEvent,
24
+ SubRunParentAbortedEvent,
17
25
  TerminationCondition,
18
26
  TerminationStopRecord,
19
27
  Tier,
28
+ Trace,
20
29
  TranscriptEntry
21
30
  } from "../types.js";
31
+ import { createRunId, elapsedMs, nowMs, providerCallIdFor } from "./ids.js";
22
32
  import {
23
33
  addCost,
24
34
  createReplayTraceBudget,
@@ -33,15 +43,62 @@ import {
33
43
  createRunUsage,
34
44
  createTranscriptLink,
35
45
  emptyCost,
46
+ lastCostBearingEventCost,
36
47
  nextProviderCallId
37
48
  } from "./defaults.js";
38
- import { throwIfAborted } from "./cancellation.js";
39
- import { parseAgentDecision } from "./decisions.js";
49
+ import {
50
+ classifyAbortReason,
51
+ classifyChildTimeoutSource,
52
+ createAbortErrorFromSignal,
53
+ createEngineDeadlineTimeoutError,
54
+ throwIfAborted
55
+ } from "./cancellation.js";
56
+ import { assertDepthWithinLimit, parseAgentDecision } from "./decisions.js";
40
57
  import { generateModelTurn } from "./model.js";
41
58
  import { evaluateTerminationStop, warnOnProtocolTerminationMisconfiguration } from "./termination.js";
42
59
  import { createRuntimeToolExecutor, executeModelResponseToolRequests, runtimeToolAvailability } from "./tools.js";
43
60
  import { createWrapUpHintController } from "./wrap-up.js";
44
61
 
62
+ /**
63
+ * Callback to invoke a child run via the engine's `runProtocol` switch. Passed
64
+ * in by `engine.ts` so coordinator avoids a circular import.
65
+ */
66
+ export type RunProtocolFn = (input: {
67
+ readonly intent: string;
68
+ readonly protocol: ProtocolSelection;
69
+ readonly tier: Tier;
70
+ readonly model: ConfiguredModelProvider;
71
+ readonly agents: readonly AgentSpec[];
72
+ readonly tools: readonly RuntimeTool<JsonObject, JsonValue>[];
73
+ readonly temperature: number;
74
+ readonly budget?: DogpileOptions["budget"];
75
+ readonly seed?: string | number;
76
+ readonly signal?: AbortSignal;
77
+ readonly terminate?: TerminationCondition;
78
+ readonly wrapUpHint?: DogpileOptions["wrapUpHint"];
79
+ readonly emit?: (event: RunEvent) => void;
80
+ readonly streamEvents?: boolean;
81
+ readonly currentDepth?: number;
82
+ readonly effectiveMaxDepth?: number;
83
+ readonly effectiveMaxConcurrentChildren?: number;
84
+ readonly onChildFailure?: DogpileOptions["onChildFailure"];
85
+ /**
86
+ * Root-run deadline (epoch ms). Children inherit `parentDeadlineMs - now()`
87
+ * as their default timeout window so a depth-N child sees the ROOT's deadline,
88
+ * not its immediate parent's freshly-computed value (BUDGET-02 / D-12).
89
+ */
90
+ readonly parentDeadlineMs?: number;
91
+ /**
92
+ * Engine-level fallback sub-run timeout (BUDGET-02 / D-14). Applied only when
93
+ * neither the parent nor the decision specifies a `budget.timeoutMs`.
94
+ */
95
+ readonly defaultSubRunTimeoutMs?: number;
96
+ readonly registerAbortDrain?: (drain: AbortDrainFn) => void;
97
+ readonly failureInstancesByChildRunId?: Map<string, DogpileError>;
98
+ }) => Promise<RunResult>;
99
+
100
+ export type AbortDrainFn = (reason?: unknown) => void;
101
+
45
102
  interface CoordinatorRunOptions {
46
103
  readonly intent: string;
47
104
  readonly protocol: CoordinatorProtocolConfig;
@@ -56,6 +113,121 @@ interface CoordinatorRunOptions {
56
113
  readonly terminate?: TerminationCondition;
57
114
  readonly wrapUpHint?: DogpileOptions["wrapUpHint"];
58
115
  readonly emit?: (event: RunEvent) => void;
116
+ readonly streamEvents?: boolean;
117
+ /**
118
+ * Recursion depth of this coordinator run. Top-level callers pass 0; child
119
+ * sub-runs receive parent depth + 1 from the dispatch loop.
120
+ */
121
+ readonly currentDepth?: number;
122
+ /**
123
+ * Effective max recursion depth resolved at run start. Plan 04 enforces;
124
+ * Plan 03 only plumbs the value.
125
+ */
126
+ readonly effectiveMaxDepth?: number;
127
+ readonly effectiveMaxConcurrentChildren?: number;
128
+ readonly onChildFailure?: DogpileOptions["onChildFailure"];
129
+ /**
130
+ * Engine `runProtocol` callback used by the delegate dispatch loop to
131
+ * recursively run a child protocol. Optional so unit tests that exercise
132
+ * the coordinator without the engine wrapper still typecheck — when omitted,
133
+ * delegate dispatch falls back to throwing `invalid-configuration`.
134
+ */
135
+ readonly runProtocol?: RunProtocolFn;
136
+ /**
137
+ * Root-run deadline (epoch ms) threaded through every recursive coordinator
138
+ * dispatch (BUDGET-02 / D-12). When set, sub-run dispatches compute their
139
+ * `remainingMs = parentDeadlineMs - Date.now()` against this deadline rather
140
+ * than the parent's full `budget.timeoutMs` window.
141
+ */
142
+ readonly parentDeadlineMs?: number;
143
+ /**
144
+ * Engine-level fallback sub-run timeout (BUDGET-02 / D-14). Applied only when
145
+ * neither the parent nor the decision specifies a `budget.timeoutMs`.
146
+ */
147
+ readonly defaultSubRunTimeoutMs?: number;
148
+ readonly registerAbortDrain?: (drain: AbortDrainFn) => void;
149
+ readonly failureInstancesByChildRunId?: Map<string, DogpileError>;
150
+ }
151
+
152
+ /**
153
+ * Hard-coded loop guard for the delegate dispatch in the coordinator plan
154
+ * turn. After this many consecutive delegate decisions the coordinator throws
155
+ * `invalid-configuration` (T-03-01). Not a public option.
156
+ */
157
+ const MAX_DISPATCH_PER_TURN = 8;
158
+ const DEFAULT_MAX_CONCURRENT_CHILDREN = 4;
159
+
160
+ type DispatchWaveFailure = {
161
+ readonly childRunId: string;
162
+ readonly intent: string;
163
+ readonly error: {
164
+ readonly code: string;
165
+ readonly message: string;
166
+ readonly detail?: { readonly reason?: string };
167
+ };
168
+ readonly partialCost: { readonly usd: number };
169
+ };
170
+
171
+ interface Semaphore {
172
+ acquire(): Promise<void>;
173
+ release(): void;
174
+ readonly inFlight: number;
175
+ readonly queued: number;
176
+ }
177
+
178
+ function createSemaphore(maxConcurrent: number): Semaphore {
179
+ let inFlight = 0;
180
+ const waiters: Array<() => void> = [];
181
+ return {
182
+ acquire(): Promise<void> {
183
+ if (inFlight < maxConcurrent) {
184
+ inFlight += 1;
185
+ return Promise.resolve();
186
+ }
187
+ return new Promise<void>((resolve) => {
188
+ waiters.push(() => {
189
+ inFlight += 1;
190
+ resolve();
191
+ });
192
+ });
193
+ },
194
+ release(): void {
195
+ inFlight -= 1;
196
+ const next = waiters.shift();
197
+ if (next !== undefined) {
198
+ next();
199
+ }
200
+ },
201
+ get inFlight() {
202
+ return inFlight;
203
+ },
204
+ get queued() {
205
+ return waiters.length;
206
+ }
207
+ };
208
+ }
209
+
210
+ /**
211
+ * Walk the coordinator's active provider set and return the FIRST provider
212
+ * whose metadata.locality === "local", or undefined if none found.
213
+ *
214
+ * Walk order (forward-compat): options.model first, then options.agents in
215
+ * declaration order. AgentSpec has no `model` field today (Phase 3 D-11
216
+ * forward-compat scaffolding); the agent walk uses optional chaining and
217
+ * effectively no-ops until a future phase adds AgentSpec.model.
218
+ */
219
+ function findFirstLocalProvider(options: CoordinatorRunOptions): ConfiguredModelProvider | undefined {
220
+ if (options.model.metadata?.locality === "local") {
221
+ return options.model;
222
+ }
223
+ // Forward-compat: AgentSpec.model not yet declared (Phase 3 D-11). Walk no-ops today; ready for caller-defined trees in a future milestone.
224
+ for (const agent of options.agents) {
225
+ const agentModel = (agent as { readonly model?: ConfiguredModelProvider }).model;
226
+ if (agentModel?.metadata?.locality === "local") {
227
+ return agentModel;
228
+ }
229
+ }
230
+ return undefined;
59
231
  }
60
232
 
61
233
  export async function runCoordinator(options: CoordinatorRunOptions): Promise<RunResult> {
@@ -64,13 +236,16 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
64
236
  const transcript: TranscriptEntry[] = [];
65
237
  const protocolDecisions: ReplayTraceProtocolDecision[] = [];
66
238
  const providerCalls: ReplayTraceProviderCall[] = [];
239
+ const dispatchedChildren = new Map<string, DispatchedChild>();
67
240
  let totalCost = emptyCost();
241
+ let concurrencyClampEmitted = false; // D-12: emit once per run, never per-engine.
68
242
  const maxTurns = options.protocol.maxTurns ?? options.agents.length;
69
243
  const activeAgents = options.agents.slice(0, maxTurns);
70
244
  const coordinator = activeAgents[0];
71
245
  const startedAtMs = nowMs();
72
246
  let stopped = false;
73
247
  let termination: TerminationStopRecord | undefined;
248
+ let triggeringFailureForAbortMode: DispatchWaveFailure | undefined;
74
249
  const wrapUpHint = createWrapUpHintController({
75
250
  protocol: options.protocol,
76
251
  tier: options.tier,
@@ -95,6 +270,63 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
95
270
  );
96
271
  };
97
272
 
273
+ const drainOnParentAbort = (reasonSource?: unknown): void => {
274
+ const reason = classifyAbortReason(reasonSource);
275
+ for (const child of dispatchedChildren.values()) {
276
+ if (child.closed) {
277
+ continue;
278
+ }
279
+ const partialCost = child.started
280
+ ? lastCostBearingEventCost(child.childEvents) ?? emptyCost()
281
+ : emptyCost();
282
+ const partialTrace = buildPartialTrace({
283
+ childRunId: child.childRunId,
284
+ events: [...child.childEvents],
285
+ startedAtMs: child.startedAtMs,
286
+ protocol: child.decision.protocol,
287
+ tier: options.tier,
288
+ modelProviderId: options.model.id,
289
+ agents: options.agents,
290
+ intent: child.decision.intent,
291
+ temperature: options.temperature,
292
+ ...(child.childTimeoutMs !== undefined ? { childTimeoutMs: child.childTimeoutMs } : {}),
293
+ ...(options.seed !== undefined ? { seed: options.seed } : {})
294
+ });
295
+ const failedEvent: SubRunFailedEvent = {
296
+ type: "sub-run-failed",
297
+ runId,
298
+ at: new Date().toISOString(),
299
+ childRunId: child.childRunId,
300
+ parentRunId: runId,
301
+ parentDecisionId: child.parentDecisionId,
302
+ parentDecisionArrayIndex: child.parentDecisionArrayIndex,
303
+ error: child.started
304
+ ? {
305
+ code: "aborted",
306
+ message: "Parent run aborted.",
307
+ detail: {
308
+ reason
309
+ }
310
+ }
311
+ : {
312
+ code: "aborted",
313
+ message: "Sibling delegate failed; queued delegate never started.",
314
+ detail: {
315
+ reason: "sibling-failed"
316
+ }
317
+ },
318
+ partialTrace,
319
+ partialCost
320
+ };
321
+ child.closed = true;
322
+ totalCost = addCost(totalCost, partialCost);
323
+ emit(failedEvent);
324
+ recordProtocolDecision(failedEvent);
325
+ }
326
+ };
327
+
328
+ options.registerAbortDrain?.(drainOnParentAbort);
329
+
98
330
  const toolExecutor = createRuntimeToolExecutor({
99
331
  runId,
100
332
  protocol: "coordinator",
@@ -125,24 +357,280 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
125
357
 
126
358
  if (coordinator) {
127
359
  if (!stopIfNeeded()) {
128
- totalCost = await runCoordinatorTurn({
129
- agent: coordinator,
130
- coordinator,
131
- input: buildCoordinatorPlanInput(options.intent, coordinator),
132
- phase: "plan",
133
- options,
134
- runId,
135
- transcript,
136
- totalCost,
137
- providerCalls,
138
- toolExecutor,
139
- toolAvailability,
140
- events,
141
- startedAtMs,
142
- wrapUpHint,
143
- emit,
144
- recordProtocolDecision
145
- });
360
+ // Delegate dispatch loop (D-11/D-16/D-17/D-18). Phase 1 limits delegation
361
+ // to the coordinator's plan turn; workers cannot delegate. The loop
362
+ // re-issues the coordinator plan turn after each successful sub-run with
363
+ // the projected D-17 result tagged into the next prompt and a synthetic
364
+ // D-18 transcript entry already appended. `partialTrace` for failed
365
+ // sub-runs is captured via a tee'd emit buffer locally — `runProtocol`'s
366
+ // error contract is unchanged.
367
+ let dispatchInput = buildCoordinatorPlanInput(options.intent, coordinator);
368
+ let dispatchCount = 0;
369
+ while (true) {
370
+ const turnOutcome = await runCoordinatorTurn({
371
+ agent: coordinator,
372
+ coordinator,
373
+ input: dispatchInput,
374
+ phase: "plan",
375
+ options,
376
+ runId,
377
+ transcript,
378
+ totalCost,
379
+ providerCalls,
380
+ toolExecutor,
381
+ toolAvailability,
382
+ events,
383
+ startedAtMs,
384
+ wrapUpHint,
385
+ emit,
386
+ recordProtocolDecision
387
+ });
388
+ totalCost = turnOutcome.totalCost;
389
+
390
+ if (turnOutcome.decision === undefined) {
391
+ break;
392
+ }
393
+
394
+ const delegates = Array.isArray(turnOutcome.decision)
395
+ ? turnOutcome.decision
396
+ : turnOutcome.decision.type === "delegate"
397
+ ? [turnOutcome.decision]
398
+ : [];
399
+ if (delegates.length === 0) {
400
+ break;
401
+ }
402
+
403
+ if (dispatchCount + delegates.length > MAX_DISPATCH_PER_TURN) {
404
+ throw new DogpileError({
405
+ code: "invalid-configuration",
406
+ message: `Coordinator plan turn delegated ${delegates.length} more children after ${dispatchCount}; max is ${MAX_DISPATCH_PER_TURN}.`,
407
+ retryable: false,
408
+ detail: {
409
+ kind: "delegate-validation",
410
+ path: "decision",
411
+ reason: "loop-guard-exceeded",
412
+ maxDispatchPerTurn: MAX_DISPATCH_PER_TURN
413
+ }
414
+ });
415
+ }
416
+
417
+ const parentDecisionId = String(events.length - 1);
418
+ const parentDepth = options.currentDepth ?? 0;
419
+ const decisionMax = delegates.reduce(
420
+ (max, delegate) => Math.min(max, delegate.maxConcurrentChildren ?? Number.POSITIVE_INFINITY),
421
+ Number.POSITIVE_INFINITY
422
+ );
423
+ let effectiveForTurn = Math.min(
424
+ options.effectiveMaxConcurrentChildren ?? DEFAULT_MAX_CONCURRENT_CHILDREN,
425
+ decisionMax
426
+ );
427
+ const requestedMax = effectiveForTurn;
428
+ const localProvider = findFirstLocalProvider(options);
429
+ if (localProvider !== undefined) {
430
+ effectiveForTurn = 1;
431
+ if (!concurrencyClampEmitted) {
432
+ const clampEvent: SubRunConcurrencyClampedEvent = {
433
+ type: "sub-run-concurrency-clamped",
434
+ runId,
435
+ at: new Date().toISOString(),
436
+ requestedMax,
437
+ effectiveMax: 1,
438
+ reason: "local-provider-detected",
439
+ providerId: localProvider.id
440
+ };
441
+ emit(clampEvent);
442
+ recordProtocolDecision(clampEvent);
443
+ concurrencyClampEmitted = true;
444
+ }
445
+ }
446
+ const semaphore = createSemaphore(effectiveForTurn);
447
+ const childRunIds = delegates.map(() => createRunId());
448
+ const dispatchedForTurn = delegates.map((delegate, index): DispatchedChild => {
449
+ const childRunId = childRunIds[index];
450
+ if (childRunId === undefined) {
451
+ throw new Error("missing child run id");
452
+ }
453
+ const dispatchedChild: DispatchedChild = {
454
+ childRunId,
455
+ decision: delegate,
456
+ parentDecisionId,
457
+ parentDecisionArrayIndex: index,
458
+ parentDepth,
459
+ controller: new AbortController(),
460
+ removeParentListener: undefined,
461
+ childEvents: [],
462
+ started: false,
463
+ closed: false,
464
+ startedAtMs: Date.now(),
465
+ childTimeoutMs: undefined,
466
+ failure: undefined
467
+ };
468
+ dispatchedChildren.set(childRunId, dispatchedChild);
469
+ return dispatchedChild;
470
+ });
471
+ const dispatchResults: Array<{ readonly index: number; readonly result: DispatchDelegateResult }> = [];
472
+ let firstFailureIndex: number | undefined;
473
+
474
+ const tasks = delegates.map(async (delegate, index) => {
475
+ const childRunId = childRunIds[index];
476
+ if (childRunId === undefined) {
477
+ throw new Error("missing child run id");
478
+ }
479
+ if (semaphore.inFlight >= effectiveForTurn) {
480
+ const queuedEvent: SubRunQueuedEvent = {
481
+ type: "sub-run-queued",
482
+ runId,
483
+ at: new Date().toISOString(),
484
+ childRunId,
485
+ parentRunId: runId,
486
+ parentDecisionId,
487
+ parentDecisionArrayIndex: index,
488
+ protocol: delegate.protocol,
489
+ intent: delegate.intent,
490
+ depth: parentDepth + 1,
491
+ queuePosition: semaphore.queued
492
+ };
493
+ emit(queuedEvent);
494
+ recordProtocolDecision(queuedEvent);
495
+ }
496
+
497
+ await semaphore.acquire();
498
+ try {
499
+ const dispatchedChild = dispatchedForTurn[index];
500
+ if (!dispatchedChild) {
501
+ throw new Error("missing dispatched child");
502
+ }
503
+ if (firstFailureIndex !== undefined) {
504
+ if (dispatchedChild.closed) {
505
+ dispatchResults.push({
506
+ index,
507
+ result: {
508
+ nextInput: "",
509
+ taggedText: `[sub-run ${childRunId}]: skipped because the parent run aborted`,
510
+ completedAtMs: Date.now()
511
+ }
512
+ });
513
+ return;
514
+ }
515
+ const partialCost = emptyCost();
516
+ const partialTrace = buildPartialTrace({
517
+ childRunId,
518
+ events: [],
519
+ startedAtMs: Date.now(),
520
+ protocol: delegate.protocol,
521
+ tier: options.tier,
522
+ modelProviderId: options.model.id,
523
+ agents: options.agents,
524
+ intent: delegate.intent,
525
+ temperature: options.temperature,
526
+ ...(options.seed !== undefined ? { seed: options.seed } : {})
527
+ });
528
+ const failedEvent: SubRunFailedEvent = {
529
+ type: "sub-run-failed",
530
+ runId,
531
+ at: new Date().toISOString(),
532
+ childRunId,
533
+ parentRunId: runId,
534
+ parentDecisionId,
535
+ parentDecisionArrayIndex: index,
536
+ error: {
537
+ code: "aborted",
538
+ message: "Sibling delegate failed; queued delegate never started.",
539
+ detail: {
540
+ reason: "sibling-failed"
541
+ }
542
+ },
543
+ partialTrace,
544
+ partialCost
545
+ };
546
+ emit(failedEvent);
547
+ recordProtocolDecision(failedEvent);
548
+ dispatchedChild.closed = true;
549
+ dispatchResults.push({
550
+ index,
551
+ result: {
552
+ nextInput: "",
553
+ taggedText: `[sub-run ${childRunId}]: skipped because a sibling delegate failed`,
554
+ completedAtMs: Date.now()
555
+ }
556
+ });
557
+ return;
558
+ }
559
+ const result = await dispatchDelegate({
560
+ decision: delegate,
561
+ childRunId,
562
+ parentDecisionId,
563
+ parentDecisionArrayIndex: index,
564
+ parentDepth,
565
+ parentRunId: runId,
566
+ options,
567
+ transcript,
568
+ emit,
569
+ recordProtocolDecision,
570
+ recordSubRunCost: (cost: CostSummary): void => {
571
+ totalCost = addCost(totalCost, cost);
572
+ },
573
+ dispatchedChild
574
+ });
575
+ dispatchResults.push({ index, result });
576
+ } catch (error) {
577
+ firstFailureIndex ??= index;
578
+ const dispatchedChild = dispatchedForTurn[index];
579
+ const failure = dispatchedChild?.failure;
580
+ if (
581
+ delegates.length === 1 &&
582
+ (options.onChildFailure === "abort" || failure === undefined || isDelegateValidationError(error))
583
+ ) {
584
+ throw error;
585
+ }
586
+ const failureMessage = error instanceof Error ? error.message : String(error);
587
+ let taggedText = `[sub-run ${childRunId} failed]: ${failureMessage}`;
588
+ if (failure) {
589
+ const error = failure.error;
590
+ taggedText = `[sub-run ${childRunId} failed | code=${error.code} | spent=$${failure.partialCost.usd.toFixed(3)}]: ${error.message}`;
591
+ }
592
+ dispatchResults.push({
593
+ index,
594
+ result: {
595
+ nextInput: "",
596
+ taggedText,
597
+ completedAtMs: Date.now()
598
+ }
599
+ });
600
+ } finally {
601
+ semaphore.release();
602
+ }
603
+ });
604
+ const settled = await Promise.allSettled(tasks);
605
+ const firstRejected = settled.find((result) => result.status === "rejected");
606
+ if (
607
+ firstRejected?.status === "rejected" &&
608
+ delegates.length === 1 &&
609
+ (options.onChildFailure === "abort" || dispatchResults.length === 0)
610
+ ) {
611
+ throw firstRejected.reason;
612
+ }
613
+
614
+ dispatchResults.sort((a, b) => a.result.completedAtMs - b.result.completedAtMs);
615
+ const taggedResults = dispatchResults.map((entry) => entry.result.taggedText).join("\n\n");
616
+ const currentWaveFailures = dispatchedForTurn
617
+ .map((child) => child.failure)
618
+ .filter((failure): failure is DispatchWaveFailure => failure !== undefined);
619
+ if (options.onChildFailure === "abort" && currentWaveFailures.length > 0) {
620
+ triggeringFailureForAbortMode ??= currentWaveFailures[0];
621
+ break;
622
+ }
623
+ const failuresSection = buildFailuresSection(currentWaveFailures);
624
+ const coordinatorAgent = options.agents[0] ?? { id: "coordinator", role: "coordinator" };
625
+ const baseInput = buildCoordinatorPlanInput(options.intent, coordinatorAgent);
626
+ dispatchInput = [
627
+ baseInput,
628
+ taggedResults,
629
+ failuresSection,
630
+ "Using the sub-run results above, decide the next step (participate or delegate)."
631
+ ].filter((section): section is string => Boolean(section)).join("\n\n");
632
+ dispatchCount += delegates.length;
633
+ }
146
634
  stopIfNeeded();
147
635
  }
148
636
 
@@ -208,7 +696,7 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
208
696
  }
209
697
 
210
698
  if (!stopIfNeeded()) {
211
- totalCost = await runCoordinatorTurn({
699
+ const synthesisOutcome = await runCoordinatorTurn({
212
700
  agent: coordinator,
213
701
  coordinator,
214
702
  input: buildFinalSynthesisInput(options.intent, transcript, coordinator),
@@ -226,6 +714,20 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
226
714
  emit,
227
715
  recordProtocolDecision
228
716
  });
717
+ totalCost = synthesisOutcome.totalCost;
718
+ // Phase 1: final-synthesis turn cannot delegate.
719
+ if (Array.isArray(synthesisOutcome.decision) || synthesisOutcome.decision?.type === "delegate") {
720
+ throw new DogpileError({
721
+ code: "invalid-configuration",
722
+ message: "Coordinator final-synthesis turn cannot emit a delegate decision in Phase 1",
723
+ retryable: false,
724
+ detail: {
725
+ kind: "delegate-validation",
726
+ path: "decision",
727
+ phase: "final-synthesis"
728
+ }
729
+ });
730
+ }
229
731
  stopIfNeeded();
230
732
  }
231
733
  }
@@ -282,6 +784,7 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
282
784
  cost: totalCost,
283
785
  transcript: createTranscriptLink(transcript)
284
786
  }),
787
+ ...(triggeringFailureForAbortMode !== undefined ? { triggeringFailureForAbortMode } : {}),
285
788
  events,
286
789
  transcript
287
790
  },
@@ -357,6 +860,11 @@ export async function runCoordinator(options: CoordinatorRunOptions): Promise<Ru
357
860
  }
358
861
  }
359
862
 
863
+ function isDelegateValidationError(error: unknown): boolean {
864
+ return DogpileError.isInstance(error) && error.code === "invalid-configuration" &&
865
+ error.detail?.["kind"] === "delegate-validation";
866
+ }
867
+
360
868
  interface CoordinatorTurnOptions {
361
869
  readonly agent: AgentSpec;
362
870
  readonly coordinator: AgentSpec;
@@ -379,7 +887,12 @@ interface CoordinatorTurnOptions {
379
887
  ) => void;
380
888
  }
381
889
 
382
- async function runCoordinatorTurn(turn: CoordinatorTurnOptions): Promise<CostSummary> {
890
+ interface CoordinatorTurnResult {
891
+ readonly totalCost: CostSummary;
892
+ readonly decision: ReturnType<typeof parseAgentDecision>;
893
+ }
894
+
895
+ async function runCoordinatorTurn(turn: CoordinatorTurnOptions): Promise<CoordinatorTurnResult> {
383
896
  throwIfAborted(turn.options.signal, turn.options.model.id);
384
897
 
385
898
  const request: ModelRequest = {
@@ -429,7 +942,11 @@ async function runCoordinatorTurn(turn: CoordinatorTurnOptions): Promise<CostSum
429
942
  turn.providerCalls.push(call);
430
943
  }
431
944
  });
432
- const decision = parseAgentDecision(response.text);
945
+ const decision = parseAgentDecision(response.text, {
946
+ parentProviderId: turn.options.model.id,
947
+ currentDepth: turn.options.currentDepth ?? 0,
948
+ maxDepth: turn.options.effectiveMaxDepth ?? Number.POSITIVE_INFINITY
949
+ });
433
950
  const totalCost = addCost(turn.totalCost, responseCost(response));
434
951
  const toolCalls = await executeModelResponseToolRequests({
435
952
  response,
@@ -470,7 +987,7 @@ async function runCoordinatorTurn(turn: CoordinatorTurnOptions): Promise<CostSum
470
987
  transcriptEntryCount: turn.transcript.length
471
988
  });
472
989
 
473
- return totalCost;
990
+ return { totalCost, decision };
474
991
  }
475
992
 
476
993
  interface CoordinatorWorkerTurnOptions {
@@ -552,7 +1069,23 @@ async function runCoordinatorWorkerTurn(turn: CoordinatorWorkerTurnOptions): Pro
552
1069
  turn.providerCallSlots[turn.providerCallIndex] = call;
553
1070
  }
554
1071
  });
555
- const decision = parseAgentDecision(response.text);
1072
+ const decision = parseAgentDecision(response.text, {
1073
+ parentProviderId: turn.options.model.id,
1074
+ currentDepth: turn.options.currentDepth ?? 0,
1075
+ maxDepth: turn.options.effectiveMaxDepth ?? Number.POSITIVE_INFINITY
1076
+ });
1077
+ if (Array.isArray(decision) || decision?.type === "delegate") {
1078
+ throw new DogpileError({
1079
+ code: "invalid-configuration",
1080
+ message: "Workers cannot emit delegate decisions in Phase 1",
1081
+ retryable: false,
1082
+ detail: {
1083
+ kind: "delegate-validation",
1084
+ path: "decision",
1085
+ phase: "worker"
1086
+ }
1087
+ });
1088
+ }
556
1089
  const toolCalls = await executeModelResponseToolRequests({
557
1090
  response,
558
1091
  executor: turn.toolExecutor,
@@ -588,6 +1121,39 @@ function buildCoordinatorPlanInput(intent: string, coordinator: AgentSpec): stri
588
1121
  return `Mission: ${intent}\nCoordinator ${coordinator.id}: assign the work, name the plan, and provide the first contribution.`;
589
1122
  }
590
1123
 
1124
+ function buildFailuresSection(failures: readonly DispatchWaveFailure[]): string | null {
1125
+ if (failures.length === 0) {
1126
+ return null;
1127
+ }
1128
+ return [
1129
+ "## Sub-run failures since last decision",
1130
+ "",
1131
+ "```json",
1132
+ JSON.stringify(failures, null, 2),
1133
+ "```"
1134
+ ].join("\n");
1135
+ }
1136
+
1137
+ function dispatchWaveFailureFromEvent(
1138
+ intent: string,
1139
+ event: SubRunFailedEvent
1140
+ ): DispatchWaveFailure | undefined {
1141
+ const reason = typeof event.error.detail?.["reason"] === "string" ? event.error.detail["reason"] : undefined;
1142
+ if (reason === "sibling-failed" || reason === "parent-aborted") {
1143
+ return undefined;
1144
+ }
1145
+ return {
1146
+ childRunId: event.childRunId,
1147
+ intent,
1148
+ error: {
1149
+ code: event.error.code,
1150
+ message: event.error.message,
1151
+ ...(reason !== undefined ? { detail: { reason } } : {})
1152
+ },
1153
+ partialCost: { usd: event.partialCost.usd }
1154
+ };
1155
+ }
1156
+
591
1157
  function buildWorkerInput(
592
1158
  intent: string,
593
1159
  transcript: readonly TranscriptEntry[],
@@ -619,19 +1185,583 @@ function responseCost(response: ModelResponse): CostSummary {
619
1185
  };
620
1186
  }
621
1187
 
622
- function createRunId(): string {
623
- const random = globalThis.crypto?.randomUUID?.();
624
- return random ?? `run-${Date.now().toString(36)}`;
1188
+ interface DispatchDelegateOptions {
1189
+ readonly decision: DelegateAgentDecision;
1190
+ readonly childRunId?: string;
1191
+ readonly parentDecisionId: string;
1192
+ readonly parentDecisionArrayIndex: number;
1193
+ readonly parentDepth: number;
1194
+ readonly parentRunId: string;
1195
+ readonly options: CoordinatorRunOptions;
1196
+ readonly transcript: TranscriptEntry[];
1197
+ readonly emit: (event: RunEvent) => void;
1198
+ readonly recordProtocolDecision: (
1199
+ event: RunEvent,
1200
+ decisionOptions?: { readonly transcriptEntryCount?: number }
1201
+ ) => void;
1202
+ /**
1203
+ * BUDGET-03 / D-01 seam: closure-mutation callback that adds child cost
1204
+ * (subResult.cost on success, partialCost on failure) into the parent's
1205
+ * `totalCost` accumulator. Invoked BEFORE `parentEmit(completedEvent)` /
1206
+ * `parentEmit(failEvent)` so the existing "last cost-bearing event ===
1207
+ * final.cost" invariant survives unchanged.
1208
+ */
1209
+ readonly recordSubRunCost: (cost: CostSummary) => void;
1210
+ readonly dispatchedChild: DispatchedChild;
625
1211
  }
626
1212
 
627
- function nowMs(): number {
628
- return globalThis.performance?.now() ?? Date.now();
1213
+ interface DispatchDelegateResult {
1214
+ readonly nextInput: string;
1215
+ readonly taggedText: string;
1216
+ readonly completedAtMs: number;
629
1217
  }
630
1218
 
631
- function elapsedMs(startedAtMs: number): number {
632
- return Math.max(0, nowMs() - startedAtMs);
1219
+ interface DispatchedChild {
1220
+ readonly childRunId: string;
1221
+ readonly decision: DelegateAgentDecision;
1222
+ readonly parentDecisionId: string;
1223
+ readonly parentDecisionArrayIndex: number;
1224
+ readonly parentDepth: number;
1225
+ readonly controller: AbortController;
1226
+ removeParentListener: (() => void) | undefined;
1227
+ readonly childEvents: RunEvent[];
1228
+ started: boolean;
1229
+ closed: boolean;
1230
+ startedAtMs: number;
1231
+ childTimeoutMs: number | undefined;
1232
+ failure: DispatchWaveFailure | undefined;
1233
+ /** STREAM-03 hook (Phase 4). Reserved; do not use. */
1234
+ readonly streamHandle?: never;
633
1235
  }
634
1236
 
635
- function providerCallIdFor(runId: string, oneBasedIndex: number): string {
636
- return `${runId}:provider-call:${oneBasedIndex}`;
1237
+ /**
1238
+ * Dispatch a single delegate decision as a recursive sub-run.
1239
+ *
1240
+ * D-11: child reuses the parent provider object verbatim.
1241
+ * D-16: `recursive: true` flag set when both parent and child protocol are
1242
+ * `coordinator`.
1243
+ * D-17: tagged result text appended to the next coordinator prompt.
1244
+ * D-18: synthetic transcript entry pushed for replay/provenance.
1245
+ *
1246
+ * On thrown error from the child engine, builds `partialTrace` from a locally
1247
+ * tee'd `childEvents` buffer — `runProtocol`'s error contract is unchanged.
1248
+ */
1249
+ async function dispatchDelegate(input: DispatchDelegateOptions): Promise<DispatchDelegateResult> {
1250
+ const { decision, options } = input;
1251
+
1252
+ // Dispatcher-time depth gate (D-14). Same error shape as the parser; this
1253
+ // is the TOCTOU defense for any state mutation between parse and dispatch.
1254
+ // Fires BEFORE sub-run-started is emitted so failed dispatches do not show
1255
+ // up in the trace as half-started sub-runs.
1256
+ if (options.effectiveMaxDepth !== undefined) {
1257
+ assertDepthWithinLimit(input.parentDepth, options.effectiveMaxDepth);
1258
+ }
1259
+
1260
+ const childRunId = input.childRunId ?? createRunId();
1261
+ const recursive = decision.protocol === "coordinator";
1262
+ const decisionTimeoutMs = decision.budget?.timeoutMs;
1263
+ const parentDeadlineMs = options.parentDeadlineMs;
1264
+
1265
+ // BUDGET-02 / D-12: deadline-based remaining-time math. Children inherit
1266
+ // `parentDeadlineMs - now()`, not a static `parent.budget.timeoutMs`. If the
1267
+ // parent's deadline has already elapsed, throw `code: "aborted"` with
1268
+ // `detail.reason: "timeout"` BEFORE `sub-run-started` is emitted.
1269
+ const remainingMs =
1270
+ parentDeadlineMs !== undefined ? Math.max(0, parentDeadlineMs - Date.now()) : undefined;
1271
+
1272
+ if (parentDeadlineMs !== undefined && remainingMs === 0) {
1273
+ throw new DogpileError({
1274
+ code: "aborted",
1275
+ message: "Parent deadline elapsed before sub-run dispatch.",
1276
+ retryable: false,
1277
+ providerId: options.model.id,
1278
+ detail: { reason: "timeout" }
1279
+ });
1280
+ }
1281
+
1282
+ // Resolve child timeout with precedence (D-12 / D-14):
1283
+ // decision.budget.timeoutMs > parent's remaining > defaultSubRunTimeoutMs > undefined.
1284
+ // When the decision-level timeout exceeds the parent's remaining, CLAMP
1285
+ // (no longer throw) and emit a `sub-run-budget-clamped` event below.
1286
+ let childTimeoutMs: number | undefined;
1287
+ let clampedFrom: number | undefined;
1288
+ if (remainingMs !== undefined) {
1289
+ if (decisionTimeoutMs !== undefined) {
1290
+ if (decisionTimeoutMs > remainingMs) {
1291
+ clampedFrom = decisionTimeoutMs;
1292
+ childTimeoutMs = remainingMs;
1293
+ } else {
1294
+ childTimeoutMs = decisionTimeoutMs;
1295
+ }
1296
+ } else {
1297
+ childTimeoutMs = remainingMs;
1298
+ }
1299
+ } else if (decisionTimeoutMs !== undefined) {
1300
+ childTimeoutMs = decisionTimeoutMs;
1301
+ } else if (options.defaultSubRunTimeoutMs !== undefined) {
1302
+ childTimeoutMs = options.defaultSubRunTimeoutMs;
1303
+ }
1304
+
1305
+ if (!options.runProtocol) {
1306
+ throw new DogpileError({
1307
+ code: "invalid-configuration",
1308
+ message:
1309
+ "Coordinator delegate dispatch requires the engine `runProtocol` callback. " +
1310
+ "Use `Dogpile.run` / `createEngine` rather than calling `runCoordinator` directly when delegate is in play.",
1311
+ retryable: false,
1312
+ detail: {
1313
+ kind: "delegate-validation",
1314
+ path: "runProtocol"
1315
+ }
1316
+ });
1317
+ }
1318
+
1319
+ // Buffered tee for partialTrace capture — see Plan 03 step 8.
1320
+ const childEvents = input.dispatchedChild.childEvents;
1321
+ const parentEmit = input.emit;
1322
+ const teedEmit = (event: RunEvent): void => {
1323
+ childEvents.push(event);
1324
+ if (input.dispatchedChild.closed) {
1325
+ return;
1326
+ }
1327
+ if (options.streamEvents && options.emit) {
1328
+ const inbound = (event as { readonly parentRunIds?: readonly string[] }).parentRunIds;
1329
+ options.emit({
1330
+ ...event,
1331
+ parentRunIds: [input.parentRunId, ...(inbound ?? [])]
1332
+ } as RunEvent);
1333
+ }
1334
+ };
1335
+ const childStartedAt = Date.now();
1336
+ input.dispatchedChild.startedAtMs = childStartedAt;
1337
+
1338
+ // BUDGET-02 / D-12: emit clamp event BEFORE sub-run-started so the trace
1339
+ // records "this child's requested timeout was reduced to fit parent's
1340
+ // remaining deadline." Skipped on the happy path (no clamp, no event).
1341
+ if (clampedFrom !== undefined && childTimeoutMs !== undefined) {
1342
+ const clampEvent: SubRunBudgetClampedEvent = {
1343
+ type: "sub-run-budget-clamped",
1344
+ runId: input.parentRunId,
1345
+ at: new Date().toISOString(),
1346
+ childRunId,
1347
+ parentRunId: input.parentRunId,
1348
+ parentDecisionId: input.parentDecisionId,
1349
+ requestedTimeoutMs: clampedFrom,
1350
+ clampedTimeoutMs: childTimeoutMs,
1351
+ reason: "exceeded-parent-remaining"
1352
+ };
1353
+ input.emit(clampEvent);
1354
+ input.recordProtocolDecision(clampEvent);
1355
+ }
1356
+
1357
+ const startEvent: RunEvent = {
1358
+ type: "sub-run-started",
1359
+ runId: input.parentRunId,
1360
+ at: new Date().toISOString(),
1361
+ childRunId,
1362
+ parentRunId: input.parentRunId,
1363
+ parentDecisionId: input.parentDecisionId,
1364
+ parentDecisionArrayIndex: input.parentDecisionArrayIndex,
1365
+ protocol: decision.protocol,
1366
+ intent: decision.intent,
1367
+ depth: input.parentDepth + 1,
1368
+ ...(recursive ? { recursive: true } : {})
1369
+ };
1370
+ parentEmit(startEvent);
1371
+ input.recordProtocolDecision(startEvent);
1372
+
1373
+ // BUDGET-01 / D-07: derive a per-child AbortController so child engines see
1374
+ // their own signal. Listener forwards parent.signal.reason verbatim, so
1375
+ // detail.reason classification (parent-aborted vs timeout) is preserved.
1376
+ // Phase 4 STREAM-03 hook: per-child cancel handle attaches here.
1377
+ const parentSignal = options.signal;
1378
+ let removeParentAbortListener: (() => void) | undefined;
1379
+ if (parentSignal !== undefined) {
1380
+ if (parentSignal.aborted) {
1381
+ input.dispatchedChild.controller.abort(parentSignal.reason);
1382
+ } else {
1383
+ const handler = (): void => {
1384
+ input.dispatchedChild.controller.abort(parentSignal.reason);
1385
+ };
1386
+ parentSignal.addEventListener("abort", handler, { once: true });
1387
+ removeParentAbortListener = (): void => {
1388
+ parentSignal.removeEventListener("abort", handler);
1389
+ };
1390
+ }
1391
+ }
1392
+ input.dispatchedChild.removeParentListener = removeParentAbortListener;
1393
+ input.dispatchedChild.started = true;
1394
+ input.dispatchedChild.childTimeoutMs = childTimeoutMs;
1395
+ const childDeadlineReason =
1396
+ childTimeoutMs !== undefined && parentDeadlineMs === undefined
1397
+ ? createEngineDeadlineTimeoutError(options.model.id, childTimeoutMs)
1398
+ : undefined;
1399
+ const childDeadlineTimer =
1400
+ childDeadlineReason !== undefined
1401
+ ? setTimeout(() => {
1402
+ input.dispatchedChild.controller.abort(childDeadlineReason);
1403
+ }, childTimeoutMs)
1404
+ : undefined;
1405
+
1406
+ const childOptions = {
1407
+ intent: decision.intent,
1408
+ protocol: decision.protocol,
1409
+ tier: options.tier,
1410
+ model: options.model, // D-11: same provider instance verbatim
1411
+ agents: options.agents,
1412
+ tools: options.tools,
1413
+ temperature: options.temperature,
1414
+ ...(childTimeoutMs !== undefined ? { budget: { timeoutMs: childTimeoutMs } } : {}),
1415
+ signal: input.dispatchedChild.controller.signal,
1416
+ emit: teedEmit,
1417
+ ...(options.streamEvents !== undefined ? { streamEvents: options.streamEvents } : {}),
1418
+ currentDepth: input.parentDepth + 1,
1419
+ ...(options.effectiveMaxDepth !== undefined ? { effectiveMaxDepth: options.effectiveMaxDepth } : {}),
1420
+ ...(options.effectiveMaxConcurrentChildren !== undefined
1421
+ ? { effectiveMaxConcurrentChildren: options.effectiveMaxConcurrentChildren }
1422
+ : {}),
1423
+ ...(options.onChildFailure !== undefined ? { onChildFailure: options.onChildFailure } : {}),
1424
+ // BUDGET-02 / D-12: forward the ROOT deadline so depth-N grandchildren
1425
+ // see the same `parentDeadlineMs` rather than a fresh per-level snapshot.
1426
+ ...(parentDeadlineMs !== undefined ? { parentDeadlineMs } : {}),
1427
+ ...(options.defaultSubRunTimeoutMs !== undefined
1428
+ ? { defaultSubRunTimeoutMs: options.defaultSubRunTimeoutMs }
1429
+ : {})
1430
+ };
1431
+
1432
+ let subResult: RunResult;
1433
+ try {
1434
+ subResult = await options.runProtocol(childOptions);
1435
+ } catch (error) {
1436
+ if (childDeadlineTimer !== undefined) {
1437
+ clearTimeout(childDeadlineTimer);
1438
+ }
1439
+ removeParentAbortListener?.();
1440
+ if (input.dispatchedChild.closed) {
1441
+ const enrichedError = enrichAbortErrorWithParentReason(error, parentSignal);
1442
+ if (DogpileError.isInstance(enrichedError)) {
1443
+ throw enrichedError;
1444
+ }
1445
+ throw error;
1446
+ }
1447
+
1448
+ const failedDecision: JsonObject = {
1449
+ type: "delegate",
1450
+ protocol: decision.protocol,
1451
+ intent: decision.intent,
1452
+ ...(decision.model !== undefined ? { model: decision.model } : {}),
1453
+ ...(decision.budget !== undefined ? { budget: decision.budget as unknown as JsonValue } : {})
1454
+ };
1455
+
1456
+ const partialTrace: Trace = buildPartialTrace({
1457
+ childRunId,
1458
+ events: childEvents,
1459
+ startedAtMs: childStartedAt,
1460
+ protocol: decision.protocol,
1461
+ tier: options.tier,
1462
+ modelProviderId: options.model.id,
1463
+ agents: options.agents,
1464
+ intent: decision.intent,
1465
+ temperature: options.temperature,
1466
+ ...(childTimeoutMs !== undefined ? { childTimeoutMs } : {}),
1467
+ ...(options.seed !== undefined ? { seed: options.seed } : {})
1468
+ });
1469
+
1470
+ // BUDGET-01 / D-08: when the child aborted because the parent.signal
1471
+ // aborted, lock detail.reason on the surfaced error. Upstream engine
1472
+ // wrapping (e.g., createStreamCancellationError) attaches its own
1473
+ // detail.status; we add detail.reason so consumers can discriminate
1474
+ // parent-aborted vs timeout regardless of which engine path produced the
1475
+ // abort error.
1476
+ const enrichedError = enrichProviderTimeoutSource(
1477
+ enrichAbortErrorWithParentReason(error, parentSignal),
1478
+ {
1479
+ ...(decisionTimeoutMs !== undefined ? { decisionTimeoutMs } : {}),
1480
+ ...(options.defaultSubRunTimeoutMs !== undefined
1481
+ ? { engineDefaultTimeoutMs: options.defaultSubRunTimeoutMs }
1482
+ : {})
1483
+ }
1484
+ );
1485
+ if (DogpileError.isInstance(enrichedError)) {
1486
+ options.failureInstancesByChildRunId?.set(childRunId, enrichedError);
1487
+ }
1488
+ const errorPayload = errorPayloadFromUnknown(enrichedError, failedDecision);
1489
+ // BUDGET-03 / D-02: capture real provider spend before the throw and
1490
+ // roll it into the parent's totalCost BEFORE emitting sub-run-failed.
1491
+ const partialCost = lastCostBearingEventCost(childEvents) ?? emptyCost();
1492
+ input.recordSubRunCost(partialCost);
1493
+ const failEvent: SubRunFailedEvent = {
1494
+ type: "sub-run-failed",
1495
+ runId: input.parentRunId,
1496
+ at: new Date().toISOString(),
1497
+ childRunId,
1498
+ parentRunId: input.parentRunId,
1499
+ parentDecisionId: input.parentDecisionId,
1500
+ parentDecisionArrayIndex: input.parentDecisionArrayIndex,
1501
+ error: errorPayload,
1502
+ partialTrace,
1503
+ partialCost
1504
+ };
1505
+ parentEmit(failEvent);
1506
+ input.recordProtocolDecision(failEvent);
1507
+ input.dispatchedChild.closed = true;
1508
+ input.dispatchedChild.failure = dispatchWaveFailureFromEvent(decision.intent, failEvent);
1509
+
1510
+ // Re-throw a DogpileError so the parent run terminates with a typed error.
1511
+ if (DogpileError.isInstance(enrichedError)) {
1512
+ throw enrichedError;
1513
+ }
1514
+ throw new DogpileError({
1515
+ code: "invalid-configuration",
1516
+ message: error instanceof Error ? error.message : String(error),
1517
+ retryable: false,
1518
+ detail: {
1519
+ kind: "delegate-validation",
1520
+ path: "decision",
1521
+ reason: "child-run-failed"
1522
+ }
1523
+ });
1524
+ }
1525
+
1526
+ if (childDeadlineTimer !== undefined) {
1527
+ clearTimeout(childDeadlineTimer);
1528
+ }
1529
+ removeParentAbortListener?.();
1530
+
1531
+ // BUDGET-03 / D-01: roll child's full cost into the parent's totalCost
1532
+ // BEFORE emitting sub-run-completed. The next agent-turn / final event will
1533
+ // read totalCost from the closure scope, preserving the existing
1534
+ // "last cost-bearing event === final.cost" invariant.
1535
+ input.recordSubRunCost(subResult.cost);
1536
+
1537
+ const completedEvent: RunEvent = {
1538
+ type: "sub-run-completed",
1539
+ runId: input.parentRunId,
1540
+ at: new Date().toISOString(),
1541
+ childRunId,
1542
+ parentRunId: input.parentRunId,
1543
+ parentDecisionId: input.parentDecisionId,
1544
+ parentDecisionArrayIndex: input.parentDecisionArrayIndex,
1545
+ subResult
1546
+ };
1547
+ parentEmit(completedEvent);
1548
+ input.recordProtocolDecision(completedEvent);
1549
+ input.dispatchedChild.closed = true;
1550
+
1551
+ // BUDGET-01 / D-10: parent.signal aborted AFTER the child completed but
1552
+ // before we advance to the next coordinator turn. Emit a marker event so
1553
+ // streaming subscribers see "parent gave up after sub-run" provenance,
1554
+ // then re-throw the parent's abort reason. Non-streaming run() rejects with
1555
+ // the thrown error and does NOT preserve the marker — engine.ts does not
1556
+ // attach the parent events array to the rejected error (verified at
1557
+ // engine.ts:230-239). Streaming-subscriber observability is the contract.
1558
+ if (parentSignal?.aborted) {
1559
+ const abortMarker: SubRunParentAbortedEvent = {
1560
+ type: "sub-run-parent-aborted",
1561
+ runId: input.parentRunId,
1562
+ at: new Date().toISOString(),
1563
+ childRunId,
1564
+ parentRunId: input.parentRunId,
1565
+ reason: "parent-aborted"
1566
+ };
1567
+ parentEmit(abortMarker);
1568
+ input.recordProtocolDecision(abortMarker);
1569
+ throw enrichAbortErrorWithParentReason(
1570
+ createAbortErrorFromSignal(parentSignal, options.model.id),
1571
+ parentSignal
1572
+ );
1573
+ }
1574
+
1575
+ // D-18 synthetic transcript entry.
1576
+ const decisionAsJson: JsonObject = {
1577
+ type: "delegate",
1578
+ protocol: decision.protocol,
1579
+ intent: decision.intent,
1580
+ ...(decision.model !== undefined ? { model: decision.model } : {}),
1581
+ ...(decision.budget !== undefined ? { budget: decision.budget as unknown as JsonValue } : {})
1582
+ };
1583
+ const taggedText = renderSubRunResult(childRunId, subResult);
1584
+ input.transcript.push({
1585
+ agentId: `sub-run:${childRunId}`,
1586
+ role: "delegate-result",
1587
+ input: JSON.stringify(decisionAsJson),
1588
+ output: taggedText
1589
+ });
1590
+
1591
+ // Build the next coordinator prompt by appending the D-17 tagged block.
1592
+ const coordinatorAgent = options.agents[0];
1593
+ const baseInput = buildCoordinatorPlanInput(input.options.intent, coordinatorAgent ?? {
1594
+ id: "coordinator",
1595
+ role: "coordinator"
1596
+ });
1597
+ return {
1598
+ nextInput: `${baseInput}\n\n${taggedText}\n\nUsing the sub-run result above, decide the next step (participate or delegate).`,
1599
+ taggedText,
1600
+ completedAtMs: Date.now()
1601
+ };
1602
+ }
1603
+
1604
+ /**
1605
+ * D-17 prompt-injection helper. Renders a child `RunResult` as the canonical
1606
+ * tagged-result block injected into the parent coordinator's next prompt.
1607
+ *
1608
+ * Format:
1609
+ * `[sub-run <childRunId>]: <output>`
1610
+ * `[sub-run <childRunId> stats]: turns=<N> costUsd=<X> durationMs=<Y>`
1611
+ *
1612
+ * The stats line is a soft contract — field names stable, ordering stable.
1613
+ */
1614
+ function renderSubRunResult(childRunId: string, subResult: RunResult): string {
1615
+ const turns = subResult.transcript.length;
1616
+ const costUsd = subResult.cost.usd ?? 0;
1617
+ const startedAt = subResult.trace.events[0]?.at;
1618
+ const endedAt = subResult.trace.events.at(-1)?.at;
1619
+ const durationMs =
1620
+ startedAt && endedAt
1621
+ ? Math.max(0, Date.parse(endedAt) - Date.parse(startedAt))
1622
+ : 0;
1623
+ return [
1624
+ `[sub-run ${childRunId}]: ${subResult.output}`,
1625
+ `[sub-run ${childRunId} stats]: turns=${turns} costUsd=${costUsd} durationMs=${durationMs}`
1626
+ ].join("\n");
1627
+ }
1628
+
1629
+ /**
1630
+ * Build a JSON-serializable {@link Trace} for `sub-run-failed.partialTrace`
1631
+ * from a buffered tee of child emits. Keeps `runProtocol`'s error contract
1632
+ * unchanged — Plan 03 step 8.
1633
+ */
1634
+ function buildPartialTrace(input: {
1635
+ readonly childRunId: string;
1636
+ readonly events: readonly RunEvent[];
1637
+ readonly startedAtMs: number;
1638
+ readonly protocol: ProtocolSelection;
1639
+ readonly tier: Tier;
1640
+ readonly modelProviderId: string;
1641
+ readonly agents: readonly AgentSpec[];
1642
+ readonly intent: string;
1643
+ readonly temperature: number;
1644
+ readonly childTimeoutMs?: number;
1645
+ readonly seed?: string | number;
1646
+ }): Trace {
1647
+ const protocolName = typeof input.protocol === "string" ? input.protocol : input.protocol.kind;
1648
+ const protocolConfig =
1649
+ typeof input.protocol === "string"
1650
+ ? ({ kind: input.protocol } as unknown as Parameters<typeof createReplayTraceRunInputs>[0]["protocol"])
1651
+ : input.protocol;
1652
+ return {
1653
+ schemaVersion: "1.0",
1654
+ runId: input.childRunId,
1655
+ protocol: protocolName,
1656
+ tier: input.tier,
1657
+ modelProviderId: input.modelProviderId,
1658
+ agentsUsed: input.agents,
1659
+ inputs: createReplayTraceRunInputs({
1660
+ intent: input.intent,
1661
+ protocol: protocolConfig,
1662
+ tier: input.tier,
1663
+ modelProviderId: input.modelProviderId,
1664
+ agents: input.agents,
1665
+ temperature: input.temperature
1666
+ }),
1667
+ budget: createReplayTraceBudget({
1668
+ tier: input.tier,
1669
+ ...(input.childTimeoutMs !== undefined ? { caps: { timeoutMs: input.childTimeoutMs } } : {})
1670
+ }),
1671
+ budgetStateChanges: createReplayTraceBudgetStateChanges(input.events),
1672
+ seed: createReplayTraceSeed(input.seed),
1673
+ protocolDecisions: [],
1674
+ providerCalls: [],
1675
+ finalOutput: {
1676
+ kind: "replay-trace-final-output",
1677
+ output: "",
1678
+ cost: emptyCost(),
1679
+ completedAt: new Date().toISOString(),
1680
+ transcript: createTranscriptLink([])
1681
+ },
1682
+ events: input.events,
1683
+ transcript: []
1684
+ };
1685
+ }
1686
+
1687
+ /**
1688
+ * BUDGET-01 / D-08: when a child sub-run threw because the parent's signal
1689
+ * aborted, lock the `detail.reason` discriminator on the resulting
1690
+ * `code: "aborted"` error. Preserves any pre-existing detail keys (e.g.,
1691
+ * `detail.status: "cancelled"` attached by `createStreamCancellationError`).
1692
+ *
1693
+ * No-op when:
1694
+ * - parent.signal is undefined or not aborted (child failure was unrelated)
1695
+ * - error is not a DogpileError with `code: "aborted"`
1696
+ * - error already has a `detail.reason` set (preserve upstream classification)
1697
+ */
1698
+ function enrichAbortErrorWithParentReason(error: unknown, parentSignal: AbortSignal | undefined): unknown {
1699
+ if (parentSignal === undefined || !parentSignal.aborted) {
1700
+ return error;
1701
+ }
1702
+ if (!DogpileError.isInstance(error) || error.code !== "aborted") {
1703
+ return error;
1704
+ }
1705
+ const existingDetail = error.detail ?? {};
1706
+ if (existingDetail["reason"] !== undefined) {
1707
+ return error;
1708
+ }
1709
+ const reason = classifyAbortReason(parentSignal.reason);
1710
+ return new DogpileError({
1711
+ code: "aborted",
1712
+ message: error.message,
1713
+ retryable: error.retryable ?? false,
1714
+ ...(error.providerId !== undefined ? { providerId: error.providerId } : {}),
1715
+ detail: { ...existingDetail, reason },
1716
+ ...(error.cause !== undefined ? { cause: error.cause } : {})
1717
+ });
1718
+ }
1719
+
1720
+ function enrichProviderTimeoutSource(
1721
+ error: unknown,
1722
+ context: {
1723
+ readonly decisionTimeoutMs?: number;
1724
+ readonly engineDefaultTimeoutMs?: number;
1725
+ }
1726
+ ): unknown {
1727
+ if (!DogpileError.isInstance(error) || error.code !== "provider-timeout") {
1728
+ return error;
1729
+ }
1730
+ const existingDetail = error.detail ?? {};
1731
+ if (existingDetail["source"] !== undefined) {
1732
+ return error;
1733
+ }
1734
+ const source = classifyChildTimeoutSource(error, {
1735
+ ...context,
1736
+ isProviderError: true
1737
+ });
1738
+ return new DogpileError({
1739
+ code: "provider-timeout",
1740
+ message: error.message,
1741
+ retryable: error.retryable ?? true,
1742
+ ...(error.providerId !== undefined ? { providerId: error.providerId } : {}),
1743
+ detail: { ...existingDetail, source },
1744
+ ...(error.cause !== undefined ? { cause: error.cause } : {})
1745
+ });
1746
+ }
1747
+
1748
+ function errorPayloadFromUnknown(error: unknown, failedDecision: JsonObject): SubRunFailedEvent["error"] {
1749
+ if (DogpileError.isInstance(error)) {
1750
+ const detail: JsonObject = {
1751
+ ...(error.detail ?? {}),
1752
+ failedDecision
1753
+ };
1754
+ return {
1755
+ code: error.code,
1756
+ message: error.message,
1757
+ ...(error.providerId !== undefined ? { providerId: error.providerId } : {}),
1758
+ detail
1759
+ };
1760
+ }
1761
+ const message = error instanceof Error ? error.message : String(error);
1762
+ return {
1763
+ code: "invalid-configuration",
1764
+ message,
1765
+ detail: { failedDecision }
1766
+ };
637
1767
  }