@dogpile/sdk 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +201 -0
  2. package/README.md +1 -0
  3. package/dist/browser/index.js +2328 -237
  4. package/dist/browser/index.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +1 -0
  8. package/dist/index.js.map +1 -1
  9. package/dist/providers/openai-compatible.d.ts +11 -0
  10. package/dist/providers/openai-compatible.d.ts.map +1 -1
  11. package/dist/providers/openai-compatible.js +88 -2
  12. package/dist/providers/openai-compatible.js.map +1 -1
  13. package/dist/runtime/audit.d.ts +42 -0
  14. package/dist/runtime/audit.d.ts.map +1 -0
  15. package/dist/runtime/audit.js +73 -0
  16. package/dist/runtime/audit.js.map +1 -0
  17. package/dist/runtime/broadcast.d.ts.map +1 -1
  18. package/dist/runtime/broadcast.js +39 -36
  19. package/dist/runtime/broadcast.js.map +1 -1
  20. package/dist/runtime/cancellation.d.ts +26 -0
  21. package/dist/runtime/cancellation.d.ts.map +1 -1
  22. package/dist/runtime/cancellation.js +38 -1
  23. package/dist/runtime/cancellation.js.map +1 -1
  24. package/dist/runtime/coordinator.d.ts +79 -1
  25. package/dist/runtime/coordinator.d.ts.map +1 -1
  26. package/dist/runtime/coordinator.js +979 -61
  27. package/dist/runtime/coordinator.js.map +1 -1
  28. package/dist/runtime/decisions.d.ts +25 -3
  29. package/dist/runtime/decisions.d.ts.map +1 -1
  30. package/dist/runtime/decisions.js +241 -3
  31. package/dist/runtime/decisions.js.map +1 -1
  32. package/dist/runtime/defaults.d.ts +37 -1
  33. package/dist/runtime/defaults.d.ts.map +1 -1
  34. package/dist/runtime/defaults.js +359 -4
  35. package/dist/runtime/defaults.js.map +1 -1
  36. package/dist/runtime/engine.d.ts +17 -4
  37. package/dist/runtime/engine.d.ts.map +1 -1
  38. package/dist/runtime/engine.js +770 -35
  39. package/dist/runtime/engine.js.map +1 -1
  40. package/dist/runtime/health.d.ts +51 -0
  41. package/dist/runtime/health.d.ts.map +1 -0
  42. package/dist/runtime/health.js +85 -0
  43. package/dist/runtime/health.js.map +1 -0
  44. package/dist/runtime/introspection.d.ts +96 -0
  45. package/dist/runtime/introspection.d.ts.map +1 -0
  46. package/dist/runtime/introspection.js +31 -0
  47. package/dist/runtime/introspection.js.map +1 -0
  48. package/dist/runtime/metrics.d.ts +44 -0
  49. package/dist/runtime/metrics.d.ts.map +1 -0
  50. package/dist/runtime/metrics.js +12 -0
  51. package/dist/runtime/metrics.js.map +1 -0
  52. package/dist/runtime/model.d.ts.map +1 -1
  53. package/dist/runtime/model.js +34 -7
  54. package/dist/runtime/model.js.map +1 -1
  55. package/dist/runtime/provenance.d.ts +25 -0
  56. package/dist/runtime/provenance.d.ts.map +1 -0
  57. package/dist/runtime/provenance.js +13 -0
  58. package/dist/runtime/provenance.js.map +1 -0
  59. package/dist/runtime/sequential.d.ts.map +1 -1
  60. package/dist/runtime/sequential.js +47 -37
  61. package/dist/runtime/sequential.js.map +1 -1
  62. package/dist/runtime/shared.d.ts.map +1 -1
  63. package/dist/runtime/shared.js +39 -36
  64. package/dist/runtime/shared.js.map +1 -1
  65. package/dist/runtime/tracing.d.ts +31 -0
  66. package/dist/runtime/tracing.d.ts.map +1 -0
  67. package/dist/runtime/tracing.js +18 -0
  68. package/dist/runtime/tracing.js.map +1 -0
  69. package/dist/runtime/validation.d.ts +10 -0
  70. package/dist/runtime/validation.d.ts.map +1 -1
  71. package/dist/runtime/validation.js +73 -0
  72. package/dist/runtime/validation.js.map +1 -1
  73. package/dist/types/events.d.ts +339 -12
  74. package/dist/types/events.d.ts.map +1 -1
  75. package/dist/types/replay.d.ts +7 -1
  76. package/dist/types/replay.d.ts.map +1 -1
  77. package/dist/types.d.ts +255 -6
  78. package/dist/types.d.ts.map +1 -1
  79. package/dist/types.js.map +1 -1
  80. package/package.json +39 -1
  81. package/src/index.ts +15 -0
  82. package/src/providers/openai-compatible.ts +83 -3
  83. package/src/runtime/audit.ts +121 -0
  84. package/src/runtime/broadcast.ts +40 -37
  85. package/src/runtime/cancellation.ts +59 -1
  86. package/src/runtime/coordinator.ts +1221 -61
  87. package/src/runtime/decisions.ts +307 -4
  88. package/src/runtime/defaults.ts +389 -4
  89. package/src/runtime/engine.ts +1004 -35
  90. package/src/runtime/health.ts +136 -0
  91. package/src/runtime/introspection.ts +122 -0
  92. package/src/runtime/metrics.ts +45 -0
  93. package/src/runtime/model.ts +38 -6
  94. package/src/runtime/provenance.ts +43 -0
  95. package/src/runtime/sequential.ts +49 -38
  96. package/src/runtime/shared.ts +40 -37
  97. package/src/runtime/tracing.ts +35 -0
  98. package/src/runtime/validation.ts +81 -0
  99. package/src/types/events.ts +369 -12
  100. package/src/types/replay.ts +14 -1
  101. package/src/types.ts +279 -4
@@ -1,12 +1,16 @@
1
1
  import { DogpileError } from "../types.js";
2
2
  import { runBroadcast } from "./broadcast.js";
3
3
  import { runCoordinator } from "./coordinator.js";
4
- import { createReplayTraceFinalOutput, createReplayTraceBudgetStateChanges, canonicalizeRunResult, canonicalizeSerializable, createRunAccounting, createRunEventLog, createRunMetadata, createRunUsage, defaultAgents, normalizeProtocol, orderAgentsForTemperature, tierTemperature } from "./defaults.js";
4
+ import { addCost, createReplayTraceFinalOutput, createReplayTraceBudgetStateChanges, canonicalizeRunResult, canonicalizeSerializable, createRunAccounting, createRunEventLog, createRunMetadata, createRunUsage, defaultAgents, emptyCost, normalizeProtocol, orderAgentsForTemperature, recomputeAccountingFromTrace, resolveOnChildFailure, tierTemperature } from "./defaults.js";
5
+ import { computeHealth, DEFAULT_HEALTH_THRESHOLDS } from "./health.js";
5
6
  import { runSequential } from "./sequential.js";
6
7
  import { runShared } from "./shared.js";
7
- import { createAbortErrorFromSignal, createTimeoutError } from "./cancellation.js";
8
+ import { classifyChildTimeoutSource, createAbortErrorFromSignal, createEngineDeadlineTimeoutError, createTimeoutError } from "./cancellation.js";
8
9
  import { budget as budgetCondition } from "./termination.js";
9
- import { validateDogpileOptions, validateEngineOptions, validateMissionIntent } from "./validation.js";
10
+ import { validateDogpileOptions, validateEngineOptions, validateMissionIntent, validateProviderLocality, validateRunCallOptions } from "./validation.js";
11
+ import { DOGPILE_SPAN_NAMES } from "./tracing.js";
12
+ const DEFAULT_MAX_DEPTH = 4;
13
+ const DEFAULT_MAX_CONCURRENT_CHILDREN = 4;
10
14
  const defaultHighLevelProtocol = "sequential";
11
15
  const defaultHighLevelTier = "balanced";
12
16
  /**
@@ -27,9 +31,20 @@ export function createEngine(options) {
27
31
  const temperature = options.temperature ?? tierTemperature(options.tier);
28
32
  const agents = orderAgentsForTemperature(options.agents ?? defaultAgents(), temperature, options.seed);
29
33
  const terminate = options.terminate ?? (options.budget ? conditionFromBudget(options.budget) : undefined);
34
+ const engineMaxDepth = options.maxDepth ?? DEFAULT_MAX_DEPTH;
35
+ const engineMaxConcurrentChildren = options.maxConcurrentChildren ?? DEFAULT_MAX_CONCURRENT_CHILDREN;
36
+ const engineOnChildFailure = options.onChildFailure;
30
37
  return {
31
- run(intent) {
38
+ run(intent, runOptions) {
32
39
  validateMissionIntent(intent);
40
+ validateRunCallOptions(runOptions);
41
+ validateProviderLocality(options.model, "model");
42
+ const effectiveMaxDepth = Math.min(engineMaxDepth, runOptions?.maxDepth ?? Number.POSITIVE_INFINITY);
43
+ assertRunDoesNotRaiseEngineMax("maxConcurrentChildren", runOptions?.maxConcurrentChildren, engineMaxConcurrentChildren);
44
+ const effectiveMaxConcurrentChildren = Math.min(engineMaxConcurrentChildren, runOptions?.maxConcurrentChildren ?? Number.POSITIVE_INFINITY);
45
+ const onChildFailure = resolveOnChildFailure(runOptions?.onChildFailure, engineOnChildFailure);
46
+ const startedAtMs = Date.now();
47
+ const parentDeadlineMs = options.budget?.timeoutMs !== undefined ? startedAtMs + options.budget.timeoutMs : undefined;
33
48
  return runNonStreamingProtocol({
34
49
  intent,
35
50
  protocol,
@@ -43,11 +58,28 @@ export function createEngine(options) {
43
58
  ...(options.signal !== undefined ? { signal: options.signal } : {}),
44
59
  ...(terminate ? { terminate } : {}),
45
60
  ...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
46
- ...(options.evaluate ? { evaluate: options.evaluate } : {})
61
+ ...(options.evaluate ? { evaluate: options.evaluate } : {}),
62
+ ...(options.tracer ? { tracer: options.tracer } : {}),
63
+ ...(options.metricsHook ? { metricsHook: options.metricsHook } : {}),
64
+ ...(options.logger ? { logger: options.logger } : {}),
65
+ currentDepth: 0,
66
+ effectiveMaxDepth,
67
+ effectiveMaxConcurrentChildren,
68
+ onChildFailure,
69
+ ...(parentDeadlineMs !== undefined ? { parentDeadlineMs } : {}),
70
+ ...(options.defaultSubRunTimeoutMs !== undefined
71
+ ? { defaultSubRunTimeoutMs: options.defaultSubRunTimeoutMs }
72
+ : {})
47
73
  });
48
74
  },
49
- stream(intent) {
75
+ stream(intent, runOptions) {
50
76
  validateMissionIntent(intent);
77
+ validateRunCallOptions(runOptions);
78
+ validateProviderLocality(options.model, "model");
79
+ const effectiveMaxDepth = Math.min(engineMaxDepth, runOptions?.maxDepth ?? Number.POSITIVE_INFINITY);
80
+ assertRunDoesNotRaiseEngineMax("maxConcurrentChildren", runOptions?.maxConcurrentChildren, engineMaxConcurrentChildren);
81
+ const effectiveMaxConcurrentChildren = Math.min(engineMaxConcurrentChildren, runOptions?.maxConcurrentChildren ?? Number.POSITIVE_INFINITY);
82
+ const onChildFailure = resolveOnChildFailure(runOptions?.onChildFailure, engineOnChildFailure);
51
83
  const pendingEvents = [];
52
84
  const pendingResolvers = [];
53
85
  const emittedEvents = [];
@@ -61,7 +93,10 @@ export function createEngine(options) {
61
93
  const abortRace = createAbortRace(abortController.signal, options.model.id);
62
94
  let complete = false;
63
95
  let lastRunId = "";
96
+ let rootRunId;
64
97
  let pendingFinalEvent;
98
+ let activeAbortDrain;
99
+ const failureInstancesByChildRunId = new Map();
65
100
  let status = "running";
66
101
  let resolveResult;
67
102
  let rejectResult;
@@ -113,6 +148,8 @@ export function createEngine(options) {
113
148
  return;
114
149
  }
115
150
  try {
151
+ const streamStartedAtMs = Date.now();
152
+ const streamParentDeadlineMs = options.budget?.timeoutMs !== undefined ? streamStartedAtMs + options.budget.timeoutMs : undefined;
116
153
  const baseResult = await abortRace.run(runProtocol({
117
154
  intent,
118
155
  protocol,
@@ -125,21 +162,45 @@ export function createEngine(options) {
125
162
  ...(options.seed !== undefined ? { seed: options.seed } : {}),
126
163
  signal: abortController.signal,
127
164
  ...(terminate ? { terminate } : {}),
165
+ currentDepth: 0,
166
+ effectiveMaxDepth,
167
+ effectiveMaxConcurrentChildren,
168
+ onChildFailure,
169
+ ...(streamParentDeadlineMs !== undefined ? { parentDeadlineMs: streamParentDeadlineMs } : {}),
170
+ ...(options.defaultSubRunTimeoutMs !== undefined
171
+ ? { defaultSubRunTimeoutMs: options.defaultSubRunTimeoutMs }
172
+ : {}),
173
+ ...(options.tracer ? { tracer: options.tracer } : {}),
174
+ ...(options.metricsHook ? { metricsHook: options.metricsHook } : {}),
175
+ ...(options.logger ? { logger: options.logger } : {}),
176
+ streamEvents: true,
128
177
  emit(event) {
129
178
  if (status !== "running") {
130
179
  return;
131
180
  }
181
+ const parentRunIds = event.parentRunIds;
182
+ if (rootRunId === undefined && parentRunIds === undefined) {
183
+ rootRunId = event.runId;
184
+ }
132
185
  lastRunId = event.runId;
133
- if (event.type === "final") {
186
+ if (event.type === "final" && event.runId === rootRunId) {
134
187
  pendingFinalEvent = event;
135
188
  return;
136
189
  }
137
190
  publish(event);
138
- }
191
+ },
192
+ registerAbortDrain(drain) {
193
+ activeAbortDrain = drain;
194
+ },
195
+ failureInstancesByChildRunId
139
196
  }));
140
197
  if (status !== "running") {
141
198
  return;
142
199
  }
200
+ const terminalThrow = resolveRuntimeTerminalThrow(baseResult.trace, failureInstancesByChildRunId);
201
+ if (terminalThrow) {
202
+ throw terminalThrow;
203
+ }
143
204
  const finalizedResult = await abortRace.run(applyRunEvaluation(baseResult, options.evaluate));
144
205
  if (status !== "running") {
145
206
  return;
@@ -161,6 +222,10 @@ export function createEngine(options) {
161
222
  }
162
223
  const runtimeError = timeoutLifecycle.translateError(error);
163
224
  status = isCancellationError(runtimeError) ? "cancelled" : "failed";
225
+ if (shouldPublishAborted(runtimeError)) {
226
+ activeAbortDrain?.(runtimeError);
227
+ publish(createStreamAbortedEvent(runtimeError, lastRunId));
228
+ }
164
229
  publish(createStreamErrorEvent(runtimeError, lastRunId));
165
230
  closeStream();
166
231
  rejectResult(runtimeError);
@@ -171,9 +236,11 @@ export function createEngine(options) {
171
236
  return;
172
237
  }
173
238
  const error = createStreamCancellationError(options.model.id, cause);
174
- status = "cancelled";
175
239
  abortController.abort(error);
240
+ activeAbortDrain?.(error);
241
+ publish(createStreamAbortedEvent(error, lastRunId));
176
242
  publish(createStreamErrorEvent(error, lastRunId));
243
+ status = "cancelled";
177
244
  closeStream();
178
245
  rejectResult(error);
179
246
  }
@@ -182,6 +249,7 @@ export function createEngine(options) {
182
249
  return;
183
250
  }
184
251
  complete = true;
252
+ failureInstancesByChildRunId.clear();
185
253
  removeCallerAbortListener();
186
254
  timeoutLifecycle.cleanup();
187
255
  abortRace.cleanup();
@@ -242,7 +310,8 @@ function createNonStreamingAbortLifecycle(options) {
242
310
  const timeoutLifecycle = createTimeoutAbortLifecycle({
243
311
  abortController,
244
312
  timeoutMs: options.timeoutMs,
245
- providerId: options.providerId
313
+ providerId: options.providerId,
314
+ timeoutErrorSource: options.timeoutErrorSource ?? "runtime"
246
315
  });
247
316
  const abortRace = createAbortRace(abortController.signal, options.providerId);
248
317
  const removeCallerAbortListener = wireCallerAbortSignal(options.callerSignal, abortController, () => {
@@ -272,7 +341,13 @@ function createTimeoutAbortLifecycle(options) {
272
341
  cleanup() { }
273
342
  };
274
343
  }
275
- const timeoutError = createTimeoutError(options.providerId, options.timeoutMs);
344
+ const timeoutSource = classifyChildTimeoutSource(undefined, {
345
+ ...(options.timeoutErrorSource === "engine" ? { engineDefaultTimeoutMs: options.timeoutMs } : {}),
346
+ isProviderError: false
347
+ });
348
+ const timeoutError = options.timeoutErrorSource === "engine" && timeoutSource === "engine"
349
+ ? createEngineDeadlineTimeoutError(options.providerId, options.timeoutMs)
350
+ : createTimeoutError(options.providerId, options.timeoutMs);
276
351
  const timeoutId = setTimeout(() => {
277
352
  options.abortController.abort(timeoutError);
278
353
  }, options.timeoutMs);
@@ -354,6 +429,25 @@ function timeoutMsFromTermination(condition) {
354
429
  function readAbortSignalReason(signal) {
355
430
  return signal?.aborted ? signal.reason : undefined;
356
431
  }
432
+ function createStreamAbortedEvent(error, runId) {
433
+ return {
434
+ type: "aborted",
435
+ runId,
436
+ at: new Date().toISOString(),
437
+ reason: streamAbortedReason(error)
438
+ };
439
+ }
440
+ function shouldPublishAborted(error) {
441
+ return DogpileError.isInstance(error) && (error.code === "aborted" || error.code === "timeout");
442
+ }
443
+ function streamAbortedReason(error) {
444
+ if (DogpileError.isInstance(error)) {
445
+ if (error.code === "timeout" || error.detail?.["reason"] === "timeout") {
446
+ return "timeout";
447
+ }
448
+ }
449
+ return "parent-aborted";
450
+ }
357
451
  function createStreamErrorEvent(error, runId) {
358
452
  if (DogpileError.isInstance(error)) {
359
453
  return {
@@ -399,11 +493,398 @@ function dogpileErrorStreamDetail(error) {
399
493
  }
400
494
  return detail;
401
495
  }
496
+ function openRunTracing(options) {
497
+ if (!options.tracer) {
498
+ return undefined;
499
+ }
500
+ const runSpan = options.tracer.startSpan(DOGPILE_SPAN_NAMES.RUN, {
501
+ ...(options.parentSpan ? { parent: options.parentSpan } : {}),
502
+ attributes: {
503
+ "dogpile.run.protocol": options.protocolKind,
504
+ "dogpile.run.tier": String(options.tier),
505
+ "dogpile.run.intent": options.intent.slice(0, 200)
506
+ }
507
+ });
508
+ return {
509
+ tracer: options.tracer,
510
+ runSpan,
511
+ subRunSpans: new Map(),
512
+ agentTurnSpans: new Map(),
513
+ modelCallSpans: new Map(),
514
+ pendingModelRequests: new Map(),
515
+ agentTurnCounters: new Map(),
516
+ turnAccumByAgent: new Map(),
517
+ agentIds: new Set(),
518
+ turnCount: 0,
519
+ lastCost: emptyCost()
520
+ };
521
+ }
522
+ function openRunMetrics(options) {
523
+ if (!options.metricsHook) {
524
+ return undefined;
525
+ }
526
+ return {
527
+ metricsHook: options.metricsHook,
528
+ logger: options.logger,
529
+ startedAtMs: Date.now(),
530
+ subRunStartTimes: new Map(),
531
+ totalCost: emptyCost(),
532
+ nestedCost: emptyCost(),
533
+ turns: 0
534
+ };
535
+ }
536
+ function routeMetricsError(err, logger) {
537
+ const msg = err instanceof Error ? err.message : String(err);
538
+ try {
539
+ if (logger !== undefined) {
540
+ logger.error("dogpile:metricsHook threw", { error: msg });
541
+ }
542
+ else {
543
+ console.error("dogpile:metricsHook threw", { error: msg });
544
+ }
545
+ }
546
+ catch {
547
+ // A logger that throws from error() cannot be helped.
548
+ }
549
+ }
550
+ function fireHook(callback, snapshot, logger) {
551
+ if (!callback) {
552
+ return;
553
+ }
554
+ try {
555
+ const result = callback(snapshot);
556
+ if (result && typeof result.catch === "function") {
557
+ result.catch((err) => {
558
+ routeMetricsError(err, logger);
559
+ });
560
+ }
561
+ }
562
+ catch (err) {
563
+ routeMetricsError(err, logger);
564
+ }
565
+ }
566
+ function buildRunSnapshot(result, startedAtMs) {
567
+ const nestedCosts = nestedSubRunCosts(result);
568
+ const budgetStopEvent = result.trace.events.find((event) => event.type === "budget-stop");
569
+ const outcome = budgetStopEvent !== undefined ? "budget-stopped" : "completed";
570
+ const totalInputTokens = result.cost.inputTokens;
571
+ const totalOutputTokens = result.cost.outputTokens;
572
+ const totalCostUsd = result.cost.usd;
573
+ const ownInputTokens = totalInputTokens - nestedCosts.reduce((sum, cost) => sum + cost.inputTokens, 0);
574
+ const ownOutputTokens = totalOutputTokens - nestedCosts.reduce((sum, cost) => sum + cost.outputTokens, 0);
575
+ const ownCostUsd = totalCostUsd - nestedCosts.reduce((sum, cost) => sum + cost.usd, 0);
576
+ const turns = result.trace.events.filter((event) => event.type === "agent-turn").length;
577
+ return {
578
+ outcome,
579
+ inputTokens: ownInputTokens,
580
+ outputTokens: ownOutputTokens,
581
+ costUsd: ownCostUsd,
582
+ totalInputTokens,
583
+ totalOutputTokens,
584
+ totalCostUsd,
585
+ turns,
586
+ durationMs: Date.now() - startedAtMs
587
+ };
588
+ }
589
+ function buildSubRunSnapshot(subResult, durationMs) {
590
+ const nestedCosts = nestedSubRunCosts(subResult);
591
+ const budgetStopEvent = subResult.trace.events.find((event) => event.type === "budget-stop");
592
+ const outcome = budgetStopEvent !== undefined ? "budget-stopped" : "completed";
593
+ const totalInputTokens = subResult.cost.inputTokens;
594
+ const totalOutputTokens = subResult.cost.outputTokens;
595
+ const totalCostUsd = subResult.cost.usd;
596
+ const ownInputTokens = totalInputTokens - nestedCosts.reduce((sum, cost) => sum + cost.inputTokens, 0);
597
+ const ownOutputTokens = totalOutputTokens - nestedCosts.reduce((sum, cost) => sum + cost.outputTokens, 0);
598
+ const ownCostUsd = totalCostUsd - nestedCosts.reduce((sum, cost) => sum + cost.usd, 0);
599
+ const turns = subResult.trace.events.filter((event) => event.type === "agent-turn").length;
600
+ return {
601
+ outcome,
602
+ inputTokens: ownInputTokens,
603
+ outputTokens: ownOutputTokens,
604
+ costUsd: ownCostUsd,
605
+ totalInputTokens,
606
+ totalOutputTokens,
607
+ totalCostUsd,
608
+ turns,
609
+ durationMs
610
+ };
611
+ }
612
+ function nestedSubRunCosts(result) {
613
+ return result.trace.events.flatMap((event) => {
614
+ if (event.type === "sub-run-completed") {
615
+ return [event.subResult.cost];
616
+ }
617
+ if (event.type === "sub-run-failed") {
618
+ return [event.partialCost];
619
+ }
620
+ return [];
621
+ });
622
+ }
623
+ function subtractCost(total, nested) {
624
+ return {
625
+ usd: total.usd - nested.usd,
626
+ inputTokens: total.inputTokens - nested.inputTokens,
627
+ outputTokens: total.outputTokens - nested.outputTokens,
628
+ totalTokens: total.totalTokens - nested.totalTokens
629
+ };
630
+ }
631
+ function handleMetricsEvent(state, event) {
632
+ const parentRunIds = event.parentRunIds;
633
+ if (parentRunIds !== undefined) {
634
+ return;
635
+ }
636
+ switch (event.type) {
637
+ case "agent-turn": {
638
+ state.totalCost = event.cost;
639
+ state.turns += 1;
640
+ break;
641
+ }
642
+ case "broadcast":
643
+ case "budget-stop":
644
+ case "final": {
645
+ state.totalCost = event.cost;
646
+ break;
647
+ }
648
+ case "sub-run-started": {
649
+ state.subRunStartTimes.set(event.childRunId, Date.now());
650
+ break;
651
+ }
652
+ case "sub-run-completed": {
653
+ state.totalCost = addCost(state.totalCost, event.subResult.cost);
654
+ state.nestedCost = addCost(state.nestedCost, event.subResult.cost);
655
+ const startMs = state.subRunStartTimes.get(event.childRunId);
656
+ const durationMs = startMs !== undefined ? Date.now() - startMs : 0;
657
+ state.subRunStartTimes.delete(event.childRunId);
658
+ const snapshot = buildSubRunSnapshot(event.subResult, durationMs);
659
+ fireHook(state.metricsHook.onSubRunComplete, snapshot, state.logger);
660
+ break;
661
+ }
662
+ case "sub-run-failed": {
663
+ state.totalCost = addCost(state.totalCost, event.partialCost);
664
+ state.nestedCost = addCost(state.nestedCost, event.partialCost);
665
+ state.subRunStartTimes.delete(event.childRunId);
666
+ break;
667
+ }
668
+ default:
669
+ break;
670
+ }
671
+ }
672
+ function closeRunMetrics(state, result) {
673
+ if (result !== undefined) {
674
+ const snapshot = buildRunSnapshot(result, state.startedAtMs);
675
+ fireHook(state.metricsHook.onRunComplete, snapshot, state.logger);
676
+ return;
677
+ }
678
+ const ownCost = subtractCost(state.totalCost, state.nestedCost);
679
+ const snapshot = {
680
+ outcome: "aborted",
681
+ inputTokens: ownCost.inputTokens,
682
+ outputTokens: ownCost.outputTokens,
683
+ costUsd: ownCost.usd,
684
+ totalInputTokens: state.totalCost.inputTokens,
685
+ totalOutputTokens: state.totalCost.outputTokens,
686
+ totalCostUsd: state.totalCost.usd,
687
+ turns: state.turns,
688
+ durationMs: Date.now() - state.startedAtMs
689
+ };
690
+ fireHook(state.metricsHook.onRunComplete, snapshot, state.logger);
691
+ }
692
+ function handleTracingEvent(state, event) {
693
+ const parentRunIds = event.parentRunIds;
694
+ if (parentRunIds !== undefined) {
695
+ return;
696
+ }
697
+ if (state.runId === undefined) {
698
+ state.runId = event.runId;
699
+ state.runSpan.setAttribute("dogpile.run.id", event.runId);
700
+ }
701
+ switch (event.type) {
702
+ case "model-request": {
703
+ state.pendingModelRequests.set(event.callId, event);
704
+ state.agentIds.add(event.agentId);
705
+ if (!state.agentTurnSpans.has(event.agentId)) {
706
+ const turnNumber = (state.agentTurnCounters.get(event.agentId) ?? 0) + 1;
707
+ state.agentTurnCounters.set(event.agentId, turnNumber);
708
+ const turnParent = state.subRunSpans.get(event.runId) ?? state.runSpan;
709
+ const turnSpan = state.tracer.startSpan(DOGPILE_SPAN_NAMES.AGENT_TURN, {
710
+ parent: turnParent,
711
+ attributes: {
712
+ "dogpile.agent.id": event.agentId,
713
+ "dogpile.agent.role": event.role,
714
+ "dogpile.turn.number": turnNumber,
715
+ "dogpile.model.id": event.modelId
716
+ }
717
+ });
718
+ state.agentTurnSpans.set(event.agentId, turnSpan);
719
+ }
720
+ const callParent = state.agentTurnSpans.get(event.agentId) ??
721
+ state.subRunSpans.get(event.runId) ??
722
+ state.runSpan;
723
+ const callSpan = state.tracer.startSpan(DOGPILE_SPAN_NAMES.MODEL_CALL, {
724
+ parent: callParent,
725
+ attributes: {
726
+ "dogpile.model.id": event.modelId,
727
+ "dogpile.call.id": event.callId,
728
+ "dogpile.provider.id": event.providerId
729
+ }
730
+ });
731
+ state.modelCallSpans.set(event.callId, callSpan);
732
+ break;
733
+ }
734
+ case "model-response": {
735
+ const span = state.modelCallSpans.get(event.callId);
736
+ if (span) {
737
+ const inputTokens = event.response.usage?.inputTokens ?? 0;
738
+ const outputTokens = event.response.usage?.outputTokens ?? 0;
739
+ const responseCost = {
740
+ usd: event.response.costUsd ?? 0,
741
+ inputTokens,
742
+ outputTokens,
743
+ totalTokens: event.response.usage?.totalTokens ?? inputTokens + outputTokens
744
+ };
745
+ span.setAttribute("dogpile.model.input_tokens", inputTokens);
746
+ span.setAttribute("dogpile.model.output_tokens", outputTokens);
747
+ if (event.response.costUsd !== undefined) {
748
+ span.setAttribute("dogpile.model.cost_usd", event.response.costUsd);
749
+ }
750
+ span.setStatus("ok");
751
+ span.end();
752
+ state.modelCallSpans.delete(event.callId);
753
+ const accum = state.turnAccumByAgent.get(event.agentId) ?? {
754
+ inputTokens: 0,
755
+ outputTokens: 0,
756
+ costUsd: 0
757
+ };
758
+ accum.inputTokens += inputTokens;
759
+ accum.outputTokens += outputTokens;
760
+ accum.costUsd += responseCost.usd;
761
+ state.turnAccumByAgent.set(event.agentId, accum);
762
+ state.lastCost = addCost(state.lastCost, responseCost);
763
+ }
764
+ state.pendingModelRequests.delete(event.callId);
765
+ break;
766
+ }
767
+ case "agent-turn": {
768
+ state.agentIds.add(event.agentId);
769
+ state.turnCount += 1;
770
+ state.lastCost = event.cost;
771
+ const turnSpan = state.agentTurnSpans.get(event.agentId);
772
+ if (turnSpan) {
773
+ turnSpan.setAttribute("dogpile.agent.role", event.role);
774
+ const accum = state.turnAccumByAgent.get(event.agentId);
775
+ turnSpan.setAttribute("dogpile.turn.cost_usd", accum?.costUsd ?? 0);
776
+ turnSpan.setAttribute("dogpile.turn.input_tokens", accum?.inputTokens ?? 0);
777
+ turnSpan.setAttribute("dogpile.turn.output_tokens", accum?.outputTokens ?? 0);
778
+ turnSpan.setStatus("ok");
779
+ turnSpan.end();
780
+ state.agentTurnSpans.delete(event.agentId);
781
+ }
782
+ state.turnAccumByAgent.delete(event.agentId);
783
+ break;
784
+ }
785
+ case "broadcast":
786
+ case "budget-stop":
787
+ case "final": {
788
+ state.lastCost = event.cost;
789
+ break;
790
+ }
791
+ case "sub-run-started": {
792
+ const span = state.tracer.startSpan(DOGPILE_SPAN_NAMES.SUB_RUN, {
793
+ parent: state.runSpan,
794
+ attributes: {
795
+ "dogpile.sub_run.child_run_id": event.childRunId,
796
+ "dogpile.sub_run.parent_run_id": event.parentRunId,
797
+ "dogpile.sub_run.depth": event.depth
798
+ }
799
+ });
800
+ state.subRunSpans.set(event.childRunId, span);
801
+ break;
802
+ }
803
+ case "sub-run-completed": {
804
+ const span = state.subRunSpans.get(event.childRunId);
805
+ if (span) {
806
+ span.setStatus("ok");
807
+ span.end();
808
+ state.subRunSpans.delete(event.childRunId);
809
+ }
810
+ break;
811
+ }
812
+ case "sub-run-failed": {
813
+ const span = state.subRunSpans.get(event.childRunId);
814
+ if (span) {
815
+ span.setStatus("error", event.error.message);
816
+ span.end();
817
+ state.subRunSpans.delete(event.childRunId);
818
+ }
819
+ break;
820
+ }
821
+ default:
822
+ break;
823
+ }
824
+ }
825
+ function closeRunTracing(state, result, error) {
826
+ if (error !== undefined) {
827
+ if (state.runId !== undefined) {
828
+ state.runSpan.setAttribute("dogpile.run.id", state.runId);
829
+ }
830
+ state.runSpan.setAttribute("dogpile.run.agent_count", state.agentIds.size);
831
+ state.runSpan.setAttribute("dogpile.run.turn_count", state.turnCount);
832
+ state.runSpan.setAttribute("dogpile.run.cost_usd", state.lastCost.usd);
833
+ state.runSpan.setAttribute("dogpile.run.input_tokens", state.lastCost.inputTokens);
834
+ state.runSpan.setAttribute("dogpile.run.output_tokens", state.lastCost.outputTokens);
835
+ state.runSpan.setAttribute("dogpile.run.outcome", "aborted");
836
+ state.runSpan.setStatus("error", error instanceof Error ? error.message : String(error));
837
+ closeOpenTracingSpans(state);
838
+ state.runSpan.end();
839
+ return;
840
+ }
841
+ if (result === undefined) {
842
+ closeOpenTracingSpans(state);
843
+ state.runSpan.end();
844
+ return;
845
+ }
846
+ const budgetStopEvent = result.trace.events.find((event) => event.type === "budget-stop");
847
+ const terminationReason = budgetStopEvent?.reason;
848
+ const outcome = terminationReason !== undefined ? "budget-stopped" : "completed";
849
+ state.runSpan.setAttribute("dogpile.run.id", result.trace.runId);
850
+ state.runSpan.setAttribute("dogpile.run.agent_count", result.trace.agentsUsed.length);
851
+ state.runSpan.setAttribute("dogpile.run.turn_count", result.trace.events.filter((event) => event.type === "agent-turn").length);
852
+ state.runSpan.setAttribute("dogpile.run.cost_usd", result.cost.usd);
853
+ state.runSpan.setAttribute("dogpile.run.input_tokens", result.cost.inputTokens);
854
+ state.runSpan.setAttribute("dogpile.run.output_tokens", result.cost.outputTokens);
855
+ state.runSpan.setAttribute("dogpile.run.outcome", outcome);
856
+ if (terminationReason !== undefined) {
857
+ state.runSpan.setAttribute("dogpile.run.termination_reason", terminationReason);
858
+ }
859
+ state.runSpan.setStatus("ok");
860
+ closeOpenTracingSpans(state);
861
+ state.runSpan.end();
862
+ }
863
+ function closeOpenTracingSpans(state) {
864
+ for (const span of state.modelCallSpans.values()) {
865
+ span.end();
866
+ }
867
+ state.modelCallSpans.clear();
868
+ for (const span of state.agentTurnSpans.values()) {
869
+ span.end();
870
+ }
871
+ state.agentTurnSpans.clear();
872
+ for (const span of state.subRunSpans.values()) {
873
+ span.end();
874
+ }
875
+ state.subRunSpans.clear();
876
+ }
402
877
  async function runNonStreamingProtocol(options) {
878
+ const failureInstancesByChildRunId = new Map();
403
879
  const abortLifecycle = createNonStreamingAbortLifecycle({
404
880
  callerSignal: options.signal,
405
881
  timeoutMs: runtimeTimeoutMs(options),
406
- providerId: options.model.id
882
+ providerId: options.model.id,
883
+ timeoutErrorSource: options.currentDepth !== undefined &&
884
+ options.currentDepth > 0 &&
885
+ options.parentDeadlineMs === undefined
886
+ ? "engine"
887
+ : "runtime"
407
888
  });
408
889
  try {
409
890
  const emittedEvents = [];
@@ -412,7 +893,8 @@ async function runNonStreamingProtocol(options) {
412
893
  ...(abortLifecycle.signal !== undefined ? { signal: abortLifecycle.signal } : {}),
413
894
  emit(event) {
414
895
  emittedEvents.push(event);
415
- }
896
+ },
897
+ failureInstancesByChildRunId
416
898
  }));
417
899
  const events = emittedEvents.length > 0 ? emittedEvents : result.trace.events;
418
900
  const trace = {
@@ -431,14 +913,20 @@ async function runNonStreamingProtocol(options) {
431
913
  events
432
914
  }),
433
915
  eventLog: createRunEventLog(trace.runId, trace.protocol, events),
434
- trace
916
+ trace,
917
+ health: computeHealth(trace, DEFAULT_HEALTH_THRESHOLDS)
435
918
  };
919
+ const terminalThrow = resolveRuntimeTerminalThrow(runResult.trace, failureInstancesByChildRunId);
920
+ if (terminalThrow) {
921
+ throw terminalThrow;
922
+ }
436
923
  return canonicalizeRunResult(await abortLifecycle.run(applyRunEvaluation(runResult, options.evaluate)));
437
924
  }
438
925
  catch (error) {
439
926
  throw abortLifecycle.translateError(error);
440
927
  }
441
928
  finally {
929
+ failureInstancesByChildRunId.clear();
442
930
  abortLifecycle.cleanup();
443
931
  }
444
932
  }
@@ -472,7 +960,56 @@ function finalEventWithEvaluation(event, evaluation) {
472
960
  evaluation
473
961
  };
474
962
  }
475
- function runProtocol(options) {
963
+ async function runProtocol(options) {
964
+ const tracing = openRunTracing({
965
+ ...(options.tracer ? { tracer: options.tracer } : {}),
966
+ ...(options.parentSpan ? { parentSpan: options.parentSpan } : {}),
967
+ intent: options.intent,
968
+ protocolKind: options.protocol.kind,
969
+ tier: options.tier
970
+ });
971
+ const metrics = openRunMetrics({
972
+ ...(options.metricsHook ? { metricsHook: options.metricsHook } : {}),
973
+ ...(options.logger ? { logger: options.logger } : {})
974
+ });
975
+ const emitForProtocol = tracing || metrics || options.emit
976
+ ? (event) => {
977
+ if (tracing) {
978
+ handleTracingEvent(tracing, event);
979
+ }
980
+ if (metrics) {
981
+ handleMetricsEvent(metrics, event);
982
+ }
983
+ options.emit?.(event);
984
+ }
985
+ : undefined;
986
+ const protocolOptions = tracing
987
+ ? {
988
+ ...options,
989
+ subRunSpansByChildId: tracing.subRunSpans
990
+ }
991
+ : options;
992
+ try {
993
+ const result = await runProtocolInner(protocolOptions, emitForProtocol);
994
+ if (tracing) {
995
+ closeRunTracing(tracing, result);
996
+ }
997
+ if (metrics && (options.currentDepth === 0 || options.currentDepth === undefined)) {
998
+ closeRunMetrics(metrics, result);
999
+ }
1000
+ return result;
1001
+ }
1002
+ catch (error) {
1003
+ if (tracing) {
1004
+ closeRunTracing(tracing, undefined, error);
1005
+ }
1006
+ if (metrics && (options.currentDepth === 0 || options.currentDepth === undefined)) {
1007
+ closeRunMetrics(metrics, undefined);
1008
+ }
1009
+ throw error;
1010
+ }
1011
+ }
1012
+ function runProtocolInner(options, emitForProtocol) {
476
1013
  switch (options.protocol.kind) {
477
1014
  case "sequential":
478
1015
  return runSequential({
@@ -488,7 +1025,7 @@ function runProtocol(options) {
488
1025
  ...(options.signal !== undefined ? { signal: options.signal } : {}),
489
1026
  ...(options.terminate ? { terminate: options.terminate } : {}),
490
1027
  ...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
491
- ...(options.emit ? { emit: options.emit } : {})
1028
+ ...(emitForProtocol ? { emit: emitForProtocol } : {})
492
1029
  });
493
1030
  case "broadcast":
494
1031
  return runBroadcast({
@@ -504,7 +1041,7 @@ function runProtocol(options) {
504
1041
  ...(options.signal !== undefined ? { signal: options.signal } : {}),
505
1042
  ...(options.terminate ? { terminate: options.terminate } : {}),
506
1043
  ...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
507
- ...(options.emit ? { emit: options.emit } : {})
1044
+ ...(emitForProtocol ? { emit: emitForProtocol } : {})
508
1045
  });
509
1046
  case "coordinator":
510
1047
  return runCoordinator({
@@ -520,7 +1057,31 @@ function runProtocol(options) {
520
1057
  ...(options.signal !== undefined ? { signal: options.signal } : {}),
521
1058
  ...(options.terminate ? { terminate: options.terminate } : {}),
522
1059
  ...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
523
- ...(options.emit ? { emit: options.emit } : {})
1060
+ ...(emitForProtocol ? { emit: emitForProtocol } : {}),
1061
+ ...(options.streamEvents !== undefined ? { streamEvents: options.streamEvents } : {}),
1062
+ currentDepth: options.currentDepth ?? 0,
1063
+ effectiveMaxDepth: options.effectiveMaxDepth ?? Infinity,
1064
+ effectiveMaxConcurrentChildren: options.effectiveMaxConcurrentChildren ?? DEFAULT_MAX_CONCURRENT_CHILDREN,
1065
+ onChildFailure: options.onChildFailure ?? "continue",
1066
+ ...(options.parentDeadlineMs !== undefined ? { parentDeadlineMs: options.parentDeadlineMs } : {}),
1067
+ ...(options.defaultSubRunTimeoutMs !== undefined
1068
+ ? { defaultSubRunTimeoutMs: options.defaultSubRunTimeoutMs }
1069
+ : {}),
1070
+ ...(options.registerAbortDrain !== undefined ? { registerAbortDrain: options.registerAbortDrain } : {}),
1071
+ ...(options.failureInstancesByChildRunId !== undefined
1072
+ ? { failureInstancesByChildRunId: options.failureInstancesByChildRunId }
1073
+ : {}),
1074
+ runProtocol: (childInput) => {
1075
+ const { runId: childRunId, ...childProtocolInput } = childInput;
1076
+ const childParent = options.subRunSpansByChildId?.get(childRunId) ?? options.parentSpan;
1077
+ return runProtocol({
1078
+ ...childProtocolInput,
1079
+ protocol: normalizeProtocol(childProtocolInput.protocol),
1080
+ ...(options.tracer ? { tracer: options.tracer } : {}),
1081
+ ...(childParent ? { parentSpan: childParent } : {}),
1082
+ ...(options.logger ? { logger: options.logger } : {})
1083
+ });
1084
+ }
524
1085
  });
525
1086
  case "shared":
526
1087
  return runShared({
@@ -536,7 +1097,7 @@ function runProtocol(options) {
536
1097
  ...(options.signal !== undefined ? { signal: options.signal } : {}),
537
1098
  ...(options.terminate ? { terminate: options.terminate } : {}),
538
1099
  ...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
539
- ...(options.emit ? { emit: options.emit } : {})
1100
+ ...(emitForProtocol ? { emit: emitForProtocol } : {})
540
1101
  });
541
1102
  }
542
1103
  }
@@ -584,13 +1145,29 @@ export function stream(options) {
584
1145
  * the ergonomic {@link RunResult} wrapper from the JSON-serializable
585
1146
  * {@link Trace} returned by a previous `run()`, `stream()`, or
586
1147
  * `Dogpile.pile()` call.
1148
+ *
1149
+ * Tracing and metrics: replay is intentionally tracing-free and metrics-free.
1150
+ * Even when an engine instance has been configured with a `tracer` or
1151
+ * `metricsHook` on its `EngineOptions`, calling this function emits no spans
1152
+ * or callbacks — replaying historical events with current timestamps would
1153
+ * confuse observability backends. See `docs/developer-usage.md`.
587
1154
  */
1155
+ // Tracing/metrics-free: replay never uses EngineOptions tracer or metricsHook.
588
1156
  export function replay(trace) {
589
1157
  const cost = trace.finalOutput.cost;
590
1158
  const lastEvent = trace.events.at(-1);
1159
+ // D-08 / D-10: rebuild accounting recursively from the saved trace and
1160
+ // verify every embedded sub-run's recorded accounting matches what the
1161
+ // child trace recomputes. Mismatches throw `invalid-configuration` with
1162
+ // `detail.reason: "trace-accounting-mismatch"`. No provider invocation.
1163
+ const accounting = recomputeAccountingFromTrace(trace);
1164
+ const replayThrow = resolveReplayTerminalThrow(trace);
1165
+ if (replayThrow) {
1166
+ throw replayThrow;
1167
+ }
591
1168
  const baseResult = {
592
1169
  output: trace.finalOutput.output,
593
- eventLog: createRunEventLog(trace.runId, trace.protocol, trace.events),
1170
+ eventLog: createRunEventLog(trace.runId, trace.protocol, synthesizeProviderEvents(trace, trace.providerCalls)),
594
1171
  trace,
595
1172
  transcript: trace.transcript,
596
1173
  usage: createRunUsage(cost),
@@ -602,14 +1179,9 @@ export function replay(trace) {
602
1179
  agentsUsed: trace.agentsUsed,
603
1180
  events: trace.events
604
1181
  }),
605
- accounting: createRunAccounting({
606
- tier: trace.tier,
607
- ...(trace.budget.caps ? { budget: trace.budget.caps } : {}),
608
- ...(trace.budget.termination ? { termination: trace.budget.termination } : {}),
609
- cost,
610
- events: trace.events
611
- }),
612
- cost
1182
+ accounting,
1183
+ cost,
1184
+ health: computeHealth(trace, DEFAULT_HEALTH_THRESHOLDS)
613
1185
  };
614
1186
  if (lastEvent?.type !== "final") {
615
1187
  return baseResult;
@@ -620,17 +1192,143 @@ export function replay(trace) {
620
1192
  ...(lastEvent.evaluation !== undefined ? { evaluation: lastEvent.evaluation } : {})
621
1193
  };
622
1194
  }
1195
+ function synthesizeProviderEvents(trace, providerCalls) {
1196
+ const hasLiveProvenance = trace.events.some((event) => event.type === "model-request" || event.type === "model-response");
1197
+ if (hasLiveProvenance) {
1198
+ return trace.events;
1199
+ }
1200
+ const baseEvents = trace.events.filter((event) => event.type !== "model-request" && event.type !== "model-response");
1201
+ const result = [];
1202
+ let turnCount = 0;
1203
+ for (const event of baseEvents) {
1204
+ if (event.type === "agent-turn") {
1205
+ const call = providerCalls[turnCount];
1206
+ if (call !== undefined) {
1207
+ const modelId = typeof call.modelId === "string" && call.modelId.length > 0 ? call.modelId : call.providerId;
1208
+ result.push({
1209
+ type: "model-request",
1210
+ runId: trace.runId,
1211
+ callId: call.callId,
1212
+ providerId: call.providerId,
1213
+ modelId,
1214
+ startedAt: call.startedAt,
1215
+ agentId: call.agentId,
1216
+ role: call.role,
1217
+ request: call.request
1218
+ });
1219
+ result.push({
1220
+ type: "model-response",
1221
+ runId: trace.runId,
1222
+ callId: call.callId,
1223
+ providerId: call.providerId,
1224
+ modelId,
1225
+ startedAt: call.startedAt,
1226
+ completedAt: call.completedAt,
1227
+ agentId: call.agentId,
1228
+ role: call.role,
1229
+ response: call.response
1230
+ });
1231
+ }
1232
+ turnCount += 1;
1233
+ }
1234
+ result.push(event);
1235
+ }
1236
+ return result;
1237
+ }
1238
+ function resolveRuntimeTerminalThrow(trace, failureInstancesByChildRunId) {
1239
+ if (trace.triggeringFailureForAbortMode !== undefined) {
1240
+ return failureInstancesByChildRunId.get(trace.triggeringFailureForAbortMode.childRunId) ?? null;
1241
+ }
1242
+ const finalEvent = trace.events.at(-1);
1243
+ if (finalEvent?.type !== "final" || finalEvent.termination === undefined) {
1244
+ return null;
1245
+ }
1246
+ const lastFailure = findLastRealFailure(trace.events, failureInstancesByChildRunId);
1247
+ if (lastFailure === null) {
1248
+ return null;
1249
+ }
1250
+ if (hasFinalSynthesisAfterEvent(trace, lastFailure.eventIndex)) {
1251
+ return null;
1252
+ }
1253
+ return lastFailure.error;
1254
+ }
1255
+ function findLastRealFailure(events, failureInstancesByChildRunId) {
1256
+ for (let index = events.length - 1; index >= 0; index -= 1) {
1257
+ const event = events[index];
1258
+ if (event?.type !== "sub-run-failed") {
1259
+ continue;
1260
+ }
1261
+ const instance = failureInstancesByChildRunId.get(event.childRunId);
1262
+ if (instance) {
1263
+ return { error: instance, eventIndex: index };
1264
+ }
1265
+ }
1266
+ return null;
1267
+ }
1268
+ function resolveReplayTerminalThrow(trace) {
1269
+ if (trace.triggeringFailureForAbortMode !== undefined) {
1270
+ return dogpileErrorFromSerializedPayload(trace.triggeringFailureForAbortMode.error);
1271
+ }
1272
+ const finalEvent = trace.events.at(-1);
1273
+ if (finalEvent?.type !== "final" || finalEvent.termination === undefined) {
1274
+ return null;
1275
+ }
1276
+ const lastFailure = reconstructLastRealFailure(trace.events);
1277
+ if (lastFailure === null) {
1278
+ return null;
1279
+ }
1280
+ if (hasFinalSynthesisAfterEvent(trace, lastFailure.eventIndex)) {
1281
+ return null;
1282
+ }
1283
+ return lastFailure.error;
1284
+ }
1285
+ function reconstructLastRealFailure(events) {
1286
+ for (let index = events.length - 1; index >= 0; index -= 1) {
1287
+ const event = events[index];
1288
+ if (event?.type !== "sub-run-failed" || isSyntheticSubRunFailure(event)) {
1289
+ continue;
1290
+ }
1291
+ return { error: dogpileErrorFromSerializedPayload(event.error), eventIndex: index };
1292
+ }
1293
+ return null;
1294
+ }
1295
+ function hasFinalSynthesisAfterEvent(trace, eventIndex) {
1296
+ return trace.protocolDecisions.some((decision) => {
1297
+ return decision.phase === "final-synthesis" && decision.eventIndex > eventIndex;
1298
+ });
1299
+ }
1300
+ function isSyntheticSubRunFailure(event) {
1301
+ const reason = event.error.detail?.["reason"];
1302
+ return reason === "sibling-failed" || reason === "parent-aborted";
1303
+ }
1304
+ function dogpileErrorFromSerializedPayload(input) {
1305
+ return new DogpileError({
1306
+ code: input.code,
1307
+ message: input.message,
1308
+ ...(input.providerId !== undefined ? { providerId: input.providerId } : {}),
1309
+ ...(input.detail !== undefined ? { detail: input.detail } : {})
1310
+ });
1311
+ }
623
1312
  /**
624
1313
  * Replay a saved completed trace as a stream without invoking a model provider.
625
1314
  *
626
1315
  * @remarks
627
- * This is the streaming counterpart to {@link replay}. It yields the exact
628
- * saved {@link Trace.events} in order and resolves {@link StreamHandle.result}
629
- * to the rehydrated {@link RunResult}. Since all data comes from the trace,
630
- * replay remains storage-free and provider-free.
1316
+ * This is the streaming counterpart to {@link replay}. It yields the same
1317
+ * event sequence exposed by the replayed result event log, including legacy
1318
+ * provenance synthesis when a saved trace predates model request/response
1319
+ * events. Since all data comes from the trace, replay remains storage-free and
1320
+ * provider-free.
1321
+ *
1322
+ * Tracing and metrics: replayStream is intentionally tracing-free and
1323
+ * metrics-free. Even when an engine instance has been configured with a
1324
+ * `tracer` or `metricsHook` on its `EngineOptions`, calling this function
1325
+ * emits no spans or callbacks — replaying historical events with current
1326
+ * timestamps would confuse observability backends. See `docs/developer-usage.md`.
631
1327
  */
1328
+ // Tracing/metrics-free: replayStream never uses EngineOptions tracer or metricsHook.
632
1329
  export function replayStream(trace) {
633
1330
  const result = Promise.resolve(replay(trace));
1331
+ const replayEvents = replayStreamEvents(trace);
634
1332
  return {
635
1333
  get status() {
636
1334
  return "completed";
@@ -640,7 +1338,7 @@ export function replayStream(trace) {
640
1338
  // Replay streams are already completed snapshots, so cancellation is a no-op.
641
1339
  },
642
1340
  subscribe(subscriber) {
643
- for (const event of trace.events) {
1341
+ for (const event of replayEvents) {
644
1342
  subscriber(event);
645
1343
  }
646
1344
  return {
@@ -653,7 +1351,7 @@ export function replayStream(trace) {
653
1351
  let index = 0;
654
1352
  return {
655
1353
  next() {
656
- const event = trace.events[index];
1354
+ const event = replayEvents[index];
657
1355
  if (event) {
658
1356
  index += 1;
659
1357
  return Promise.resolve({ done: false, value: event });
@@ -664,6 +1362,26 @@ export function replayStream(trace) {
664
1362
  }
665
1363
  };
666
1364
  }
1365
+ function replayStreamEvents(trace, parentRunIds = []) {
1366
+ const events = [];
1367
+ for (const event of synthesizeProviderEvents(trace, trace.providerCalls)) {
1368
+ if (event.type === "sub-run-completed") {
1369
+ events.push(...replayStreamEvents(event.subResult.trace, [...parentRunIds, trace.runId]));
1370
+ }
1371
+ events.push(wrapReplayStreamEvent(event, parentRunIds));
1372
+ }
1373
+ return events;
1374
+ }
1375
+ function wrapReplayStreamEvent(event, parentRunIds) {
1376
+ if (parentRunIds.length === 0) {
1377
+ return event;
1378
+ }
1379
+ const inbound = event.parentRunIds;
1380
+ return {
1381
+ ...event,
1382
+ parentRunIds: [...parentRunIds, ...(inbound ?? [])]
1383
+ };
1384
+ }
667
1385
  function wireCallerAbortSignal(callerSignal, abortController, cancelRun) {
668
1386
  if (!callerSignal) {
669
1387
  return () => { };
@@ -690,7 +1408,8 @@ function createStreamCancellationError(providerId, cause) {
690
1408
  providerId,
691
1409
  ...(cause !== undefined ? { cause } : {}),
692
1410
  detail: {
693
- status: "cancelled"
1411
+ status: "cancelled",
1412
+ reason: "parent-aborted"
694
1413
  }
695
1414
  });
696
1415
  }
@@ -707,6 +1426,22 @@ function withHighLevelDefaults(options) {
707
1426
  tier: options.tier ?? defaultHighLevelTier
708
1427
  };
709
1428
  }
1429
+ function assertRunDoesNotRaiseEngineMax(path, runValue, engineValue) {
1430
+ if (runValue === undefined || runValue <= engineValue) {
1431
+ return;
1432
+ }
1433
+ throw new DogpileError({
1434
+ code: "invalid-configuration",
1435
+ message: `${path} cannot raise the engine ceiling (${engineValue}).`,
1436
+ retryable: false,
1437
+ detail: {
1438
+ kind: "configuration-validation",
1439
+ path,
1440
+ expected: `integer <= ${engineValue}`,
1441
+ actual: runValue
1442
+ }
1443
+ });
1444
+ }
710
1445
  /**
711
1446
  * Branded high-level SDK namespace.
712
1447
  *