@dogpile/sdk 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -0
- package/dist/browser/index.js +726 -176
- package/dist/browser/index.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +1 -0
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/runtime/audit.d.ts +42 -0
- package/dist/runtime/audit.d.ts.map +1 -0
- package/dist/runtime/audit.js +73 -0
- package/dist/runtime/audit.js.map +1 -0
- package/dist/runtime/broadcast.d.ts.map +1 -1
- package/dist/runtime/broadcast.js +39 -36
- package/dist/runtime/broadcast.js.map +1 -1
- package/dist/runtime/coordinator.d.ts +5 -0
- package/dist/runtime/coordinator.d.ts.map +1 -1
- package/dist/runtime/coordinator.js +50 -39
- package/dist/runtime/coordinator.js.map +1 -1
- package/dist/runtime/defaults.d.ts.map +1 -1
- package/dist/runtime/defaults.js +12 -4
- package/dist/runtime/defaults.js.map +1 -1
- package/dist/runtime/engine.d.ts +17 -4
- package/dist/runtime/engine.d.ts.map +1 -1
- package/dist/runtime/engine.js +523 -18
- package/dist/runtime/engine.js.map +1 -1
- package/dist/runtime/health.d.ts +51 -0
- package/dist/runtime/health.d.ts.map +1 -0
- package/dist/runtime/health.js +85 -0
- package/dist/runtime/health.js.map +1 -0
- package/dist/runtime/introspection.d.ts +96 -0
- package/dist/runtime/introspection.d.ts.map +1 -0
- package/dist/runtime/introspection.js +31 -0
- package/dist/runtime/introspection.js.map +1 -0
- package/dist/runtime/metrics.d.ts +44 -0
- package/dist/runtime/metrics.d.ts.map +1 -0
- package/dist/runtime/metrics.js +12 -0
- package/dist/runtime/metrics.js.map +1 -0
- package/dist/runtime/model.d.ts.map +1 -1
- package/dist/runtime/model.js +34 -7
- package/dist/runtime/model.js.map +1 -1
- package/dist/runtime/provenance.d.ts +25 -0
- package/dist/runtime/provenance.d.ts.map +1 -0
- package/dist/runtime/provenance.js +13 -0
- package/dist/runtime/provenance.js.map +1 -0
- package/dist/runtime/sequential.d.ts.map +1 -1
- package/dist/runtime/sequential.js +39 -36
- package/dist/runtime/sequential.js.map +1 -1
- package/dist/runtime/shared.d.ts.map +1 -1
- package/dist/runtime/shared.js +39 -36
- package/dist/runtime/shared.js.map +1 -1
- package/dist/runtime/tracing.d.ts +31 -0
- package/dist/runtime/tracing.d.ts.map +1 -0
- package/dist/runtime/tracing.js +18 -0
- package/dist/runtime/tracing.js.map +1 -0
- package/dist/types/events.d.ts +10 -4
- package/dist/types/events.d.ts.map +1 -1
- package/dist/types/replay.d.ts +2 -0
- package/dist/types/replay.d.ts.map +1 -1
- package/dist/types.d.ts +124 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +39 -1
- package/src/index.ts +5 -0
- package/src/providers/openai-compatible.ts +1 -0
- package/src/runtime/audit.ts +121 -0
- package/src/runtime/broadcast.ts +40 -37
- package/src/runtime/coordinator.ts +54 -39
- package/src/runtime/defaults.ts +13 -4
- package/src/runtime/engine.ts +648 -18
- package/src/runtime/health.ts +136 -0
- package/src/runtime/introspection.ts +122 -0
- package/src/runtime/metrics.ts +45 -0
- package/src/runtime/model.ts +38 -6
- package/src/runtime/provenance.ts +43 -0
- package/src/runtime/sequential.ts +40 -37
- package/src/runtime/shared.ts +40 -37
- package/src/runtime/tracing.ts +35 -0
- package/src/types/events.ts +10 -4
- package/src/types/replay.ts +2 -0
- package/src/types.ts +132 -1
package/dist/runtime/engine.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import { DogpileError } from "../types.js";
|
|
2
2
|
import { runBroadcast } from "./broadcast.js";
|
|
3
3
|
import { runCoordinator } from "./coordinator.js";
|
|
4
|
-
import { createReplayTraceFinalOutput, createReplayTraceBudgetStateChanges, canonicalizeRunResult, canonicalizeSerializable, createRunAccounting, createRunEventLog, createRunMetadata, createRunUsage, defaultAgents, normalizeProtocol, orderAgentsForTemperature, recomputeAccountingFromTrace, resolveOnChildFailure, tierTemperature } from "./defaults.js";
|
|
4
|
+
import { addCost, createReplayTraceFinalOutput, createReplayTraceBudgetStateChanges, canonicalizeRunResult, canonicalizeSerializable, createRunAccounting, createRunEventLog, createRunMetadata, createRunUsage, defaultAgents, emptyCost, normalizeProtocol, orderAgentsForTemperature, recomputeAccountingFromTrace, resolveOnChildFailure, tierTemperature } from "./defaults.js";
|
|
5
|
+
import { computeHealth, DEFAULT_HEALTH_THRESHOLDS } from "./health.js";
|
|
5
6
|
import { runSequential } from "./sequential.js";
|
|
6
7
|
import { runShared } from "./shared.js";
|
|
7
8
|
import { classifyChildTimeoutSource, createAbortErrorFromSignal, createEngineDeadlineTimeoutError, createTimeoutError } from "./cancellation.js";
|
|
8
9
|
import { budget as budgetCondition } from "./termination.js";
|
|
9
10
|
import { validateDogpileOptions, validateEngineOptions, validateMissionIntent, validateProviderLocality, validateRunCallOptions } from "./validation.js";
|
|
11
|
+
import { DOGPILE_SPAN_NAMES } from "./tracing.js";
|
|
10
12
|
const DEFAULT_MAX_DEPTH = 4;
|
|
11
13
|
const DEFAULT_MAX_CONCURRENT_CHILDREN = 4;
|
|
12
14
|
const defaultHighLevelProtocol = "sequential";
|
|
@@ -57,6 +59,9 @@ export function createEngine(options) {
|
|
|
57
59
|
...(terminate ? { terminate } : {}),
|
|
58
60
|
...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
|
|
59
61
|
...(options.evaluate ? { evaluate: options.evaluate } : {}),
|
|
62
|
+
...(options.tracer ? { tracer: options.tracer } : {}),
|
|
63
|
+
...(options.metricsHook ? { metricsHook: options.metricsHook } : {}),
|
|
64
|
+
...(options.logger ? { logger: options.logger } : {}),
|
|
60
65
|
currentDepth: 0,
|
|
61
66
|
effectiveMaxDepth,
|
|
62
67
|
effectiveMaxConcurrentChildren,
|
|
@@ -165,6 +170,9 @@ export function createEngine(options) {
|
|
|
165
170
|
...(options.defaultSubRunTimeoutMs !== undefined
|
|
166
171
|
? { defaultSubRunTimeoutMs: options.defaultSubRunTimeoutMs }
|
|
167
172
|
: {}),
|
|
173
|
+
...(options.tracer ? { tracer: options.tracer } : {}),
|
|
174
|
+
...(options.metricsHook ? { metricsHook: options.metricsHook } : {}),
|
|
175
|
+
...(options.logger ? { logger: options.logger } : {}),
|
|
168
176
|
streamEvents: true,
|
|
169
177
|
emit(event) {
|
|
170
178
|
if (status !== "running") {
|
|
@@ -485,6 +493,387 @@ function dogpileErrorStreamDetail(error) {
|
|
|
485
493
|
}
|
|
486
494
|
return detail;
|
|
487
495
|
}
|
|
496
|
+
function openRunTracing(options) {
|
|
497
|
+
if (!options.tracer) {
|
|
498
|
+
return undefined;
|
|
499
|
+
}
|
|
500
|
+
const runSpan = options.tracer.startSpan(DOGPILE_SPAN_NAMES.RUN, {
|
|
501
|
+
...(options.parentSpan ? { parent: options.parentSpan } : {}),
|
|
502
|
+
attributes: {
|
|
503
|
+
"dogpile.run.protocol": options.protocolKind,
|
|
504
|
+
"dogpile.run.tier": String(options.tier),
|
|
505
|
+
"dogpile.run.intent": options.intent.slice(0, 200)
|
|
506
|
+
}
|
|
507
|
+
});
|
|
508
|
+
return {
|
|
509
|
+
tracer: options.tracer,
|
|
510
|
+
runSpan,
|
|
511
|
+
subRunSpans: new Map(),
|
|
512
|
+
agentTurnSpans: new Map(),
|
|
513
|
+
modelCallSpans: new Map(),
|
|
514
|
+
pendingModelRequests: new Map(),
|
|
515
|
+
agentTurnCounters: new Map(),
|
|
516
|
+
turnAccumByAgent: new Map(),
|
|
517
|
+
agentIds: new Set(),
|
|
518
|
+
turnCount: 0,
|
|
519
|
+
lastCost: emptyCost()
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
function openRunMetrics(options) {
|
|
523
|
+
if (!options.metricsHook) {
|
|
524
|
+
return undefined;
|
|
525
|
+
}
|
|
526
|
+
return {
|
|
527
|
+
metricsHook: options.metricsHook,
|
|
528
|
+
logger: options.logger,
|
|
529
|
+
startedAtMs: Date.now(),
|
|
530
|
+
subRunStartTimes: new Map(),
|
|
531
|
+
totalCost: emptyCost(),
|
|
532
|
+
nestedCost: emptyCost(),
|
|
533
|
+
turns: 0
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
function routeMetricsError(err, logger) {
|
|
537
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
538
|
+
try {
|
|
539
|
+
if (logger !== undefined) {
|
|
540
|
+
logger.error("dogpile:metricsHook threw", { error: msg });
|
|
541
|
+
}
|
|
542
|
+
else {
|
|
543
|
+
console.error("dogpile:metricsHook threw", { error: msg });
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
catch {
|
|
547
|
+
// A logger that throws from error() cannot be helped.
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
function fireHook(callback, snapshot, logger) {
|
|
551
|
+
if (!callback) {
|
|
552
|
+
return;
|
|
553
|
+
}
|
|
554
|
+
try {
|
|
555
|
+
const result = callback(snapshot);
|
|
556
|
+
if (result && typeof result.catch === "function") {
|
|
557
|
+
result.catch((err) => {
|
|
558
|
+
routeMetricsError(err, logger);
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
routeMetricsError(err, logger);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
function buildRunSnapshot(result, startedAtMs) {
|
|
567
|
+
const nestedCosts = nestedSubRunCosts(result);
|
|
568
|
+
const budgetStopEvent = result.trace.events.find((event) => event.type === "budget-stop");
|
|
569
|
+
const outcome = budgetStopEvent !== undefined ? "budget-stopped" : "completed";
|
|
570
|
+
const totalInputTokens = result.cost.inputTokens;
|
|
571
|
+
const totalOutputTokens = result.cost.outputTokens;
|
|
572
|
+
const totalCostUsd = result.cost.usd;
|
|
573
|
+
const ownInputTokens = totalInputTokens - nestedCosts.reduce((sum, cost) => sum + cost.inputTokens, 0);
|
|
574
|
+
const ownOutputTokens = totalOutputTokens - nestedCosts.reduce((sum, cost) => sum + cost.outputTokens, 0);
|
|
575
|
+
const ownCostUsd = totalCostUsd - nestedCosts.reduce((sum, cost) => sum + cost.usd, 0);
|
|
576
|
+
const turns = result.trace.events.filter((event) => event.type === "agent-turn").length;
|
|
577
|
+
return {
|
|
578
|
+
outcome,
|
|
579
|
+
inputTokens: ownInputTokens,
|
|
580
|
+
outputTokens: ownOutputTokens,
|
|
581
|
+
costUsd: ownCostUsd,
|
|
582
|
+
totalInputTokens,
|
|
583
|
+
totalOutputTokens,
|
|
584
|
+
totalCostUsd,
|
|
585
|
+
turns,
|
|
586
|
+
durationMs: Date.now() - startedAtMs
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
function buildSubRunSnapshot(subResult, durationMs) {
|
|
590
|
+
const nestedCosts = nestedSubRunCosts(subResult);
|
|
591
|
+
const budgetStopEvent = subResult.trace.events.find((event) => event.type === "budget-stop");
|
|
592
|
+
const outcome = budgetStopEvent !== undefined ? "budget-stopped" : "completed";
|
|
593
|
+
const totalInputTokens = subResult.cost.inputTokens;
|
|
594
|
+
const totalOutputTokens = subResult.cost.outputTokens;
|
|
595
|
+
const totalCostUsd = subResult.cost.usd;
|
|
596
|
+
const ownInputTokens = totalInputTokens - nestedCosts.reduce((sum, cost) => sum + cost.inputTokens, 0);
|
|
597
|
+
const ownOutputTokens = totalOutputTokens - nestedCosts.reduce((sum, cost) => sum + cost.outputTokens, 0);
|
|
598
|
+
const ownCostUsd = totalCostUsd - nestedCosts.reduce((sum, cost) => sum + cost.usd, 0);
|
|
599
|
+
const turns = subResult.trace.events.filter((event) => event.type === "agent-turn").length;
|
|
600
|
+
return {
|
|
601
|
+
outcome,
|
|
602
|
+
inputTokens: ownInputTokens,
|
|
603
|
+
outputTokens: ownOutputTokens,
|
|
604
|
+
costUsd: ownCostUsd,
|
|
605
|
+
totalInputTokens,
|
|
606
|
+
totalOutputTokens,
|
|
607
|
+
totalCostUsd,
|
|
608
|
+
turns,
|
|
609
|
+
durationMs
|
|
610
|
+
};
|
|
611
|
+
}
|
|
612
|
+
function nestedSubRunCosts(result) {
|
|
613
|
+
return result.trace.events.flatMap((event) => {
|
|
614
|
+
if (event.type === "sub-run-completed") {
|
|
615
|
+
return [event.subResult.cost];
|
|
616
|
+
}
|
|
617
|
+
if (event.type === "sub-run-failed") {
|
|
618
|
+
return [event.partialCost];
|
|
619
|
+
}
|
|
620
|
+
return [];
|
|
621
|
+
});
|
|
622
|
+
}
|
|
623
|
+
function subtractCost(total, nested) {
|
|
624
|
+
return {
|
|
625
|
+
usd: total.usd - nested.usd,
|
|
626
|
+
inputTokens: total.inputTokens - nested.inputTokens,
|
|
627
|
+
outputTokens: total.outputTokens - nested.outputTokens,
|
|
628
|
+
totalTokens: total.totalTokens - nested.totalTokens
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
function handleMetricsEvent(state, event) {
|
|
632
|
+
const parentRunIds = event.parentRunIds;
|
|
633
|
+
if (parentRunIds !== undefined) {
|
|
634
|
+
return;
|
|
635
|
+
}
|
|
636
|
+
switch (event.type) {
|
|
637
|
+
case "agent-turn": {
|
|
638
|
+
state.totalCost = event.cost;
|
|
639
|
+
state.turns += 1;
|
|
640
|
+
break;
|
|
641
|
+
}
|
|
642
|
+
case "broadcast":
|
|
643
|
+
case "budget-stop":
|
|
644
|
+
case "final": {
|
|
645
|
+
state.totalCost = event.cost;
|
|
646
|
+
break;
|
|
647
|
+
}
|
|
648
|
+
case "sub-run-started": {
|
|
649
|
+
state.subRunStartTimes.set(event.childRunId, Date.now());
|
|
650
|
+
break;
|
|
651
|
+
}
|
|
652
|
+
case "sub-run-completed": {
|
|
653
|
+
state.totalCost = addCost(state.totalCost, event.subResult.cost);
|
|
654
|
+
state.nestedCost = addCost(state.nestedCost, event.subResult.cost);
|
|
655
|
+
const startMs = state.subRunStartTimes.get(event.childRunId);
|
|
656
|
+
const durationMs = startMs !== undefined ? Date.now() - startMs : 0;
|
|
657
|
+
state.subRunStartTimes.delete(event.childRunId);
|
|
658
|
+
const snapshot = buildSubRunSnapshot(event.subResult, durationMs);
|
|
659
|
+
fireHook(state.metricsHook.onSubRunComplete, snapshot, state.logger);
|
|
660
|
+
break;
|
|
661
|
+
}
|
|
662
|
+
case "sub-run-failed": {
|
|
663
|
+
state.totalCost = addCost(state.totalCost, event.partialCost);
|
|
664
|
+
state.nestedCost = addCost(state.nestedCost, event.partialCost);
|
|
665
|
+
state.subRunStartTimes.delete(event.childRunId);
|
|
666
|
+
break;
|
|
667
|
+
}
|
|
668
|
+
default:
|
|
669
|
+
break;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
function closeRunMetrics(state, result) {
|
|
673
|
+
if (result !== undefined) {
|
|
674
|
+
const snapshot = buildRunSnapshot(result, state.startedAtMs);
|
|
675
|
+
fireHook(state.metricsHook.onRunComplete, snapshot, state.logger);
|
|
676
|
+
return;
|
|
677
|
+
}
|
|
678
|
+
const ownCost = subtractCost(state.totalCost, state.nestedCost);
|
|
679
|
+
const snapshot = {
|
|
680
|
+
outcome: "aborted",
|
|
681
|
+
inputTokens: ownCost.inputTokens,
|
|
682
|
+
outputTokens: ownCost.outputTokens,
|
|
683
|
+
costUsd: ownCost.usd,
|
|
684
|
+
totalInputTokens: state.totalCost.inputTokens,
|
|
685
|
+
totalOutputTokens: state.totalCost.outputTokens,
|
|
686
|
+
totalCostUsd: state.totalCost.usd,
|
|
687
|
+
turns: state.turns,
|
|
688
|
+
durationMs: Date.now() - state.startedAtMs
|
|
689
|
+
};
|
|
690
|
+
fireHook(state.metricsHook.onRunComplete, snapshot, state.logger);
|
|
691
|
+
}
|
|
692
|
+
function handleTracingEvent(state, event) {
|
|
693
|
+
const parentRunIds = event.parentRunIds;
|
|
694
|
+
if (parentRunIds !== undefined) {
|
|
695
|
+
return;
|
|
696
|
+
}
|
|
697
|
+
if (state.runId === undefined) {
|
|
698
|
+
state.runId = event.runId;
|
|
699
|
+
state.runSpan.setAttribute("dogpile.run.id", event.runId);
|
|
700
|
+
}
|
|
701
|
+
switch (event.type) {
|
|
702
|
+
case "model-request": {
|
|
703
|
+
state.pendingModelRequests.set(event.callId, event);
|
|
704
|
+
state.agentIds.add(event.agentId);
|
|
705
|
+
if (!state.agentTurnSpans.has(event.agentId)) {
|
|
706
|
+
const turnNumber = (state.agentTurnCounters.get(event.agentId) ?? 0) + 1;
|
|
707
|
+
state.agentTurnCounters.set(event.agentId, turnNumber);
|
|
708
|
+
const turnParent = state.subRunSpans.get(event.runId) ?? state.runSpan;
|
|
709
|
+
const turnSpan = state.tracer.startSpan(DOGPILE_SPAN_NAMES.AGENT_TURN, {
|
|
710
|
+
parent: turnParent,
|
|
711
|
+
attributes: {
|
|
712
|
+
"dogpile.agent.id": event.agentId,
|
|
713
|
+
"dogpile.agent.role": event.role,
|
|
714
|
+
"dogpile.turn.number": turnNumber,
|
|
715
|
+
"dogpile.model.id": event.modelId
|
|
716
|
+
}
|
|
717
|
+
});
|
|
718
|
+
state.agentTurnSpans.set(event.agentId, turnSpan);
|
|
719
|
+
}
|
|
720
|
+
const callParent = state.agentTurnSpans.get(event.agentId) ??
|
|
721
|
+
state.subRunSpans.get(event.runId) ??
|
|
722
|
+
state.runSpan;
|
|
723
|
+
const callSpan = state.tracer.startSpan(DOGPILE_SPAN_NAMES.MODEL_CALL, {
|
|
724
|
+
parent: callParent,
|
|
725
|
+
attributes: {
|
|
726
|
+
"dogpile.model.id": event.modelId,
|
|
727
|
+
"dogpile.call.id": event.callId,
|
|
728
|
+
"dogpile.provider.id": event.providerId
|
|
729
|
+
}
|
|
730
|
+
});
|
|
731
|
+
state.modelCallSpans.set(event.callId, callSpan);
|
|
732
|
+
break;
|
|
733
|
+
}
|
|
734
|
+
case "model-response": {
|
|
735
|
+
const span = state.modelCallSpans.get(event.callId);
|
|
736
|
+
if (span) {
|
|
737
|
+
const inputTokens = event.response.usage?.inputTokens ?? 0;
|
|
738
|
+
const outputTokens = event.response.usage?.outputTokens ?? 0;
|
|
739
|
+
const responseCost = {
|
|
740
|
+
usd: event.response.costUsd ?? 0,
|
|
741
|
+
inputTokens,
|
|
742
|
+
outputTokens,
|
|
743
|
+
totalTokens: event.response.usage?.totalTokens ?? inputTokens + outputTokens
|
|
744
|
+
};
|
|
745
|
+
span.setAttribute("dogpile.model.input_tokens", inputTokens);
|
|
746
|
+
span.setAttribute("dogpile.model.output_tokens", outputTokens);
|
|
747
|
+
if (event.response.costUsd !== undefined) {
|
|
748
|
+
span.setAttribute("dogpile.model.cost_usd", event.response.costUsd);
|
|
749
|
+
}
|
|
750
|
+
span.setStatus("ok");
|
|
751
|
+
span.end();
|
|
752
|
+
state.modelCallSpans.delete(event.callId);
|
|
753
|
+
const accum = state.turnAccumByAgent.get(event.agentId) ?? {
|
|
754
|
+
inputTokens: 0,
|
|
755
|
+
outputTokens: 0,
|
|
756
|
+
costUsd: 0
|
|
757
|
+
};
|
|
758
|
+
accum.inputTokens += inputTokens;
|
|
759
|
+
accum.outputTokens += outputTokens;
|
|
760
|
+
accum.costUsd += responseCost.usd;
|
|
761
|
+
state.turnAccumByAgent.set(event.agentId, accum);
|
|
762
|
+
state.lastCost = addCost(state.lastCost, responseCost);
|
|
763
|
+
}
|
|
764
|
+
state.pendingModelRequests.delete(event.callId);
|
|
765
|
+
break;
|
|
766
|
+
}
|
|
767
|
+
case "agent-turn": {
|
|
768
|
+
state.agentIds.add(event.agentId);
|
|
769
|
+
state.turnCount += 1;
|
|
770
|
+
state.lastCost = event.cost;
|
|
771
|
+
const turnSpan = state.agentTurnSpans.get(event.agentId);
|
|
772
|
+
if (turnSpan) {
|
|
773
|
+
turnSpan.setAttribute("dogpile.agent.role", event.role);
|
|
774
|
+
const accum = state.turnAccumByAgent.get(event.agentId);
|
|
775
|
+
turnSpan.setAttribute("dogpile.turn.cost_usd", accum?.costUsd ?? 0);
|
|
776
|
+
turnSpan.setAttribute("dogpile.turn.input_tokens", accum?.inputTokens ?? 0);
|
|
777
|
+
turnSpan.setAttribute("dogpile.turn.output_tokens", accum?.outputTokens ?? 0);
|
|
778
|
+
turnSpan.setStatus("ok");
|
|
779
|
+
turnSpan.end();
|
|
780
|
+
state.agentTurnSpans.delete(event.agentId);
|
|
781
|
+
}
|
|
782
|
+
state.turnAccumByAgent.delete(event.agentId);
|
|
783
|
+
break;
|
|
784
|
+
}
|
|
785
|
+
case "broadcast":
|
|
786
|
+
case "budget-stop":
|
|
787
|
+
case "final": {
|
|
788
|
+
state.lastCost = event.cost;
|
|
789
|
+
break;
|
|
790
|
+
}
|
|
791
|
+
case "sub-run-started": {
|
|
792
|
+
const span = state.tracer.startSpan(DOGPILE_SPAN_NAMES.SUB_RUN, {
|
|
793
|
+
parent: state.runSpan,
|
|
794
|
+
attributes: {
|
|
795
|
+
"dogpile.sub_run.child_run_id": event.childRunId,
|
|
796
|
+
"dogpile.sub_run.parent_run_id": event.parentRunId,
|
|
797
|
+
"dogpile.sub_run.depth": event.depth
|
|
798
|
+
}
|
|
799
|
+
});
|
|
800
|
+
state.subRunSpans.set(event.childRunId, span);
|
|
801
|
+
break;
|
|
802
|
+
}
|
|
803
|
+
case "sub-run-completed": {
|
|
804
|
+
const span = state.subRunSpans.get(event.childRunId);
|
|
805
|
+
if (span) {
|
|
806
|
+
span.setStatus("ok");
|
|
807
|
+
span.end();
|
|
808
|
+
state.subRunSpans.delete(event.childRunId);
|
|
809
|
+
}
|
|
810
|
+
break;
|
|
811
|
+
}
|
|
812
|
+
case "sub-run-failed": {
|
|
813
|
+
const span = state.subRunSpans.get(event.childRunId);
|
|
814
|
+
if (span) {
|
|
815
|
+
span.setStatus("error", event.error.message);
|
|
816
|
+
span.end();
|
|
817
|
+
state.subRunSpans.delete(event.childRunId);
|
|
818
|
+
}
|
|
819
|
+
break;
|
|
820
|
+
}
|
|
821
|
+
default:
|
|
822
|
+
break;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
function closeRunTracing(state, result, error) {
|
|
826
|
+
if (error !== undefined) {
|
|
827
|
+
if (state.runId !== undefined) {
|
|
828
|
+
state.runSpan.setAttribute("dogpile.run.id", state.runId);
|
|
829
|
+
}
|
|
830
|
+
state.runSpan.setAttribute("dogpile.run.agent_count", state.agentIds.size);
|
|
831
|
+
state.runSpan.setAttribute("dogpile.run.turn_count", state.turnCount);
|
|
832
|
+
state.runSpan.setAttribute("dogpile.run.cost_usd", state.lastCost.usd);
|
|
833
|
+
state.runSpan.setAttribute("dogpile.run.input_tokens", state.lastCost.inputTokens);
|
|
834
|
+
state.runSpan.setAttribute("dogpile.run.output_tokens", state.lastCost.outputTokens);
|
|
835
|
+
state.runSpan.setAttribute("dogpile.run.outcome", "aborted");
|
|
836
|
+
state.runSpan.setStatus("error", error instanceof Error ? error.message : String(error));
|
|
837
|
+
closeOpenTracingSpans(state);
|
|
838
|
+
state.runSpan.end();
|
|
839
|
+
return;
|
|
840
|
+
}
|
|
841
|
+
if (result === undefined) {
|
|
842
|
+
closeOpenTracingSpans(state);
|
|
843
|
+
state.runSpan.end();
|
|
844
|
+
return;
|
|
845
|
+
}
|
|
846
|
+
const budgetStopEvent = result.trace.events.find((event) => event.type === "budget-stop");
|
|
847
|
+
const terminationReason = budgetStopEvent?.reason;
|
|
848
|
+
const outcome = terminationReason !== undefined ? "budget-stopped" : "completed";
|
|
849
|
+
state.runSpan.setAttribute("dogpile.run.id", result.trace.runId);
|
|
850
|
+
state.runSpan.setAttribute("dogpile.run.agent_count", result.trace.agentsUsed.length);
|
|
851
|
+
state.runSpan.setAttribute("dogpile.run.turn_count", result.trace.events.filter((event) => event.type === "agent-turn").length);
|
|
852
|
+
state.runSpan.setAttribute("dogpile.run.cost_usd", result.cost.usd);
|
|
853
|
+
state.runSpan.setAttribute("dogpile.run.input_tokens", result.cost.inputTokens);
|
|
854
|
+
state.runSpan.setAttribute("dogpile.run.output_tokens", result.cost.outputTokens);
|
|
855
|
+
state.runSpan.setAttribute("dogpile.run.outcome", outcome);
|
|
856
|
+
if (terminationReason !== undefined) {
|
|
857
|
+
state.runSpan.setAttribute("dogpile.run.termination_reason", terminationReason);
|
|
858
|
+
}
|
|
859
|
+
state.runSpan.setStatus("ok");
|
|
860
|
+
closeOpenTracingSpans(state);
|
|
861
|
+
state.runSpan.end();
|
|
862
|
+
}
|
|
863
|
+
function closeOpenTracingSpans(state) {
|
|
864
|
+
for (const span of state.modelCallSpans.values()) {
|
|
865
|
+
span.end();
|
|
866
|
+
}
|
|
867
|
+
state.modelCallSpans.clear();
|
|
868
|
+
for (const span of state.agentTurnSpans.values()) {
|
|
869
|
+
span.end();
|
|
870
|
+
}
|
|
871
|
+
state.agentTurnSpans.clear();
|
|
872
|
+
for (const span of state.subRunSpans.values()) {
|
|
873
|
+
span.end();
|
|
874
|
+
}
|
|
875
|
+
state.subRunSpans.clear();
|
|
876
|
+
}
|
|
488
877
|
async function runNonStreamingProtocol(options) {
|
|
489
878
|
const failureInstancesByChildRunId = new Map();
|
|
490
879
|
const abortLifecycle = createNonStreamingAbortLifecycle({
|
|
@@ -524,7 +913,8 @@ async function runNonStreamingProtocol(options) {
|
|
|
524
913
|
events
|
|
525
914
|
}),
|
|
526
915
|
eventLog: createRunEventLog(trace.runId, trace.protocol, events),
|
|
527
|
-
trace
|
|
916
|
+
trace,
|
|
917
|
+
health: computeHealth(trace, DEFAULT_HEALTH_THRESHOLDS)
|
|
528
918
|
};
|
|
529
919
|
const terminalThrow = resolveRuntimeTerminalThrow(runResult.trace, failureInstancesByChildRunId);
|
|
530
920
|
if (terminalThrow) {
|
|
@@ -570,7 +960,56 @@ function finalEventWithEvaluation(event, evaluation) {
|
|
|
570
960
|
evaluation
|
|
571
961
|
};
|
|
572
962
|
}
|
|
573
|
-
function runProtocol(options) {
|
|
963
|
+
async function runProtocol(options) {
|
|
964
|
+
const tracing = openRunTracing({
|
|
965
|
+
...(options.tracer ? { tracer: options.tracer } : {}),
|
|
966
|
+
...(options.parentSpan ? { parentSpan: options.parentSpan } : {}),
|
|
967
|
+
intent: options.intent,
|
|
968
|
+
protocolKind: options.protocol.kind,
|
|
969
|
+
tier: options.tier
|
|
970
|
+
});
|
|
971
|
+
const metrics = openRunMetrics({
|
|
972
|
+
...(options.metricsHook ? { metricsHook: options.metricsHook } : {}),
|
|
973
|
+
...(options.logger ? { logger: options.logger } : {})
|
|
974
|
+
});
|
|
975
|
+
const emitForProtocol = tracing || metrics || options.emit
|
|
976
|
+
? (event) => {
|
|
977
|
+
if (tracing) {
|
|
978
|
+
handleTracingEvent(tracing, event);
|
|
979
|
+
}
|
|
980
|
+
if (metrics) {
|
|
981
|
+
handleMetricsEvent(metrics, event);
|
|
982
|
+
}
|
|
983
|
+
options.emit?.(event);
|
|
984
|
+
}
|
|
985
|
+
: undefined;
|
|
986
|
+
const protocolOptions = tracing
|
|
987
|
+
? {
|
|
988
|
+
...options,
|
|
989
|
+
subRunSpansByChildId: tracing.subRunSpans
|
|
990
|
+
}
|
|
991
|
+
: options;
|
|
992
|
+
try {
|
|
993
|
+
const result = await runProtocolInner(protocolOptions, emitForProtocol);
|
|
994
|
+
if (tracing) {
|
|
995
|
+
closeRunTracing(tracing, result);
|
|
996
|
+
}
|
|
997
|
+
if (metrics && (options.currentDepth === 0 || options.currentDepth === undefined)) {
|
|
998
|
+
closeRunMetrics(metrics, result);
|
|
999
|
+
}
|
|
1000
|
+
return result;
|
|
1001
|
+
}
|
|
1002
|
+
catch (error) {
|
|
1003
|
+
if (tracing) {
|
|
1004
|
+
closeRunTracing(tracing, undefined, error);
|
|
1005
|
+
}
|
|
1006
|
+
if (metrics && (options.currentDepth === 0 || options.currentDepth === undefined)) {
|
|
1007
|
+
closeRunMetrics(metrics, undefined);
|
|
1008
|
+
}
|
|
1009
|
+
throw error;
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
function runProtocolInner(options, emitForProtocol) {
|
|
574
1013
|
switch (options.protocol.kind) {
|
|
575
1014
|
case "sequential":
|
|
576
1015
|
return runSequential({
|
|
@@ -586,7 +1025,7 @@ function runProtocol(options) {
|
|
|
586
1025
|
...(options.signal !== undefined ? { signal: options.signal } : {}),
|
|
587
1026
|
...(options.terminate ? { terminate: options.terminate } : {}),
|
|
588
1027
|
...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
|
|
589
|
-
...(
|
|
1028
|
+
...(emitForProtocol ? { emit: emitForProtocol } : {})
|
|
590
1029
|
});
|
|
591
1030
|
case "broadcast":
|
|
592
1031
|
return runBroadcast({
|
|
@@ -602,7 +1041,7 @@ function runProtocol(options) {
|
|
|
602
1041
|
...(options.signal !== undefined ? { signal: options.signal } : {}),
|
|
603
1042
|
...(options.terminate ? { terminate: options.terminate } : {}),
|
|
604
1043
|
...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
|
|
605
|
-
...(
|
|
1044
|
+
...(emitForProtocol ? { emit: emitForProtocol } : {})
|
|
606
1045
|
});
|
|
607
1046
|
case "coordinator":
|
|
608
1047
|
return runCoordinator({
|
|
@@ -618,7 +1057,7 @@ function runProtocol(options) {
|
|
|
618
1057
|
...(options.signal !== undefined ? { signal: options.signal } : {}),
|
|
619
1058
|
...(options.terminate ? { terminate: options.terminate } : {}),
|
|
620
1059
|
...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
|
|
621
|
-
...(
|
|
1060
|
+
...(emitForProtocol ? { emit: emitForProtocol } : {}),
|
|
622
1061
|
...(options.streamEvents !== undefined ? { streamEvents: options.streamEvents } : {}),
|
|
623
1062
|
currentDepth: options.currentDepth ?? 0,
|
|
624
1063
|
effectiveMaxDepth: options.effectiveMaxDepth ?? Infinity,
|
|
@@ -632,10 +1071,17 @@ function runProtocol(options) {
|
|
|
632
1071
|
...(options.failureInstancesByChildRunId !== undefined
|
|
633
1072
|
? { failureInstancesByChildRunId: options.failureInstancesByChildRunId }
|
|
634
1073
|
: {}),
|
|
635
|
-
runProtocol: (childInput) =>
|
|
636
|
-
...childInput
|
|
637
|
-
|
|
638
|
-
|
|
1074
|
+
runProtocol: (childInput) => {
|
|
1075
|
+
const { runId: childRunId, ...childProtocolInput } = childInput;
|
|
1076
|
+
const childParent = options.subRunSpansByChildId?.get(childRunId) ?? options.parentSpan;
|
|
1077
|
+
return runProtocol({
|
|
1078
|
+
...childProtocolInput,
|
|
1079
|
+
protocol: normalizeProtocol(childProtocolInput.protocol),
|
|
1080
|
+
...(options.tracer ? { tracer: options.tracer } : {}),
|
|
1081
|
+
...(childParent ? { parentSpan: childParent } : {}),
|
|
1082
|
+
...(options.logger ? { logger: options.logger } : {})
|
|
1083
|
+
});
|
|
1084
|
+
}
|
|
639
1085
|
});
|
|
640
1086
|
case "shared":
|
|
641
1087
|
return runShared({
|
|
@@ -651,7 +1097,7 @@ function runProtocol(options) {
|
|
|
651
1097
|
...(options.signal !== undefined ? { signal: options.signal } : {}),
|
|
652
1098
|
...(options.terminate ? { terminate: options.terminate } : {}),
|
|
653
1099
|
...(options.wrapUpHint ? { wrapUpHint: options.wrapUpHint } : {}),
|
|
654
|
-
...(
|
|
1100
|
+
...(emitForProtocol ? { emit: emitForProtocol } : {})
|
|
655
1101
|
});
|
|
656
1102
|
}
|
|
657
1103
|
}
|
|
@@ -699,7 +1145,14 @@ export function stream(options) {
|
|
|
699
1145
|
* the ergonomic {@link RunResult} wrapper from the JSON-serializable
|
|
700
1146
|
* {@link Trace} returned by a previous `run()`, `stream()`, or
|
|
701
1147
|
* `Dogpile.pile()` call.
|
|
1148
|
+
*
|
|
1149
|
+
* Tracing and metrics: replay is intentionally tracing-free and metrics-free.
|
|
1150
|
+
* Even when an engine instance has been configured with a `tracer` or
|
|
1151
|
+
* `metricsHook` on its `EngineOptions`, calling this function emits no spans
|
|
1152
|
+
* or callbacks — replaying historical events with current timestamps would
|
|
1153
|
+
* confuse observability backends. See `docs/developer-usage.md`.
|
|
702
1154
|
*/
|
|
1155
|
+
// Tracing/metrics-free: replay never uses EngineOptions tracer or metricsHook.
|
|
703
1156
|
export function replay(trace) {
|
|
704
1157
|
const cost = trace.finalOutput.cost;
|
|
705
1158
|
const lastEvent = trace.events.at(-1);
|
|
@@ -714,7 +1167,7 @@ export function replay(trace) {
|
|
|
714
1167
|
}
|
|
715
1168
|
const baseResult = {
|
|
716
1169
|
output: trace.finalOutput.output,
|
|
717
|
-
eventLog: createRunEventLog(trace.runId, trace.protocol, trace.
|
|
1170
|
+
eventLog: createRunEventLog(trace.runId, trace.protocol, synthesizeProviderEvents(trace, trace.providerCalls)),
|
|
718
1171
|
trace,
|
|
719
1172
|
transcript: trace.transcript,
|
|
720
1173
|
usage: createRunUsage(cost),
|
|
@@ -727,7 +1180,8 @@ export function replay(trace) {
|
|
|
727
1180
|
events: trace.events
|
|
728
1181
|
}),
|
|
729
1182
|
accounting,
|
|
730
|
-
cost
|
|
1183
|
+
cost,
|
|
1184
|
+
health: computeHealth(trace, DEFAULT_HEALTH_THRESHOLDS)
|
|
731
1185
|
};
|
|
732
1186
|
if (lastEvent?.type !== "final") {
|
|
733
1187
|
return baseResult;
|
|
@@ -738,6 +1192,49 @@ export function replay(trace) {
|
|
|
738
1192
|
...(lastEvent.evaluation !== undefined ? { evaluation: lastEvent.evaluation } : {})
|
|
739
1193
|
};
|
|
740
1194
|
}
|
|
1195
|
+
function synthesizeProviderEvents(trace, providerCalls) {
|
|
1196
|
+
const hasLiveProvenance = trace.events.some((event) => event.type === "model-request" || event.type === "model-response");
|
|
1197
|
+
if (hasLiveProvenance) {
|
|
1198
|
+
return trace.events;
|
|
1199
|
+
}
|
|
1200
|
+
const baseEvents = trace.events.filter((event) => event.type !== "model-request" && event.type !== "model-response");
|
|
1201
|
+
const result = [];
|
|
1202
|
+
let turnCount = 0;
|
|
1203
|
+
for (const event of baseEvents) {
|
|
1204
|
+
if (event.type === "agent-turn") {
|
|
1205
|
+
const call = providerCalls[turnCount];
|
|
1206
|
+
if (call !== undefined) {
|
|
1207
|
+
const modelId = typeof call.modelId === "string" && call.modelId.length > 0 ? call.modelId : call.providerId;
|
|
1208
|
+
result.push({
|
|
1209
|
+
type: "model-request",
|
|
1210
|
+
runId: trace.runId,
|
|
1211
|
+
callId: call.callId,
|
|
1212
|
+
providerId: call.providerId,
|
|
1213
|
+
modelId,
|
|
1214
|
+
startedAt: call.startedAt,
|
|
1215
|
+
agentId: call.agentId,
|
|
1216
|
+
role: call.role,
|
|
1217
|
+
request: call.request
|
|
1218
|
+
});
|
|
1219
|
+
result.push({
|
|
1220
|
+
type: "model-response",
|
|
1221
|
+
runId: trace.runId,
|
|
1222
|
+
callId: call.callId,
|
|
1223
|
+
providerId: call.providerId,
|
|
1224
|
+
modelId,
|
|
1225
|
+
startedAt: call.startedAt,
|
|
1226
|
+
completedAt: call.completedAt,
|
|
1227
|
+
agentId: call.agentId,
|
|
1228
|
+
role: call.role,
|
|
1229
|
+
response: call.response
|
|
1230
|
+
});
|
|
1231
|
+
}
|
|
1232
|
+
turnCount += 1;
|
|
1233
|
+
}
|
|
1234
|
+
result.push(event);
|
|
1235
|
+
}
|
|
1236
|
+
return result;
|
|
1237
|
+
}
|
|
741
1238
|
function resolveRuntimeTerminalThrow(trace, failureInstancesByChildRunId) {
|
|
742
1239
|
if (trace.triggeringFailureForAbortMode !== undefined) {
|
|
743
1240
|
return failureInstancesByChildRunId.get(trace.triggeringFailureForAbortMode.childRunId) ?? null;
|
|
@@ -816,11 +1313,19 @@ function dogpileErrorFromSerializedPayload(input) {
|
|
|
816
1313
|
* Replay a saved completed trace as a stream without invoking a model provider.
|
|
817
1314
|
*
|
|
818
1315
|
* @remarks
|
|
819
|
-
* This is the streaming counterpart to {@link replay}. It yields the
|
|
820
|
-
*
|
|
821
|
-
*
|
|
822
|
-
* replay remains storage-free and
|
|
1316
|
+
* This is the streaming counterpart to {@link replay}. It yields the same
|
|
1317
|
+
* event sequence exposed by the replayed result event log, including legacy
|
|
1318
|
+
* provenance synthesis when a saved trace predates model request/response
|
|
1319
|
+
* events. Since all data comes from the trace, replay remains storage-free and
|
|
1320
|
+
* provider-free.
|
|
1321
|
+
*
|
|
1322
|
+
* Tracing and metrics: replayStream is intentionally tracing-free and
|
|
1323
|
+
* metrics-free. Even when an engine instance has been configured with a
|
|
1324
|
+
* `tracer` or `metricsHook` on its `EngineOptions`, calling this function
|
|
1325
|
+
* emits no spans or callbacks — replaying historical events with current
|
|
1326
|
+
* timestamps would confuse observability backends. See `docs/developer-usage.md`.
|
|
823
1327
|
*/
|
|
1328
|
+
// Tracing/metrics-free: replayStream never uses EngineOptions tracer or metricsHook.
|
|
824
1329
|
export function replayStream(trace) {
|
|
825
1330
|
const result = Promise.resolve(replay(trace));
|
|
826
1331
|
const replayEvents = replayStreamEvents(trace);
|
|
@@ -859,7 +1364,7 @@ export function replayStream(trace) {
|
|
|
859
1364
|
}
|
|
860
1365
|
function replayStreamEvents(trace, parentRunIds = []) {
|
|
861
1366
|
const events = [];
|
|
862
|
-
for (const event of trace.
|
|
1367
|
+
for (const event of synthesizeProviderEvents(trace, trace.providerCalls)) {
|
|
863
1368
|
if (event.type === "sub-run-completed") {
|
|
864
1369
|
events.push(...replayStreamEvents(event.subResult.trace, [...parentRunIds, trace.runId]));
|
|
865
1370
|
}
|