@dogpile/sdk 0.3.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +201 -0
- package/README.md +1 -0
- package/dist/browser/index.js +2328 -237
- package/dist/browser/index.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/providers/openai-compatible.d.ts +11 -0
- package/dist/providers/openai-compatible.d.ts.map +1 -1
- package/dist/providers/openai-compatible.js +88 -2
- package/dist/providers/openai-compatible.js.map +1 -1
- package/dist/runtime/audit.d.ts +42 -0
- package/dist/runtime/audit.d.ts.map +1 -0
- package/dist/runtime/audit.js +73 -0
- package/dist/runtime/audit.js.map +1 -0
- package/dist/runtime/broadcast.d.ts.map +1 -1
- package/dist/runtime/broadcast.js +39 -36
- package/dist/runtime/broadcast.js.map +1 -1
- package/dist/runtime/cancellation.d.ts +26 -0
- package/dist/runtime/cancellation.d.ts.map +1 -1
- package/dist/runtime/cancellation.js +38 -1
- package/dist/runtime/cancellation.js.map +1 -1
- package/dist/runtime/coordinator.d.ts +79 -1
- package/dist/runtime/coordinator.d.ts.map +1 -1
- package/dist/runtime/coordinator.js +979 -61
- package/dist/runtime/coordinator.js.map +1 -1
- package/dist/runtime/decisions.d.ts +25 -3
- package/dist/runtime/decisions.d.ts.map +1 -1
- package/dist/runtime/decisions.js +241 -3
- package/dist/runtime/decisions.js.map +1 -1
- package/dist/runtime/defaults.d.ts +37 -1
- package/dist/runtime/defaults.d.ts.map +1 -1
- package/dist/runtime/defaults.js +359 -4
- package/dist/runtime/defaults.js.map +1 -1
- package/dist/runtime/engine.d.ts +17 -4
- package/dist/runtime/engine.d.ts.map +1 -1
- package/dist/runtime/engine.js +770 -35
- package/dist/runtime/engine.js.map +1 -1
- package/dist/runtime/health.d.ts +51 -0
- package/dist/runtime/health.d.ts.map +1 -0
- package/dist/runtime/health.js +85 -0
- package/dist/runtime/health.js.map +1 -0
- package/dist/runtime/introspection.d.ts +96 -0
- package/dist/runtime/introspection.d.ts.map +1 -0
- package/dist/runtime/introspection.js +31 -0
- package/dist/runtime/introspection.js.map +1 -0
- package/dist/runtime/metrics.d.ts +44 -0
- package/dist/runtime/metrics.d.ts.map +1 -0
- package/dist/runtime/metrics.js +12 -0
- package/dist/runtime/metrics.js.map +1 -0
- package/dist/runtime/model.d.ts.map +1 -1
- package/dist/runtime/model.js +34 -7
- package/dist/runtime/model.js.map +1 -1
- package/dist/runtime/provenance.d.ts +25 -0
- package/dist/runtime/provenance.d.ts.map +1 -0
- package/dist/runtime/provenance.js +13 -0
- package/dist/runtime/provenance.js.map +1 -0
- package/dist/runtime/sequential.d.ts.map +1 -1
- package/dist/runtime/sequential.js +47 -37
- package/dist/runtime/sequential.js.map +1 -1
- package/dist/runtime/shared.d.ts.map +1 -1
- package/dist/runtime/shared.js +39 -36
- package/dist/runtime/shared.js.map +1 -1
- package/dist/runtime/tracing.d.ts +31 -0
- package/dist/runtime/tracing.d.ts.map +1 -0
- package/dist/runtime/tracing.js +18 -0
- package/dist/runtime/tracing.js.map +1 -0
- package/dist/runtime/validation.d.ts +10 -0
- package/dist/runtime/validation.d.ts.map +1 -1
- package/dist/runtime/validation.js +73 -0
- package/dist/runtime/validation.js.map +1 -1
- package/dist/types/events.d.ts +339 -12
- package/dist/types/events.d.ts.map +1 -1
- package/dist/types/replay.d.ts +7 -1
- package/dist/types/replay.d.ts.map +1 -1
- package/dist/types.d.ts +255 -6
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +39 -1
- package/src/index.ts +15 -0
- package/src/providers/openai-compatible.ts +83 -3
- package/src/runtime/audit.ts +121 -0
- package/src/runtime/broadcast.ts +40 -37
- package/src/runtime/cancellation.ts +59 -1
- package/src/runtime/coordinator.ts +1221 -61
- package/src/runtime/decisions.ts +307 -4
- package/src/runtime/defaults.ts +389 -4
- package/src/runtime/engine.ts +1004 -35
- package/src/runtime/health.ts +136 -0
- package/src/runtime/introspection.ts +122 -0
- package/src/runtime/metrics.ts +45 -0
- package/src/runtime/model.ts +38 -6
- package/src/runtime/provenance.ts +43 -0
- package/src/runtime/sequential.ts +49 -38
- package/src/runtime/shared.ts +40 -37
- package/src/runtime/tracing.ts +35 -0
- package/src/runtime/validation.ts +81 -0
- package/src/types/events.ts +369 -12
- package/src/types/replay.ts +14 -1
- package/src/types.ts +279 -4
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Health diagnostics computation for completed run traces.
|
|
3
|
+
*
|
|
4
|
+
* @module
|
|
5
|
+
*/
|
|
6
|
+
import type { HealthAnomaly, RunHealthSummary, Trace } from "../types.js";
|
|
7
|
+
import type { TurnEvent } from "../types/events.js";
|
|
8
|
+
|
|
9
|
+
// Re-export types so callers who import from this subpath get them directly.
|
|
10
|
+
export type { HealthAnomaly, RunHealthSummary } from "../types.js";
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Thresholds for health anomaly detection.
|
|
14
|
+
*
|
|
15
|
+
* Both fields are optional. When absent, the corresponding threshold-gated
|
|
16
|
+
* anomaly is suppressed entirely. Threshold-free anomalies (`empty-contribution`)
|
|
17
|
+
* always fire when qualifying events are present regardless of this config.
|
|
18
|
+
*
|
|
19
|
+
* Note: `provider-error-recovered` is in the AnomalyCode union but is never
|
|
20
|
+
* emitted by computeHealth in Phase 7 - no trace signal exists without an
|
|
21
|
+
* event-shape change. See STATE.md: "Phase 6 is the only event-shape change."
|
|
22
|
+
*/
|
|
23
|
+
export interface HealthThresholds {
|
|
24
|
+
/**
|
|
25
|
+
* Per-agent turn count threshold. If an agent produces more than this many
|
|
26
|
+
* agent-turn events, a "runaway-turns" anomaly is emitted with severity "error".
|
|
27
|
+
* The threshold value in the anomaly record equals this number.
|
|
28
|
+
*/
|
|
29
|
+
readonly runawayTurns?: number;
|
|
30
|
+
/**
|
|
31
|
+
* Budget utilization percentage threshold (0-100). If budget utilization
|
|
32
|
+
* (finalCost / maxUsd * 100) >= this value, a "budget-near-miss" anomaly is
|
|
33
|
+
* emitted with severity "warning". Suppressed when no USD cap is configured.
|
|
34
|
+
*/
|
|
35
|
+
readonly budgetNearMissPct?: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Default health thresholds used for `result.health` auto-computation.
|
|
40
|
+
*
|
|
41
|
+
* Both threshold-gated anomalies (runaway-turns, budget-near-miss) are suppressed
|
|
42
|
+
* by default. Only threshold-free anomalies (empty-contribution) can fire on the
|
|
43
|
+
* auto-compute path.
|
|
44
|
+
*/
|
|
45
|
+
export const DEFAULT_HEALTH_THRESHOLDS: HealthThresholds = Object.freeze({});
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Compute a health summary from a completed run trace.
|
|
49
|
+
*
|
|
50
|
+
* Pure function - no side effects, no I/O, no storage access. Deterministic:
|
|
51
|
+
* given the same trace and thresholds, always produces the same result.
|
|
52
|
+
*
|
|
53
|
+
* @param trace - Completed run trace (from RunResult.trace or a stored trace).
|
|
54
|
+
* @param thresholds - Optional threshold overrides. Defaults to DEFAULT_HEALTH_THRESHOLDS.
|
|
55
|
+
*/
|
|
56
|
+
export function computeHealth(
|
|
57
|
+
trace: Trace,
|
|
58
|
+
thresholds: HealthThresholds = DEFAULT_HEALTH_THRESHOLDS
|
|
59
|
+
): RunHealthSummary {
|
|
60
|
+
assertFiniteNonNegativeThreshold(thresholds.runawayTurns, "runawayTurns");
|
|
61
|
+
assertBudgetNearMissThreshold(thresholds.budgetNearMissPct);
|
|
62
|
+
|
|
63
|
+
const turnEvents = trace.events.filter((event): event is TurnEvent => event.type === "agent-turn");
|
|
64
|
+
const agentIds = new Set(turnEvents.map((event) => event.agentId));
|
|
65
|
+
const totalTurns = turnEvents.length;
|
|
66
|
+
const agentCount = agentIds.size;
|
|
67
|
+
|
|
68
|
+
const maxUsd = trace.budget.caps?.maxUsd;
|
|
69
|
+
const finalCost = trace.finalOutput.cost.usd;
|
|
70
|
+
const budgetUtilizationPct: number | null =
|
|
71
|
+
maxUsd !== undefined ? (maxUsd === 0 ? (finalCost === 0 ? 0 : 100) : (finalCost / maxUsd) * 100) : null;
|
|
72
|
+
|
|
73
|
+
const anomalies: HealthAnomaly[] = [];
|
|
74
|
+
|
|
75
|
+
if (thresholds.runawayTurns !== undefined) {
|
|
76
|
+
for (const agentId of agentIds) {
|
|
77
|
+
const count = turnEvents.filter((event) => event.agentId === agentId).length;
|
|
78
|
+
if (count > thresholds.runawayTurns) {
|
|
79
|
+
anomalies.push({
|
|
80
|
+
code: "runaway-turns",
|
|
81
|
+
severity: "error",
|
|
82
|
+
value: count,
|
|
83
|
+
threshold: thresholds.runawayTurns,
|
|
84
|
+
agentId
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (thresholds.budgetNearMissPct !== undefined && budgetUtilizationPct !== null) {
|
|
91
|
+
if (budgetUtilizationPct >= thresholds.budgetNearMissPct) {
|
|
92
|
+
anomalies.push({
|
|
93
|
+
code: "budget-near-miss",
|
|
94
|
+
severity: "warning",
|
|
95
|
+
value: budgetUtilizationPct,
|
|
96
|
+
threshold: thresholds.budgetNearMissPct
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
for (const event of turnEvents) {
|
|
102
|
+
if (event.output.trim() === "") {
|
|
103
|
+
anomalies.push({
|
|
104
|
+
code: "empty-contribution",
|
|
105
|
+
severity: "error",
|
|
106
|
+
value: 0,
|
|
107
|
+
threshold: 0,
|
|
108
|
+
agentId: event.agentId
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// provider-error-recovered is deferred: no trace signal exists in Phase 7.
|
|
114
|
+
return {
|
|
115
|
+
anomalies,
|
|
116
|
+
stats: {
|
|
117
|
+
totalTurns,
|
|
118
|
+
agentCount,
|
|
119
|
+
budgetUtilizationPct
|
|
120
|
+
}
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function assertFiniteNonNegativeThreshold(value: number | undefined, name: string): void {
|
|
125
|
+
if (value !== undefined && (!Number.isFinite(value) || value < 0)) {
|
|
126
|
+
throw new RangeError(`${name} must be a finite non-negative number`);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function assertBudgetNearMissThreshold(value: number | undefined): void {
|
|
131
|
+
assertFiniteNonNegativeThreshold(value, "budgetNearMissPct");
|
|
132
|
+
|
|
133
|
+
if (value !== undefined && value > 100) {
|
|
134
|
+
throw new RangeError("budgetNearMissPct must be between 0 and 100");
|
|
135
|
+
}
|
|
136
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Typed event query function for filtering completed trace events.
|
|
3
|
+
*
|
|
4
|
+
* @module
|
|
5
|
+
*/
|
|
6
|
+
import type {
|
|
7
|
+
BroadcastEvent,
|
|
8
|
+
BudgetStopEvent,
|
|
9
|
+
FinalEvent,
|
|
10
|
+
ModelOutputChunkEvent,
|
|
11
|
+
ModelRequestEvent,
|
|
12
|
+
ModelResponseEvent,
|
|
13
|
+
RoleAssignmentEvent,
|
|
14
|
+
RunEvent,
|
|
15
|
+
SubRunBudgetClampedEvent,
|
|
16
|
+
SubRunCompletedEvent,
|
|
17
|
+
SubRunConcurrencyClampedEvent,
|
|
18
|
+
SubRunFailedEvent,
|
|
19
|
+
SubRunParentAbortedEvent,
|
|
20
|
+
SubRunQueuedEvent,
|
|
21
|
+
SubRunStartedEvent,
|
|
22
|
+
ToolCallEvent,
|
|
23
|
+
ToolResultEvent,
|
|
24
|
+
TurnEvent
|
|
25
|
+
} from "../types.js";
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Filter criteria for querying a completed trace event log.
|
|
29
|
+
*
|
|
30
|
+
* All fields are optional. AND semantics: all present fields must match.
|
|
31
|
+
* An empty filter object returns all events. An unmatched filter returns [].
|
|
32
|
+
*
|
|
33
|
+
* `costRange` matches only events with a `cost.usd` field: TurnEvent and
|
|
34
|
+
* BroadcastEvent. Events without a cost field are excluded from results when
|
|
35
|
+
* `costRange` is set (not returned as unmatched - silently excluded).
|
|
36
|
+
*
|
|
37
|
+
* `turnRange` uses the global 1-based position of agent-turn events across
|
|
38
|
+
* all agents. Position 1 is the first TurnEvent in the event array regardless
|
|
39
|
+
* of which agent produced it. BroadcastEvent.round is a separate concept and
|
|
40
|
+
* is not matched by turnRange.
|
|
41
|
+
*/
|
|
42
|
+
export interface EventQueryFilter {
|
|
43
|
+
/** Filter to events with this exact type discriminant. */
|
|
44
|
+
readonly type?: RunEvent["type"];
|
|
45
|
+
/** Filter to events where agentId === this value. Events without agentId are excluded. */
|
|
46
|
+
readonly agentId?: string;
|
|
47
|
+
/**
|
|
48
|
+
* Filter to agent-turn events at the specified global 1-based position range.
|
|
49
|
+
* Only TurnEvents are included in results when this filter is set.
|
|
50
|
+
*/
|
|
51
|
+
readonly turnRange?: {
|
|
52
|
+
readonly min?: number;
|
|
53
|
+
readonly max?: number;
|
|
54
|
+
};
|
|
55
|
+
/**
|
|
56
|
+
* Filter to events where cost.usd is within [min, max].
|
|
57
|
+
* Only TurnEvent and BroadcastEvent have cost.usd - all other events are excluded.
|
|
58
|
+
*/
|
|
59
|
+
readonly costRange?: {
|
|
60
|
+
readonly min?: number;
|
|
61
|
+
readonly max?: number;
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// One overload per RunEvent discriminant (D-03: hand-written overloads, IDE-reliable)
|
|
66
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "role-assignment" }): RoleAssignmentEvent[];
|
|
67
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "model-request" }): ModelRequestEvent[];
|
|
68
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "model-response" }): ModelResponseEvent[];
|
|
69
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "model-output-chunk" }): ModelOutputChunkEvent[];
|
|
70
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "tool-call" }): ToolCallEvent[];
|
|
71
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "tool-result" }): ToolResultEvent[];
|
|
72
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "agent-turn" }): TurnEvent[];
|
|
73
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "broadcast" }): BroadcastEvent[];
|
|
74
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-started" }): SubRunStartedEvent[];
|
|
75
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-completed" }): SubRunCompletedEvent[];
|
|
76
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-failed" }): SubRunFailedEvent[];
|
|
77
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-parent-aborted" }): SubRunParentAbortedEvent[];
|
|
78
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-budget-clamped" }): SubRunBudgetClampedEvent[];
|
|
79
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-queued" }): SubRunQueuedEvent[];
|
|
80
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "sub-run-concurrency-clamped" }): SubRunConcurrencyClampedEvent[];
|
|
81
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "budget-stop" }): BudgetStopEvent[];
|
|
82
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter & { type: "final" }): FinalEvent[];
|
|
83
|
+
// Fallback overload: no type constraint -> returns full RunEvent[]
|
|
84
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter): RunEvent[];
|
|
85
|
+
// Implementation signature (not visible to callers):
|
|
86
|
+
export function queryEvents(events: readonly RunEvent[], filter: EventQueryFilter): RunEvent[] {
|
|
87
|
+
let result: RunEvent[] = filter.type !== undefined
|
|
88
|
+
? events.filter((event) => event.type === filter.type)
|
|
89
|
+
: [...events];
|
|
90
|
+
|
|
91
|
+
if (filter.agentId !== undefined) {
|
|
92
|
+
const { agentId } = filter;
|
|
93
|
+
result = result.filter((event) => "agentId" in event && (event as { agentId?: string }).agentId === agentId);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (filter.turnRange !== undefined) {
|
|
97
|
+
const { min, max } = filter.turnRange;
|
|
98
|
+
const agentTurnEvents = events.filter((event): event is TurnEvent => event.type === "agent-turn");
|
|
99
|
+
const inRangeSet = new Set<RunEvent>(
|
|
100
|
+
agentTurnEvents.filter((_, index) => {
|
|
101
|
+
const position = index + 1;
|
|
102
|
+
return (min === undefined || position >= min) && (max === undefined || position <= max);
|
|
103
|
+
})
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
result = result.filter((event) => event.type === "agent-turn" && inRangeSet.has(event));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (filter.costRange !== undefined) {
|
|
110
|
+
const { min, max } = filter.costRange;
|
|
111
|
+
result = result.filter((event) => {
|
|
112
|
+
if (event.type !== "agent-turn" && event.type !== "broadcast") {
|
|
113
|
+
return false;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const usd = event.cost.usd;
|
|
117
|
+
return (min === undefined || usd >= min) && (max === undefined || usd <= max);
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Metrics hook interface for run-completion counters (Phase 10 / METR-01..METR-02).
|
|
3
|
+
*
|
|
4
|
+
* The SDK does not import any metrics backend. Callers provide an object
|
|
5
|
+
* satisfying `MetricsHook` to receive named counters at run and sub-run
|
|
6
|
+
* completion. When absent, zero overhead — no allocations, no branch cost.
|
|
7
|
+
*
|
|
8
|
+
* `replay()` and `replayStream()` ignore `metricsHook` on engine options —
|
|
9
|
+
* counters for historical replays would be misleading.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
export interface RunMetricsSnapshot {
|
|
13
|
+
readonly outcome: "completed" | "budget-stopped" | "aborted";
|
|
14
|
+
/** Direct tokens for this run, excluding nested sub-runs. */
|
|
15
|
+
readonly inputTokens: number;
|
|
16
|
+
/** Direct tokens for this run, excluding nested sub-runs. */
|
|
17
|
+
readonly outputTokens: number;
|
|
18
|
+
/** Direct cost for this run, excluding nested sub-runs. */
|
|
19
|
+
readonly costUsd: number;
|
|
20
|
+
/** Total tokens including the full sub-run subtree (already rolled up). */
|
|
21
|
+
readonly totalInputTokens: number;
|
|
22
|
+
/** Total tokens including the full sub-run subtree. */
|
|
23
|
+
readonly totalOutputTokens: number;
|
|
24
|
+
/** Total cost including the full sub-run subtree. */
|
|
25
|
+
readonly totalCostUsd: number;
|
|
26
|
+
/** Count of agent-turn events directly in this run (own-only, not nested sub-runs). */
|
|
27
|
+
readonly turns: number;
|
|
28
|
+
/** Wall-clock duration in milliseconds from run start to terminal state. */
|
|
29
|
+
readonly durationMs: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export interface MetricsHook {
|
|
33
|
+
/**
|
|
34
|
+
* Called once at every terminal state of the top-level run (completed,
|
|
35
|
+
* budget-stopped, aborted). When the hook is async, the SDK attaches
|
|
36
|
+
* `.catch` and does NOT await — hook latency never delays run completion.
|
|
37
|
+
*/
|
|
38
|
+
readonly onRunComplete?: (snapshot: RunMetricsSnapshot) => void | Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* Called once for each coordinator-dispatched child run that completes.
|
|
41
|
+
* Fires from the parent run's emit closure on the `sub-run-completed` event.
|
|
42
|
+
* Does NOT fire for failed sub-runs (`sub-run-failed`).
|
|
43
|
+
*/
|
|
44
|
+
readonly onSubRunComplete?: (snapshot: RunMetricsSnapshot) => void | Promise<void>;
|
|
45
|
+
}
|
package/src/runtime/model.ts
CHANGED
|
@@ -24,14 +24,28 @@ type ModelUsage = NonNullable<ModelResponse["usage"]>;
|
|
|
24
24
|
|
|
25
25
|
export async function generateModelTurn(options: GenerateModelTurnOptions): Promise<ModelResponse> {
|
|
26
26
|
const startedAt = new Date().toISOString();
|
|
27
|
+
const modelId = options.model.modelId ?? options.model.id;
|
|
28
|
+
const traceRequest = requestForTrace(options.request);
|
|
27
29
|
let response: ModelResponse;
|
|
28
30
|
|
|
29
31
|
throwIfAborted(options.request.signal, options.model.id);
|
|
30
32
|
|
|
33
|
+
options.emit({
|
|
34
|
+
type: "model-request",
|
|
35
|
+
runId: options.runId,
|
|
36
|
+
callId: options.callId,
|
|
37
|
+
providerId: options.model.id,
|
|
38
|
+
modelId,
|
|
39
|
+
startedAt,
|
|
40
|
+
agentId: options.agent.id,
|
|
41
|
+
role: options.agent.role,
|
|
42
|
+
request: traceRequest
|
|
43
|
+
});
|
|
44
|
+
|
|
31
45
|
if (!options.model.stream) {
|
|
32
46
|
response = await options.model.generate(options.request);
|
|
33
47
|
throwIfAborted(options.request.signal, options.model.id);
|
|
34
|
-
recordProviderCall(response, startedAt, options);
|
|
48
|
+
recordProviderCall(response, startedAt, modelId, traceRequest, options);
|
|
35
49
|
return response;
|
|
36
50
|
}
|
|
37
51
|
|
|
@@ -86,32 +100,50 @@ export async function generateModelTurn(options: GenerateModelTurnOptions): Prom
|
|
|
86
100
|
...(metadata !== undefined ? { metadata } : {})
|
|
87
101
|
};
|
|
88
102
|
throwIfAborted(options.request.signal, options.model.id);
|
|
89
|
-
recordProviderCall(response, startedAt, options);
|
|
103
|
+
recordProviderCall(response, startedAt, modelId, traceRequest, options);
|
|
90
104
|
return response;
|
|
91
105
|
}
|
|
92
106
|
|
|
93
107
|
function recordProviderCall(
|
|
94
108
|
response: ModelResponse,
|
|
95
109
|
startedAt: string,
|
|
110
|
+
modelId: string,
|
|
111
|
+
request: ModelRequest,
|
|
96
112
|
options: GenerateModelTurnOptions
|
|
97
113
|
): void {
|
|
114
|
+
const completedAt = new Date().toISOString();
|
|
115
|
+
|
|
116
|
+
options.emit({
|
|
117
|
+
type: "model-response",
|
|
118
|
+
runId: options.runId,
|
|
119
|
+
callId: options.callId,
|
|
120
|
+
providerId: options.model.id,
|
|
121
|
+
modelId,
|
|
122
|
+
startedAt,
|
|
123
|
+
completedAt,
|
|
124
|
+
agentId: options.agent.id,
|
|
125
|
+
role: options.agent.role,
|
|
126
|
+
response
|
|
127
|
+
});
|
|
128
|
+
|
|
98
129
|
options.onProviderCall?.({
|
|
99
130
|
kind: "replay-trace-provider-call",
|
|
100
131
|
callId: options.callId,
|
|
101
132
|
providerId: options.model.id,
|
|
133
|
+
modelId,
|
|
102
134
|
startedAt,
|
|
103
|
-
completedAt
|
|
135
|
+
completedAt,
|
|
104
136
|
agentId: options.agent.id,
|
|
105
137
|
role: options.agent.role,
|
|
106
|
-
request
|
|
138
|
+
request,
|
|
107
139
|
response
|
|
108
140
|
});
|
|
109
141
|
}
|
|
110
142
|
|
|
111
143
|
function requestForTrace(request: ModelRequest): ModelRequest {
|
|
112
144
|
return {
|
|
113
|
-
messages: request.messages,
|
|
145
|
+
messages: request.messages.map((message) => ({ ...message })),
|
|
114
146
|
temperature: request.temperature,
|
|
115
|
-
metadata: request.metadata
|
|
147
|
+
metadata: JSON.parse(JSON.stringify(request.metadata)) as ModelRequest["metadata"]
|
|
116
148
|
};
|
|
117
149
|
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import type { ModelRequestEvent, ModelResponseEvent } from "../types.js";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Normalized provenance fields from a completed model-response event.
|
|
5
|
+
* All five fields are present and JSON-serializable.
|
|
6
|
+
*/
|
|
7
|
+
export interface ProvenanceRecord {
|
|
8
|
+
readonly modelId: string;
|
|
9
|
+
readonly providerId: string;
|
|
10
|
+
readonly callId: string;
|
|
11
|
+
readonly startedAt: string;
|
|
12
|
+
readonly completedAt: string;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Normalized provenance fields from a model-request event.
|
|
17
|
+
* completedAt is absent because the call has not completed at this point.
|
|
18
|
+
*/
|
|
19
|
+
export interface PartialProvenanceRecord {
|
|
20
|
+
readonly modelId: string;
|
|
21
|
+
readonly providerId: string;
|
|
22
|
+
readonly callId: string;
|
|
23
|
+
readonly startedAt: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function getProvenance(event: ModelResponseEvent): ProvenanceRecord;
|
|
27
|
+
export function getProvenance(event: ModelRequestEvent): PartialProvenanceRecord;
|
|
28
|
+
export function getProvenance(
|
|
29
|
+
event: ModelRequestEvent | ModelResponseEvent
|
|
30
|
+
): ProvenanceRecord | PartialProvenanceRecord {
|
|
31
|
+
const base: PartialProvenanceRecord = {
|
|
32
|
+
modelId: event.modelId,
|
|
33
|
+
providerId: event.providerId,
|
|
34
|
+
callId: event.callId,
|
|
35
|
+
startedAt: event.startedAt
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
if (event.type === "model-response") {
|
|
39
|
+
return { ...base, completedAt: event.completedAt };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return base;
|
|
43
|
+
}
|
|
@@ -16,6 +16,7 @@ import type {
|
|
|
16
16
|
TerminationCondition,
|
|
17
17
|
TerminationStopRecord,
|
|
18
18
|
Tier,
|
|
19
|
+
Trace,
|
|
19
20
|
TranscriptEntry
|
|
20
21
|
} from "../types.js";
|
|
21
22
|
import { createRunId, elapsedMs, nowMs } from "./ids.js";
|
|
@@ -35,6 +36,7 @@ import {
|
|
|
35
36
|
emptyCost,
|
|
36
37
|
nextProviderCallId
|
|
37
38
|
} from "./defaults.js";
|
|
39
|
+
import { computeHealth, DEFAULT_HEALTH_THRESHOLDS } from "./health.js";
|
|
38
40
|
import { throwIfAborted } from "./cancellation.js";
|
|
39
41
|
import { isParticipatingDecision, parseAgentDecision } from "./decisions.js";
|
|
40
42
|
import { generateModelTurn } from "./model.js";
|
|
@@ -218,7 +220,15 @@ export async function runSequential(options: SequentialRunOptions): Promise<RunR
|
|
|
218
220
|
}
|
|
219
221
|
}
|
|
220
222
|
|
|
221
|
-
|
|
223
|
+
// Preferred: most recent entry with an explicit participating decision.
|
|
224
|
+
// Fallback: most recent entry that has no parsed decision at all (preserves
|
|
225
|
+
// pre-discriminated-union behavior where unparsed turns were treated as
|
|
226
|
+
// participating). Delegate decisions are explicitly non-participating.
|
|
227
|
+
const reversed = [...transcript].reverse();
|
|
228
|
+
const output =
|
|
229
|
+
reversed.find((entry) => isParticipatingDecision(entry.decision))?.output ??
|
|
230
|
+
reversed.find((entry) => entry.decision === undefined)?.output ??
|
|
231
|
+
"";
|
|
222
232
|
throwIfAborted(options.signal, options.model.id);
|
|
223
233
|
const final: RunEvent = {
|
|
224
234
|
type: "final",
|
|
@@ -234,45 +244,46 @@ export async function runSequential(options: SequentialRunOptions): Promise<RunR
|
|
|
234
244
|
transcriptEntryCount: transcript.length
|
|
235
245
|
});
|
|
236
246
|
const finalEvent = events.at(-1);
|
|
247
|
+
const trace: Trace = {
|
|
248
|
+
schemaVersion: "1.0",
|
|
249
|
+
runId,
|
|
250
|
+
protocol: "sequential",
|
|
251
|
+
tier: options.tier,
|
|
252
|
+
modelProviderId: options.model.id,
|
|
253
|
+
agentsUsed: activeAgents,
|
|
254
|
+
inputs: createReplayTraceRunInputs({
|
|
255
|
+
intent: options.intent,
|
|
256
|
+
protocol: options.protocol,
|
|
257
|
+
tier: options.tier,
|
|
258
|
+
modelProviderId: options.model.id,
|
|
259
|
+
agents: activeAgents,
|
|
260
|
+
temperature: options.temperature
|
|
261
|
+
}),
|
|
262
|
+
budget: createReplayTraceBudget({
|
|
263
|
+
tier: options.tier,
|
|
264
|
+
...(options.budget ? { caps: options.budget } : {}),
|
|
265
|
+
...(options.terminate ? { termination: options.terminate } : {})
|
|
266
|
+
}),
|
|
267
|
+
budgetStateChanges: createReplayTraceBudgetStateChanges(events),
|
|
268
|
+
seed: createReplayTraceSeed(options.seed),
|
|
269
|
+
protocolDecisions,
|
|
270
|
+
providerCalls,
|
|
271
|
+
finalOutput: createReplayTraceFinalOutput(output, finalEvent ?? events[0] ?? {
|
|
272
|
+
type: "final",
|
|
273
|
+
runId,
|
|
274
|
+
at: "",
|
|
275
|
+
output,
|
|
276
|
+
cost: totalCost,
|
|
277
|
+
transcript: createTranscriptLink(transcript)
|
|
278
|
+
}),
|
|
279
|
+
events,
|
|
280
|
+
transcript
|
|
281
|
+
};
|
|
237
282
|
|
|
238
283
|
return {
|
|
239
284
|
output,
|
|
240
285
|
eventLog: createRunEventLog(runId, "sequential", events),
|
|
241
|
-
trace
|
|
242
|
-
schemaVersion: "1.0",
|
|
243
|
-
runId,
|
|
244
|
-
protocol: "sequential",
|
|
245
|
-
tier: options.tier,
|
|
246
|
-
modelProviderId: options.model.id,
|
|
247
|
-
agentsUsed: activeAgents,
|
|
248
|
-
inputs: createReplayTraceRunInputs({
|
|
249
|
-
intent: options.intent,
|
|
250
|
-
protocol: options.protocol,
|
|
251
|
-
tier: options.tier,
|
|
252
|
-
modelProviderId: options.model.id,
|
|
253
|
-
agents: activeAgents,
|
|
254
|
-
temperature: options.temperature
|
|
255
|
-
}),
|
|
256
|
-
budget: createReplayTraceBudget({
|
|
257
|
-
tier: options.tier,
|
|
258
|
-
...(options.budget ? { caps: options.budget } : {}),
|
|
259
|
-
...(options.terminate ? { termination: options.terminate } : {})
|
|
260
|
-
}),
|
|
261
|
-
budgetStateChanges: createReplayTraceBudgetStateChanges(events),
|
|
262
|
-
seed: createReplayTraceSeed(options.seed),
|
|
263
|
-
protocolDecisions,
|
|
264
|
-
providerCalls,
|
|
265
|
-
finalOutput: createReplayTraceFinalOutput(output, finalEvent ?? events[0] ?? {
|
|
266
|
-
type: "final",
|
|
267
|
-
runId,
|
|
268
|
-
at: "",
|
|
269
|
-
output,
|
|
270
|
-
cost: totalCost,
|
|
271
|
-
transcript: createTranscriptLink(transcript)
|
|
272
|
-
}),
|
|
273
|
-
events,
|
|
274
|
-
transcript
|
|
275
|
-
},
|
|
286
|
+
trace,
|
|
276
287
|
transcript,
|
|
277
288
|
usage: createRunUsage(totalCost),
|
|
278
289
|
metadata: createRunMetadata({
|
|
@@ -290,7 +301,8 @@ export async function runSequential(options: SequentialRunOptions): Promise<RunR
|
|
|
290
301
|
cost: totalCost,
|
|
291
302
|
events
|
|
292
303
|
}),
|
|
293
|
-
cost: totalCost
|
|
304
|
+
cost: totalCost,
|
|
305
|
+
health: computeHealth(trace, DEFAULT_HEALTH_THRESHOLDS)
|
|
294
306
|
};
|
|
295
307
|
|
|
296
308
|
function stopIfNeeded(): boolean {
|
|
@@ -369,4 +381,3 @@ function responseCost(response: ModelResponse): CostSummary {
|
|
|
369
381
|
totalTokens: response.usage?.totalTokens ?? 0
|
|
370
382
|
};
|
|
371
383
|
}
|
|
372
|
-
|
package/src/runtime/shared.ts
CHANGED
|
@@ -16,6 +16,7 @@ import type {
|
|
|
16
16
|
TerminationCondition,
|
|
17
17
|
TerminationStopRecord,
|
|
18
18
|
Tier,
|
|
19
|
+
Trace,
|
|
19
20
|
TranscriptEntry
|
|
20
21
|
} from "../types.js";
|
|
21
22
|
import { createRunId, elapsedMs, nowMs, providerCallIdFor } from "./ids.js";
|
|
@@ -34,6 +35,7 @@ import {
|
|
|
34
35
|
createTranscriptLink,
|
|
35
36
|
emptyCost
|
|
36
37
|
} from "./defaults.js";
|
|
38
|
+
import { computeHealth, DEFAULT_HEALTH_THRESHOLDS } from "./health.js";
|
|
37
39
|
import { throwIfAborted } from "./cancellation.js";
|
|
38
40
|
import { parseAgentDecision } from "./decisions.js";
|
|
39
41
|
import { generateModelTurn } from "./model.js";
|
|
@@ -242,45 +244,46 @@ export async function runShared(options: SharedRunOptions): Promise<RunResult> {
|
|
|
242
244
|
transcriptEntryCount: transcript.length
|
|
243
245
|
});
|
|
244
246
|
const finalEvent = events.at(-1);
|
|
247
|
+
const trace: Trace = {
|
|
248
|
+
schemaVersion: "1.0",
|
|
249
|
+
runId,
|
|
250
|
+
protocol: "shared",
|
|
251
|
+
tier: options.tier,
|
|
252
|
+
modelProviderId: options.model.id,
|
|
253
|
+
agentsUsed: activeAgents,
|
|
254
|
+
inputs: createReplayTraceRunInputs({
|
|
255
|
+
intent: options.intent,
|
|
256
|
+
protocol: options.protocol,
|
|
257
|
+
tier: options.tier,
|
|
258
|
+
modelProviderId: options.model.id,
|
|
259
|
+
agents: activeAgents,
|
|
260
|
+
temperature: options.temperature
|
|
261
|
+
}),
|
|
262
|
+
budget: createReplayTraceBudget({
|
|
263
|
+
tier: options.tier,
|
|
264
|
+
...(options.budget ? { caps: options.budget } : {}),
|
|
265
|
+
...(options.terminate ? { termination: options.terminate } : {})
|
|
266
|
+
}),
|
|
267
|
+
budgetStateChanges: createReplayTraceBudgetStateChanges(events),
|
|
268
|
+
seed: createReplayTraceSeed(options.seed),
|
|
269
|
+
protocolDecisions,
|
|
270
|
+
providerCalls,
|
|
271
|
+
finalOutput: createReplayTraceFinalOutput(output, finalEvent ?? {
|
|
272
|
+
type: "final",
|
|
273
|
+
runId,
|
|
274
|
+
at: "",
|
|
275
|
+
output,
|
|
276
|
+
cost: totalCost,
|
|
277
|
+
transcript: createTranscriptLink(transcript)
|
|
278
|
+
}),
|
|
279
|
+
events,
|
|
280
|
+
transcript
|
|
281
|
+
};
|
|
245
282
|
|
|
246
283
|
return {
|
|
247
284
|
output,
|
|
248
285
|
eventLog: createRunEventLog(runId, "shared", events),
|
|
249
|
-
trace
|
|
250
|
-
schemaVersion: "1.0",
|
|
251
|
-
runId,
|
|
252
|
-
protocol: "shared",
|
|
253
|
-
tier: options.tier,
|
|
254
|
-
modelProviderId: options.model.id,
|
|
255
|
-
agentsUsed: activeAgents,
|
|
256
|
-
inputs: createReplayTraceRunInputs({
|
|
257
|
-
intent: options.intent,
|
|
258
|
-
protocol: options.protocol,
|
|
259
|
-
tier: options.tier,
|
|
260
|
-
modelProviderId: options.model.id,
|
|
261
|
-
agents: activeAgents,
|
|
262
|
-
temperature: options.temperature
|
|
263
|
-
}),
|
|
264
|
-
budget: createReplayTraceBudget({
|
|
265
|
-
tier: options.tier,
|
|
266
|
-
...(options.budget ? { caps: options.budget } : {}),
|
|
267
|
-
...(options.terminate ? { termination: options.terminate } : {})
|
|
268
|
-
}),
|
|
269
|
-
budgetStateChanges: createReplayTraceBudgetStateChanges(events),
|
|
270
|
-
seed: createReplayTraceSeed(options.seed),
|
|
271
|
-
protocolDecisions,
|
|
272
|
-
providerCalls,
|
|
273
|
-
finalOutput: createReplayTraceFinalOutput(output, finalEvent ?? {
|
|
274
|
-
type: "final",
|
|
275
|
-
runId,
|
|
276
|
-
at: "",
|
|
277
|
-
output,
|
|
278
|
-
cost: totalCost,
|
|
279
|
-
transcript: createTranscriptLink(transcript)
|
|
280
|
-
}),
|
|
281
|
-
events,
|
|
282
|
-
transcript
|
|
283
|
-
},
|
|
286
|
+
trace,
|
|
284
287
|
transcript,
|
|
285
288
|
usage: createRunUsage(totalCost),
|
|
286
289
|
metadata: createRunMetadata({
|
|
@@ -298,7 +301,8 @@ export async function runShared(options: SharedRunOptions): Promise<RunResult> {
|
|
|
298
301
|
cost: totalCost,
|
|
299
302
|
events
|
|
300
303
|
}),
|
|
301
|
-
cost: totalCost
|
|
304
|
+
cost: totalCost,
|
|
305
|
+
health: computeHealth(trace, DEFAULT_HEALTH_THRESHOLDS)
|
|
302
306
|
};
|
|
303
307
|
|
|
304
308
|
function stopIfNeeded(): boolean {
|
|
@@ -375,4 +379,3 @@ function responseCost(response: ModelResponse): CostSummary {
|
|
|
375
379
|
totalTokens: response.usage?.totalTokens ?? 0
|
|
376
380
|
};
|
|
377
381
|
}
|
|
378
|
-
|