@prometheus-ai/agent-core 0.5.4 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/agent-loop.d.ts +7 -0
- package/dist/types/agent.d.ts +41 -13
- package/dist/types/compaction/branch-summarization.d.ts +3 -3
- package/dist/types/compaction/compaction.d.ts +11 -9
- package/dist/types/compaction/messages.d.ts +14 -2
- package/dist/types/compaction/openai.d.ts +18 -3
- package/dist/types/compaction/pruning.d.ts +55 -0
- package/dist/types/compaction/shake.d.ts +3 -1
- package/dist/types/compaction/utils.d.ts +18 -2
- package/dist/types/proxy.d.ts +4 -3
- package/dist/types/telemetry.d.ts +59 -57
- package/dist/types/types.d.ts +60 -16
- package/package.json +6 -4
- package/src/agent-loop.ts +660 -181
- package/src/agent.ts +103 -30
- package/src/compaction/branch-summarization.ts +8 -7
- package/src/compaction/compaction.ts +69 -34
- package/src/compaction/messages.ts +78 -64
- package/src/compaction/openai.ts +88 -74
- package/src/compaction/prompts/branch-summary.md +1 -1
- package/src/compaction/prompts/compaction-summary-context.md +1 -1
- package/src/compaction/prompts/compaction-summary.md +2 -2
- package/src/compaction/prompts/compaction-update-summary.md +3 -3
- package/src/compaction/prompts/file-operations.md +3 -8
- package/src/compaction/prompts/summarization-system.md +1 -1
- package/src/compaction/pruning.ts +240 -8
- package/src/compaction/shake.ts +7 -3
- package/src/compaction/utils.ts +97 -19
- package/src/proxy.ts +13 -7
- package/src/telemetry.ts +126 -113
- package/src/types.ts +65 -16
package/src/telemetry.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* OpenTelemetry instrumentation for the agent loop.
|
|
3
3
|
*
|
|
4
4
|
* Implements the OpenTelemetry GenAI semantic conventions
|
|
5
|
-
* (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `
|
|
5
|
+
* (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `pi.gen_ai.*`
|
|
6
6
|
* extension attributes for run summaries, dashboard summaries, and cost hints
|
|
7
7
|
* that are useful to downstream observability UIs.
|
|
8
8
|
*
|
|
@@ -50,6 +50,7 @@ import {
|
|
|
50
50
|
} from "@prometheus-ai/ai";
|
|
51
51
|
import { AgentRunCollector, type AgentRunCoverage, type AgentRunSummary, type ToolStatus } from "./run-collector";
|
|
52
52
|
import type { AgentTool } from "./types";
|
|
53
|
+
import { EventLoopKeepalive } from "./utils/yield";
|
|
53
54
|
|
|
54
55
|
/** Default tracer name. Override via {@link AgentTelemetryConfig.tracerName}. */
|
|
55
56
|
export const DEFAULT_TRACER_NAME = "@prometheus-ai/agent-core";
|
|
@@ -123,39 +124,40 @@ export const enum OpenAIAttr {
|
|
|
123
124
|
}
|
|
124
125
|
|
|
125
126
|
/** Project extension attributes. Kept out of the reserved `gen_ai.*` namespace. */
|
|
126
|
-
export const enum
|
|
127
|
-
AgentStepNumber = "
|
|
128
|
-
AgentStepCount = "
|
|
129
|
-
RequestReasoningEffort = "
|
|
130
|
-
RequestToolChoice = "
|
|
131
|
-
RequestAvailableTools = "
|
|
132
|
-
RequestMessages = "
|
|
133
|
-
ResponseText = "
|
|
134
|
-
ResponseToolCalls = "
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
127
|
+
export const enum PiGenAIAttr {
|
|
128
|
+
AgentStepNumber = "pi.gen_ai.agent.step.number",
|
|
129
|
+
AgentStepCount = "pi.gen_ai.agent.step.count",
|
|
130
|
+
RequestReasoningEffort = "pi.gen_ai.request.reasoning.effort",
|
|
131
|
+
RequestToolChoice = "pi.gen_ai.request.tool.choice",
|
|
132
|
+
RequestAvailableTools = "pi.gen_ai.request.available_tools",
|
|
133
|
+
RequestMessages = "pi.gen_ai.request.messages",
|
|
134
|
+
ResponseText = "pi.gen_ai.response.text",
|
|
135
|
+
ResponseToolCalls = "pi.gen_ai.response.tool_calls",
|
|
136
|
+
ResponseUpstreamProvider = "pi.gen_ai.response.upstream_provider",
|
|
137
|
+
UsageTotalTokens = "pi.gen_ai.usage.total_tokens",
|
|
138
|
+
UsageServerSideTools = "pi.gen_ai.usage.server_tool_requests",
|
|
139
|
+
CostEstimatedUsd = "pi.gen_ai.cost.estimated_usd",
|
|
140
|
+
CostInputUsd = "pi.gen_ai.cost.input_usd",
|
|
141
|
+
CostOutputUsd = "pi.gen_ai.cost.output_usd",
|
|
142
|
+
CostUnavailableReason = "pi.gen_ai.cost.unavailable_reason",
|
|
143
|
+
ToolStatus = "pi.gen_ai.tool.status",
|
|
144
|
+
ToolCallIntent = "pi.gen_ai.tool.call.intent",
|
|
145
|
+
HandoffFromAgentName = "pi.gen_ai.handoff.from_agent.name",
|
|
146
|
+
HandoffFromAgentId = "pi.gen_ai.handoff.from_agent.id",
|
|
147
|
+
HandoffToAgentName = "pi.gen_ai.handoff.to_agent.name",
|
|
148
|
+
HandoffToAgentId = "pi.gen_ai.handoff.to_agent.id",
|
|
147
149
|
// Marks chat spans emitted outside the agent loop (compaction, handoff, branch
|
|
148
150
|
// summary, image inspection, …). Lets dashboards split oneshot cost / latency
|
|
149
151
|
// from main-turn cost without overloading the semconv `gen_ai.operation.name`.
|
|
150
|
-
OneshotKind = "
|
|
152
|
+
OneshotKind = "pi.gen_ai.oneshot.kind",
|
|
151
153
|
// Gateway / proxy (LiteLLM, Helicone, Portkey, …) — populated when a known
|
|
152
154
|
// gateway header pattern is detected on the upstream response. The base
|
|
153
155
|
// `gen_ai.provider.name` continues to track the *upstream* provider (e.g.
|
|
154
156
|
// `anthropic`) that the gateway routed to.
|
|
155
|
-
GatewayName = "
|
|
156
|
-
GatewayEndpoint = "
|
|
157
|
-
GatewayCallId = "
|
|
158
|
-
GatewayRoutedTo = "
|
|
157
|
+
GatewayName = "pi.gen_ai.gateway.name",
|
|
158
|
+
GatewayEndpoint = "pi.gen_ai.gateway.endpoint",
|
|
159
|
+
GatewayCallId = "pi.gen_ai.gateway.call_id",
|
|
160
|
+
GatewayRoutedTo = "pi.gen_ai.gateway.routed_to",
|
|
159
161
|
}
|
|
160
162
|
|
|
161
163
|
/** GenAI operation names — values for {@link GenAIAttr.OperationName}. */
|
|
@@ -199,9 +201,9 @@ export interface CostEstimatorContext {
|
|
|
199
201
|
|
|
200
202
|
/**
|
|
201
203
|
* Cost estimator result.
|
|
202
|
-
* { usd: number } — cost is known; emitted as
|
|
204
|
+
* { usd: number } — cost is known; emitted as pi.gen_ai.cost.estimated_usd
|
|
203
205
|
* { unavailable: string } — cost is intentionally unknown; emitted as
|
|
204
|
-
*
|
|
206
|
+
* pi.gen_ai.cost.unavailable_reason
|
|
205
207
|
* undefined — no opinion; nothing emitted
|
|
206
208
|
*/
|
|
207
209
|
export type CostEstimate =
|
|
@@ -248,7 +250,7 @@ export interface ChatUsageEvent {
|
|
|
248
250
|
*
|
|
249
251
|
* Use this to reconcile gateway-issued ids (e.g. `x-litellm-call-id`) with
|
|
250
252
|
* downstream billing / spend dashboards. Known gateway patterns are also
|
|
251
|
-
* auto-stamped on the chat span as `
|
|
253
|
+
* auto-stamped on the chat span as `pi.gen_ai.gateway.*` attributes.
|
|
252
254
|
*/
|
|
253
255
|
readonly headers: Readonly<Record<string, string>> | undefined;
|
|
254
256
|
}
|
|
@@ -626,14 +628,14 @@ export function recordTelemetryWarning(telemetry: AgentTelemetry | undefined, wa
|
|
|
626
628
|
function emitTelemetryWarning(telemetry: AgentTelemetry | undefined, warning: AgentTelemetryWarning): void {
|
|
627
629
|
const hook = telemetry?.config.onTelemetryWarning;
|
|
628
630
|
if (!hook) {
|
|
629
|
-
if (warning.error === undefined) console.warn(`[
|
|
630
|
-
else console.warn(`[
|
|
631
|
+
if (warning.error === undefined) console.warn(`[pi-agent] ${warning.message}`);
|
|
632
|
+
else console.warn(`[pi-agent] ${warning.message}`, warning.error);
|
|
631
633
|
return;
|
|
632
634
|
}
|
|
633
635
|
try {
|
|
634
636
|
hook(warning);
|
|
635
637
|
} catch (err) {
|
|
636
|
-
console.warn("[
|
|
638
|
+
console.warn("[pi-agent] onTelemetryWarning threw; swallowing:", err);
|
|
637
639
|
}
|
|
638
640
|
}
|
|
639
641
|
|
|
@@ -678,7 +680,7 @@ export function startInvokeAgentSpan(telemetry: AgentTelemetry | undefined, mode
|
|
|
678
680
|
/** Stamp the final step count on the `invoke_agent` span. */
|
|
679
681
|
export function applyInvokeAgentFinish(span: Span | undefined, stepCount: number): void {
|
|
680
682
|
if (!span) return;
|
|
681
|
-
span.setAttribute(
|
|
683
|
+
span.setAttribute(PiGenAIAttr.AgentStepCount, stepCount);
|
|
682
684
|
}
|
|
683
685
|
|
|
684
686
|
/**
|
|
@@ -735,7 +737,7 @@ export interface ChatRequestSnapshot {
|
|
|
735
737
|
|
|
736
738
|
function buildChatRequestAttributes(stepNumber: number, request: ChatRequestSnapshot, provider: string): Attributes {
|
|
737
739
|
const attrs: Attributes = {
|
|
738
|
-
[
|
|
740
|
+
[PiGenAIAttr.AgentStepNumber]: stepNumber,
|
|
739
741
|
[GenAIAttr.OutputType]: "text",
|
|
740
742
|
[GenAIAttr.RequestStream]: true,
|
|
741
743
|
};
|
|
@@ -753,11 +755,11 @@ function buildChatRequestAttributes(stepNumber: number, request: ChatRequestSnap
|
|
|
753
755
|
const resolved = resolveServiceTier(request.serviceTier, provider);
|
|
754
756
|
if (resolved) attrs[OpenAIAttr.RequestServiceTier] = resolved;
|
|
755
757
|
}
|
|
756
|
-
if (request.reasoningEffort) attrs[
|
|
758
|
+
if (request.reasoningEffort) attrs[PiGenAIAttr.RequestReasoningEffort] = request.reasoningEffort;
|
|
757
759
|
const toolChoice = serializeToolChoice(request.toolChoice);
|
|
758
|
-
if (toolChoice) attrs[
|
|
760
|
+
if (toolChoice) attrs[PiGenAIAttr.RequestToolChoice] = toolChoice;
|
|
759
761
|
if (request.tools && request.tools.length > 0) {
|
|
760
|
-
attrs[
|
|
762
|
+
attrs[PiGenAIAttr.RequestAvailableTools] = request.tools.map(tool => tool.name);
|
|
761
763
|
}
|
|
762
764
|
return attrs;
|
|
763
765
|
}
|
|
@@ -775,7 +777,7 @@ function serializeToolChoice(toolChoice: ToolChoice | undefined): string | undef
|
|
|
775
777
|
|
|
776
778
|
function applyContentCaptureForRequest(telemetry: AgentTelemetry, span: Span, request: ChatRequestSnapshot): void {
|
|
777
779
|
const requestMessages = serializeRequestMessagesForTelemetry(telemetry, request);
|
|
778
|
-
if (requestMessages) span.setAttribute(
|
|
780
|
+
if (requestMessages) span.setAttribute(PiGenAIAttr.RequestMessages, requestMessages);
|
|
779
781
|
if (telemetry.contentCapture !== "full") return;
|
|
780
782
|
const systemInstructions = serializeFullSystemInstructionsForTelemetry(request);
|
|
781
783
|
if (systemInstructions) span.setAttribute(GenAIAttr.SystemInstructions, systemInstructions);
|
|
@@ -785,9 +787,9 @@ function applyContentCaptureForRequest(telemetry: AgentTelemetry, span: Span, re
|
|
|
785
787
|
|
|
786
788
|
function applyContentCaptureForResponse(telemetry: AgentTelemetry, span: Span, message: AssistantMessage): void {
|
|
787
789
|
const responseText = serializeResponseTextForTelemetry(telemetry, message);
|
|
788
|
-
if (responseText) span.setAttribute(
|
|
790
|
+
if (responseText) span.setAttribute(PiGenAIAttr.ResponseText, responseText);
|
|
789
791
|
const responseToolCalls = serializeResponseToolCallsForTelemetry(telemetry, message);
|
|
790
|
-
if (responseToolCalls) span.setAttribute(
|
|
792
|
+
if (responseToolCalls) span.setAttribute(PiGenAIAttr.ResponseToolCalls, responseToolCalls);
|
|
791
793
|
if (telemetry.contentCapture === "full") {
|
|
792
794
|
const outputMessages = serializeFullOutputMessagesForTelemetry(message);
|
|
793
795
|
if (outputMessages) span.setAttribute(GenAIAttr.OutputMessages, outputMessages);
|
|
@@ -1188,6 +1190,9 @@ export function failChatSpan(
|
|
|
1188
1190
|
function applyChatResponseAttributes(span: Span, message: AssistantMessage): void {
|
|
1189
1191
|
span.setAttribute(GenAIAttr.ResponseModel, message.model);
|
|
1190
1192
|
if (message.responseId) span.setAttribute(GenAIAttr.ResponseId, message.responseId);
|
|
1193
|
+
if (message.upstreamProvider) {
|
|
1194
|
+
span.setAttribute(PiGenAIAttr.ResponseUpstreamProvider, message.upstreamProvider);
|
|
1195
|
+
}
|
|
1191
1196
|
if (message.ttft != null) span.setAttribute(GenAIAttr.ResponseTimeToFirstChunk, message.ttft / 1000);
|
|
1192
1197
|
const finishReason = mapStopReason(message.stopReason);
|
|
1193
1198
|
if (finishReason) span.setAttribute(GenAIAttr.ResponseFinishReasons, [finishReason]);
|
|
@@ -1202,7 +1207,7 @@ function applyUsageAttributes(span: Span, usage: Usage | undefined): void {
|
|
|
1202
1207
|
span.setAttribute(GenAIAttr.UsageInputTokens, inputTokens);
|
|
1203
1208
|
span.setAttribute(GenAIAttr.UsageOutputTokens, outputTokens);
|
|
1204
1209
|
const total = usage.totalTokens ?? inputTokens + outputTokens;
|
|
1205
|
-
span.setAttribute(
|
|
1210
|
+
span.setAttribute(PiGenAIAttr.UsageTotalTokens, total);
|
|
1206
1211
|
if (usage.cacheRead != null) span.setAttribute(GenAIAttr.UsageCacheReadInputTokens, usage.cacheRead);
|
|
1207
1212
|
if (usage.cacheWrite != null) span.setAttribute(GenAIAttr.UsageCacheCreationInputTokens, usage.cacheWrite);
|
|
1208
1213
|
if (usage.reasoningTokens != null) {
|
|
@@ -1210,7 +1215,7 @@ function applyUsageAttributes(span: Span, usage: Usage | undefined): void {
|
|
|
1210
1215
|
}
|
|
1211
1216
|
if (usage.server) {
|
|
1212
1217
|
const sums = (usage.server.webSearch ?? 0) + (usage.server.webFetch ?? 0);
|
|
1213
|
-
if (sums > 0) span.setAttribute(
|
|
1218
|
+
if (sums > 0) span.setAttribute(PiGenAIAttr.UsageServerSideTools, sums);
|
|
1214
1219
|
}
|
|
1215
1220
|
}
|
|
1216
1221
|
|
|
@@ -1276,10 +1281,10 @@ function applyGatewayAttributes(
|
|
|
1276
1281
|
): void {
|
|
1277
1282
|
const gateway = detectGatewayFromHeaders(headers);
|
|
1278
1283
|
if (!gateway) return;
|
|
1279
|
-
span.setAttribute(
|
|
1280
|
-
if (baseUrl) span.setAttribute(
|
|
1281
|
-
if (gateway.callId) span.setAttribute(
|
|
1282
|
-
if (gateway.routedTo) span.setAttribute(
|
|
1284
|
+
span.setAttribute(PiGenAIAttr.GatewayName, gateway.name);
|
|
1285
|
+
if (baseUrl) span.setAttribute(PiGenAIAttr.GatewayEndpoint, baseUrl);
|
|
1286
|
+
if (gateway.callId) span.setAttribute(PiGenAIAttr.GatewayCallId, gateway.callId);
|
|
1287
|
+
if (gateway.routedTo) span.setAttribute(PiGenAIAttr.GatewayRoutedTo, gateway.routedTo);
|
|
1283
1288
|
}
|
|
1284
1289
|
|
|
1285
1290
|
interface AppliedCostEstimate {
|
|
@@ -1340,7 +1345,7 @@ function applyCostEstimateForUsage(
|
|
|
1340
1345
|
}
|
|
1341
1346
|
if (!result) return EMPTY_COST;
|
|
1342
1347
|
if ("unavailable" in result) {
|
|
1343
|
-
span.setAttribute(
|
|
1348
|
+
span.setAttribute(PiGenAIAttr.CostUnavailableReason, result.unavailable);
|
|
1344
1349
|
const cost: AppliedCostEstimate = {
|
|
1345
1350
|
costUsd: undefined,
|
|
1346
1351
|
inputUsd: undefined,
|
|
@@ -1362,9 +1367,9 @@ function applyCostEstimateForUsage(
|
|
|
1362
1367
|
});
|
|
1363
1368
|
return cost;
|
|
1364
1369
|
}
|
|
1365
|
-
span.setAttribute(
|
|
1366
|
-
if (result.inputUsd != null) span.setAttribute(
|
|
1367
|
-
if (result.outputUsd != null) span.setAttribute(
|
|
1370
|
+
span.setAttribute(PiGenAIAttr.CostEstimatedUsd, result.usd);
|
|
1371
|
+
if (result.inputUsd != null) span.setAttribute(PiGenAIAttr.CostInputUsd, result.inputUsd);
|
|
1372
|
+
if (result.outputUsd != null) span.setAttribute(PiGenAIAttr.CostOutputUsd, result.outputUsd);
|
|
1368
1373
|
const cost: AppliedCostEstimate = {
|
|
1369
1374
|
costUsd: result.usd,
|
|
1370
1375
|
inputUsd: result.inputUsd,
|
|
@@ -1534,7 +1539,7 @@ export async function recordManualChatTelemetry(
|
|
|
1534
1539
|
});
|
|
1535
1540
|
if (!span) return undefined;
|
|
1536
1541
|
if (options.span && options.attributes) span.setAttributes(options.attributes);
|
|
1537
|
-
if (options.stepNumber != null) span.setAttribute(
|
|
1542
|
+
if (options.stepNumber != null) span.setAttribute(PiGenAIAttr.AgentStepNumber, options.stepNumber);
|
|
1538
1543
|
span.setAttribute(GenAIAttr.ResponseModel, options.responseModel ?? options.model.name);
|
|
1539
1544
|
if (options.responseId) span.setAttribute(GenAIAttr.ResponseId, options.responseId);
|
|
1540
1545
|
const finishReason = mapStopReason(options.finishReason);
|
|
@@ -1567,7 +1572,7 @@ export async function recordManualChatTelemetry(
|
|
|
1567
1572
|
}
|
|
1568
1573
|
if (options.responseText) {
|
|
1569
1574
|
const responseText = stringifyJsonAttribute(summarizeTelemetryTexts([options.responseText]));
|
|
1570
|
-
if (responseText) span.setAttribute(
|
|
1575
|
+
if (responseText) span.setAttribute(PiGenAIAttr.ResponseText, responseText);
|
|
1571
1576
|
}
|
|
1572
1577
|
if (options.responseToolCalls && options.responseToolCalls.length > 0) {
|
|
1573
1578
|
const calls = options.responseToolCalls.map(call => ({
|
|
@@ -1576,7 +1581,7 @@ export async function recordManualChatTelemetry(
|
|
|
1576
1581
|
input: summarizeTelemetryValue(call.input),
|
|
1577
1582
|
}));
|
|
1578
1583
|
const responseToolCalls = stringifyJsonAttribute(limitTelemetryToolCalls(calls));
|
|
1579
|
-
if (responseToolCalls) span.setAttribute(
|
|
1584
|
+
if (responseToolCalls) span.setAttribute(PiGenAIAttr.ResponseToolCalls, responseToolCalls);
|
|
1580
1585
|
}
|
|
1581
1586
|
applyTerminalStatus(span, options.finishReason, undefined);
|
|
1582
1587
|
if (options.endSpan ?? options.span === undefined) span.end();
|
|
@@ -1595,7 +1600,7 @@ export interface InstrumentedChatSpanOptions {
|
|
|
1595
1600
|
/** Step index recorded on the span; defaults to `-1` for non-loop calls. */
|
|
1596
1601
|
readonly stepNumber?: number;
|
|
1597
1602
|
/**
|
|
1598
|
-
* Tag stamped onto `
|
|
1603
|
+
* Tag stamped onto `pi.gen_ai.oneshot.kind`. Values used by the agent:
|
|
1599
1604
|
* `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`,
|
|
1600
1605
|
* `handoff`, `branch_summary`, `inspect_image`. Free-form to allow callers
|
|
1601
1606
|
* outside this package to add new kinds without bumping the helper.
|
|
@@ -1629,6 +1634,13 @@ export async function instrumentedCompleteSimple<TApi extends Api>(
|
|
|
1629
1634
|
options: SimpleStreamOptions,
|
|
1630
1635
|
span: InstrumentedChatSpanOptions,
|
|
1631
1636
|
): Promise<AssistantMessage> {
|
|
1637
|
+
// Oneshot LLM calls (handoff, compaction/branch summaries) run outside the
|
|
1638
|
+
// agent `#runLoop`, which is where the EventLoopKeepalive normally lives.
|
|
1639
|
+
// Without it, Bun's JSC loop stops servicing timers while parked on the
|
|
1640
|
+
// long-lived completion promise, freezing any host spinner (e.g. the
|
|
1641
|
+
// `/handoff` Loader) until an unrelated I/O event (a terminal resize)
|
|
1642
|
+
// pokes the loop. Keep the loop healthy for the duration of the call.
|
|
1643
|
+
using _keepalive = new EventLoopKeepalive();
|
|
1632
1644
|
const { telemetry, parent, oneshotKind } = span;
|
|
1633
1645
|
const stepNumber = span.stepNumber ?? -1;
|
|
1634
1646
|
const reasoning = options.reasoning;
|
|
@@ -1650,7 +1662,7 @@ export async function instrumentedCompleteSimple<TApi extends Api>(
|
|
|
1650
1662
|
},
|
|
1651
1663
|
});
|
|
1652
1664
|
if (chatSpan) {
|
|
1653
|
-
if (oneshotKind) chatSpan.setAttribute(
|
|
1665
|
+
if (oneshotKind) chatSpan.setAttribute(PiGenAIAttr.OneshotKind, oneshotKind);
|
|
1654
1666
|
if (span.attributes) chatSpan.setAttributes(span.attributes);
|
|
1655
1667
|
}
|
|
1656
1668
|
|
|
@@ -1788,7 +1800,7 @@ export function finishExecuteToolSpan(
|
|
|
1788
1800
|
}
|
|
1789
1801
|
|
|
1790
1802
|
/** Span attribute carrying the terminal {@link ToolStatus}. */
|
|
1791
|
-
export const EXECUTE_TOOL_STATUS_ATTR =
|
|
1803
|
+
export const EXECUTE_TOOL_STATUS_ATTR = PiGenAIAttr.ToolStatus;
|
|
1792
1804
|
|
|
1793
1805
|
/**
|
|
1794
1806
|
* Mapping from non-ok {@link ToolStatus} values to the `error.type` attribute
|
|
@@ -1861,7 +1873,8 @@ export function finishInvokeAgentSpan(
|
|
|
1861
1873
|
|
|
1862
1874
|
/**
|
|
1863
1875
|
* Invoke {@link AgentTelemetryConfig.onRunEnd} on `telemetry` if set. Throws
|
|
1864
|
-
are caught and
|
|
1876
|
+
* are caught and surfaced via the `onTelemetryWarning` hook (falling back to `console.warn`
|
|
1877
|
+
* when no hook is set) — telemetry callbacks NEVER turn a
|
|
1865
1878
|
* successful agent run into a failed one. Idempotent at the call site via
|
|
1866
1879
|
* {@link AgentRunCollector.markRunEnded}; callers must check that before
|
|
1867
1880
|
* calling this helper.
|
|
@@ -1880,66 +1893,66 @@ export function fireOnRunEnd(telemetry: AgentTelemetry, summary: AgentRunSummary
|
|
|
1880
1893
|
}
|
|
1881
1894
|
}
|
|
1882
1895
|
|
|
1883
|
-
/** Aggregate `
|
|
1884
|
-
export const enum
|
|
1885
|
-
ChatsCount = "
|
|
1886
|
-
ChatsTotalLatencyMs = "
|
|
1887
|
-
ChatsStopReasonPrefix = "
|
|
1888
|
-
ToolsCount = "
|
|
1889
|
-
ToolsOkCount = "
|
|
1890
|
-
ToolsErrorCount = "
|
|
1891
|
-
ToolsSkippedCount = "
|
|
1892
|
-
ToolsBlockedCount = "
|
|
1893
|
-
ToolsTimeoutCount = "
|
|
1894
|
-
ToolsAbortedCount = "
|
|
1895
|
-
ToolsTotalLatencyMs = "
|
|
1896
|
-
ToolsInvoked = "
|
|
1897
|
-
ToolsAvailable = "
|
|
1898
|
-
ToolsUnused = "
|
|
1899
|
-
UsageInputTokensTotal = "
|
|
1900
|
-
UsageOutputTokensTotal = "
|
|
1901
|
-
UsageCacheReadInputTokensTotal = "
|
|
1902
|
-
UsageCacheCreationInputTokensTotal = "
|
|
1903
|
-
UsageReasoningOutputTokensTotal = "
|
|
1904
|
-
UsageTotalTokensTotal = "
|
|
1905
|
-
CostEstimatedUsdTotal = "
|
|
1906
|
-
ErrorsCount = "
|
|
1907
|
-
}
|
|
1908
|
-
|
|
1909
|
-
/** Stamp the aggregate `
|
|
1896
|
+
/** Aggregate `pi.gen_ai.agent.*` attributes stamped on the `invoke_agent` span. */
|
|
1897
|
+
export const enum PiGenAIAggregateAttr {
|
|
1898
|
+
ChatsCount = "pi.gen_ai.agent.chats.count",
|
|
1899
|
+
ChatsTotalLatencyMs = "pi.gen_ai.agent.chats.total_latency_ms",
|
|
1900
|
+
ChatsStopReasonPrefix = "pi.gen_ai.agent.chats.stop_reason.",
|
|
1901
|
+
ToolsCount = "pi.gen_ai.agent.tools.count",
|
|
1902
|
+
ToolsOkCount = "pi.gen_ai.agent.tools.ok.count",
|
|
1903
|
+
ToolsErrorCount = "pi.gen_ai.agent.tools.error.count",
|
|
1904
|
+
ToolsSkippedCount = "pi.gen_ai.agent.tools.skipped.count",
|
|
1905
|
+
ToolsBlockedCount = "pi.gen_ai.agent.tools.blocked.count",
|
|
1906
|
+
ToolsTimeoutCount = "pi.gen_ai.agent.tools.timeout.count",
|
|
1907
|
+
ToolsAbortedCount = "pi.gen_ai.agent.tools.aborted.count",
|
|
1908
|
+
ToolsTotalLatencyMs = "pi.gen_ai.agent.tools.total_latency_ms",
|
|
1909
|
+
ToolsInvoked = "pi.gen_ai.agent.tools.invoked",
|
|
1910
|
+
ToolsAvailable = "pi.gen_ai.agent.tools.available",
|
|
1911
|
+
ToolsUnused = "pi.gen_ai.agent.tools.unused",
|
|
1912
|
+
UsageInputTokensTotal = "pi.gen_ai.agent.usage.input_tokens.total",
|
|
1913
|
+
UsageOutputTokensTotal = "pi.gen_ai.agent.usage.output_tokens.total",
|
|
1914
|
+
UsageCacheReadInputTokensTotal = "pi.gen_ai.agent.usage.cache_read.input_tokens.total",
|
|
1915
|
+
UsageCacheCreationInputTokensTotal = "pi.gen_ai.agent.usage.cache_creation.input_tokens.total",
|
|
1916
|
+
UsageReasoningOutputTokensTotal = "pi.gen_ai.agent.usage.reasoning.output_tokens.total",
|
|
1917
|
+
UsageTotalTokensTotal = "pi.gen_ai.agent.usage.total_tokens.total",
|
|
1918
|
+
CostEstimatedUsdTotal = "pi.gen_ai.agent.cost.estimated_usd.total",
|
|
1919
|
+
ErrorsCount = "pi.gen_ai.agent.errors.count",
|
|
1920
|
+
}
|
|
1921
|
+
|
|
1922
|
+
/** Stamp the aggregate `pi.gen_ai.agent.*` attributes on the given span. */
|
|
1910
1923
|
function applyAggregateAttributes(span: Span, summary: AgentRunSummary, coverage: AgentRunCoverage): void {
|
|
1911
|
-
span.setAttribute(
|
|
1912
|
-
span.setAttribute(
|
|
1924
|
+
span.setAttribute(PiGenAIAggregateAttr.ChatsCount, summary.chats.total);
|
|
1925
|
+
span.setAttribute(PiGenAIAggregateAttr.ChatsTotalLatencyMs, summary.chats.totalLatencyMs);
|
|
1913
1926
|
for (const [reason, count] of Object.entries(summary.chats.byStopReason)) {
|
|
1914
|
-
span.setAttribute(`${
|
|
1915
|
-
}
|
|
1916
|
-
span.setAttribute(
|
|
1917
|
-
span.setAttribute(
|
|
1918
|
-
span.setAttribute(
|
|
1919
|
-
span.setAttribute(
|
|
1920
|
-
span.setAttribute(
|
|
1921
|
-
span.setAttribute(
|
|
1922
|
-
span.setAttribute(
|
|
1923
|
-
span.setAttribute(
|
|
1927
|
+
span.setAttribute(`${PiGenAIAggregateAttr.ChatsStopReasonPrefix}${reason}.count`, count);
|
|
1928
|
+
}
|
|
1929
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsCount, summary.tools.total);
|
|
1930
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsOkCount, summary.tools.ok);
|
|
1931
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsErrorCount, summary.tools.error);
|
|
1932
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsSkippedCount, summary.tools.skipped);
|
|
1933
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsBlockedCount, summary.tools.blocked);
|
|
1934
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsTimeoutCount, summary.tools.timeout);
|
|
1935
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsAbortedCount, summary.tools.aborted);
|
|
1936
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsTotalLatencyMs, summary.tools.totalLatencyMs);
|
|
1924
1937
|
if (coverage.toolsInvoked.length > 0) {
|
|
1925
|
-
span.setAttribute(
|
|
1938
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsInvoked, [...coverage.toolsInvoked]);
|
|
1926
1939
|
}
|
|
1927
1940
|
if (coverage.toolsAvailable.length > 0) {
|
|
1928
|
-
span.setAttribute(
|
|
1941
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsAvailable, [...coverage.toolsAvailable]);
|
|
1929
1942
|
}
|
|
1930
1943
|
if (coverage.toolsUnused.length > 0) {
|
|
1931
|
-
span.setAttribute(
|
|
1932
|
-
}
|
|
1933
|
-
span.setAttribute(
|
|
1934
|
-
span.setAttribute(
|
|
1935
|
-
span.setAttribute(
|
|
1936
|
-
span.setAttribute(
|
|
1937
|
-
span.setAttribute(
|
|
1938
|
-
span.setAttribute(
|
|
1944
|
+
span.setAttribute(PiGenAIAggregateAttr.ToolsUnused, [...coverage.toolsUnused]);
|
|
1945
|
+
}
|
|
1946
|
+
span.setAttribute(PiGenAIAggregateAttr.UsageInputTokensTotal, summary.usage.inputTokens);
|
|
1947
|
+
span.setAttribute(PiGenAIAggregateAttr.UsageOutputTokensTotal, summary.usage.outputTokens);
|
|
1948
|
+
span.setAttribute(PiGenAIAggregateAttr.UsageCacheReadInputTokensTotal, summary.usage.cachedInputTokens);
|
|
1949
|
+
span.setAttribute(PiGenAIAggregateAttr.UsageCacheCreationInputTokensTotal, summary.usage.cacheWriteTokens);
|
|
1950
|
+
span.setAttribute(PiGenAIAggregateAttr.UsageReasoningOutputTokensTotal, summary.usage.reasoningOutputTokens);
|
|
1951
|
+
span.setAttribute(PiGenAIAggregateAttr.UsageTotalTokensTotal, summary.usage.totalTokens);
|
|
1939
1952
|
if (summary.cost.estimatedUsd > 0) {
|
|
1940
|
-
span.setAttribute(
|
|
1953
|
+
span.setAttribute(PiGenAIAggregateAttr.CostEstimatedUsdTotal, summary.cost.estimatedUsd);
|
|
1941
1954
|
}
|
|
1942
|
-
span.setAttribute(
|
|
1955
|
+
span.setAttribute(PiGenAIAggregateAttr.ErrorsCount, summary.errors.total);
|
|
1943
1956
|
}
|
|
1944
1957
|
|
|
1945
1958
|
/**
|
|
@@ -1974,10 +1987,10 @@ export function recordHandoff(
|
|
|
1974
1987
|
const attrs: Attributes = {};
|
|
1975
1988
|
const fromAgent = options.fromAgent ? normalizeAgentIdentity(telemetry, options.fromAgent) : undefined;
|
|
1976
1989
|
const toAgent = normalizeAgentIdentity(telemetry, options.toAgent);
|
|
1977
|
-
if (fromAgent?.name) attrs[
|
|
1978
|
-
if (fromAgent?.id) attrs[
|
|
1979
|
-
if (toAgent.name) attrs[
|
|
1980
|
-
if (toAgent.id) attrs[
|
|
1990
|
+
if (fromAgent?.name) attrs[PiGenAIAttr.HandoffFromAgentName] = fromAgent.name;
|
|
1991
|
+
if (fromAgent?.id) attrs[PiGenAIAttr.HandoffFromAgentId] = fromAgent.id;
|
|
1992
|
+
if (toAgent.name) attrs[PiGenAIAttr.HandoffToAgentName] = toAgent.name;
|
|
1993
|
+
if (toAgent.id) attrs[PiGenAIAttr.HandoffToAgentId] = toAgent.id;
|
|
1981
1994
|
const name = toAgent.name
|
|
1982
1995
|
? fromAgent?.name
|
|
1983
1996
|
? `handoff ${fromAgent.name} → ${toAgent.name}`
|
package/src/types.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import type {
|
|
2
|
+
ApiKeyResolveContext,
|
|
2
3
|
AssistantMessage,
|
|
3
4
|
AssistantMessageEvent,
|
|
4
5
|
AssistantMessageEventStream,
|
|
6
|
+
Context,
|
|
5
7
|
Effort,
|
|
6
8
|
ImageContent,
|
|
7
9
|
Message,
|
|
@@ -25,6 +27,14 @@ export type StreamFn = (
|
|
|
25
27
|
...args: Parameters<typeof streamSimple>
|
|
26
28
|
) => AssistantMessageEventStream | Promise<AssistantMessageEventStream>;
|
|
27
29
|
|
|
30
|
+
/**
|
|
31
|
+
* An aside entry: a ready {@link AgentMessage}, or a sync thunk evaluated at
|
|
32
|
+
* injection time that returns the message to inject or `null` to skip it. Thunks
|
|
33
|
+
* let the producer make the final inject-or-drop decision against current state
|
|
34
|
+
* (e.g. dropping late diagnostics a newer edit superseded).
|
|
35
|
+
*/
|
|
36
|
+
export type AsideMessage = AgentMessage | (() => AgentMessage | null);
|
|
37
|
+
|
|
28
38
|
/**
|
|
29
39
|
* Configuration for the agent loop.
|
|
30
40
|
*/
|
|
@@ -38,14 +48,6 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
38
48
|
*/
|
|
39
49
|
interruptMode?: "immediate" | "wait";
|
|
40
50
|
|
|
41
|
-
/**
|
|
42
|
-
* Maximum completed tool calls to accept from one streamed assistant turn before
|
|
43
|
-
* cutting the provider stream and executing that batch. The cap is enforced on
|
|
44
|
-
* `toolcall_end` so every executed call has complete arguments. Undefined disables
|
|
45
|
-
* batching.
|
|
46
|
-
*/
|
|
47
|
-
maxToolCallsPerTurn?: number;
|
|
48
|
-
|
|
49
51
|
/**
|
|
50
52
|
* Optional session identifier forwarded to LLM providers.
|
|
51
53
|
* Used by providers that support session-based caching (e.g., OpenAI Codex).
|
|
@@ -106,23 +108,46 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
106
108
|
*/
|
|
107
109
|
transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise<AgentMessage[]>;
|
|
108
110
|
|
|
111
|
+
/**
|
|
112
|
+
* Optional transform applied to the final provider context after conversion,
|
|
113
|
+
* normalization, and append-only context handling, but before telemetry capture
|
|
114
|
+
* and provider send.
|
|
115
|
+
*/
|
|
116
|
+
transformProviderContext?: (context: Context, model: Model) => Context;
|
|
117
|
+
|
|
109
118
|
/**
|
|
110
119
|
* Resolves an API key dynamically for each LLM call.
|
|
111
120
|
*
|
|
112
121
|
* Useful for short-lived OAuth tokens (e.g., GitHub Copilot) that may expire
|
|
113
122
|
* during long-running tool execution phases.
|
|
114
123
|
*/
|
|
115
|
-
getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
|
|
124
|
+
getApiKey?: (provider: string, ctx?: ApiKeyResolveContext) => Promise<string | undefined> | string | undefined;
|
|
116
125
|
|
|
117
126
|
/**
|
|
118
127
|
* Returns steering messages to inject into the conversation mid-run.
|
|
119
128
|
*
|
|
120
|
-
* Called
|
|
121
|
-
*
|
|
122
|
-
*
|
|
129
|
+
* Called at injection boundaries only (loop start and after a tool batch
|
|
130
|
+
* fully settles), so dequeued messages are immediately injected. The
|
|
131
|
+
* mid-batch interrupt poll uses {@link hasSteeringMessages} instead and
|
|
132
|
+
* never consumes the queue.
|
|
123
133
|
*/
|
|
124
134
|
getSteeringMessages?: () => Promise<AgentMessage[]>;
|
|
125
135
|
|
|
136
|
+
/**
|
|
137
|
+
* Peeks whether steering messages are queued, without consuming them.
|
|
138
|
+
*
|
|
139
|
+
* Called after each tool execution (unless interruptMode is "wait") to decide
|
|
140
|
+
* whether to skip the remaining tool calls in the batch. The queue keeps
|
|
141
|
+
* owning its messages until the loop reaches the next injection boundary and
|
|
142
|
+
* dequeues via {@link getSteeringMessages} — so callers can still cancel or
|
|
143
|
+
* restore queued messages while in-flight tools settle, and an external
|
|
144
|
+
* abort in that window leaves the queue intact for a post-abort continue.
|
|
145
|
+
*
|
|
146
|
+
* When omitted, steering never interrupts a running tool batch; queued
|
|
147
|
+
* messages are still delivered at the next injection boundary.
|
|
148
|
+
*/
|
|
149
|
+
hasSteeringMessages?: () => boolean | Promise<boolean>;
|
|
150
|
+
|
|
126
151
|
/**
|
|
127
152
|
* Returns follow-up messages to process after the agent would otherwise stop.
|
|
128
153
|
*
|
|
@@ -131,6 +156,17 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
131
156
|
* continues with another turn.
|
|
132
157
|
*/
|
|
133
158
|
getFollowUpMessages?: () => Promise<AgentMessage[]>;
|
|
159
|
+
/**
|
|
160
|
+
* Returns non-interrupting "aside" messages to inject at a step boundary.
|
|
161
|
+
*
|
|
162
|
+
* Polled after each tool batch (before the next LLM call) AND at the yield
|
|
163
|
+
* check. Unlike steering, these NEVER abort in-flight tools — they are passive
|
|
164
|
+
* notifications (e.g. background-job completions, late LSP diagnostics) that
|
|
165
|
+
* should reach the model between requests without waiting for the agent to
|
|
166
|
+
* fully stop. Returned messages are appended to the context with normal
|
|
167
|
+
* message events and keep the loop running so the model can react.
|
|
168
|
+
*/
|
|
169
|
+
getAsideMessages?: () => Promise<AsideMessage[]>;
|
|
134
170
|
/**
|
|
135
171
|
* Hook fired right before the loop would exit.
|
|
136
172
|
*
|
|
@@ -198,6 +234,15 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
198
234
|
*/
|
|
199
235
|
getReasoning?: () => Effort | undefined;
|
|
200
236
|
|
|
237
|
+
/**
|
|
238
|
+
* Dynamic reasoning-disable override, resolved per LLM call. When set,
|
|
239
|
+
* its return value overrides the static `disableReasoning` from
|
|
240
|
+
* `SimpleStreamOptions` for that request. Pair with `getReasoning` so
|
|
241
|
+
* mid-run transitions into and out of the explicit `off` state propagate
|
|
242
|
+
* to the next provider call.
|
|
243
|
+
*/
|
|
244
|
+
getDisableReasoning?: () => boolean | undefined;
|
|
245
|
+
|
|
201
246
|
/**
|
|
202
247
|
* Called after a tool call has been validated and is about to execute.
|
|
203
248
|
*
|
|
@@ -281,6 +326,8 @@ export interface AfterToolCallResult {
|
|
|
281
326
|
details?: unknown;
|
|
282
327
|
/** If provided, replaces the error flag carried with the tool result. */
|
|
283
328
|
isError?: boolean;
|
|
329
|
+
/** If provided, replaces the contextually-useless flag carried with the tool result. */
|
|
330
|
+
useless?: boolean;
|
|
284
331
|
}
|
|
285
332
|
|
|
286
333
|
/** Context passed to `beforeToolCall`. */
|
|
@@ -320,7 +367,7 @@ export interface AfterToolCallContext {
|
|
|
320
367
|
*
|
|
321
368
|
* @example
|
|
322
369
|
* ```typescript
|
|
323
|
-
* declare module "@prometheus-ai/agent
|
|
370
|
+
* declare module "@prometheus-ai/agent" {
|
|
324
371
|
* interface CustomAgentMessages {
|
|
325
372
|
* artifact: ArtifactMessage;
|
|
326
373
|
* notification: NotificationMessage;
|
|
@@ -346,6 +393,7 @@ export interface AgentState {
|
|
|
346
393
|
systemPrompt: string[];
|
|
347
394
|
model: Model;
|
|
348
395
|
thinkingLevel?: Effort;
|
|
396
|
+
disableReasoning?: boolean;
|
|
349
397
|
tools: AgentTool<any>[];
|
|
350
398
|
messages: AgentMessage[]; // Can include attachments + custom message types
|
|
351
399
|
isStreaming: boolean;
|
|
@@ -362,6 +410,8 @@ export interface AgentToolResult<T = any, _TInput = unknown> {
|
|
|
362
410
|
// Marks a non-throwing failure (e.g. an aggregator catching per-entry errors).
|
|
363
411
|
// agent-loop honors this and surfaces it as a tool error on the wire.
|
|
364
412
|
isError?: boolean;
|
|
413
|
+
/** Marks the result as contextually useless: safe for compaction to elide once consumed (e.g. zero matches, wait timeout). Ignored when isError is set. */
|
|
414
|
+
useless?: boolean;
|
|
365
415
|
}
|
|
366
416
|
|
|
367
417
|
// Callback for streaming tool execution updates
|
|
@@ -422,14 +472,13 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
|
|
|
422
472
|
loadMode?: "essential" | "discoverable";
|
|
423
473
|
/** Short one-line summary used for tool discovery indexes. */
|
|
424
474
|
summary?: string;
|
|
425
|
-
/** If true, tool execution ignores abort signals (runs to completion) */
|
|
426
|
-
nonAbortable?: boolean;
|
|
427
475
|
/**
|
|
428
476
|
* Concurrency mode for tool scheduling when multiple calls are in one turn.
|
|
429
477
|
* - "shared": can run alongside other shared tools (default)
|
|
430
478
|
* - "exclusive": runs alone; other tools wait until it finishes
|
|
479
|
+
* - function: resolved per call from the (raw, pre-validation) arguments
|
|
431
480
|
*/
|
|
432
|
-
concurrency?: "shared" | "exclusive";
|
|
481
|
+
concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
|
|
433
482
|
/** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
|
|
434
483
|
lenientArgValidation?: boolean;
|
|
435
484
|
/**
|