@prometheus-ai/agent-core 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/telemetry.ts CHANGED
@@ -2,7 +2,7 @@
2
2
  * OpenTelemetry instrumentation for the agent loop.
3
3
  *
4
4
  * Implements the OpenTelemetry GenAI semantic conventions
5
- * (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `prometheus.gen_ai.*`
5
+ * (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `pi.gen_ai.*`
6
6
  * extension attributes for run summaries, dashboard summaries, and cost hints
7
7
  * that are useful to downstream observability UIs.
8
8
  *
@@ -50,6 +50,7 @@ import {
50
50
  } from "@prometheus-ai/ai";
51
51
  import { AgentRunCollector, type AgentRunCoverage, type AgentRunSummary, type ToolStatus } from "./run-collector";
52
52
  import type { AgentTool } from "./types";
53
+ import { EventLoopKeepalive } from "./utils/yield";
53
54
 
54
55
  /** Default tracer name. Override via {@link AgentTelemetryConfig.tracerName}. */
55
56
  export const DEFAULT_TRACER_NAME = "@prometheus-ai/agent-core";
@@ -123,39 +124,40 @@ export const enum OpenAIAttr {
123
124
  }
124
125
 
125
126
  /** Project extension attributes. Kept out of the reserved `gen_ai.*` namespace. */
126
- export const enum PrometheusGenAIAttr {
127
- AgentStepNumber = "prometheus.gen_ai.agent.step.number",
128
- AgentStepCount = "prometheus.gen_ai.agent.step.count",
129
- RequestReasoningEffort = "prometheus.gen_ai.request.reasoning.effort",
130
- RequestToolChoice = "prometheus.gen_ai.request.tool.choice",
131
- RequestAvailableTools = "prometheus.gen_ai.request.available_tools",
132
- RequestMessages = "prometheus.gen_ai.request.messages",
133
- ResponseText = "prometheus.gen_ai.response.text",
134
- ResponseToolCalls = "prometheus.gen_ai.response.tool_calls",
135
- UsageTotalTokens = "prometheus.gen_ai.usage.total_tokens",
136
- UsageServerSideTools = "prometheus.gen_ai.usage.server_tool_requests",
137
- CostEstimatedUsd = "prometheus.gen_ai.cost.estimated_usd",
138
- CostInputUsd = "prometheus.gen_ai.cost.input_usd",
139
- CostOutputUsd = "prometheus.gen_ai.cost.output_usd",
140
- CostUnavailableReason = "prometheus.gen_ai.cost.unavailable_reason",
141
- ToolStatus = "prometheus.gen_ai.tool.status",
142
- ToolCallIntent = "prometheus.gen_ai.tool.call.intent",
143
- HandoffFromAgentName = "prometheus.gen_ai.handoff.from_agent.name",
144
- HandoffFromAgentId = "prometheus.gen_ai.handoff.from_agent.id",
145
- HandoffToAgentName = "prometheus.gen_ai.handoff.to_agent.name",
146
- HandoffToAgentId = "prometheus.gen_ai.handoff.to_agent.id",
127
+ export const enum PiGenAIAttr {
128
+ AgentStepNumber = "pi.gen_ai.agent.step.number",
129
+ AgentStepCount = "pi.gen_ai.agent.step.count",
130
+ RequestReasoningEffort = "pi.gen_ai.request.reasoning.effort",
131
+ RequestToolChoice = "pi.gen_ai.request.tool.choice",
132
+ RequestAvailableTools = "pi.gen_ai.request.available_tools",
133
+ RequestMessages = "pi.gen_ai.request.messages",
134
+ ResponseText = "pi.gen_ai.response.text",
135
+ ResponseToolCalls = "pi.gen_ai.response.tool_calls",
136
+ ResponseUpstreamProvider = "pi.gen_ai.response.upstream_provider",
137
+ UsageTotalTokens = "pi.gen_ai.usage.total_tokens",
138
+ UsageServerSideTools = "pi.gen_ai.usage.server_tool_requests",
139
+ CostEstimatedUsd = "pi.gen_ai.cost.estimated_usd",
140
+ CostInputUsd = "pi.gen_ai.cost.input_usd",
141
+ CostOutputUsd = "pi.gen_ai.cost.output_usd",
142
+ CostUnavailableReason = "pi.gen_ai.cost.unavailable_reason",
143
+ ToolStatus = "pi.gen_ai.tool.status",
144
+ ToolCallIntent = "pi.gen_ai.tool.call.intent",
145
+ HandoffFromAgentName = "pi.gen_ai.handoff.from_agent.name",
146
+ HandoffFromAgentId = "pi.gen_ai.handoff.from_agent.id",
147
+ HandoffToAgentName = "pi.gen_ai.handoff.to_agent.name",
148
+ HandoffToAgentId = "pi.gen_ai.handoff.to_agent.id",
147
149
  // Marks chat spans emitted outside the agent loop (compaction, handoff, branch
148
150
  // summary, image inspection, …). Lets dashboards split oneshot cost / latency
149
151
  // from main-turn cost without overloading the semconv `gen_ai.operation.name`.
150
- OneshotKind = "prometheus.gen_ai.oneshot.kind",
152
+ OneshotKind = "pi.gen_ai.oneshot.kind",
151
153
  // Gateway / proxy (LiteLLM, Helicone, Portkey, …) — populated when a known
152
154
  // gateway header pattern is detected on the upstream response. The base
153
155
  // `gen_ai.provider.name` continues to track the *upstream* provider (e.g.
154
156
  // `anthropic`) that the gateway routed to.
155
- GatewayName = "prometheus.gen_ai.gateway.name",
156
- GatewayEndpoint = "prometheus.gen_ai.gateway.endpoint",
157
- GatewayCallId = "prometheus.gen_ai.gateway.call_id",
158
- GatewayRoutedTo = "prometheus.gen_ai.gateway.routed_to",
157
+ GatewayName = "pi.gen_ai.gateway.name",
158
+ GatewayEndpoint = "pi.gen_ai.gateway.endpoint",
159
+ GatewayCallId = "pi.gen_ai.gateway.call_id",
160
+ GatewayRoutedTo = "pi.gen_ai.gateway.routed_to",
159
161
  }
160
162
 
161
163
  /** GenAI operation names — values for {@link GenAIAttr.OperationName}. */
@@ -199,9 +201,9 @@ export interface CostEstimatorContext {
199
201
 
200
202
  /**
201
203
  * Cost estimator result.
202
- * { usd: number } — cost is known; emitted as prometheus.gen_ai.cost.estimated_usd
204
+ * { usd: number } — cost is known; emitted as pi.gen_ai.cost.estimated_usd
203
205
  * { unavailable: string } — cost is intentionally unknown; emitted as
204
- * prometheus.gen_ai.cost.unavailable_reason
206
+ * pi.gen_ai.cost.unavailable_reason
205
207
  * undefined — no opinion; nothing emitted
206
208
  */
207
209
  export type CostEstimate =
@@ -248,7 +250,7 @@ export interface ChatUsageEvent {
248
250
  *
249
251
  * Use this to reconcile gateway-issued ids (e.g. `x-litellm-call-id`) with
250
252
  * downstream billing / spend dashboards. Known gateway patterns are also
251
- * auto-stamped on the chat span as `prometheus.gen_ai.gateway.*` attributes.
253
+ * auto-stamped on the chat span as `pi.gen_ai.gateway.*` attributes.
252
254
  */
253
255
  readonly headers: Readonly<Record<string, string>> | undefined;
254
256
  }
@@ -626,14 +628,14 @@ export function recordTelemetryWarning(telemetry: AgentTelemetry | undefined, wa
626
628
  function emitTelemetryWarning(telemetry: AgentTelemetry | undefined, warning: AgentTelemetryWarning): void {
627
629
  const hook = telemetry?.config.onTelemetryWarning;
628
630
  if (!hook) {
629
- if (warning.error === undefined) console.warn(`[prometheus-agent] ${warning.message}`);
630
- else console.warn(`[prometheus-agent] ${warning.message}`, warning.error);
631
+ if (warning.error === undefined) console.warn(`[pi-agent] ${warning.message}`);
632
+ else console.warn(`[pi-agent] ${warning.message}`, warning.error);
631
633
  return;
632
634
  }
633
635
  try {
634
636
  hook(warning);
635
637
  } catch (err) {
636
- console.warn("[prometheus-agent] onTelemetryWarning threw; swallowing:", err);
638
+ console.warn("[pi-agent] onTelemetryWarning threw; swallowing:", err);
637
639
  }
638
640
  }
639
641
 
@@ -678,7 +680,7 @@ export function startInvokeAgentSpan(telemetry: AgentTelemetry | undefined, mode
678
680
  /** Stamp the final step count on the `invoke_agent` span. */
679
681
  export function applyInvokeAgentFinish(span: Span | undefined, stepCount: number): void {
680
682
  if (!span) return;
681
- span.setAttribute(PrometheusGenAIAttr.AgentStepCount, stepCount);
683
+ span.setAttribute(PiGenAIAttr.AgentStepCount, stepCount);
682
684
  }
683
685
 
684
686
  /**
@@ -735,7 +737,7 @@ export interface ChatRequestSnapshot {
735
737
 
736
738
  function buildChatRequestAttributes(stepNumber: number, request: ChatRequestSnapshot, provider: string): Attributes {
737
739
  const attrs: Attributes = {
738
- [PrometheusGenAIAttr.AgentStepNumber]: stepNumber,
740
+ [PiGenAIAttr.AgentStepNumber]: stepNumber,
739
741
  [GenAIAttr.OutputType]: "text",
740
742
  [GenAIAttr.RequestStream]: true,
741
743
  };
@@ -753,11 +755,11 @@ function buildChatRequestAttributes(stepNumber: number, request: ChatRequestSnap
753
755
  const resolved = resolveServiceTier(request.serviceTier, provider);
754
756
  if (resolved) attrs[OpenAIAttr.RequestServiceTier] = resolved;
755
757
  }
756
- if (request.reasoningEffort) attrs[PrometheusGenAIAttr.RequestReasoningEffort] = request.reasoningEffort;
758
+ if (request.reasoningEffort) attrs[PiGenAIAttr.RequestReasoningEffort] = request.reasoningEffort;
757
759
  const toolChoice = serializeToolChoice(request.toolChoice);
758
- if (toolChoice) attrs[PrometheusGenAIAttr.RequestToolChoice] = toolChoice;
760
+ if (toolChoice) attrs[PiGenAIAttr.RequestToolChoice] = toolChoice;
759
761
  if (request.tools && request.tools.length > 0) {
760
- attrs[PrometheusGenAIAttr.RequestAvailableTools] = request.tools.map(tool => tool.name);
762
+ attrs[PiGenAIAttr.RequestAvailableTools] = request.tools.map(tool => tool.name);
761
763
  }
762
764
  return attrs;
763
765
  }
@@ -775,7 +777,7 @@ function serializeToolChoice(toolChoice: ToolChoice | undefined): string | undef
775
777
 
776
778
  function applyContentCaptureForRequest(telemetry: AgentTelemetry, span: Span, request: ChatRequestSnapshot): void {
777
779
  const requestMessages = serializeRequestMessagesForTelemetry(telemetry, request);
778
- if (requestMessages) span.setAttribute(PrometheusGenAIAttr.RequestMessages, requestMessages);
780
+ if (requestMessages) span.setAttribute(PiGenAIAttr.RequestMessages, requestMessages);
779
781
  if (telemetry.contentCapture !== "full") return;
780
782
  const systemInstructions = serializeFullSystemInstructionsForTelemetry(request);
781
783
  if (systemInstructions) span.setAttribute(GenAIAttr.SystemInstructions, systemInstructions);
@@ -785,9 +787,9 @@ function applyContentCaptureForRequest(telemetry: AgentTelemetry, span: Span, re
785
787
 
786
788
  function applyContentCaptureForResponse(telemetry: AgentTelemetry, span: Span, message: AssistantMessage): void {
787
789
  const responseText = serializeResponseTextForTelemetry(telemetry, message);
788
- if (responseText) span.setAttribute(PrometheusGenAIAttr.ResponseText, responseText);
790
+ if (responseText) span.setAttribute(PiGenAIAttr.ResponseText, responseText);
789
791
  const responseToolCalls = serializeResponseToolCallsForTelemetry(telemetry, message);
790
- if (responseToolCalls) span.setAttribute(PrometheusGenAIAttr.ResponseToolCalls, responseToolCalls);
792
+ if (responseToolCalls) span.setAttribute(PiGenAIAttr.ResponseToolCalls, responseToolCalls);
791
793
  if (telemetry.contentCapture === "full") {
792
794
  const outputMessages = serializeFullOutputMessagesForTelemetry(message);
793
795
  if (outputMessages) span.setAttribute(GenAIAttr.OutputMessages, outputMessages);
@@ -1188,6 +1190,9 @@ export function failChatSpan(
1188
1190
  function applyChatResponseAttributes(span: Span, message: AssistantMessage): void {
1189
1191
  span.setAttribute(GenAIAttr.ResponseModel, message.model);
1190
1192
  if (message.responseId) span.setAttribute(GenAIAttr.ResponseId, message.responseId);
1193
+ if (message.upstreamProvider) {
1194
+ span.setAttribute(PiGenAIAttr.ResponseUpstreamProvider, message.upstreamProvider);
1195
+ }
1191
1196
  if (message.ttft != null) span.setAttribute(GenAIAttr.ResponseTimeToFirstChunk, message.ttft / 1000);
1192
1197
  const finishReason = mapStopReason(message.stopReason);
1193
1198
  if (finishReason) span.setAttribute(GenAIAttr.ResponseFinishReasons, [finishReason]);
@@ -1202,7 +1207,7 @@ function applyUsageAttributes(span: Span, usage: Usage | undefined): void {
1202
1207
  span.setAttribute(GenAIAttr.UsageInputTokens, inputTokens);
1203
1208
  span.setAttribute(GenAIAttr.UsageOutputTokens, outputTokens);
1204
1209
  const total = usage.totalTokens ?? inputTokens + outputTokens;
1205
- span.setAttribute(PrometheusGenAIAttr.UsageTotalTokens, total);
1210
+ span.setAttribute(PiGenAIAttr.UsageTotalTokens, total);
1206
1211
  if (usage.cacheRead != null) span.setAttribute(GenAIAttr.UsageCacheReadInputTokens, usage.cacheRead);
1207
1212
  if (usage.cacheWrite != null) span.setAttribute(GenAIAttr.UsageCacheCreationInputTokens, usage.cacheWrite);
1208
1213
  if (usage.reasoningTokens != null) {
@@ -1210,7 +1215,7 @@ function applyUsageAttributes(span: Span, usage: Usage | undefined): void {
1210
1215
  }
1211
1216
  if (usage.server) {
1212
1217
  const sums = (usage.server.webSearch ?? 0) + (usage.server.webFetch ?? 0);
1213
- if (sums > 0) span.setAttribute(PrometheusGenAIAttr.UsageServerSideTools, sums);
1218
+ if (sums > 0) span.setAttribute(PiGenAIAttr.UsageServerSideTools, sums);
1214
1219
  }
1215
1220
  }
1216
1221
 
@@ -1276,10 +1281,10 @@ function applyGatewayAttributes(
1276
1281
  ): void {
1277
1282
  const gateway = detectGatewayFromHeaders(headers);
1278
1283
  if (!gateway) return;
1279
- span.setAttribute(PrometheusGenAIAttr.GatewayName, gateway.name);
1280
- if (baseUrl) span.setAttribute(PrometheusGenAIAttr.GatewayEndpoint, baseUrl);
1281
- if (gateway.callId) span.setAttribute(PrometheusGenAIAttr.GatewayCallId, gateway.callId);
1282
- if (gateway.routedTo) span.setAttribute(PrometheusGenAIAttr.GatewayRoutedTo, gateway.routedTo);
1284
+ span.setAttribute(PiGenAIAttr.GatewayName, gateway.name);
1285
+ if (baseUrl) span.setAttribute(PiGenAIAttr.GatewayEndpoint, baseUrl);
1286
+ if (gateway.callId) span.setAttribute(PiGenAIAttr.GatewayCallId, gateway.callId);
1287
+ if (gateway.routedTo) span.setAttribute(PiGenAIAttr.GatewayRoutedTo, gateway.routedTo);
1283
1288
  }
1284
1289
 
1285
1290
  interface AppliedCostEstimate {
@@ -1340,7 +1345,7 @@ function applyCostEstimateForUsage(
1340
1345
  }
1341
1346
  if (!result) return EMPTY_COST;
1342
1347
  if ("unavailable" in result) {
1343
- span.setAttribute(PrometheusGenAIAttr.CostUnavailableReason, result.unavailable);
1348
+ span.setAttribute(PiGenAIAttr.CostUnavailableReason, result.unavailable);
1344
1349
  const cost: AppliedCostEstimate = {
1345
1350
  costUsd: undefined,
1346
1351
  inputUsd: undefined,
@@ -1362,9 +1367,9 @@ function applyCostEstimateForUsage(
1362
1367
  });
1363
1368
  return cost;
1364
1369
  }
1365
- span.setAttribute(PrometheusGenAIAttr.CostEstimatedUsd, result.usd);
1366
- if (result.inputUsd != null) span.setAttribute(PrometheusGenAIAttr.CostInputUsd, result.inputUsd);
1367
- if (result.outputUsd != null) span.setAttribute(PrometheusGenAIAttr.CostOutputUsd, result.outputUsd);
1370
+ span.setAttribute(PiGenAIAttr.CostEstimatedUsd, result.usd);
1371
+ if (result.inputUsd != null) span.setAttribute(PiGenAIAttr.CostInputUsd, result.inputUsd);
1372
+ if (result.outputUsd != null) span.setAttribute(PiGenAIAttr.CostOutputUsd, result.outputUsd);
1368
1373
  const cost: AppliedCostEstimate = {
1369
1374
  costUsd: result.usd,
1370
1375
  inputUsd: result.inputUsd,
@@ -1534,7 +1539,7 @@ export async function recordManualChatTelemetry(
1534
1539
  });
1535
1540
  if (!span) return undefined;
1536
1541
  if (options.span && options.attributes) span.setAttributes(options.attributes);
1537
- if (options.stepNumber != null) span.setAttribute(PrometheusGenAIAttr.AgentStepNumber, options.stepNumber);
1542
+ if (options.stepNumber != null) span.setAttribute(PiGenAIAttr.AgentStepNumber, options.stepNumber);
1538
1543
  span.setAttribute(GenAIAttr.ResponseModel, options.responseModel ?? options.model.name);
1539
1544
  if (options.responseId) span.setAttribute(GenAIAttr.ResponseId, options.responseId);
1540
1545
  const finishReason = mapStopReason(options.finishReason);
@@ -1567,7 +1572,7 @@ export async function recordManualChatTelemetry(
1567
1572
  }
1568
1573
  if (options.responseText) {
1569
1574
  const responseText = stringifyJsonAttribute(summarizeTelemetryTexts([options.responseText]));
1570
- if (responseText) span.setAttribute(PrometheusGenAIAttr.ResponseText, responseText);
1575
+ if (responseText) span.setAttribute(PiGenAIAttr.ResponseText, responseText);
1571
1576
  }
1572
1577
  if (options.responseToolCalls && options.responseToolCalls.length > 0) {
1573
1578
  const calls = options.responseToolCalls.map(call => ({
@@ -1576,7 +1581,7 @@ export async function recordManualChatTelemetry(
1576
1581
  input: summarizeTelemetryValue(call.input),
1577
1582
  }));
1578
1583
  const responseToolCalls = stringifyJsonAttribute(limitTelemetryToolCalls(calls));
1579
- if (responseToolCalls) span.setAttribute(PrometheusGenAIAttr.ResponseToolCalls, responseToolCalls);
1584
+ if (responseToolCalls) span.setAttribute(PiGenAIAttr.ResponseToolCalls, responseToolCalls);
1580
1585
  }
1581
1586
  applyTerminalStatus(span, options.finishReason, undefined);
1582
1587
  if (options.endSpan ?? options.span === undefined) span.end();
@@ -1595,7 +1600,7 @@ export interface InstrumentedChatSpanOptions {
1595
1600
  /** Step index recorded on the span; defaults to `-1` for non-loop calls. */
1596
1601
  readonly stepNumber?: number;
1597
1602
  /**
1598
- * Tag stamped onto `prometheus.gen_ai.oneshot.kind`. Values used by the agent:
1603
+ * Tag stamped onto `pi.gen_ai.oneshot.kind`. Values used by the agent:
1599
1604
  * `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`,
1600
1605
  * `handoff`, `branch_summary`, `inspect_image`. Free-form to allow callers
1601
1606
  * outside this package to add new kinds without bumping the helper.
@@ -1629,6 +1634,13 @@ export async function instrumentedCompleteSimple<TApi extends Api>(
1629
1634
  options: SimpleStreamOptions,
1630
1635
  span: InstrumentedChatSpanOptions,
1631
1636
  ): Promise<AssistantMessage> {
1637
+ // Oneshot LLM calls (handoff, compaction/branch summaries) run outside the
1638
+ // agent `#runLoop`, which is where the EventLoopKeepalive normally lives.
1639
+ // Without it, Bun's JSC loop stops servicing timers while parked on the
1640
+ // long-lived completion promise, freezing any host spinner (e.g. the
1641
+ // `/handoff` Loader) until an unrelated I/O event (a terminal resize)
1642
+ // pokes the loop. Keep the loop healthy for the duration of the call.
1643
+ using _keepalive = new EventLoopKeepalive();
1632
1644
  const { telemetry, parent, oneshotKind } = span;
1633
1645
  const stepNumber = span.stepNumber ?? -1;
1634
1646
  const reasoning = options.reasoning;
@@ -1650,7 +1662,7 @@ export async function instrumentedCompleteSimple<TApi extends Api>(
1650
1662
  },
1651
1663
  });
1652
1664
  if (chatSpan) {
1653
- if (oneshotKind) chatSpan.setAttribute(PrometheusGenAIAttr.OneshotKind, oneshotKind);
1665
+ if (oneshotKind) chatSpan.setAttribute(PiGenAIAttr.OneshotKind, oneshotKind);
1654
1666
  if (span.attributes) chatSpan.setAttributes(span.attributes);
1655
1667
  }
1656
1668
 
@@ -1788,7 +1800,7 @@ export function finishExecuteToolSpan(
1788
1800
  }
1789
1801
 
1790
1802
  /** Span attribute carrying the terminal {@link ToolStatus}. */
1791
- export const EXECUTE_TOOL_STATUS_ATTR = PrometheusGenAIAttr.ToolStatus;
1803
+ export const EXECUTE_TOOL_STATUS_ATTR = PiGenAIAttr.ToolStatus;
1792
1804
 
1793
1805
  /**
1794
1806
  * Mapping from non-ok {@link ToolStatus} values to the `error.type` attribute
@@ -1861,7 +1873,8 @@ export function finishInvokeAgentSpan(
1861
1873
 
1862
1874
  /**
1863
1875
  * Invoke {@link AgentTelemetryConfig.onRunEnd} on `telemetry` if set. Throws
1864
- are caught and logged via `console.warn` telemetry callbacks NEVER turn a
1876
+ * are caught and surfaced via the `onTelemetryWarning` hook (falling back to `console.warn`
1877
+ * when no hook is set) — telemetry callbacks NEVER turn a
1865
1878
  * successful agent run into a failed one. Idempotent at the call site via
1866
1879
  * {@link AgentRunCollector.markRunEnded}; callers must check that before
1867
1880
  * calling this helper.
@@ -1880,66 +1893,66 @@ export function fireOnRunEnd(telemetry: AgentTelemetry, summary: AgentRunSummary
1880
1893
  }
1881
1894
  }
1882
1895
 
1883
- /** Aggregate `prometheus.gen_ai.agent.*` attributes stamped on the `invoke_agent` span. */
1884
- export const enum PrometheusGenAIAggregateAttr {
1885
- ChatsCount = "prometheus.gen_ai.agent.chats.count",
1886
- ChatsTotalLatencyMs = "prometheus.gen_ai.agent.chats.total_latency_ms",
1887
- ChatsStopReasonPrefix = "prometheus.gen_ai.agent.chats.stop_reason.",
1888
- ToolsCount = "prometheus.gen_ai.agent.tools.count",
1889
- ToolsOkCount = "prometheus.gen_ai.agent.tools.ok.count",
1890
- ToolsErrorCount = "prometheus.gen_ai.agent.tools.error.count",
1891
- ToolsSkippedCount = "prometheus.gen_ai.agent.tools.skipped.count",
1892
- ToolsBlockedCount = "prometheus.gen_ai.agent.tools.blocked.count",
1893
- ToolsTimeoutCount = "prometheus.gen_ai.agent.tools.timeout.count",
1894
- ToolsAbortedCount = "prometheus.gen_ai.agent.tools.aborted.count",
1895
- ToolsTotalLatencyMs = "prometheus.gen_ai.agent.tools.total_latency_ms",
1896
- ToolsInvoked = "prometheus.gen_ai.agent.tools.invoked",
1897
- ToolsAvailable = "prometheus.gen_ai.agent.tools.available",
1898
- ToolsUnused = "prometheus.gen_ai.agent.tools.unused",
1899
- UsageInputTokensTotal = "prometheus.gen_ai.agent.usage.input_tokens.total",
1900
- UsageOutputTokensTotal = "prometheus.gen_ai.agent.usage.output_tokens.total",
1901
- UsageCacheReadInputTokensTotal = "prometheus.gen_ai.agent.usage.cache_read.input_tokens.total",
1902
- UsageCacheCreationInputTokensTotal = "prometheus.gen_ai.agent.usage.cache_creation.input_tokens.total",
1903
- UsageReasoningOutputTokensTotal = "prometheus.gen_ai.agent.usage.reasoning.output_tokens.total",
1904
- UsageTotalTokensTotal = "prometheus.gen_ai.agent.usage.total_tokens.total",
1905
- CostEstimatedUsdTotal = "prometheus.gen_ai.agent.cost.estimated_usd.total",
1906
- ErrorsCount = "prometheus.gen_ai.agent.errors.count",
1907
- }
1908
-
1909
- /** Stamp the aggregate `prometheus.gen_ai.agent.*` attributes on the given span. */
1896
+ /** Aggregate `pi.gen_ai.agent.*` attributes stamped on the `invoke_agent` span. */
1897
+ export const enum PiGenAIAggregateAttr {
1898
+ ChatsCount = "pi.gen_ai.agent.chats.count",
1899
+ ChatsTotalLatencyMs = "pi.gen_ai.agent.chats.total_latency_ms",
1900
+ ChatsStopReasonPrefix = "pi.gen_ai.agent.chats.stop_reason.",
1901
+ ToolsCount = "pi.gen_ai.agent.tools.count",
1902
+ ToolsOkCount = "pi.gen_ai.agent.tools.ok.count",
1903
+ ToolsErrorCount = "pi.gen_ai.agent.tools.error.count",
1904
+ ToolsSkippedCount = "pi.gen_ai.agent.tools.skipped.count",
1905
+ ToolsBlockedCount = "pi.gen_ai.agent.tools.blocked.count",
1906
+ ToolsTimeoutCount = "pi.gen_ai.agent.tools.timeout.count",
1907
+ ToolsAbortedCount = "pi.gen_ai.agent.tools.aborted.count",
1908
+ ToolsTotalLatencyMs = "pi.gen_ai.agent.tools.total_latency_ms",
1909
+ ToolsInvoked = "pi.gen_ai.agent.tools.invoked",
1910
+ ToolsAvailable = "pi.gen_ai.agent.tools.available",
1911
+ ToolsUnused = "pi.gen_ai.agent.tools.unused",
1912
+ UsageInputTokensTotal = "pi.gen_ai.agent.usage.input_tokens.total",
1913
+ UsageOutputTokensTotal = "pi.gen_ai.agent.usage.output_tokens.total",
1914
+ UsageCacheReadInputTokensTotal = "pi.gen_ai.agent.usage.cache_read.input_tokens.total",
1915
+ UsageCacheCreationInputTokensTotal = "pi.gen_ai.agent.usage.cache_creation.input_tokens.total",
1916
+ UsageReasoningOutputTokensTotal = "pi.gen_ai.agent.usage.reasoning.output_tokens.total",
1917
+ UsageTotalTokensTotal = "pi.gen_ai.agent.usage.total_tokens.total",
1918
+ CostEstimatedUsdTotal = "pi.gen_ai.agent.cost.estimated_usd.total",
1919
+ ErrorsCount = "pi.gen_ai.agent.errors.count",
1920
+ }
1921
+
1922
+ /** Stamp the aggregate `pi.gen_ai.agent.*` attributes on the given span. */
1910
1923
  function applyAggregateAttributes(span: Span, summary: AgentRunSummary, coverage: AgentRunCoverage): void {
1911
- span.setAttribute(PrometheusGenAIAggregateAttr.ChatsCount, summary.chats.total);
1912
- span.setAttribute(PrometheusGenAIAggregateAttr.ChatsTotalLatencyMs, summary.chats.totalLatencyMs);
1924
+ span.setAttribute(PiGenAIAggregateAttr.ChatsCount, summary.chats.total);
1925
+ span.setAttribute(PiGenAIAggregateAttr.ChatsTotalLatencyMs, summary.chats.totalLatencyMs);
1913
1926
  for (const [reason, count] of Object.entries(summary.chats.byStopReason)) {
1914
- span.setAttribute(`${PrometheusGenAIAggregateAttr.ChatsStopReasonPrefix}${reason}.count`, count);
1915
- }
1916
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsCount, summary.tools.total);
1917
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsOkCount, summary.tools.ok);
1918
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsErrorCount, summary.tools.error);
1919
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsSkippedCount, summary.tools.skipped);
1920
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsBlockedCount, summary.tools.blocked);
1921
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsTimeoutCount, summary.tools.timeout);
1922
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsAbortedCount, summary.tools.aborted);
1923
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsTotalLatencyMs, summary.tools.totalLatencyMs);
1927
+ span.setAttribute(`${PiGenAIAggregateAttr.ChatsStopReasonPrefix}${reason}.count`, count);
1928
+ }
1929
+ span.setAttribute(PiGenAIAggregateAttr.ToolsCount, summary.tools.total);
1930
+ span.setAttribute(PiGenAIAggregateAttr.ToolsOkCount, summary.tools.ok);
1931
+ span.setAttribute(PiGenAIAggregateAttr.ToolsErrorCount, summary.tools.error);
1932
+ span.setAttribute(PiGenAIAggregateAttr.ToolsSkippedCount, summary.tools.skipped);
1933
+ span.setAttribute(PiGenAIAggregateAttr.ToolsBlockedCount, summary.tools.blocked);
1934
+ span.setAttribute(PiGenAIAggregateAttr.ToolsTimeoutCount, summary.tools.timeout);
1935
+ span.setAttribute(PiGenAIAggregateAttr.ToolsAbortedCount, summary.tools.aborted);
1936
+ span.setAttribute(PiGenAIAggregateAttr.ToolsTotalLatencyMs, summary.tools.totalLatencyMs);
1924
1937
  if (coverage.toolsInvoked.length > 0) {
1925
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsInvoked, [...coverage.toolsInvoked]);
1938
+ span.setAttribute(PiGenAIAggregateAttr.ToolsInvoked, [...coverage.toolsInvoked]);
1926
1939
  }
1927
1940
  if (coverage.toolsAvailable.length > 0) {
1928
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsAvailable, [...coverage.toolsAvailable]);
1941
+ span.setAttribute(PiGenAIAggregateAttr.ToolsAvailable, [...coverage.toolsAvailable]);
1929
1942
  }
1930
1943
  if (coverage.toolsUnused.length > 0) {
1931
- span.setAttribute(PrometheusGenAIAggregateAttr.ToolsUnused, [...coverage.toolsUnused]);
1932
- }
1933
- span.setAttribute(PrometheusGenAIAggregateAttr.UsageInputTokensTotal, summary.usage.inputTokens);
1934
- span.setAttribute(PrometheusGenAIAggregateAttr.UsageOutputTokensTotal, summary.usage.outputTokens);
1935
- span.setAttribute(PrometheusGenAIAggregateAttr.UsageCacheReadInputTokensTotal, summary.usage.cachedInputTokens);
1936
- span.setAttribute(PrometheusGenAIAggregateAttr.UsageCacheCreationInputTokensTotal, summary.usage.cacheWriteTokens);
1937
- span.setAttribute(PrometheusGenAIAggregateAttr.UsageReasoningOutputTokensTotal, summary.usage.reasoningOutputTokens);
1938
- span.setAttribute(PrometheusGenAIAggregateAttr.UsageTotalTokensTotal, summary.usage.totalTokens);
1944
+ span.setAttribute(PiGenAIAggregateAttr.ToolsUnused, [...coverage.toolsUnused]);
1945
+ }
1946
+ span.setAttribute(PiGenAIAggregateAttr.UsageInputTokensTotal, summary.usage.inputTokens);
1947
+ span.setAttribute(PiGenAIAggregateAttr.UsageOutputTokensTotal, summary.usage.outputTokens);
1948
+ span.setAttribute(PiGenAIAggregateAttr.UsageCacheReadInputTokensTotal, summary.usage.cachedInputTokens);
1949
+ span.setAttribute(PiGenAIAggregateAttr.UsageCacheCreationInputTokensTotal, summary.usage.cacheWriteTokens);
1950
+ span.setAttribute(PiGenAIAggregateAttr.UsageReasoningOutputTokensTotal, summary.usage.reasoningOutputTokens);
1951
+ span.setAttribute(PiGenAIAggregateAttr.UsageTotalTokensTotal, summary.usage.totalTokens);
1939
1952
  if (summary.cost.estimatedUsd > 0) {
1940
- span.setAttribute(PrometheusGenAIAggregateAttr.CostEstimatedUsdTotal, summary.cost.estimatedUsd);
1953
+ span.setAttribute(PiGenAIAggregateAttr.CostEstimatedUsdTotal, summary.cost.estimatedUsd);
1941
1954
  }
1942
- span.setAttribute(PrometheusGenAIAggregateAttr.ErrorsCount, summary.errors.total);
1955
+ span.setAttribute(PiGenAIAggregateAttr.ErrorsCount, summary.errors.total);
1943
1956
  }
1944
1957
 
1945
1958
  /**
@@ -1974,10 +1987,10 @@ export function recordHandoff(
1974
1987
  const attrs: Attributes = {};
1975
1988
  const fromAgent = options.fromAgent ? normalizeAgentIdentity(telemetry, options.fromAgent) : undefined;
1976
1989
  const toAgent = normalizeAgentIdentity(telemetry, options.toAgent);
1977
- if (fromAgent?.name) attrs[PrometheusGenAIAttr.HandoffFromAgentName] = fromAgent.name;
1978
- if (fromAgent?.id) attrs[PrometheusGenAIAttr.HandoffFromAgentId] = fromAgent.id;
1979
- if (toAgent.name) attrs[PrometheusGenAIAttr.HandoffToAgentName] = toAgent.name;
1980
- if (toAgent.id) attrs[PrometheusGenAIAttr.HandoffToAgentId] = toAgent.id;
1990
+ if (fromAgent?.name) attrs[PiGenAIAttr.HandoffFromAgentName] = fromAgent.name;
1991
+ if (fromAgent?.id) attrs[PiGenAIAttr.HandoffFromAgentId] = fromAgent.id;
1992
+ if (toAgent.name) attrs[PiGenAIAttr.HandoffToAgentName] = toAgent.name;
1993
+ if (toAgent.id) attrs[PiGenAIAttr.HandoffToAgentId] = toAgent.id;
1981
1994
  const name = toAgent.name
1982
1995
  ? fromAgent?.name
1983
1996
  ? `handoff ${fromAgent.name} → ${toAgent.name}`
package/src/types.ts CHANGED
@@ -1,7 +1,9 @@
1
1
  import type {
2
+ ApiKeyResolveContext,
2
3
  AssistantMessage,
3
4
  AssistantMessageEvent,
4
5
  AssistantMessageEventStream,
6
+ Context,
5
7
  Effort,
6
8
  ImageContent,
7
9
  Message,
@@ -25,6 +27,14 @@ export type StreamFn = (
25
27
  ...args: Parameters<typeof streamSimple>
26
28
  ) => AssistantMessageEventStream | Promise<AssistantMessageEventStream>;
27
29
 
30
+ /**
31
+ * An aside entry: a ready {@link AgentMessage}, or a sync thunk evaluated at
32
+ * injection time that returns the message to inject or `null` to skip it. Thunks
33
+ * let the producer make the final inject-or-drop decision against current state
34
+ * (e.g. dropping late diagnostics a newer edit superseded).
35
+ */
36
+ export type AsideMessage = AgentMessage | (() => AgentMessage | null);
37
+
28
38
  /**
29
39
  * Configuration for the agent loop.
30
40
  */
@@ -38,14 +48,6 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
38
48
  */
39
49
  interruptMode?: "immediate" | "wait";
40
50
 
41
- /**
42
- * Maximum completed tool calls to accept from one streamed assistant turn before
43
- * cutting the provider stream and executing that batch. The cap is enforced on
44
- * `toolcall_end` so every executed call has complete arguments. Undefined disables
45
- * batching.
46
- */
47
- maxToolCallsPerTurn?: number;
48
-
49
51
  /**
50
52
  * Optional session identifier forwarded to LLM providers.
51
53
  * Used by providers that support session-based caching (e.g., OpenAI Codex).
@@ -106,23 +108,46 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
106
108
  */
107
109
  transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise<AgentMessage[]>;
108
110
 
111
+ /**
112
+ * Optional transform applied to the final provider context after conversion,
113
+ * normalization, and append-only context handling, but before telemetry capture
114
+ * and provider send.
115
+ */
116
+ transformProviderContext?: (context: Context, model: Model) => Context;
117
+
109
118
  /**
110
119
  * Resolves an API key dynamically for each LLM call.
111
120
  *
112
121
  * Useful for short-lived OAuth tokens (e.g., GitHub Copilot) that may expire
113
122
  * during long-running tool execution phases.
114
123
  */
115
- getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
124
+ getApiKey?: (provider: string, ctx?: ApiKeyResolveContext) => Promise<string | undefined> | string | undefined;
116
125
 
117
126
  /**
118
127
  * Returns steering messages to inject into the conversation mid-run.
119
128
  *
120
- * Called after each tool execution to check for user interruptions unless interruptMode is "wait".
121
- * If messages are returned, remaining tool calls are skipped and
122
- * these messages are added to the context before the next LLM call.
129
+ * Called at injection boundaries only (loop start and after a tool batch
130
+ * fully settles), so dequeued messages are immediately injected. The
131
+ * mid-batch interrupt poll uses {@link hasSteeringMessages} instead and
132
+ * never consumes the queue.
123
133
  */
124
134
  getSteeringMessages?: () => Promise<AgentMessage[]>;
125
135
 
136
+ /**
137
+ * Peeks whether steering messages are queued, without consuming them.
138
+ *
139
+ * Called after each tool execution (unless interruptMode is "wait") to decide
140
+ * whether to skip the remaining tool calls in the batch. The queue keeps
141
+ * owning its messages until the loop reaches the next injection boundary and
142
+ * dequeues via {@link getSteeringMessages} — so callers can still cancel or
143
+ * restore queued messages while in-flight tools settle, and an external
144
+ * abort in that window leaves the queue intact for a post-abort continue.
145
+ *
146
+ * When omitted, steering never interrupts a running tool batch; queued
147
+ * messages are still delivered at the next injection boundary.
148
+ */
149
+ hasSteeringMessages?: () => boolean | Promise<boolean>;
150
+
126
151
  /**
127
152
  * Returns follow-up messages to process after the agent would otherwise stop.
128
153
  *
@@ -131,6 +156,17 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
131
156
  * continues with another turn.
132
157
  */
133
158
  getFollowUpMessages?: () => Promise<AgentMessage[]>;
159
+ /**
160
+ * Returns non-interrupting "aside" messages to inject at a step boundary.
161
+ *
162
+ * Polled after each tool batch (before the next LLM call) AND at the yield
163
+ * check. Unlike steering, these NEVER abort in-flight tools — they are passive
164
+ * notifications (e.g. background-job completions, late LSP diagnostics) that
165
+ * should reach the model between requests without waiting for the agent to
166
+ * fully stop. Returned messages are appended to the context with normal
167
+ * message events and keep the loop running so the model can react.
168
+ */
169
+ getAsideMessages?: () => Promise<AsideMessage[]>;
134
170
  /**
135
171
  * Hook fired right before the loop would exit.
136
172
  *
@@ -198,6 +234,15 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
198
234
  */
199
235
  getReasoning?: () => Effort | undefined;
200
236
 
237
+ /**
238
+ * Dynamic reasoning-disable override, resolved per LLM call. When set,
239
+ * its return value overrides the static `disableReasoning` from
240
+ * `SimpleStreamOptions` for that request. Pair with `getReasoning` so
241
+ * mid-run transitions into and out of the explicit `off` state propagate
242
+ * to the next provider call.
243
+ */
244
+ getDisableReasoning?: () => boolean | undefined;
245
+
201
246
  /**
202
247
  * Called after a tool call has been validated and is about to execute.
203
248
  *
@@ -281,6 +326,8 @@ export interface AfterToolCallResult {
281
326
  details?: unknown;
282
327
  /** If provided, replaces the error flag carried with the tool result. */
283
328
  isError?: boolean;
329
+ /** If provided, replaces the contextually-useless flag carried with the tool result. */
330
+ useless?: boolean;
284
331
  }
285
332
 
286
333
  /** Context passed to `beforeToolCall`. */
@@ -320,7 +367,7 @@ export interface AfterToolCallContext {
320
367
  *
321
368
  * @example
322
369
  * ```typescript
323
- * declare module "@prometheus-ai/agent-core" {
370
+ * declare module "@prometheus-ai/agent" {
324
371
  * interface CustomAgentMessages {
325
372
  * artifact: ArtifactMessage;
326
373
  * notification: NotificationMessage;
@@ -346,6 +393,7 @@ export interface AgentState {
346
393
  systemPrompt: string[];
347
394
  model: Model;
348
395
  thinkingLevel?: Effort;
396
+ disableReasoning?: boolean;
349
397
  tools: AgentTool<any>[];
350
398
  messages: AgentMessage[]; // Can include attachments + custom message types
351
399
  isStreaming: boolean;
@@ -362,6 +410,8 @@ export interface AgentToolResult<T = any, _TInput = unknown> {
362
410
  // Marks a non-throwing failure (e.g. an aggregator catching per-entry errors).
363
411
  // agent-loop honors this and surfaces it as a tool error on the wire.
364
412
  isError?: boolean;
413
+ /** Marks the result as contextually useless: safe for compaction to elide once consumed (e.g. zero matches, wait timeout). Ignored when isError is set. */
414
+ useless?: boolean;
365
415
  }
366
416
 
367
417
  // Callback for streaming tool execution updates
@@ -422,14 +472,13 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
422
472
  loadMode?: "essential" | "discoverable";
423
473
  /** Short one-line summary used for tool discovery indexes. */
424
474
  summary?: string;
425
- /** If true, tool execution ignores abort signals (runs to completion) */
426
- nonAbortable?: boolean;
427
475
  /**
428
476
  * Concurrency mode for tool scheduling when multiple calls are in one turn.
429
477
  * - "shared": can run alongside other shared tools (default)
430
478
  * - "exclusive": runs alone; other tools wait until it finishes
479
+ * - function: resolved per call from the (raw, pre-validation) arguments
431
480
  */
432
- concurrency?: "shared" | "exclusive";
481
+ concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
433
482
  /** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
434
483
  lenientArgValidation?: boolean;
435
484
  /**