npm - @prometheus-ai/agent-core - Versions diffs - 0.5.4 → 0.5.8 - Mend

@prometheus-ai/agent-core 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/types/agent-loop.d.ts +7 -0
package/dist/types/agent.d.ts +41 -13
package/dist/types/compaction/branch-summarization.d.ts +3 -3
package/dist/types/compaction/compaction.d.ts +11 -9
package/dist/types/compaction/messages.d.ts +14 -2
package/dist/types/compaction/openai.d.ts +18 -3
package/dist/types/compaction/pruning.d.ts +55 -0
package/dist/types/compaction/shake.d.ts +3 -1
package/dist/types/compaction/utils.d.ts +18 -2
package/dist/types/proxy.d.ts +4 -3
package/dist/types/telemetry.d.ts +59 -57
package/dist/types/types.d.ts +60 -16
package/package.json +6 -4
package/src/agent-loop.ts +660 -181
package/src/agent.ts +103 -30
package/src/compaction/branch-summarization.ts +8 -7
package/src/compaction/compaction.ts +69 -34
package/src/compaction/messages.ts +78 -64
package/src/compaction/openai.ts +88 -74
package/src/compaction/prompts/branch-summary.md +1 -1
package/src/compaction/prompts/compaction-summary-context.md +1 -1
package/src/compaction/prompts/compaction-summary.md +2 -2
package/src/compaction/prompts/compaction-update-summary.md +3 -3
package/src/compaction/prompts/file-operations.md +3 -8
package/src/compaction/prompts/summarization-system.md +1 -1
package/src/compaction/pruning.ts +240 -8
package/src/compaction/shake.ts +7 -3
package/src/compaction/utils.ts +97 -19
package/src/proxy.ts +13 -7
package/src/telemetry.ts +126 -113
package/src/types.ts +65 -16

package/src/telemetry.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * OpenTelemetry instrumentation for the agent loop.
  *
  * Implements the OpenTelemetry GenAI semantic conventions
- * (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `prometheus.gen_ai.*`
+ * (https://opentelemetry.io/docs/specs/semconv/gen-ai/) plus `pi.gen_ai.*`
  * extension attributes for run summaries, dashboard summaries, and cost hints
  * that are useful to downstream observability UIs.
  *
@@ -50,6 +50,7 @@ import {
 } from "@prometheus-ai/ai";
 import { AgentRunCollector, type AgentRunCoverage, type AgentRunSummary, type ToolStatus } from "./run-collector";
 import type { AgentTool } from "./types";
+import { EventLoopKeepalive } from "./utils/yield";
 /** Default tracer name. Override via {@link AgentTelemetryConfig.tracerName}. */
 export const DEFAULT_TRACER_NAME = "@prometheus-ai/agent-core";
@@ -123,39 +124,40 @@ export const enum OpenAIAttr {
 }
 /** Project extension attributes. Kept out of the reserved `gen_ai.*` namespace. */
-export const enum PrometheusGenAIAttr {
-	AgentStepNumber = "prometheus.gen_ai.agent.step.number",
-	AgentStepCount = "prometheus.gen_ai.agent.step.count",
-	RequestReasoningEffort = "prometheus.gen_ai.request.reasoning.effort",
-	RequestToolChoice = "prometheus.gen_ai.request.tool.choice",
-	RequestAvailableTools = "prometheus.gen_ai.request.available_tools",
-	RequestMessages = "prometheus.gen_ai.request.messages",
-	ResponseText = "prometheus.gen_ai.response.text",
-	ResponseToolCalls = "prometheus.gen_ai.response.tool_calls",
-	UsageTotalTokens = "prometheus.gen_ai.usage.total_tokens",
-	UsageServerSideTools = "prometheus.gen_ai.usage.server_tool_requests",
-	CostEstimatedUsd = "prometheus.gen_ai.cost.estimated_usd",
-	CostInputUsd = "prometheus.gen_ai.cost.input_usd",
-	CostOutputUsd = "prometheus.gen_ai.cost.output_usd",
-	CostUnavailableReason = "prometheus.gen_ai.cost.unavailable_reason",
-	ToolStatus = "prometheus.gen_ai.tool.status",
-	ToolCallIntent = "prometheus.gen_ai.tool.call.intent",
-	HandoffFromAgentName = "prometheus.gen_ai.handoff.from_agent.name",
-	HandoffFromAgentId = "prometheus.gen_ai.handoff.from_agent.id",
-	HandoffToAgentName = "prometheus.gen_ai.handoff.to_agent.name",
-	HandoffToAgentId = "prometheus.gen_ai.handoff.to_agent.id",
+export const enum PiGenAIAttr {
+	AgentStepNumber = "pi.gen_ai.agent.step.number",
+	AgentStepCount = "pi.gen_ai.agent.step.count",
+	RequestReasoningEffort = "pi.gen_ai.request.reasoning.effort",
+	RequestToolChoice = "pi.gen_ai.request.tool.choice",
+	RequestAvailableTools = "pi.gen_ai.request.available_tools",
+	RequestMessages = "pi.gen_ai.request.messages",
+	ResponseText = "pi.gen_ai.response.text",
+	ResponseToolCalls = "pi.gen_ai.response.tool_calls",
+	ResponseUpstreamProvider = "pi.gen_ai.response.upstream_provider",
+	UsageTotalTokens = "pi.gen_ai.usage.total_tokens",
+	UsageServerSideTools = "pi.gen_ai.usage.server_tool_requests",
+	CostEstimatedUsd = "pi.gen_ai.cost.estimated_usd",
+	CostInputUsd = "pi.gen_ai.cost.input_usd",
+	CostOutputUsd = "pi.gen_ai.cost.output_usd",
+	CostUnavailableReason = "pi.gen_ai.cost.unavailable_reason",
+	ToolStatus = "pi.gen_ai.tool.status",
+	ToolCallIntent = "pi.gen_ai.tool.call.intent",
+	HandoffFromAgentName = "pi.gen_ai.handoff.from_agent.name",
+	HandoffFromAgentId = "pi.gen_ai.handoff.from_agent.id",
+	HandoffToAgentName = "pi.gen_ai.handoff.to_agent.name",
+	HandoffToAgentId = "pi.gen_ai.handoff.to_agent.id",
 	// Marks chat spans emitted outside the agent loop (compaction, handoff, branch
 	// summary, image inspection, …). Lets dashboards split oneshot cost / latency
 	// from main-turn cost without overloading the semconv `gen_ai.operation.name`.
-	OneshotKind = "prometheus.gen_ai.oneshot.kind",
+	OneshotKind = "pi.gen_ai.oneshot.kind",
 	// Gateway / proxy (LiteLLM, Helicone, Portkey, …) — populated when a known
 	// gateway header pattern is detected on the upstream response. The base
 	// `gen_ai.provider.name` continues to track the *upstream* provider (e.g.
 	// `anthropic`) that the gateway routed to.
-	GatewayName = "prometheus.gen_ai.gateway.name",
-	GatewayEndpoint = "prometheus.gen_ai.gateway.endpoint",
-	GatewayCallId = "prometheus.gen_ai.gateway.call_id",
-	GatewayRoutedTo = "prometheus.gen_ai.gateway.routed_to",
+	GatewayName = "pi.gen_ai.gateway.name",
+	GatewayEndpoint = "pi.gen_ai.gateway.endpoint",
+	GatewayCallId = "pi.gen_ai.gateway.call_id",
+	GatewayRoutedTo = "pi.gen_ai.gateway.routed_to",
 }
 /** GenAI operation names — values for {@link GenAIAttr.OperationName}. */
@@ -199,9 +201,9 @@ export interface CostEstimatorContext {
 /**
  * Cost estimator result.
- *   { usd: number }                — cost is known; emitted as prometheus.gen_ai.cost.estimated_usd
+ *   { usd: number }                — cost is known; emitted as pi.gen_ai.cost.estimated_usd
  *   { unavailable: string }        — cost is intentionally unknown; emitted as
- *                                    prometheus.gen_ai.cost.unavailable_reason
+ *                                    pi.gen_ai.cost.unavailable_reason
  *   undefined                      — no opinion; nothing emitted
  */
 export type CostEstimate =
@@ -248,7 +250,7 @@ export interface ChatUsageEvent {
 	 *
 	 * Use this to reconcile gateway-issued ids (e.g. `x-litellm-call-id`) with
 	 * downstream billing / spend dashboards. Known gateway patterns are also
-	 * auto-stamped on the chat span as `prometheus.gen_ai.gateway.*` attributes.
+	 * auto-stamped on the chat span as `pi.gen_ai.gateway.*` attributes.
 	 */
 	readonly headers: Readonly<Record<string, string>> | undefined;
 }
@@ -626,14 +628,14 @@ export function recordTelemetryWarning(telemetry: AgentTelemetry | undefined, wa
 function emitTelemetryWarning(telemetry: AgentTelemetry | undefined, warning: AgentTelemetryWarning): void {
 	const hook = telemetry?.config.onTelemetryWarning;
 	if (!hook) {
-		if (warning.error === undefined) console.warn(`[prometheus-agent] ${warning.message}`);
-		else console.warn(`[prometheus-agent] ${warning.message}`, warning.error);
+		if (warning.error === undefined) console.warn(`[pi-agent] ${warning.message}`);
+		else console.warn(`[pi-agent] ${warning.message}`, warning.error);
 		return;
 	}
 	try {
 		hook(warning);
 	} catch (err) {
-		console.warn("[prometheus-agent] onTelemetryWarning threw; swallowing:", err);
+		console.warn("[pi-agent] onTelemetryWarning threw; swallowing:", err);
 	}
 }
@@ -678,7 +680,7 @@ export function startInvokeAgentSpan(telemetry: AgentTelemetry | undefined, mode
 /** Stamp the final step count on the `invoke_agent` span. */
 export function applyInvokeAgentFinish(span: Span | undefined, stepCount: number): void {
 	if (!span) return;
-	span.setAttribute(PrometheusGenAIAttr.AgentStepCount, stepCount);
+	span.setAttribute(PiGenAIAttr.AgentStepCount, stepCount);
 }
 /**
@@ -735,7 +737,7 @@ export interface ChatRequestSnapshot {
 function buildChatRequestAttributes(stepNumber: number, request: ChatRequestSnapshot, provider: string): Attributes {
 	const attrs: Attributes = {
-		[PrometheusGenAIAttr.AgentStepNumber]: stepNumber,
+		[PiGenAIAttr.AgentStepNumber]: stepNumber,
 		[GenAIAttr.OutputType]: "text",
 		[GenAIAttr.RequestStream]: true,
 	};
@@ -753,11 +755,11 @@ function buildChatRequestAttributes(stepNumber: number, request: ChatRequestSnap
 		const resolved = resolveServiceTier(request.serviceTier, provider);
 		if (resolved) attrs[OpenAIAttr.RequestServiceTier] = resolved;
 	}
-	if (request.reasoningEffort) attrs[PrometheusGenAIAttr.RequestReasoningEffort] = request.reasoningEffort;
+	if (request.reasoningEffort) attrs[PiGenAIAttr.RequestReasoningEffort] = request.reasoningEffort;
 	const toolChoice = serializeToolChoice(request.toolChoice);
-	if (toolChoice) attrs[PrometheusGenAIAttr.RequestToolChoice] = toolChoice;
+	if (toolChoice) attrs[PiGenAIAttr.RequestToolChoice] = toolChoice;
 	if (request.tools && request.tools.length > 0) {
-		attrs[PrometheusGenAIAttr.RequestAvailableTools] = request.tools.map(tool => tool.name);
+		attrs[PiGenAIAttr.RequestAvailableTools] = request.tools.map(tool => tool.name);
 	}
 	return attrs;
 }
@@ -775,7 +777,7 @@ function serializeToolChoice(toolChoice: ToolChoice | undefined): string | undef
 function applyContentCaptureForRequest(telemetry: AgentTelemetry, span: Span, request: ChatRequestSnapshot): void {
 	const requestMessages = serializeRequestMessagesForTelemetry(telemetry, request);
-	if (requestMessages) span.setAttribute(PrometheusGenAIAttr.RequestMessages, requestMessages);
+	if (requestMessages) span.setAttribute(PiGenAIAttr.RequestMessages, requestMessages);
 	if (telemetry.contentCapture !== "full") return;
 	const systemInstructions = serializeFullSystemInstructionsForTelemetry(request);
 	if (systemInstructions) span.setAttribute(GenAIAttr.SystemInstructions, systemInstructions);
@@ -785,9 +787,9 @@ function applyContentCaptureForRequest(telemetry: AgentTelemetry, span: Span, re
 function applyContentCaptureForResponse(telemetry: AgentTelemetry, span: Span, message: AssistantMessage): void {
 	const responseText = serializeResponseTextForTelemetry(telemetry, message);
-	if (responseText) span.setAttribute(PrometheusGenAIAttr.ResponseText, responseText);
+	if (responseText) span.setAttribute(PiGenAIAttr.ResponseText, responseText);
 	const responseToolCalls = serializeResponseToolCallsForTelemetry(telemetry, message);
-	if (responseToolCalls) span.setAttribute(PrometheusGenAIAttr.ResponseToolCalls, responseToolCalls);
+	if (responseToolCalls) span.setAttribute(PiGenAIAttr.ResponseToolCalls, responseToolCalls);
 	if (telemetry.contentCapture === "full") {
 		const outputMessages = serializeFullOutputMessagesForTelemetry(message);
 		if (outputMessages) span.setAttribute(GenAIAttr.OutputMessages, outputMessages);
@@ -1188,6 +1190,9 @@ export function failChatSpan(
 function applyChatResponseAttributes(span: Span, message: AssistantMessage): void {
 	span.setAttribute(GenAIAttr.ResponseModel, message.model);
 	if (message.responseId) span.setAttribute(GenAIAttr.ResponseId, message.responseId);
+	if (message.upstreamProvider) {
+		span.setAttribute(PiGenAIAttr.ResponseUpstreamProvider, message.upstreamProvider);
+	}
 	if (message.ttft != null) span.setAttribute(GenAIAttr.ResponseTimeToFirstChunk, message.ttft / 1000);
 	const finishReason = mapStopReason(message.stopReason);
 	if (finishReason) span.setAttribute(GenAIAttr.ResponseFinishReasons, [finishReason]);
@@ -1202,7 +1207,7 @@ function applyUsageAttributes(span: Span, usage: Usage | undefined): void {
 	span.setAttribute(GenAIAttr.UsageInputTokens, inputTokens);
 	span.setAttribute(GenAIAttr.UsageOutputTokens, outputTokens);
 	const total = usage.totalTokens ?? inputTokens + outputTokens;
-	span.setAttribute(PrometheusGenAIAttr.UsageTotalTokens, total);
+	span.setAttribute(PiGenAIAttr.UsageTotalTokens, total);
 	if (usage.cacheRead != null) span.setAttribute(GenAIAttr.UsageCacheReadInputTokens, usage.cacheRead);
 	if (usage.cacheWrite != null) span.setAttribute(GenAIAttr.UsageCacheCreationInputTokens, usage.cacheWrite);
 	if (usage.reasoningTokens != null) {
@@ -1210,7 +1215,7 @@ function applyUsageAttributes(span: Span, usage: Usage | undefined): void {
 	}
 	if (usage.server) {
 		const sums = (usage.server.webSearch ?? 0) + (usage.server.webFetch ?? 0);
-		if (sums > 0) span.setAttribute(PrometheusGenAIAttr.UsageServerSideTools, sums);
+		if (sums > 0) span.setAttribute(PiGenAIAttr.UsageServerSideTools, sums);
 	}
 }
@@ -1276,10 +1281,10 @@ function applyGatewayAttributes(
 ): void {
 	const gateway = detectGatewayFromHeaders(headers);
 	if (!gateway) return;
-	span.setAttribute(PrometheusGenAIAttr.GatewayName, gateway.name);
-	if (baseUrl) span.setAttribute(PrometheusGenAIAttr.GatewayEndpoint, baseUrl);
-	if (gateway.callId) span.setAttribute(PrometheusGenAIAttr.GatewayCallId, gateway.callId);
-	if (gateway.routedTo) span.setAttribute(PrometheusGenAIAttr.GatewayRoutedTo, gateway.routedTo);
+	span.setAttribute(PiGenAIAttr.GatewayName, gateway.name);
+	if (baseUrl) span.setAttribute(PiGenAIAttr.GatewayEndpoint, baseUrl);
+	if (gateway.callId) span.setAttribute(PiGenAIAttr.GatewayCallId, gateway.callId);
+	if (gateway.routedTo) span.setAttribute(PiGenAIAttr.GatewayRoutedTo, gateway.routedTo);
 }
 interface AppliedCostEstimate {
@@ -1340,7 +1345,7 @@ function applyCostEstimateForUsage(
 	}
 	if (!result) return EMPTY_COST;
 	if ("unavailable" in result) {
-		span.setAttribute(PrometheusGenAIAttr.CostUnavailableReason, result.unavailable);
+		span.setAttribute(PiGenAIAttr.CostUnavailableReason, result.unavailable);
 		const cost: AppliedCostEstimate = {
 			costUsd: undefined,
 			inputUsd: undefined,
@@ -1362,9 +1367,9 @@ function applyCostEstimateForUsage(
 		});
 		return cost;
 	}
-	span.setAttribute(PrometheusGenAIAttr.CostEstimatedUsd, result.usd);
-	if (result.inputUsd != null) span.setAttribute(PrometheusGenAIAttr.CostInputUsd, result.inputUsd);
-	if (result.outputUsd != null) span.setAttribute(PrometheusGenAIAttr.CostOutputUsd, result.outputUsd);
+	span.setAttribute(PiGenAIAttr.CostEstimatedUsd, result.usd);
+	if (result.inputUsd != null) span.setAttribute(PiGenAIAttr.CostInputUsd, result.inputUsd);
+	if (result.outputUsd != null) span.setAttribute(PiGenAIAttr.CostOutputUsd, result.outputUsd);
 	const cost: AppliedCostEstimate = {
 		costUsd: result.usd,
 		inputUsd: result.inputUsd,
@@ -1534,7 +1539,7 @@ export async function recordManualChatTelemetry(
 		});
 	if (!span) return undefined;
 	if (options.span && options.attributes) span.setAttributes(options.attributes);
-	if (options.stepNumber != null) span.setAttribute(PrometheusGenAIAttr.AgentStepNumber, options.stepNumber);
+	if (options.stepNumber != null) span.setAttribute(PiGenAIAttr.AgentStepNumber, options.stepNumber);
 	span.setAttribute(GenAIAttr.ResponseModel, options.responseModel ?? options.model.name);
 	if (options.responseId) span.setAttribute(GenAIAttr.ResponseId, options.responseId);
 	const finishReason = mapStopReason(options.finishReason);
@@ -1567,7 +1572,7 @@ export async function recordManualChatTelemetry(
 	}
 	if (options.responseText) {
 		const responseText = stringifyJsonAttribute(summarizeTelemetryTexts([options.responseText]));
-		if (responseText) span.setAttribute(PrometheusGenAIAttr.ResponseText, responseText);
+		if (responseText) span.setAttribute(PiGenAIAttr.ResponseText, responseText);
 	}
 	if (options.responseToolCalls && options.responseToolCalls.length > 0) {
 		const calls = options.responseToolCalls.map(call => ({
@@ -1576,7 +1581,7 @@ export async function recordManualChatTelemetry(
 			input: summarizeTelemetryValue(call.input),
 		}));
 		const responseToolCalls = stringifyJsonAttribute(limitTelemetryToolCalls(calls));
-		if (responseToolCalls) span.setAttribute(PrometheusGenAIAttr.ResponseToolCalls, responseToolCalls);
+		if (responseToolCalls) span.setAttribute(PiGenAIAttr.ResponseToolCalls, responseToolCalls);
 	}
 	applyTerminalStatus(span, options.finishReason, undefined);
 	if (options.endSpan ?? options.span === undefined) span.end();
@@ -1595,7 +1600,7 @@ export interface InstrumentedChatSpanOptions {
 	/** Step index recorded on the span; defaults to `-1` for non-loop calls. */
 	readonly stepNumber?: number;
 	/**
-	 * Tag stamped onto `prometheus.gen_ai.oneshot.kind`. Values used by the agent:
+	 * Tag stamped onto `pi.gen_ai.oneshot.kind`. Values used by the agent:
 	 * `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`,
 	 * `handoff`, `branch_summary`, `inspect_image`. Free-form to allow callers
 	 * outside this package to add new kinds without bumping the helper.
@@ -1629,6 +1634,13 @@ export async function instrumentedCompleteSimple<TApi extends Api>(
 	options: SimpleStreamOptions,
 	span: InstrumentedChatSpanOptions,
 ): Promise<AssistantMessage> {
+	// Oneshot LLM calls (handoff, compaction/branch summaries) run outside the
+	// agent `#runLoop`, which is where the EventLoopKeepalive normally lives.
+	// Without it, Bun's JSC loop stops servicing timers while parked on the
+	// long-lived completion promise, freezing any host spinner (e.g. the
+	// `/handoff` Loader) until an unrelated I/O event (a terminal resize)
+	// pokes the loop. Keep the loop healthy for the duration of the call.
+	using _keepalive = new EventLoopKeepalive();
 	const { telemetry, parent, oneshotKind } = span;
 	const stepNumber = span.stepNumber ?? -1;
 	const reasoning = options.reasoning;
@@ -1650,7 +1662,7 @@ export async function instrumentedCompleteSimple<TApi extends Api>(
 		},
 	});
 	if (chatSpan) {
-		if (oneshotKind) chatSpan.setAttribute(PrometheusGenAIAttr.OneshotKind, oneshotKind);
+		if (oneshotKind) chatSpan.setAttribute(PiGenAIAttr.OneshotKind, oneshotKind);
 		if (span.attributes) chatSpan.setAttributes(span.attributes);
 	}
@@ -1788,7 +1800,7 @@ export function finishExecuteToolSpan(
 }
 /** Span attribute carrying the terminal {@link ToolStatus}. */
-export const EXECUTE_TOOL_STATUS_ATTR = PrometheusGenAIAttr.ToolStatus;
+export const EXECUTE_TOOL_STATUS_ATTR = PiGenAIAttr.ToolStatus;
 /**
  * Mapping from non-ok {@link ToolStatus} values to the `error.type` attribute
@@ -1861,7 +1873,8 @@ export function finishInvokeAgentSpan(
 /**
  * Invoke {@link AgentTelemetryConfig.onRunEnd} on `telemetry` if set. Throws
- are caught and logged via `console.warn` — telemetry callbacks NEVER turn a
+ * are caught and surfaced via the `onTelemetryWarning` hook (falling back to `console.warn`
+ * when no hook is set) — telemetry callbacks NEVER turn a
  * successful agent run into a failed one. Idempotent at the call site via
  * {@link AgentRunCollector.markRunEnded}; callers must check that before
  * calling this helper.
@@ -1880,66 +1893,66 @@ export function fireOnRunEnd(telemetry: AgentTelemetry, summary: AgentRunSummary
 	}
 }
-/** Aggregate `prometheus.gen_ai.agent.*` attributes stamped on the `invoke_agent` span. */
-export const enum PrometheusGenAIAggregateAttr {
-	ChatsCount = "prometheus.gen_ai.agent.chats.count",
-	ChatsTotalLatencyMs = "prometheus.gen_ai.agent.chats.total_latency_ms",
-	ChatsStopReasonPrefix = "prometheus.gen_ai.agent.chats.stop_reason.",
-	ToolsCount = "prometheus.gen_ai.agent.tools.count",
-	ToolsOkCount = "prometheus.gen_ai.agent.tools.ok.count",
-	ToolsErrorCount = "prometheus.gen_ai.agent.tools.error.count",
-	ToolsSkippedCount = "prometheus.gen_ai.agent.tools.skipped.count",
-	ToolsBlockedCount = "prometheus.gen_ai.agent.tools.blocked.count",
-	ToolsTimeoutCount = "prometheus.gen_ai.agent.tools.timeout.count",
-	ToolsAbortedCount = "prometheus.gen_ai.agent.tools.aborted.count",
-	ToolsTotalLatencyMs = "prometheus.gen_ai.agent.tools.total_latency_ms",
-	ToolsInvoked = "prometheus.gen_ai.agent.tools.invoked",
-	ToolsAvailable = "prometheus.gen_ai.agent.tools.available",
-	ToolsUnused = "prometheus.gen_ai.agent.tools.unused",
-	UsageInputTokensTotal = "prometheus.gen_ai.agent.usage.input_tokens.total",
-	UsageOutputTokensTotal = "prometheus.gen_ai.agent.usage.output_tokens.total",
-	UsageCacheReadInputTokensTotal = "prometheus.gen_ai.agent.usage.cache_read.input_tokens.total",
-	UsageCacheCreationInputTokensTotal = "prometheus.gen_ai.agent.usage.cache_creation.input_tokens.total",
-	UsageReasoningOutputTokensTotal = "prometheus.gen_ai.agent.usage.reasoning.output_tokens.total",
-	UsageTotalTokensTotal = "prometheus.gen_ai.agent.usage.total_tokens.total",
-	CostEstimatedUsdTotal = "prometheus.gen_ai.agent.cost.estimated_usd.total",
-	ErrorsCount = "prometheus.gen_ai.agent.errors.count",
-}
-/** Stamp the aggregate `prometheus.gen_ai.agent.*` attributes on the given span. */
+/** Aggregate `pi.gen_ai.agent.*` attributes stamped on the `invoke_agent` span. */
+export const enum PiGenAIAggregateAttr {
+	ChatsCount = "pi.gen_ai.agent.chats.count",
+	ChatsTotalLatencyMs = "pi.gen_ai.agent.chats.total_latency_ms",
+	ChatsStopReasonPrefix = "pi.gen_ai.agent.chats.stop_reason.",
+	ToolsCount = "pi.gen_ai.agent.tools.count",
+	ToolsOkCount = "pi.gen_ai.agent.tools.ok.count",
+	ToolsErrorCount = "pi.gen_ai.agent.tools.error.count",
+	ToolsSkippedCount = "pi.gen_ai.agent.tools.skipped.count",
+	ToolsBlockedCount = "pi.gen_ai.agent.tools.blocked.count",
+	ToolsTimeoutCount = "pi.gen_ai.agent.tools.timeout.count",
+	ToolsAbortedCount = "pi.gen_ai.agent.tools.aborted.count",
+	ToolsTotalLatencyMs = "pi.gen_ai.agent.tools.total_latency_ms",
+	ToolsInvoked = "pi.gen_ai.agent.tools.invoked",
+	ToolsAvailable = "pi.gen_ai.agent.tools.available",
+	ToolsUnused = "pi.gen_ai.agent.tools.unused",
+	UsageInputTokensTotal = "pi.gen_ai.agent.usage.input_tokens.total",
+	UsageOutputTokensTotal = "pi.gen_ai.agent.usage.output_tokens.total",
+	UsageCacheReadInputTokensTotal = "pi.gen_ai.agent.usage.cache_read.input_tokens.total",
+	UsageCacheCreationInputTokensTotal = "pi.gen_ai.agent.usage.cache_creation.input_tokens.total",
+	UsageReasoningOutputTokensTotal = "pi.gen_ai.agent.usage.reasoning.output_tokens.total",
+	UsageTotalTokensTotal = "pi.gen_ai.agent.usage.total_tokens.total",
+	CostEstimatedUsdTotal = "pi.gen_ai.agent.cost.estimated_usd.total",
+	ErrorsCount = "pi.gen_ai.agent.errors.count",
+}
+/** Stamp the aggregate `pi.gen_ai.agent.*` attributes on the given span. */
 function applyAggregateAttributes(span: Span, summary: AgentRunSummary, coverage: AgentRunCoverage): void {
-	span.setAttribute(PrometheusGenAIAggregateAttr.ChatsCount, summary.chats.total);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ChatsTotalLatencyMs, summary.chats.totalLatencyMs);
+	span.setAttribute(PiGenAIAggregateAttr.ChatsCount, summary.chats.total);
+	span.setAttribute(PiGenAIAggregateAttr.ChatsTotalLatencyMs, summary.chats.totalLatencyMs);
 	for (const [reason, count] of Object.entries(summary.chats.byStopReason)) {
-		span.setAttribute(`${PrometheusGenAIAggregateAttr.ChatsStopReasonPrefix}${reason}.count`, count);
-	}
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsCount, summary.tools.total);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsOkCount, summary.tools.ok);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsErrorCount, summary.tools.error);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsSkippedCount, summary.tools.skipped);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsBlockedCount, summary.tools.blocked);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsTimeoutCount, summary.tools.timeout);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsAbortedCount, summary.tools.aborted);
-	span.setAttribute(PrometheusGenAIAggregateAttr.ToolsTotalLatencyMs, summary.tools.totalLatencyMs);
+		span.setAttribute(`${PiGenAIAggregateAttr.ChatsStopReasonPrefix}${reason}.count`, count);
+	}
+	span.setAttribute(PiGenAIAggregateAttr.ToolsCount, summary.tools.total);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsOkCount, summary.tools.ok);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsErrorCount, summary.tools.error);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsSkippedCount, summary.tools.skipped);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsBlockedCount, summary.tools.blocked);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsTimeoutCount, summary.tools.timeout);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsAbortedCount, summary.tools.aborted);
+	span.setAttribute(PiGenAIAggregateAttr.ToolsTotalLatencyMs, summary.tools.totalLatencyMs);
 	if (coverage.toolsInvoked.length > 0) {
-		span.setAttribute(PrometheusGenAIAggregateAttr.ToolsInvoked, [...coverage.toolsInvoked]);
+		span.setAttribute(PiGenAIAggregateAttr.ToolsInvoked, [...coverage.toolsInvoked]);
 	}
 	if (coverage.toolsAvailable.length > 0) {
-		span.setAttribute(PrometheusGenAIAggregateAttr.ToolsAvailable, [...coverage.toolsAvailable]);
+		span.setAttribute(PiGenAIAggregateAttr.ToolsAvailable, [...coverage.toolsAvailable]);
 	}
 	if (coverage.toolsUnused.length > 0) {
-		span.setAttribute(PrometheusGenAIAggregateAttr.ToolsUnused, [...coverage.toolsUnused]);
-	}
-	span.setAttribute(PrometheusGenAIAggregateAttr.UsageInputTokensTotal, summary.usage.inputTokens);
-	span.setAttribute(PrometheusGenAIAggregateAttr.UsageOutputTokensTotal, summary.usage.outputTokens);
-	span.setAttribute(PrometheusGenAIAggregateAttr.UsageCacheReadInputTokensTotal, summary.usage.cachedInputTokens);
-	span.setAttribute(PrometheusGenAIAggregateAttr.UsageCacheCreationInputTokensTotal, summary.usage.cacheWriteTokens);
-	span.setAttribute(PrometheusGenAIAggregateAttr.UsageReasoningOutputTokensTotal, summary.usage.reasoningOutputTokens);
-	span.setAttribute(PrometheusGenAIAggregateAttr.UsageTotalTokensTotal, summary.usage.totalTokens);
+		span.setAttribute(PiGenAIAggregateAttr.ToolsUnused, [...coverage.toolsUnused]);
+	}
+	span.setAttribute(PiGenAIAggregateAttr.UsageInputTokensTotal, summary.usage.inputTokens);
+	span.setAttribute(PiGenAIAggregateAttr.UsageOutputTokensTotal, summary.usage.outputTokens);
+	span.setAttribute(PiGenAIAggregateAttr.UsageCacheReadInputTokensTotal, summary.usage.cachedInputTokens);
+	span.setAttribute(PiGenAIAggregateAttr.UsageCacheCreationInputTokensTotal, summary.usage.cacheWriteTokens);
+	span.setAttribute(PiGenAIAggregateAttr.UsageReasoningOutputTokensTotal, summary.usage.reasoningOutputTokens);
+	span.setAttribute(PiGenAIAggregateAttr.UsageTotalTokensTotal, summary.usage.totalTokens);
 	if (summary.cost.estimatedUsd > 0) {
-		span.setAttribute(PrometheusGenAIAggregateAttr.CostEstimatedUsdTotal, summary.cost.estimatedUsd);
+		span.setAttribute(PiGenAIAggregateAttr.CostEstimatedUsdTotal, summary.cost.estimatedUsd);
 	}
-	span.setAttribute(PrometheusGenAIAggregateAttr.ErrorsCount, summary.errors.total);
+	span.setAttribute(PiGenAIAggregateAttr.ErrorsCount, summary.errors.total);
 }
 /**
@@ -1974,10 +1987,10 @@ export function recordHandoff(
 	const attrs: Attributes = {};
 	const fromAgent = options.fromAgent ? normalizeAgentIdentity(telemetry, options.fromAgent) : undefined;
 	const toAgent = normalizeAgentIdentity(telemetry, options.toAgent);
-	if (fromAgent?.name) attrs[PrometheusGenAIAttr.HandoffFromAgentName] = fromAgent.name;
-	if (fromAgent?.id) attrs[PrometheusGenAIAttr.HandoffFromAgentId] = fromAgent.id;
-	if (toAgent.name) attrs[PrometheusGenAIAttr.HandoffToAgentName] = toAgent.name;
-	if (toAgent.id) attrs[PrometheusGenAIAttr.HandoffToAgentId] = toAgent.id;
+	if (fromAgent?.name) attrs[PiGenAIAttr.HandoffFromAgentName] = fromAgent.name;
+	if (fromAgent?.id) attrs[PiGenAIAttr.HandoffFromAgentId] = fromAgent.id;
+	if (toAgent.name) attrs[PiGenAIAttr.HandoffToAgentName] = toAgent.name;
+	if (toAgent.id) attrs[PiGenAIAttr.HandoffToAgentId] = toAgent.id;
 	const name = toAgent.name
 		? fromAgent?.name
 			? `handoff ${fromAgent.name} → ${toAgent.name}`

package/src/types.ts CHANGED Viewed

@@ -1,7 +1,9 @@
 import type {
+	ApiKeyResolveContext,
 	AssistantMessage,
 	AssistantMessageEvent,
 	AssistantMessageEventStream,
+	Context,
 	Effort,
 	ImageContent,
 	Message,
@@ -25,6 +27,14 @@ export type StreamFn = (
 	...args: Parameters<typeof streamSimple>
 ) => AssistantMessageEventStream | Promise<AssistantMessageEventStream>;
+/**
+ * An aside entry: a ready {@link AgentMessage}, or a sync thunk evaluated at
+ * injection time that returns the message to inject or `null` to skip it. Thunks
+ * let the producer make the final inject-or-drop decision against current state
+ * (e.g. dropping late diagnostics a newer edit superseded).
+ */
+export type AsideMessage = AgentMessage | (() => AgentMessage | null);
 /**
  * Configuration for the agent loop.
  */
@@ -38,14 +48,6 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
 	 */
 	interruptMode?: "immediate" | "wait";
-	/**
-	 * Maximum completed tool calls to accept from one streamed assistant turn before
-	 * cutting the provider stream and executing that batch. The cap is enforced on
-	 * `toolcall_end` so every executed call has complete arguments. Undefined disables
-	 * batching.
-	 */
-	maxToolCallsPerTurn?: number;
 	/**
 	 * Optional session identifier forwarded to LLM providers.
 	 * Used by providers that support session-based caching (e.g., OpenAI Codex).
@@ -106,23 +108,46 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
 	 */
 	transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise<AgentMessage[]>;
+	/**
+	 * Optional transform applied to the final provider context after conversion,
+	 * normalization, and append-only context handling, but before telemetry capture
+	 * and provider send.
+	 */
+	transformProviderContext?: (context: Context, model: Model) => Context;
 	/**
 	 * Resolves an API key dynamically for each LLM call.
 	 *
 	 * Useful for short-lived OAuth tokens (e.g., GitHub Copilot) that may expire
 	 * during long-running tool execution phases.
 	 */
-	getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
+	getApiKey?: (provider: string, ctx?: ApiKeyResolveContext) => Promise<string | undefined> | string | undefined;
 	/**
 	 * Returns steering messages to inject into the conversation mid-run.
 	 *
-	 * Called after each tool execution to check for user interruptions unless interruptMode is "wait".
-	 * If messages are returned, remaining tool calls are skipped and
-	 * these messages are added to the context before the next LLM call.
+	 * Called at injection boundaries only (loop start and after a tool batch
+	 * fully settles), so dequeued messages are immediately injected. The
+	 * mid-batch interrupt poll uses {@link hasSteeringMessages} instead and
+	 * never consumes the queue.
 	 */
 	getSteeringMessages?: () => Promise<AgentMessage[]>;
+	/**
+	 * Peeks whether steering messages are queued, without consuming them.
+	 *
+	 * Called after each tool execution (unless interruptMode is "wait") to decide
+	 * whether to skip the remaining tool calls in the batch. The queue keeps
+	 * owning its messages until the loop reaches the next injection boundary and
+	 * dequeues via {@link getSteeringMessages} — so callers can still cancel or
+	 * restore queued messages while in-flight tools settle, and an external
+	 * abort in that window leaves the queue intact for a post-abort continue.
+	 *
+	 * When omitted, steering never interrupts a running tool batch; queued
+	 * messages are still delivered at the next injection boundary.
+	 */
+	hasSteeringMessages?: () => boolean | Promise<boolean>;
 	/**
 	 * Returns follow-up messages to process after the agent would otherwise stop.
 	 *
@@ -131,6 +156,17 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
 	 * continues with another turn.
 	 */
 	getFollowUpMessages?: () => Promise<AgentMessage[]>;
+	/**
+	 * Returns non-interrupting "aside" messages to inject at a step boundary.
+	 *
+	 * Polled after each tool batch (before the next LLM call) AND at the yield
+	 * check. Unlike steering, these NEVER abort in-flight tools — they are passive
+	 * notifications (e.g. background-job completions, late LSP diagnostics) that
+	 * should reach the model between requests without waiting for the agent to
+	 * fully stop. Returned messages are appended to the context with normal
+	 * message events and keep the loop running so the model can react.
+	 */
+	getAsideMessages?: () => Promise<AsideMessage[]>;
 	/**
 	 * Hook fired right before the loop would exit.
 	 *
@@ -198,6 +234,15 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
 	 */
 	getReasoning?: () => Effort | undefined;
+	/**
+	 * Dynamic reasoning-disable override, resolved per LLM call. When set,
+	 * its return value overrides the static `disableReasoning` from
+	 * `SimpleStreamOptions` for that request. Pair with `getReasoning` so
+	 * mid-run transitions into and out of the explicit `off` state propagate
+	 * to the next provider call.
+	 */
+	getDisableReasoning?: () => boolean | undefined;
 	/**
 	 * Called after a tool call has been validated and is about to execute.
 	 *
@@ -281,6 +326,8 @@ export interface AfterToolCallResult {
 	details?: unknown;
 	/** If provided, replaces the error flag carried with the tool result. */
 	isError?: boolean;
+	/** If provided, replaces the contextually-useless flag carried with the tool result. */
+	useless?: boolean;
 }
 /** Context passed to `beforeToolCall`. */
@@ -320,7 +367,7 @@ export interface AfterToolCallContext {
  *
  * @example
  * ```typescript
- * declare module "@prometheus-ai/agent-core" {
+ * declare module "@prometheus-ai/agent" {
  *   interface CustomAgentMessages {
  *     artifact: ArtifactMessage;
  *     notification: NotificationMessage;
@@ -346,6 +393,7 @@ export interface AgentState {
 	systemPrompt: string[];
 	model: Model;
 	thinkingLevel?: Effort;
+	disableReasoning?: boolean;
 	tools: AgentTool<any>[];
 	messages: AgentMessage[]; // Can include attachments + custom message types
 	isStreaming: boolean;
@@ -362,6 +410,8 @@ export interface AgentToolResult<T = any, _TInput = unknown> {
 	// Marks a non-throwing failure (e.g. an aggregator catching per-entry errors).
 	// agent-loop honors this and surfaces it as a tool error on the wire.
 	isError?: boolean;
+	/** Marks the result as contextually useless: safe for compaction to elide once consumed (e.g. zero matches, wait timeout). Ignored when isError is set. */
+	useless?: boolean;
 }
 // Callback for streaming tool execution updates
@@ -422,14 +472,13 @@ export interface AgentTool<TParameters extends TSchema = TSchema, TDetails = any
 	loadMode?: "essential" | "discoverable";
 	/** Short one-line summary used for tool discovery indexes. */
 	summary?: string;
-	/** If true, tool execution ignores abort signals (runs to completion) */
-	nonAbortable?: boolean;
 	/**
 	 * Concurrency mode for tool scheduling when multiple calls are in one turn.
 	 * - "shared": can run alongside other shared tools (default)
 	 * - "exclusive": runs alone; other tools wait until it finishes
+	 * - function: resolved per call from the (raw, pre-validation) arguments
 	 */
-	concurrency?: "shared" | "exclusive";
+	concurrency?: "shared" | "exclusive" | ((args: Partial<Static<TParameters>>) => "shared" | "exclusive");
 	/** If true, argument validation errors are non-fatal: raw args are passed to execute() instead of returning an error to the LLM. */
 	lenientArgValidation?: boolean;
 	/**