npm - @oh-my-pi/pi-agent-core - Versions diffs - 15.1.2 → 15.1.3 - Mend

@oh-my-pi/pi-agent-core 15.1.2 → 15.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/CHANGELOG.md +6 -0
package/README.md +1 -1
package/dist/types/compaction/branch-summarization.d.ts +6 -0
package/dist/types/compaction/compaction.d.ts +13 -0
package/dist/types/run-collector.d.ts +0 -10
package/dist/types/telemetry.d.ts +38 -1
package/package.json +4 -4
package/src/agent-loop.ts +1 -1
package/src/compaction/branch-summarization.ts +8 -2
package/src/compaction/compaction.ts +23 -5
package/src/run-collector.ts +15 -12
package/src/telemetry.ts +113 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 ## [Unreleased]
+## [15.1.3] - 2026-05-17
+### Added
+- Added optional `telemetry` support to `generateSummary`, `generateHandoff`, `generateBranchSummary`, and `compact` options so compaction, handoff, and branch summary one-shot LLM calls can emit OpenTelemetry chat telemetry when enabled
+- Added shared oneshot telemetry instrumentation for compaction, handoff, and branch summary calls, tagging spans with `pi.gen_ai.oneshot.kind` values such as `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`, `handoff`, and `branch_summary`
 ## [15.1.2] - 2026-05-15
 ### Added

package/README.md CHANGED Viewed

@@ -279,7 +279,7 @@ const agent = new Agent({
 ## Tools
-Define tools using `AgentTool` with a Zod parameter schema (via `z` from `@oh-my-pi/pi-ai`). Legacy TypeBox-authored schemas are still accepted at runtime and are lifted to Zod internally.
+Define tools using `AgentTool` with a Zod parameter schema (via `z` from `@oh-my-pi/pi-ai`).
 ```typescript
 import { z } from "@oh-my-pi/pi-ai";

package/dist/types/compaction/branch-summarization.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  * a summary of the branch being left so context isn't lost.
  */
 import type { Model } from "@oh-my-pi/pi-ai";
+import { type AgentTelemetry } from "../telemetry";
 import type { AgentMessage } from "../types";
 import type { ReadonlySessionManager, SessionEntry } from "./entries";
 import { type ConvertToLlm } from "./messages";
@@ -51,6 +52,11 @@ export interface GenerateBranchSummaryOptions {
     metadata?: Record<string, unknown>;
     /** Convert app-specific messages before serializing the branch summary prompt. */
     convertToLlm?: ConvertToLlm;
+    /**
+     * Optional telemetry handle. When provided, the branch summary LLM call is
+     * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "branch_summary"`.
+     */
+    telemetry?: AgentTelemetry;
 }
 /**
  * Collect entries that should be summarized when navigating from one position to another.

package/dist/types/compaction/compaction.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@
  * and after compaction the session is reloaded.
  */
 import { type MessageAttribution, type Model, type Usage } from "@oh-my-pi/pi-ai";
+import { type AgentTelemetry } from "../telemetry";
 import type { AgentMessage, AgentTool } from "../types";
 import type { SessionEntry } from "./entries";
 import { type ConvertToLlm } from "./messages";
@@ -107,6 +108,13 @@ export interface SummaryOptions {
     initiatorOverride?: MessageAttribution;
     metadata?: Record<string, unknown>;
     convertToLlm?: ConvertToLlm;
+    /**
+     * Optional telemetry handle. When provided, every LLM call emitted during
+     * compaction is wrapped in an OTEL chat span tagged with
+     * `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
+     * or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
+     */
+    telemetry?: AgentTelemetry;
 }
 export declare function generateSummary(currentMessages: AgentMessage[], model: Model, reserveTokens: number, apiKey: string, signal?: AbortSignal, customInstructions?: string, previousSummary?: string, options?: SummaryOptions): Promise<string>;
 export interface HandoffOptions {
@@ -118,6 +126,11 @@ export interface HandoffOptions {
     convertToLlm?: ConvertToLlm;
     initiatorOverride?: MessageAttribution;
     metadata?: Record<string, unknown>;
+    /**
+     * Optional telemetry handle. When provided, the handoff LLM call is
+     * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
+     */
+    telemetry?: AgentTelemetry;
 }
 export declare function renderHandoffPrompt(customInstructions?: string): string;
 export declare function generateHandoff(messages: AgentMessage[], model: Model, apiKey: string, options: HandoffOptions, signal?: AbortSignal): Promise<string>;

package/dist/types/run-collector.d.ts CHANGED Viewed

@@ -108,16 +108,6 @@ export interface AgentRunCoverage {
     readonly modelsUsed: readonly string[];
     readonly providersUsed: readonly string[];
 }
-/**
- * Per-invocation event buffer. Constructed unconditionally inside
- * {@link resolveTelemetry}; cost is one allocation per `agentLoop` call.
- *
- * Methods are intentionally non-throwing — telemetry must never turn a
- * successful agent run into a failed one. WeakMap keys keep span-state
- * lookups bounded; if a finish path is somehow reached without a matching
- * begin (provider crash, tracer swap mid-run), the corresponding record is
- * still emitted with `latencyMs: 0` rather than throwing.
- */
 export declare class AgentRunCollector {
     #private;
     /** True once `markRunEnded()` has been called for this invocation. */

package/dist/types/telemetry.d.ts CHANGED Viewed

@@ -22,7 +22,7 @@
  * registered, `@opentelemetry/api` returns a no-op tracer and all calls are
  * cheap pass-throughs.
  */
-import { type AssistantMessage, type Message, type Model, type ServiceTier, type StopReason, type ToolChoice, type Usage } from "@oh-my-pi/pi-ai";
+import { type Api, type AssistantMessage, type Context, type Message, type Model, type ServiceTier, type SimpleStreamOptions, type StopReason, type ToolChoice, type Usage } from "@oh-my-pi/pi-ai";
 import { type Attributes, type AttributeValue, type Span, SpanKind, SpanStatusCode, type Tracer, trace } from "@opentelemetry/api";
 import { AgentRunCollector, type AgentRunCoverage, type AgentRunSummary, type ToolStatus } from "./run-collector";
 import type { AgentTool } from "./types";
@@ -99,6 +99,7 @@ export declare const enum PiGenAIAttr {
     HandoffFromAgentId = "pi.gen_ai.handoff.from_agent.id",
     HandoffToAgentName = "pi.gen_ai.handoff.to_agent.name",
     HandoffToAgentId = "pi.gen_ai.handoff.to_agent.id",
+    OneshotKind = "pi.gen_ai.oneshot.kind",
     GatewayName = "pi.gen_ai.gateway.name",
     GatewayEndpoint = "pi.gen_ai.gateway.endpoint",
     GatewayCallId = "pi.gen_ai.gateway.call_id",
@@ -433,6 +434,42 @@ export interface ManualChatTelemetryOptions {
     readonly endSpan?: boolean;
 }
 export declare function recordManualChatTelemetry(telemetry: AgentTelemetry | undefined, options: ManualChatTelemetryOptions): Promise<Span | undefined>;
+/**
+ * Options accepted by {@link instrumentedCompleteSimple}. Mirrors the
+ * `streamAssistantResponse` chat-span lifecycle for oneshot LLM calls
+ * (compaction summaries, handoff document, branch summary, inspect_image).
+ */
+export interface InstrumentedChatSpanOptions {
+    readonly telemetry: AgentTelemetry | undefined;
+    /** Optional explicit parent span. Defaults to `context.active()`. */
+    readonly parent?: Span;
+    /** Step index recorded on the span; defaults to `-1` for non-loop calls. */
+    readonly stepNumber?: number;
+    /**
+     * Tag stamped onto `pi.gen_ai.oneshot.kind`. Values used by the agent:
+     * `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`,
+     * `handoff`, `branch_summary`, `inspect_image`. Free-form to allow callers
+     * outside this package to add new kinds without bumping the helper.
+     */
+    readonly oneshotKind?: string;
+    /** Extra span attributes applied verbatim. */
+    readonly attributes?: Attributes;
+    /**
+     * Override for the underlying {@link completeSimple} call. Defaults to
+     * `completeSimple` from `@oh-my-pi/pi-ai`. Use to retain a test injection
+     * seam while still going through the chat-span lifecycle.
+     */
+    readonly completeImpl?: <TApi extends Api>(model: Model<TApi>, ctx: Context, options: SimpleStreamOptions) => Promise<AssistantMessage>;
+}
+/**
+ * Wrap a {@link completeSimple} round-trip with the same chat-span lifecycle
+ * the agent loop uses for streamed turns: `startChatSpan` → run inside the
+ * active span → `finishChatSpan` on success, `failChatSpan` on throw.
+ *
+ * Short-circuits when `telemetry` is `undefined` so cost / overhead stays at
+ * zero for installations without an OTEL SDK.
+ */
+export declare function instrumentedCompleteSimple<TApi extends Api>(model: Model<TApi>, ctx: Context, options: SimpleStreamOptions, span: InstrumentedChatSpanOptions): Promise<AssistantMessage>;
 /**
  * Start an `execute_tool` span representing one tool invocation. Parented
  * under the supplied `invoke_agent` span by default — pass `parent` to

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-agent-core",
-	"version": "15.1.2",
+	"version": "15.1.3",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -35,9 +35,9 @@
 		"fmt": "biome format --write ."
 	},
 	"dependencies": {
-		"@oh-my-pi/pi-ai": "15.1.2",
-		"@oh-my-pi/pi-natives": "15.1.2",
-		"@oh-my-pi/pi-utils": "15.1.2",
+		"@oh-my-pi/pi-ai": "15.1.3",
+		"@oh-my-pi/pi-natives": "15.1.3",
+		"@oh-my-pi/pi-utils": "15.1.3",
 		"@opentelemetry/api": "^1.9.0"
 	},
 	"devDependencies": {

package/src/agent-loop.ts CHANGED Viewed

@@ -14,7 +14,7 @@ import {
 	validateToolArguments,
 	zodToWireSchema,
 } from "@oh-my-pi/pi-ai";
-import { sanitizeText } from "@oh-my-pi/pi-natives";
+import { sanitizeText } from "@oh-my-pi/pi-utils";
 import {
 	createHarmonyAuditEvent,
 	type HarmonyDetection,

package/src/compaction/branch-summarization.ts CHANGED Viewed

@@ -6,8 +6,8 @@
  */
 import type { Model } from "@oh-my-pi/pi-ai";
-import { completeSimple } from "@oh-my-pi/pi-ai";
 import { prompt } from "@oh-my-pi/pi-utils";
+import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
 import type { AgentMessage } from "../types";
 import { estimateTokens } from "./compaction";
 import type { ReadonlySessionManager, SessionEntry } from "./entries";
@@ -81,6 +81,11 @@ export interface GenerateBranchSummaryOptions {
 	metadata?: Record<string, unknown>;
 	/** Convert app-specific messages before serializing the branch summary prompt. */
 	convertToLlm?: ConvertToLlm;
+	/**
+	 * Optional telemetry handle. When provided, the branch summary LLM call is
+	 * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "branch_summary"`.
+	 */
+	telemetry?: AgentTelemetry;
 }
 // ============================================================================
@@ -299,10 +304,11 @@ export async function generateBranchSummary(
 	];
 	// Call LLM for summarization
-	const response = await completeSimple(
+	const response = await instrumentedCompleteSimple(
 		model,
 		{ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
 		{ apiKey, signal, maxTokens: 2048, metadata },
+		{ telemetry: options.telemetry, oneshotKind: "branch_summary" },
 	);
 	// Check if aborted or errored

package/src/compaction/compaction.ts CHANGED Viewed

@@ -7,7 +7,6 @@
 import {
 	type AssistantMessage,
-	completeSimple,
 	Effort,
 	type Message,
 	type MessageAttribution,
@@ -16,6 +15,7 @@ import {
 } from "@oh-my-pi/pi-ai";
 import { countTokens } from "@oh-my-pi/pi-natives";
 import { logger, prompt } from "@oh-my-pi/pi-utils";
+import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
 import type { AgentMessage, AgentTool } from "../types";
 import type { CompactionEntry, SessionEntry } from "./entries";
 import { type ConvertToLlm, convertToLlm, createBranchSummaryMessage, createCustomMessage } from "./messages";
@@ -514,6 +514,13 @@ export interface SummaryOptions {
 	initiatorOverride?: MessageAttribution;
 	metadata?: Record<string, unknown>;
 	convertToLlm?: ConvertToLlm;
+	/**
+	 * Optional telemetry handle. When provided, every LLM call emitted during
+	 * compaction is wrapped in an OTEL chat span tagged with
+	 * `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
+	 * or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
+	 */
+	telemetry?: AgentTelemetry;
 }
 export async function generateSummary(
@@ -570,7 +577,7 @@ export async function generateSummary(
 		return remote.summary;
 	}
-	const response = await completeSimple(
+	const response = await instrumentedCompleteSimple(
 		model,
 		{ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
 		{
@@ -581,6 +588,7 @@ export async function generateSummary(
 			initiatorOverride: options?.initiatorOverride,
 			metadata: options?.metadata,
 		},
+		{ telemetry: options?.telemetry, oneshotKind: "compaction_summary" },
 	);
 	if (response.stopReason === "error") {
@@ -608,6 +616,11 @@ export interface HandoffOptions {
 	convertToLlm?: ConvertToLlm;
 	initiatorOverride?: MessageAttribution;
 	metadata?: Record<string, unknown>;
+	/**
+	 * Optional telemetry handle. When provided, the handoff LLM call is
+	 * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
+	 */
+	telemetry?: AgentTelemetry;
 }
 export function renderHandoffPrompt(customInstructions?: string): string {
@@ -635,7 +648,7 @@ export async function generateHandoff(
 		},
 	];
-	const response = await completeSimple(
+	const response = await instrumentedCompleteSimple(
 		model,
 		{
 			systemPrompt: options.systemPrompt,
@@ -650,6 +663,7 @@ export async function generateHandoff(
 			initiatorOverride: options.initiatorOverride,
 			metadata: options.metadata,
 		},
+		{ telemetry: options.telemetry, oneshotKind: "handoff" },
 	);
 	if (response.stopReason === "error") {
@@ -694,7 +708,7 @@ async function generateShortSummary(
 		return remote.summary;
 	}
-	const response = await completeSimple(
+	const response = await instrumentedCompleteSimple(
 		model,
 		{
 			systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT],
@@ -708,6 +722,7 @@ async function generateShortSummary(
 			initiatorOverride: options?.initiatorOverride,
 			metadata: options?.metadata,
 		},
+		{ telemetry: options?.telemetry, oneshotKind: "compaction_short_summary" },
 	);
 	if (response.stopReason === "error") {
@@ -889,6 +904,7 @@ export async function compact(
 		initiatorOverride: options?.initiatorOverride,
 		metadata: options?.metadata,
 		convertToLlm: options?.convertToLlm,
+		telemetry: options?.telemetry,
 	};
 	let preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, undefined);
@@ -978,6 +994,7 @@ export async function compact(
 			remoteEndpoint: summaryOptions.remoteEndpoint,
 			initiatorOverride: summaryOptions.initiatorOverride,
 			metadata: summaryOptions.metadata,
+			telemetry: summaryOptions.telemetry,
 		},
 	);
@@ -1023,7 +1040,7 @@ async function generateTurnPrefixSummary(
 		},
 	];
-	const response = await completeSimple(
+	const response = await instrumentedCompleteSimple(
 		model,
 		{ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
 		{
@@ -1034,6 +1051,7 @@ async function generateTurnPrefixSummary(
 			initiatorOverride: options?.initiatorOverride,
 			metadata: options?.metadata,
 		},
+		{ telemetry: options?.telemetry, oneshotKind: "compaction_turn_prefix" },
 	);
 	if (response.stopReason === "error") {

package/src/run-collector.ts CHANGED Viewed

@@ -139,9 +139,12 @@ interface ToolStart {
  * begin (provider crash, tracer swap mid-run), the corresponding record is
  * still emitted with `latencyMs: 0` rather than throwing.
  */
+const kChatStart = Symbol("agent.run-collector.chatStart");
+const kToolStart = Symbol("agent.run-collector.toolStart");
+type SpanWithChatStart = Span & { [kChatStart]?: ChatStart };
+type SpanWithToolStart = Span & { [kToolStart]?: ToolStart };
 export class AgentRunCollector {
-	readonly #chatStarts = new WeakMap<Span, ChatStart>();
-	readonly #toolStarts = new WeakMap<Span, ToolStart>();
 	readonly #chats: ChatRecord[] = [];
 	readonly #tools: ToolRecord[] = [];
 	readonly #availableTools = new Set<string>();
@@ -179,12 +182,12 @@ export class AgentRunCollector {
 		init: { readonly stepNumber: number; readonly model: Model; readonly provider?: string },
 	): void {
 		const provider = init.provider ?? init.model.provider;
-		this.#chatStarts.set(span, {
+		(span as SpanWithChatStart)[kChatStart] = {
 			stepNumber: init.stepNumber,
 			startedAtMs: performance.now(),
 			model: init.model.id,
 			provider,
-		});
+		};
 		this.#modelsUsed.add(init.model.id);
 		if (provider) this.#providersUsed.add(provider);
 	}
@@ -197,8 +200,8 @@ export class AgentRunCollector {
 			readonly costUnavailableReason: string | undefined;
 		},
 	): void {
-		const start = this.#chatStarts.get(span);
-		this.#chatStarts.delete(span);
+		const start = (span as SpanWithChatStart)[kChatStart];
+		(span as SpanWithChatStart)[kChatStart] = undefined;
 		const usage = message.usage;
 		// Public surface: `inputTokens` is the total cost-bearing input the
 		// provider charged for, so it must include cache_read + cache_write.
@@ -237,8 +240,8 @@ export class AgentRunCollector {
 	 * appear in the run summary.
 	 */
 	failChat(span: Span, fields: { readonly errorType: string }): void {
-		const start = this.#chatStarts.get(span);
-		this.#chatStarts.delete(span);
+		const start = (span as SpanWithChatStart)[kChatStart];
+		(span as SpanWithChatStart)[kChatStart] = undefined;
 		this.#chats.push({
 			stepNumber: start?.stepNumber ?? -1,
 			model: start?.model ?? "",
@@ -258,17 +261,17 @@ export class AgentRunCollector {
 	}
 	beginTool(span: Span, init: { readonly toolCallId: string; readonly toolName: string }): void {
-		this.#toolStarts.set(span, {
+		(span as SpanWithToolStart)[kToolStart] = {
 			toolCallId: init.toolCallId,
 			toolName: init.toolName,
 			startedAtMs: performance.now(),
-		});
+		};
 		this.#invokedTools.add(init.toolName);
 	}
 	endTool(span: Span, fields: { readonly status: ToolStatus; readonly errorType: string | undefined }): void {
-		const start = this.#toolStarts.get(span);
-		this.#toolStarts.delete(span);
+		const start = (span as SpanWithToolStart)[kToolStart];
+		(span as SpanWithToolStart)[kToolStart] = undefined;
 		this.#tools.push({
 			toolCallId: start?.toolCallId ?? "",
 			toolName: start?.toolName ?? "",

package/src/telemetry.ts CHANGED Viewed

@@ -24,10 +24,14 @@
  */
 import {
+	type Api,
 	type AssistantMessage,
+	type Context,
+	completeSimple,
 	type Message,
 	type Model,
 	type ServiceTier,
+	type SimpleStreamOptions,
 	type StopReason,
 	shouldSendServiceTier,
 	type ToolChoice,
@@ -139,6 +143,10 @@ export const enum PiGenAIAttr {
 	HandoffFromAgentId = "pi.gen_ai.handoff.from_agent.id",
 	HandoffToAgentName = "pi.gen_ai.handoff.to_agent.name",
 	HandoffToAgentId = "pi.gen_ai.handoff.to_agent.id",
+	// Marks chat spans emitted outside the agent loop (compaction, handoff, branch
+	// summary, image inspection, …). Lets dashboards split oneshot cost / latency
+	// from main-turn cost without overloading the semconv `gen_ai.operation.name`.
+	OneshotKind = "pi.gen_ai.oneshot.kind",
 	// Gateway / proxy (LiteLLM, Helicone, Portkey, …) — populated when a known
 	// gateway header pattern is detected on the upstream response. The base
 	// `gen_ai.provider.name` continues to track the *upstream* provider (e.g.
@@ -1573,6 +1581,111 @@ export async function recordManualChatTelemetry(
 	return span;
 }
+/**
+ * Options accepted by {@link instrumentedCompleteSimple}. Mirrors the
+ * `streamAssistantResponse` chat-span lifecycle for oneshot LLM calls
+ * (compaction summaries, handoff document, branch summary, inspect_image).
+ */
+export interface InstrumentedChatSpanOptions {
+	readonly telemetry: AgentTelemetry | undefined;
+	/** Optional explicit parent span. Defaults to `context.active()`. */
+	readonly parent?: Span;
+	/** Step index recorded on the span; defaults to `-1` for non-loop calls. */
+	readonly stepNumber?: number;
+	/**
+	 * Tag stamped onto `pi.gen_ai.oneshot.kind`. Values used by the agent:
+	 * `compaction_summary`, `compaction_short_summary`, `compaction_turn_prefix`,
+	 * `handoff`, `branch_summary`, `inspect_image`. Free-form to allow callers
+	 * outside this package to add new kinds without bumping the helper.
+	 */
+	readonly oneshotKind?: string;
+	/** Extra span attributes applied verbatim. */
+	readonly attributes?: Attributes;
+	/**
+	 * Override for the underlying {@link completeSimple} call. Defaults to
+	 * `completeSimple` from `@oh-my-pi/pi-ai`. Use to retain a test injection
+	 * seam while still going through the chat-span lifecycle.
+	 */
+	readonly completeImpl?: <TApi extends Api>(
+		model: Model<TApi>,
+		ctx: Context,
+		options: SimpleStreamOptions,
+	) => Promise<AssistantMessage>;
+}
+/**
+ * Wrap a {@link completeSimple} round-trip with the same chat-span lifecycle
+ * the agent loop uses for streamed turns: `startChatSpan` → run inside the
+ * active span → `finishChatSpan` on success, `failChatSpan` on throw.
+ *
+ * Short-circuits when `telemetry` is `undefined` so cost / overhead stays at
+ * zero for installations without an OTEL SDK.
+ */
+export async function instrumentedCompleteSimple<TApi extends Api>(
+	model: Model<TApi>,
+	ctx: Context,
+	options: SimpleStreamOptions,
+	span: InstrumentedChatSpanOptions,
+): Promise<AssistantMessage> {
+	const { telemetry, parent, oneshotKind } = span;
+	const stepNumber = span.stepNumber ?? -1;
+	const reasoning = options.reasoning;
+	const chatSpan = startChatSpan(telemetry, model, {
+		parent,
+		stepNumber,
+		request: {
+			maxTokens: options.maxTokens,
+			temperature: options.temperature,
+			topP: options.topP,
+			topK: options.topK,
+			presencePenalty: options.presencePenalty,
+			serviceTier: options.serviceTier,
+			reasoningEffort: typeof reasoning === "string" ? reasoning : undefined,
+			toolChoice: options.toolChoice,
+			tools: ctx.tools,
+			systemPrompt: ctx.systemPrompt,
+			messages: ctx.messages,
+		},
+	});
+	if (chatSpan) {
+		if (oneshotKind) chatSpan.setAttribute(PiGenAIAttr.OneshotKind, oneshotKind);
+		if (span.attributes) chatSpan.setAttributes(span.attributes);
+	}
+	// Wrap the user-supplied onResponse so we always capture response headers
+	// for the cost / gateway hooks without stealing them from the caller.
+	let capturedHeaders: Readonly<Record<string, string>> | undefined;
+	const userOnResponse = options.onResponse;
+	const captureOnResponse: NonNullable<SimpleStreamOptions["onResponse"]> = (response, modelInfo) => {
+		capturedHeaders = response.headers;
+		return userOnResponse?.(response, modelInfo);
+	};
+	try {
+		return await runInActiveSpan(chatSpan, async () => {
+			const complete = span.completeImpl ?? completeSimple;
+			const message = await complete(model, ctx, {
+				...options,
+				onResponse: captureOnResponse,
+			});
+			await finishChatSpan(telemetry, chatSpan, message, {
+				stepNumber,
+				serviceTier: options.serviceTier,
+				responseHeaders: capturedHeaders,
+				baseUrl: model.baseUrl,
+			});
+			return message;
+		});
+	} catch (err) {
+		failChatSpan(telemetry, chatSpan, {
+			errorObject: err,
+			responseHeaders: capturedHeaders,
+			baseUrl: model.baseUrl,
+		});
+		throw err;
+	}
+}
 /**
  * Start an `execute_tool` span representing one tool invocation. Parented
  * under the supplied `invoke_agent` span by default — pass `parent` to