npm - @prometheus-ai/agent-core - Versions diffs - 0.5.4 → 0.5.8 - Mend

@prometheus-ai/agent-core 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/types/agent-loop.d.ts +7 -0
package/dist/types/agent.d.ts +41 -13
package/dist/types/compaction/branch-summarization.d.ts +3 -3
package/dist/types/compaction/compaction.d.ts +11 -9
package/dist/types/compaction/messages.d.ts +14 -2
package/dist/types/compaction/openai.d.ts +18 -3
package/dist/types/compaction/pruning.d.ts +55 -0
package/dist/types/compaction/shake.d.ts +3 -1
package/dist/types/compaction/utils.d.ts +18 -2
package/dist/types/proxy.d.ts +4 -3
package/dist/types/telemetry.d.ts +59 -57
package/dist/types/types.d.ts +60 -16
package/package.json +6 -4
package/src/agent-loop.ts +660 -181
package/src/agent.ts +103 -30
package/src/compaction/branch-summarization.ts +8 -7
package/src/compaction/compaction.ts +69 -34
package/src/compaction/messages.ts +78 -64
package/src/compaction/openai.ts +88 -74
package/src/compaction/prompts/branch-summary.md +1 -1
package/src/compaction/prompts/compaction-summary-context.md +1 -1
package/src/compaction/prompts/compaction-summary.md +2 -2
package/src/compaction/prompts/compaction-update-summary.md +3 -3
package/src/compaction/prompts/file-operations.md +3 -8
package/src/compaction/prompts/summarization-system.md +1 -1
package/src/compaction/pruning.ts +240 -8
package/src/compaction/shake.ts +7 -3
package/src/compaction/utils.ts +97 -19
package/src/proxy.ts +13 -7
package/src/telemetry.ts +126 -113
package/src/types.ts +65 -16

package/src/compaction/compaction.ts CHANGED Viewed

@@ -6,21 +6,27 @@
  */
 import {
+	type ApiKey,
 	type AssistantMessage,
-	clampThinkingLevelForModel,
 	Effort,
+	type FetchImpl,
 	type Message,
 	type MessageAttribution,
 	type Model,
+	ProviderHttpError,
+	type Tool,
 	type Usage,
+	withAuth,
 } from "@prometheus-ai/ai";
+import { clampThinkingLevelForModel } from "@prometheus-ai/catalog/model-thinking";
 import { countTokens } from "@prometheus-ai/natives";
+import * as snapcompact from "@prometheus-ai/snapcompact";
 import { logger, prompt } from "@prometheus-ai/utils";
 import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
 import { ThinkingLevel } from "../thinking";
-import type { AgentMessage, AgentTool } from "../types";
+import type { AgentMessage } from "../types";
 import type { CompactionEntry, SessionEntry } from "./entries";
-import { type ConvertToLlm, convertToLlm, createBranchSummaryMessage, createCustomMessage } from "./messages";
+import { type ConvertToLlm, createBranchSummaryMessage, createCustomMessage, defaultConvertToLlm } from "./messages";
 import {
 	buildOpenAiNativeHistory,
 	getPreservedOpenAiRemoteCompactionData,
@@ -43,6 +49,7 @@ import {
 	type FileOperations,
 	SUMMARIZATION_SYSTEM_PROMPT,
 	serializeConversation,
+	stripReadSelector,
 	upsertFileOperations,
 } from "./utils";
@@ -72,7 +79,7 @@ function extractFileOperations(
 		if (!prevCompaction.fromExtension && prevCompaction.details) {
 			const details = prevCompaction.details as CompactionDetails;
 			if (Array.isArray(details.readFiles)) {
-				for (const f of details.readFiles) fileOps.read.add(f);
+				for (const f of details.readFiles) fileOps.read.add(stripReadSelector(f));
 			}
 			if (Array.isArray(details.modifiedFiles)) {
 				for (const f of details.modifiedFiles) fileOps.edited.add(f);
@@ -135,7 +142,7 @@ export interface CompactionResult<T = unknown> {
 export interface CompactionSettings {
 	enabled: boolean;
-	strategy?: "context-full" | "handoff" | "shake" | "off";
+	strategy?: "context-full" | "handoff" | "shake" | "snapcompact" | "off";
 	thresholdPercent?: number;
 	thresholdTokens?: number;
 	reserveTokens: number;
@@ -283,9 +290,19 @@ export function estimateTokens(message: AgentMessage): number {
 					fragments.push(block.text);
 				} else if (block.type === "thinking") {
 					fragments.push(block.thinking);
+					// Providers charge for the opaque signature/reasoning payload that
+					// rides alongside the thinking text (OpenAI Responses encrypted
+					// reasoning items, Anthropic signed thinking blocks, etc.). Without
+					// counting it, this estimator can read ~half of the provider-reported
+					// usage on thinking-heavy turns — see #2275 for the resulting
+					// compaction-trigger / post-check metric divergence.
+					if (block.thinkingSignature) fragments.push(block.thinkingSignature);
 				} else if (block.type === "toolCall") {
 					fragments.push(block.name);
 					fragments.push(JSON.stringify(block.arguments));
+				} else if (block.type === "redactedThinking") {
+					// Encrypted reasoning blob the provider still bills for on replay.
+					fragments.push(block.data);
 				}
 			}
 			break;
@@ -308,6 +325,10 @@ export function estimateTokens(message: AgentMessage): number {
 		case "branchSummary":
 		case "compactionSummary": {
 			fragments.push(message.summary);
+			if (message.role === "compactionSummary" && message.images) {
+				// Snapcompact frames render at ≥1568px; providers bill the downscaled cap.
+				extra += message.images.length * snapcompact.FRAME_TOKEN_ESTIMATE;
+			}
 			break;
 		}
 		default:
@@ -538,10 +559,11 @@ function effortFromThinkingLevel(level: ThinkingLevel): Effort {
  * - Explicit effort → respect user choice → clamped per model.
  *
  * The clamp routes through `clampThinkingLevelForModel`, which returns
- * `undefined` for models with `compat.supportsReasoningEffort: false`
- * (e.g. `xai-oauth/grok-build`). That `undefined` then flows through to the
- * openai-responses mapper where `modelOmitsReasoningEffort` short-circuits
- * the wire param — no `requireSupportedEffort` throw.
+ * `undefined` for reasoning models without a thinking config — the build-time
+ * encoding of `compat.supportsReasoningEffort: false` (e.g.
+ * `xai-oauth/grok-build`). That `undefined` then flows through to the
+ * openai-responses mapper, which omits the wire param — no
+ * `requireSupportedEffort` throw.
  */
 function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined): Effort | undefined {
 	if (level === ThinkingLevel.Off) return undefined;
@@ -556,15 +578,12 @@ function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined)
  * onto a top-level `.status` field so callers (notably
  * `AgentSession.#isCompactionAuthFailure`) can branch on 401/403 without
  * regex-scraping `error.message`. The `auth_unavailable` synthetic
- * (Prometheus native gateway) does not populate `errorStatus`, hence the legacy
+ * (prometheus-native gateway) does not populate `errorStatus`, hence the legacy
  * message-based check is still required upstream — see issue #986.
  */
 function createSummarizationError(prefix: string, response: AssistantMessage): Error {
-	const error: Error & { status?: number } = new Error(`${prefix}: ${response.errorMessage || "Unknown error"}`);
-	if (response.errorStatus !== undefined) {
-		error.status = response.errorStatus;
-	}
-	return error;
+	const text = `${prefix}: ${response.errorMessage || "Unknown error"}`;
+	return response.errorStatus === undefined ? new Error(text) : new ProviderHttpError(text, response.errorStatus);
 }
 /**
@@ -582,7 +601,7 @@ export interface SummaryOptions {
 	/**
 	 * Optional telemetry handle. When provided, every LLM call emitted during
 	 * compaction is wrapped in an OTEL chat span tagged with
-	 * `prometheus.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
+	 * `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
 	 * or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
 	 */
 	telemetry?: AgentTelemetry;
@@ -595,13 +614,15 @@ export interface SummaryOptions {
 	 * `resolveCompactionEffort` for the conversion contract.
 	 */
 	thinkingLevel?: ThinkingLevel;
+	/** Optional fetch implementation threaded into remote compaction calls. */
+	fetch?: FetchImpl;
 }
 export async function generateSummary(
 	currentMessages: AgentMessage[],
 	model: Model,
 	reserveTokens: number,
-	apiKey: string,
+	apiKey: ApiKey,
 	signal?: AbortSignal,
 	customInstructions?: string,
 	previousSummary?: string,
@@ -620,7 +641,7 @@ export async function generateSummary(
 	// Serialize conversation to text so model doesn't try to continue it
 	// Convert to LLM messages first (handles custom app messages when caller provides a transformer).
-	const llmMessages = (options?.convertToLlm ?? convertToLlm)(currentMessages);
+	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
 	const conversationText = serializeConversation(llmMessages);
 	// Build the prompt with conversation wrapped in tags
@@ -647,6 +668,7 @@ export async function generateSummary(
 				prompt: promptText,
 			},
 			signal,
+			{ fetch: options.fetch },
 		);
 		return remote.summary;
 	}
@@ -685,14 +707,14 @@ export interface HandoffOptions {
 	/** Live agent system prompt — passed verbatim so providers hit the cached prefix. */
 	systemPrompt: string[];
 	/** Live agent tool list — same purpose. Forced to `toolChoice: "none"`. */
-	tools?: AgentTool<any>[];
+	tools?: Tool[];
 	customInstructions?: string;
 	convertToLlm?: ConvertToLlm;
 	initiatorOverride?: MessageAttribution;
 	metadata?: Record<string, unknown>;
 	/**
 	 * Optional telemetry handle. When provided, the handoff LLM call is
-	 * wrapped in an OTEL chat span tagged with `prometheus.gen_ai.oneshot.kind = "handoff"`.
+	 * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
 	 */
 	telemetry?: AgentTelemetry;
 	/**
@@ -714,11 +736,11 @@ export function renderHandoffPrompt(customInstructions?: string): string {
 export async function generateHandoff(
 	messages: AgentMessage[],
 	model: Model,
-	apiKey: string,
+	apiKey: ApiKey,
 	options: HandoffOptions,
 	signal?: AbortSignal,
 ): Promise<string> {
-	const llmMessages = (options.convertToLlm ?? convertToLlm)(messages);
+	const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
 	const requestMessages: Message[] = [
 		...llmMessages,
 		{
@@ -762,12 +784,12 @@ async function generateShortSummary(
 	historySummary: string | undefined,
 	model: Model,
 	reserveTokens: number,
-	apiKey: string,
+	apiKey: ApiKey,
 	signal?: AbortSignal,
 	options?: SummaryOptions,
 ): Promise<string> {
 	const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
-	const llmMessages = (options?.convertToLlm ?? convertToLlm)(recentMessages);
+	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
 	const conversationText = serializeConversation(llmMessages);
 	let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
@@ -785,6 +807,7 @@ async function generateShortSummary(
 				prompt: promptText,
 			},
 			signal,
+			{ fetch: options?.fetch },
 		);
 		return remote.summary;
 	}
@@ -959,7 +982,7 @@ const TURN_PREFIX_SUMMARIZATION_PROMPT = prompt.render(compactionTurnPrefixPromp
 export async function compact(
 	preparation: CompactionPreparation,
 	model: Model,
-	apiKey: string,
+	apiKey: ApiKey,
 	customInstructions?: string,
 	signal?: AbortSignal,
 	options?: SummaryOptions,
@@ -992,6 +1015,7 @@ export async function compact(
 		// silently falls back to Effort.High — the same defect e07b47ee4 fixed
 		// at the call sites, leaked back in here. See resolveCompactionEffort.
 		thinkingLevel: options?.thinkingLevel,
+		fetch: options?.fetch,
 	};
 	let preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, undefined);
@@ -1003,21 +1027,31 @@ export async function compact(
 				? previousRemoteCompaction.replacementHistory
 				: undefined;
 		const remoteHistory = buildOpenAiNativeHistory(
-			(summaryOptions.convertToLlm ?? convertToLlm)(remoteMessages),
+			(summaryOptions.convertToLlm ?? defaultConvertToLlm)(remoteMessages),
 			model,
 			previousReplacementHistory,
 		);
 		if (remoteHistory.length > 0) {
 			try {
-				const remote = await requestOpenAiRemoteCompaction(
-					model,
+				const remote = await withAuth(
 					apiKey,
-					remoteHistory,
-					summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
-					signal,
+					key =>
+						requestOpenAiRemoteCompaction(
+							model,
+							key,
+							remoteHistory,
+							summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
+							signal,
+							{ fetch: summaryOptions.fetch },
+						),
+					{ signal },
 				);
 				preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
 			} catch (err) {
+				// A user/session abort is a cancellation, not a remote failure —
+				// swallowing it here would downgrade Esc into "fall back to local
+				// summarization" and keep compaction running on an aborted signal.
+				if (signal?.aborted) throw err;
 				logger.warn("OpenAI remote compaction failed, falling back to local summarization", {
 					error: err instanceof Error ? err.message : String(err),
 					model: model.id,
@@ -1085,12 +1119,13 @@ export async function compact(
 			// Same propagation as summaryOptions above — generateShortSummary
 			// resolves its own reasoning via resolveCompactionEffort.
 			thinkingLevel: options?.thinkingLevel,
+			fetch: summaryOptions.fetch,
 		},
 	);
 	// Compute file lists and append to summary
 	const { readFiles, modifiedFiles } = computeFileLists(fileOps);
-	summary = upsertFileOperations(summary, readFiles, modifiedFiles);
+	summary = upsertFileOperations(summary, readFiles, modifiedFiles, fileOps.read);
 	if (!firstKeptEntryId) {
 		throw new Error("First kept entry has no ID - session may need migration");
@@ -1113,13 +1148,13 @@ async function generateTurnPrefixSummary(
 	messages: AgentMessage[],
 	model: Model,
 	reserveTokens: number,
-	apiKey: string,
+	apiKey: ApiKey,
 	signal?: AbortSignal,
 	options?: SummaryOptions,
 ): Promise<string> {
 	const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
-	const llmMessages = (options?.convertToLlm ?? convertToLlm)(messages);
+	const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
 	const conversationText = serializeConversation(llmMessages);
 	const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
 	const summarizationMessages = [

package/src/compaction/messages.ts CHANGED Viewed

@@ -51,6 +51,8 @@ export interface CompactionSummaryMessage {
 	shortSummary?: string;
 	tokensBefore: number;
 	providerPayload?: ProviderPayload;
+	/** Snapcompact frames archived by this compaction; appended as image blocks after the summary text. */
+	images?: ImageContent[];
 	timestamp: number;
 }
@@ -98,6 +100,7 @@ export function createCompactionSummaryMessage(
 	timestamp: string,
 	shortSummary?: string,
 	providerPayload?: ProviderPayload,
+	images?: ImageContent[],
 ): CompactionSummaryMessage {
 	return {
 		role: "compactionSummary",
@@ -105,6 +108,7 @@ export function createCompactionSummaryMessage(
 		shortSummary,
 		tokensBefore,
 		providerPayload,
+		images: images && images.length > 0 ? images : undefined,
 		timestamp: new Date(timestamp).getTime(),
 	};
 }
@@ -137,6 +141,79 @@ function isCoreCompactionMessage(message: AgentMessage): message is AgentMessage
 	);
 }
+/**
+ * Transform a single core-domain agent message to its LLM form; `undefined`
+ * drops it from the provider request.
+ *
+ * Single source of truth for the core roles (user/developer/assistant/
+ * toolResult) and the compaction messages owned by this package. Embedders
+ * with their own app messages (e.g. the coding agent) handle their custom
+ * roles and delegate every core role here — duplicating these cases is how
+ * snapcompact frames once silently fell off the provider request.
+ */
+export function convertMessageToLlm(message: AgentMessage): Message | undefined {
+	if (isCoreCompactionMessage(message)) {
+		switch (message.role) {
+			case "custom":
+			case "hookMessage": {
+				const content =
+					typeof message.content === "string"
+						? [{ type: "text" as const, text: message.content }]
+						: message.content;
+				return {
+					role: "developer",
+					content,
+					attribution: message.attribution,
+					timestamp: message.timestamp,
+				};
+			}
+			case "branchSummary":
+				return {
+					role: "user",
+					content: [
+						{
+							type: "text" as const,
+							text: renderBranchSummaryContext(message.summary),
+						},
+					],
+					attribution: "agent",
+					timestamp: message.timestamp,
+				};
+			case "compactionSummary":
+				return {
+					role: "user",
+					content: [
+						{
+							type: "text" as const,
+							text: renderCompactionSummaryContext(message.summary),
+						},
+						...(message.images ?? []),
+					],
+					attribution: "agent",
+					providerPayload: message.providerPayload,
+					timestamp: message.timestamp,
+				};
+		}
+	}
+	switch (message.role) {
+		case "user":
+			return { ...message, attribution: message.attribution ?? "user" };
+		case "developer":
+			return { ...message, attribution: message.attribution ?? "agent" };
+		case "assistant":
+			return message as AssistantMessage;
+		case "toolResult":
+			return {
+				...message,
+				content: getPrunedToolResultContent(message as ToolResultMessage),
+				attribution: message.attribution ?? "agent",
+			};
+		default:
+			return undefined;
+	}
+}
 /**
  * Default compaction-domain transformer.
  *
@@ -145,68 +222,5 @@ function isCoreCompactionMessage(message: AgentMessage): message is AgentMessage
  * core LLM roles and the compaction messages owned by this package.
  */
 export function defaultConvertToLlm(messages: AgentMessage[]): Message[] {
-	return messages
-		.map((message): Message | undefined => {
-			if (isCoreCompactionMessage(message)) {
-				switch (message.role) {
-					case "custom":
-					case "hookMessage": {
-						const content =
-							typeof message.content === "string"
-								? [{ type: "text" as const, text: message.content }]
-								: message.content;
-						return {
-							role: "user",
-							content,
-							attribution: message.attribution,
-							timestamp: message.timestamp,
-						};
-					}
-					case "branchSummary":
-						return {
-							role: "user",
-							content: [
-								{
-									type: "text" as const,
-									text: renderBranchSummaryContext(message.summary),
-								},
-							],
-							attribution: "agent",
-							timestamp: message.timestamp,
-						};
-					case "compactionSummary":
-						return {
-							role: "user",
-							content: [
-								{
-									type: "text" as const,
-									text: renderCompactionSummaryContext(message.summary),
-								},
-							],
-							attribution: "agent",
-							providerPayload: message.providerPayload,
-							timestamp: message.timestamp,
-						};
-				}
-			}
-			switch (message.role) {
-				case "user":
-					return { ...message, attribution: message.attribution ?? "user" };
-				case "developer":
-					return { ...message, attribution: message.attribution ?? "agent" };
-				case "assistant":
-					return message as AssistantMessage;
-				case "toolResult":
-					return {
-						...message,
-						content: getPrunedToolResultContent(message as ToolResultMessage),
-						attribution: message.attribution ?? "agent",
-					};
-				default:
-					return undefined;
-			}
-		})
-		.filter(message => message !== undefined);
+	return messages.map(convertMessageToLlm).filter(message => message !== undefined);
 }
-export const convertToLlm = defaultConvertToLlm;