@prometheus-ai/agent-core 0.5.4 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,21 +6,27 @@
6
6
  */
7
7
 
8
8
  import {
9
+ type ApiKey,
9
10
  type AssistantMessage,
10
- clampThinkingLevelForModel,
11
11
  Effort,
12
+ type FetchImpl,
12
13
  type Message,
13
14
  type MessageAttribution,
14
15
  type Model,
16
+ ProviderHttpError,
17
+ type Tool,
15
18
  type Usage,
19
+ withAuth,
16
20
  } from "@prometheus-ai/ai";
21
+ import { clampThinkingLevelForModel } from "@prometheus-ai/catalog/model-thinking";
17
22
  import { countTokens } from "@prometheus-ai/natives";
23
+ import * as snapcompact from "@prometheus-ai/snapcompact";
18
24
  import { logger, prompt } from "@prometheus-ai/utils";
19
25
  import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
20
26
  import { ThinkingLevel } from "../thinking";
21
- import type { AgentMessage, AgentTool } from "../types";
27
+ import type { AgentMessage } from "../types";
22
28
  import type { CompactionEntry, SessionEntry } from "./entries";
23
- import { type ConvertToLlm, convertToLlm, createBranchSummaryMessage, createCustomMessage } from "./messages";
29
+ import { type ConvertToLlm, createBranchSummaryMessage, createCustomMessage, defaultConvertToLlm } from "./messages";
24
30
  import {
25
31
  buildOpenAiNativeHistory,
26
32
  getPreservedOpenAiRemoteCompactionData,
@@ -43,6 +49,7 @@ import {
43
49
  type FileOperations,
44
50
  SUMMARIZATION_SYSTEM_PROMPT,
45
51
  serializeConversation,
52
+ stripReadSelector,
46
53
  upsertFileOperations,
47
54
  } from "./utils";
48
55
 
@@ -72,7 +79,7 @@ function extractFileOperations(
72
79
  if (!prevCompaction.fromExtension && prevCompaction.details) {
73
80
  const details = prevCompaction.details as CompactionDetails;
74
81
  if (Array.isArray(details.readFiles)) {
75
- for (const f of details.readFiles) fileOps.read.add(f);
82
+ for (const f of details.readFiles) fileOps.read.add(stripReadSelector(f));
76
83
  }
77
84
  if (Array.isArray(details.modifiedFiles)) {
78
85
  for (const f of details.modifiedFiles) fileOps.edited.add(f);
@@ -135,7 +142,7 @@ export interface CompactionResult<T = unknown> {
135
142
 
136
143
  export interface CompactionSettings {
137
144
  enabled: boolean;
138
- strategy?: "context-full" | "handoff" | "shake" | "off";
145
+ strategy?: "context-full" | "handoff" | "shake" | "snapcompact" | "off";
139
146
  thresholdPercent?: number;
140
147
  thresholdTokens?: number;
141
148
  reserveTokens: number;
@@ -283,9 +290,19 @@ export function estimateTokens(message: AgentMessage): number {
283
290
  fragments.push(block.text);
284
291
  } else if (block.type === "thinking") {
285
292
  fragments.push(block.thinking);
293
+ // Providers charge for the opaque signature/reasoning payload that
294
+ // rides alongside the thinking text (OpenAI Responses encrypted
295
+ // reasoning items, Anthropic signed thinking blocks, etc.). Without
296
+ // counting it, this estimator can read ~half of the provider-reported
297
+ // usage on thinking-heavy turns — see #2275 for the resulting
298
+ // compaction-trigger / post-check metric divergence.
299
+ if (block.thinkingSignature) fragments.push(block.thinkingSignature);
286
300
  } else if (block.type === "toolCall") {
287
301
  fragments.push(block.name);
288
302
  fragments.push(JSON.stringify(block.arguments));
303
+ } else if (block.type === "redactedThinking") {
304
+ // Encrypted reasoning blob the provider still bills for on replay.
305
+ fragments.push(block.data);
289
306
  }
290
307
  }
291
308
  break;
@@ -308,6 +325,10 @@ export function estimateTokens(message: AgentMessage): number {
308
325
  case "branchSummary":
309
326
  case "compactionSummary": {
310
327
  fragments.push(message.summary);
328
+ if (message.role === "compactionSummary" && message.images) {
329
+ // Snapcompact frames render at ≥1568px; providers bill the downscaled cap.
330
+ extra += message.images.length * snapcompact.FRAME_TOKEN_ESTIMATE;
331
+ }
311
332
  break;
312
333
  }
313
334
  default:
@@ -538,10 +559,11 @@ function effortFromThinkingLevel(level: ThinkingLevel): Effort {
538
559
  * - Explicit effort → respect user choice → clamped per model.
539
560
  *
540
561
  * The clamp routes through `clampThinkingLevelForModel`, which returns
541
- * `undefined` for models with `compat.supportsReasoningEffort: false`
542
- * (e.g. `xai-oauth/grok-build`). That `undefined` then flows through to the
543
- * openai-responses mapper where `modelOmitsReasoningEffort` short-circuits
544
- * the wire param — no `requireSupportedEffort` throw.
562
+ * `undefined` for reasoning models without a thinking config — the build-time
563
+ * encoding of `compat.supportsReasoningEffort: false` (e.g.
564
+ * `xai-oauth/grok-build`). That `undefined` then flows through to the
565
+ * openai-responses mapper, which omits the wire param — no
566
+ * `requireSupportedEffort` throw.
545
567
  */
546
568
  function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined): Effort | undefined {
547
569
  if (level === ThinkingLevel.Off) return undefined;
@@ -556,15 +578,12 @@ function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined)
556
578
  * onto a top-level `.status` field so callers (notably
557
579
  * `AgentSession.#isCompactionAuthFailure`) can branch on 401/403 without
558
580
  * regex-scraping `error.message`. The `auth_unavailable` synthetic
559
- * (Prometheus native gateway) does not populate `errorStatus`, hence the legacy
581
+ * (prometheus-native gateway) does not populate `errorStatus`, hence the legacy
560
582
  * message-based check is still required upstream — see issue #986.
561
583
  */
562
584
  function createSummarizationError(prefix: string, response: AssistantMessage): Error {
563
- const error: Error & { status?: number } = new Error(`${prefix}: ${response.errorMessage || "Unknown error"}`);
564
- if (response.errorStatus !== undefined) {
565
- error.status = response.errorStatus;
566
- }
567
- return error;
585
+ const text = `${prefix}: ${response.errorMessage || "Unknown error"}`;
586
+ return response.errorStatus === undefined ? new Error(text) : new ProviderHttpError(text, response.errorStatus);
568
587
  }
569
588
 
570
589
  /**
@@ -582,7 +601,7 @@ export interface SummaryOptions {
582
601
  /**
583
602
  * Optional telemetry handle. When provided, every LLM call emitted during
584
603
  * compaction is wrapped in an OTEL chat span tagged with
585
- * `prometheus.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
604
+ * `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
586
605
  * or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
587
606
  */
588
607
  telemetry?: AgentTelemetry;
@@ -595,13 +614,15 @@ export interface SummaryOptions {
595
614
  * `resolveCompactionEffort` for the conversion contract.
596
615
  */
597
616
  thinkingLevel?: ThinkingLevel;
617
+ /** Optional fetch implementation threaded into remote compaction calls. */
618
+ fetch?: FetchImpl;
598
619
  }
599
620
 
600
621
  export async function generateSummary(
601
622
  currentMessages: AgentMessage[],
602
623
  model: Model,
603
624
  reserveTokens: number,
604
- apiKey: string,
625
+ apiKey: ApiKey,
605
626
  signal?: AbortSignal,
606
627
  customInstructions?: string,
607
628
  previousSummary?: string,
@@ -620,7 +641,7 @@ export async function generateSummary(
620
641
 
621
642
  // Serialize conversation to text so model doesn't try to continue it
622
643
  // Convert to LLM messages first (handles custom app messages when caller provides a transformer).
623
- const llmMessages = (options?.convertToLlm ?? convertToLlm)(currentMessages);
644
+ const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(currentMessages);
624
645
  const conversationText = serializeConversation(llmMessages);
625
646
 
626
647
  // Build the prompt with conversation wrapped in tags
@@ -647,6 +668,7 @@ export async function generateSummary(
647
668
  prompt: promptText,
648
669
  },
649
670
  signal,
671
+ { fetch: options.fetch },
650
672
  );
651
673
  return remote.summary;
652
674
  }
@@ -685,14 +707,14 @@ export interface HandoffOptions {
685
707
  /** Live agent system prompt — passed verbatim so providers hit the cached prefix. */
686
708
  systemPrompt: string[];
687
709
  /** Live agent tool list — same purpose. Forced to `toolChoice: "none"`. */
688
- tools?: AgentTool<any>[];
710
+ tools?: Tool[];
689
711
  customInstructions?: string;
690
712
  convertToLlm?: ConvertToLlm;
691
713
  initiatorOverride?: MessageAttribution;
692
714
  metadata?: Record<string, unknown>;
693
715
  /**
694
716
  * Optional telemetry handle. When provided, the handoff LLM call is
695
- * wrapped in an OTEL chat span tagged with `prometheus.gen_ai.oneshot.kind = "handoff"`.
717
+ * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
696
718
  */
697
719
  telemetry?: AgentTelemetry;
698
720
  /**
@@ -714,11 +736,11 @@ export function renderHandoffPrompt(customInstructions?: string): string {
714
736
  export async function generateHandoff(
715
737
  messages: AgentMessage[],
716
738
  model: Model,
717
- apiKey: string,
739
+ apiKey: ApiKey,
718
740
  options: HandoffOptions,
719
741
  signal?: AbortSignal,
720
742
  ): Promise<string> {
721
- const llmMessages = (options.convertToLlm ?? convertToLlm)(messages);
743
+ const llmMessages = (options.convertToLlm ?? defaultConvertToLlm)(messages);
722
744
  const requestMessages: Message[] = [
723
745
  ...llmMessages,
724
746
  {
@@ -762,12 +784,12 @@ async function generateShortSummary(
762
784
  historySummary: string | undefined,
763
785
  model: Model,
764
786
  reserveTokens: number,
765
- apiKey: string,
787
+ apiKey: ApiKey,
766
788
  signal?: AbortSignal,
767
789
  options?: SummaryOptions,
768
790
  ): Promise<string> {
769
791
  const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
770
- const llmMessages = (options?.convertToLlm ?? convertToLlm)(recentMessages);
792
+ const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(recentMessages);
771
793
  const conversationText = serializeConversation(llmMessages);
772
794
 
773
795
  let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
@@ -785,6 +807,7 @@ async function generateShortSummary(
785
807
  prompt: promptText,
786
808
  },
787
809
  signal,
810
+ { fetch: options?.fetch },
788
811
  );
789
812
  return remote.summary;
790
813
  }
@@ -959,7 +982,7 @@ const TURN_PREFIX_SUMMARIZATION_PROMPT = prompt.render(compactionTurnPrefixPromp
959
982
  export async function compact(
960
983
  preparation: CompactionPreparation,
961
984
  model: Model,
962
- apiKey: string,
985
+ apiKey: ApiKey,
963
986
  customInstructions?: string,
964
987
  signal?: AbortSignal,
965
988
  options?: SummaryOptions,
@@ -992,6 +1015,7 @@ export async function compact(
992
1015
  // silently falls back to Effort.High — the same defect e07b47ee4 fixed
993
1016
  // at the call sites, leaked back in here. See resolveCompactionEffort.
994
1017
  thinkingLevel: options?.thinkingLevel,
1018
+ fetch: options?.fetch,
995
1019
  };
996
1020
 
997
1021
  let preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, undefined);
@@ -1003,21 +1027,31 @@ export async function compact(
1003
1027
  ? previousRemoteCompaction.replacementHistory
1004
1028
  : undefined;
1005
1029
  const remoteHistory = buildOpenAiNativeHistory(
1006
- (summaryOptions.convertToLlm ?? convertToLlm)(remoteMessages),
1030
+ (summaryOptions.convertToLlm ?? defaultConvertToLlm)(remoteMessages),
1007
1031
  model,
1008
1032
  previousReplacementHistory,
1009
1033
  );
1010
1034
  if (remoteHistory.length > 0) {
1011
1035
  try {
1012
- const remote = await requestOpenAiRemoteCompaction(
1013
- model,
1036
+ const remote = await withAuth(
1014
1037
  apiKey,
1015
- remoteHistory,
1016
- summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
1017
- signal,
1038
+ key =>
1039
+ requestOpenAiRemoteCompaction(
1040
+ model,
1041
+ key,
1042
+ remoteHistory,
1043
+ summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
1044
+ signal,
1045
+ { fetch: summaryOptions.fetch },
1046
+ ),
1047
+ { signal },
1018
1048
  );
1019
1049
  preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
1020
1050
  } catch (err) {
1051
+ // A user/session abort is a cancellation, not a remote failure —
1052
+ // swallowing it here would downgrade Esc into "fall back to local
1053
+ // summarization" and keep compaction running on an aborted signal.
1054
+ if (signal?.aborted) throw err;
1021
1055
  logger.warn("OpenAI remote compaction failed, falling back to local summarization", {
1022
1056
  error: err instanceof Error ? err.message : String(err),
1023
1057
  model: model.id,
@@ -1085,12 +1119,13 @@ export async function compact(
1085
1119
  // Same propagation as summaryOptions above — generateShortSummary
1086
1120
  // resolves its own reasoning via resolveCompactionEffort.
1087
1121
  thinkingLevel: options?.thinkingLevel,
1122
+ fetch: summaryOptions.fetch,
1088
1123
  },
1089
1124
  );
1090
1125
 
1091
1126
  // Compute file lists and append to summary
1092
1127
  const { readFiles, modifiedFiles } = computeFileLists(fileOps);
1093
- summary = upsertFileOperations(summary, readFiles, modifiedFiles);
1128
+ summary = upsertFileOperations(summary, readFiles, modifiedFiles, fileOps.read);
1094
1129
 
1095
1130
  if (!firstKeptEntryId) {
1096
1131
  throw new Error("First kept entry has no ID - session may need migration");
@@ -1113,13 +1148,13 @@ async function generateTurnPrefixSummary(
1113
1148
  messages: AgentMessage[],
1114
1149
  model: Model,
1115
1150
  reserveTokens: number,
1116
- apiKey: string,
1151
+ apiKey: ApiKey,
1117
1152
  signal?: AbortSignal,
1118
1153
  options?: SummaryOptions,
1119
1154
  ): Promise<string> {
1120
1155
  const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
1121
1156
 
1122
- const llmMessages = (options?.convertToLlm ?? convertToLlm)(messages);
1157
+ const llmMessages = (options?.convertToLlm ?? defaultConvertToLlm)(messages);
1123
1158
  const conversationText = serializeConversation(llmMessages);
1124
1159
  const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
1125
1160
  const summarizationMessages = [
@@ -51,6 +51,8 @@ export interface CompactionSummaryMessage {
51
51
  shortSummary?: string;
52
52
  tokensBefore: number;
53
53
  providerPayload?: ProviderPayload;
54
+ /** Snapcompact frames archived by this compaction; appended as image blocks after the summary text. */
55
+ images?: ImageContent[];
54
56
  timestamp: number;
55
57
  }
56
58
 
@@ -98,6 +100,7 @@ export function createCompactionSummaryMessage(
98
100
  timestamp: string,
99
101
  shortSummary?: string,
100
102
  providerPayload?: ProviderPayload,
103
+ images?: ImageContent[],
101
104
  ): CompactionSummaryMessage {
102
105
  return {
103
106
  role: "compactionSummary",
@@ -105,6 +108,7 @@ export function createCompactionSummaryMessage(
105
108
  shortSummary,
106
109
  tokensBefore,
107
110
  providerPayload,
111
+ images: images && images.length > 0 ? images : undefined,
108
112
  timestamp: new Date(timestamp).getTime(),
109
113
  };
110
114
  }
@@ -137,6 +141,79 @@ function isCoreCompactionMessage(message: AgentMessage): message is AgentMessage
137
141
  );
138
142
  }
139
143
 
144
+ /**
145
+ * Transform a single core-domain agent message to its LLM form; `undefined`
146
+ * drops it from the provider request.
147
+ *
148
+ * Single source of truth for the core roles (user/developer/assistant/
149
+ * toolResult) and the compaction messages owned by this package. Embedders
150
+ * with their own app messages (e.g. the coding agent) handle their custom
151
+ * roles and delegate every core role here — duplicating these cases is how
152
+ * snapcompact frames once silently fell off the provider request.
153
+ */
154
+ export function convertMessageToLlm(message: AgentMessage): Message | undefined {
155
+ if (isCoreCompactionMessage(message)) {
156
+ switch (message.role) {
157
+ case "custom":
158
+ case "hookMessage": {
159
+ const content =
160
+ typeof message.content === "string"
161
+ ? [{ type: "text" as const, text: message.content }]
162
+ : message.content;
163
+ return {
164
+ role: "developer",
165
+ content,
166
+ attribution: message.attribution,
167
+ timestamp: message.timestamp,
168
+ };
169
+ }
170
+ case "branchSummary":
171
+ return {
172
+ role: "user",
173
+ content: [
174
+ {
175
+ type: "text" as const,
176
+ text: renderBranchSummaryContext(message.summary),
177
+ },
178
+ ],
179
+ attribution: "agent",
180
+ timestamp: message.timestamp,
181
+ };
182
+ case "compactionSummary":
183
+ return {
184
+ role: "user",
185
+ content: [
186
+ {
187
+ type: "text" as const,
188
+ text: renderCompactionSummaryContext(message.summary),
189
+ },
190
+ ...(message.images ?? []),
191
+ ],
192
+ attribution: "agent",
193
+ providerPayload: message.providerPayload,
194
+ timestamp: message.timestamp,
195
+ };
196
+ }
197
+ }
198
+
199
+ switch (message.role) {
200
+ case "user":
201
+ return { ...message, attribution: message.attribution ?? "user" };
202
+ case "developer":
203
+ return { ...message, attribution: message.attribution ?? "agent" };
204
+ case "assistant":
205
+ return message as AssistantMessage;
206
+ case "toolResult":
207
+ return {
208
+ ...message,
209
+ content: getPrunedToolResultContent(message as ToolResultMessage),
210
+ attribution: message.attribution ?? "agent",
211
+ };
212
+ default:
213
+ return undefined;
214
+ }
215
+ }
216
+
140
217
  /**
141
218
  * Default compaction-domain transformer.
142
219
  *
@@ -145,68 +222,5 @@ function isCoreCompactionMessage(message: AgentMessage): message is AgentMessage
145
222
  * core LLM roles and the compaction messages owned by this package.
146
223
  */
147
224
  export function defaultConvertToLlm(messages: AgentMessage[]): Message[] {
148
- return messages
149
- .map((message): Message | undefined => {
150
- if (isCoreCompactionMessage(message)) {
151
- switch (message.role) {
152
- case "custom":
153
- case "hookMessage": {
154
- const content =
155
- typeof message.content === "string"
156
- ? [{ type: "text" as const, text: message.content }]
157
- : message.content;
158
- return {
159
- role: "user",
160
- content,
161
- attribution: message.attribution,
162
- timestamp: message.timestamp,
163
- };
164
- }
165
- case "branchSummary":
166
- return {
167
- role: "user",
168
- content: [
169
- {
170
- type: "text" as const,
171
- text: renderBranchSummaryContext(message.summary),
172
- },
173
- ],
174
- attribution: "agent",
175
- timestamp: message.timestamp,
176
- };
177
- case "compactionSummary":
178
- return {
179
- role: "user",
180
- content: [
181
- {
182
- type: "text" as const,
183
- text: renderCompactionSummaryContext(message.summary),
184
- },
185
- ],
186
- attribution: "agent",
187
- providerPayload: message.providerPayload,
188
- timestamp: message.timestamp,
189
- };
190
- }
191
- }
192
-
193
- switch (message.role) {
194
- case "user":
195
- return { ...message, attribution: message.attribution ?? "user" };
196
- case "developer":
197
- return { ...message, attribution: message.attribution ?? "agent" };
198
- case "assistant":
199
- return message as AssistantMessage;
200
- case "toolResult":
201
- return {
202
- ...message,
203
- content: getPrunedToolResultContent(message as ToolResultMessage),
204
- attribution: message.attribution ?? "agent",
205
- };
206
- default:
207
- return undefined;
208
- }
209
- })
210
- .filter(message => message !== undefined);
225
+ return messages.map(convertMessageToLlm).filter(message => message !== undefined);
211
226
  }
212
- export const convertToLlm = defaultConvertToLlm;