@prometheus-ai/agent-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/README.md +473 -0
  3. package/dist/types/agent-loop.d.ts +55 -0
  4. package/dist/types/agent.d.ts +331 -0
  5. package/dist/types/append-only-context.d.ts +113 -0
  6. package/dist/types/compaction/branch-summarization.d.ts +94 -0
  7. package/dist/types/compaction/compaction.d.ts +183 -0
  8. package/dist/types/compaction/entries.d.ts +103 -0
  9. package/dist/types/compaction/errors.d.ts +26 -0
  10. package/dist/types/compaction/index.d.ts +12 -0
  11. package/dist/types/compaction/messages.d.ts +61 -0
  12. package/dist/types/compaction/openai.d.ts +58 -0
  13. package/dist/types/compaction/pruning.d.ts +19 -0
  14. package/dist/types/compaction/shake.d.ts +82 -0
  15. package/dist/types/compaction/tool-protection.d.ts +17 -0
  16. package/dist/types/compaction/utils.d.ts +32 -0
  17. package/dist/types/compaction.d.ts +1 -0
  18. package/dist/types/harmony-leak.d.ts +118 -0
  19. package/dist/types/index.d.ts +11 -0
  20. package/dist/types/proxy.d.ts +84 -0
  21. package/dist/types/run-collector.d.ts +196 -0
  22. package/dist/types/telemetry.d.ts +588 -0
  23. package/dist/types/thinking.d.ts +17 -0
  24. package/dist/types/types.d.ts +443 -0
  25. package/dist/types/utils/yield.d.ts +52 -0
  26. package/package.json +75 -0
  27. package/src/agent-loop.ts +1418 -0
  28. package/src/agent.ts +1236 -0
  29. package/src/append-only-context.ts +297 -0
  30. package/src/compaction/branch-summarization.ts +339 -0
  31. package/src/compaction/compaction.ts +1155 -0
  32. package/src/compaction/entries.ts +133 -0
  33. package/src/compaction/errors.ts +31 -0
  34. package/src/compaction/index.ts +13 -0
  35. package/src/compaction/messages.ts +212 -0
  36. package/src/compaction/openai.ts +552 -0
  37. package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
  38. package/src/compaction/prompts/branch-summary-context.md +5 -0
  39. package/src/compaction/prompts/branch-summary-preamble.md +2 -0
  40. package/src/compaction/prompts/branch-summary.md +30 -0
  41. package/src/compaction/prompts/compaction-short-summary.md +9 -0
  42. package/src/compaction/prompts/compaction-summary-context.md +5 -0
  43. package/src/compaction/prompts/compaction-summary.md +38 -0
  44. package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
  45. package/src/compaction/prompts/compaction-update-summary.md +45 -0
  46. package/src/compaction/prompts/file-operations.md +10 -0
  47. package/src/compaction/prompts/handoff-document.md +49 -0
  48. package/src/compaction/prompts/summarization-system.md +3 -0
  49. package/src/compaction/pruning.ts +99 -0
  50. package/src/compaction/shake.ts +406 -0
  51. package/src/compaction/tool-protection.ts +55 -0
  52. package/src/compaction/utils.ts +185 -0
  53. package/src/compaction.ts +1 -0
  54. package/src/harmony-leak.ts +456 -0
  55. package/src/index.ts +21 -0
  56. package/src/proxy.ts +326 -0
  57. package/src/run-collector.ts +631 -0
  58. package/src/telemetry.ts +2020 -0
  59. package/src/thinking.ts +19 -0
  60. package/src/types.ts +505 -0
  61. package/src/utils/yield.ts +146 -0
@@ -0,0 +1,1155 @@
1
+ /**
2
+ * Context compaction for long sessions.
3
+ *
4
+ * Pure functions for compaction logic. The session manager handles I/O,
5
+ * and after compaction the session is reloaded.
6
+ */
7
+
8
+ import {
9
+ type AssistantMessage,
10
+ clampThinkingLevelForModel,
11
+ Effort,
12
+ type Message,
13
+ type MessageAttribution,
14
+ type Model,
15
+ type Usage,
16
+ } from "@prometheus-ai/ai";
17
+ import { countTokens } from "@prometheus-ai/natives";
18
+ import { logger, prompt } from "@prometheus-ai/utils";
19
+ import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
20
+ import { ThinkingLevel } from "../thinking";
21
+ import type { AgentMessage, AgentTool } from "../types";
22
+ import type { CompactionEntry, SessionEntry } from "./entries";
23
+ import { type ConvertToLlm, convertToLlm, createBranchSummaryMessage, createCustomMessage } from "./messages";
24
+ import {
25
+ buildOpenAiNativeHistory,
26
+ getPreservedOpenAiRemoteCompactionData,
27
+ requestOpenAiRemoteCompaction,
28
+ requestRemoteCompaction,
29
+ shouldUseOpenAiRemoteCompaction,
30
+ withOpenAiRemoteCompactionPreserveData,
31
+ } from "./openai";
32
+ import autoHandoffThresholdFocusPrompt from "./prompts/auto-handoff-threshold-focus.md" with { type: "text" };
33
+ import compactionShortSummaryPrompt from "./prompts/compaction-short-summary.md" with { type: "text" };
34
+ import compactionSummaryPrompt from "./prompts/compaction-summary.md" with { type: "text" };
35
+ import compactionTurnPrefixPrompt from "./prompts/compaction-turn-prefix.md" with { type: "text" };
36
+ import compactionUpdateSummaryPrompt from "./prompts/compaction-update-summary.md" with { type: "text" };
37
+ import handoffDocumentPrompt from "./prompts/handoff-document.md" with { type: "text" };
38
+
39
+ import {
40
+ computeFileLists,
41
+ createFileOps,
42
+ extractFileOpsFromMessage,
43
+ type FileOperations,
44
+ SUMMARIZATION_SYSTEM_PROMPT,
45
+ serializeConversation,
46
+ upsertFileOperations,
47
+ } from "./utils";
48
+
49
+ // ============================================================================
50
+ // File Operation Tracking
51
+ // ============================================================================
52
+
53
+ /** Details stored in CompactionEntry.details for file tracking */
54
+ export interface CompactionDetails {
55
+ readFiles: string[];
56
+ modifiedFiles: string[];
57
+ }
58
+
59
+ /**
60
+ * Extract file operations from messages and previous compaction entries.
61
+ */
62
+ function extractFileOperations(
63
+ messages: AgentMessage[],
64
+ entries: SessionEntry[],
65
+ prevCompactionIndex: number,
66
+ ): FileOperations {
67
+ const fileOps = createFileOps();
68
+
69
+ // Collect from previous compaction's details (if pi-generated)
70
+ if (prevCompactionIndex >= 0) {
71
+ const prevCompaction = entries[prevCompactionIndex] as CompactionEntry;
72
+ if (!prevCompaction.fromExtension && prevCompaction.details) {
73
+ const details = prevCompaction.details as CompactionDetails;
74
+ if (Array.isArray(details.readFiles)) {
75
+ for (const f of details.readFiles) fileOps.read.add(f);
76
+ }
77
+ if (Array.isArray(details.modifiedFiles)) {
78
+ for (const f of details.modifiedFiles) fileOps.edited.add(f);
79
+ }
80
+ }
81
+ }
82
+
83
+ // Extract from tool calls in messages
84
+ for (const msg of messages) {
85
+ extractFileOpsFromMessage(msg, fileOps);
86
+ }
87
+
88
+ return fileOps;
89
+ }
90
+
91
+ // ============================================================================
92
+ // Message Extraction
93
+ // ============================================================================
94
+
95
+ /**
96
+ * Extract AgentMessage from an entry if it produces one.
97
+ * Returns undefined for entries that don't contribute to LLM context.
98
+ */
99
+ function getMessageFromEntry(entry: SessionEntry): AgentMessage | undefined {
100
+ if (entry.type === "message") {
101
+ return entry.message;
102
+ }
103
+ if (entry.type === "custom_message") {
104
+ return createCustomMessage(
105
+ entry.customType,
106
+ entry.content,
107
+ entry.display,
108
+ entry.details,
109
+ entry.timestamp,
110
+ entry.attribution,
111
+ );
112
+ }
113
+ if (entry.type === "branch_summary") {
114
+ return createBranchSummaryMessage(entry.summary, entry.fromId, entry.timestamp);
115
+ }
116
+ return undefined;
117
+ }
118
+
119
+ /** Result from compact() - SessionManager adds uuid/parentUuid when saving */
120
+ export interface CompactionResult<T = unknown> {
121
+ summary: string;
122
+ /** Short PR-style summary for display purposes. */
123
+ shortSummary?: string;
124
+ firstKeptEntryId: string;
125
+ tokensBefore: number;
126
+ /** Hook-specific data (e.g., ArtifactIndex, version markers for structured compaction) */
127
+ details?: T;
128
+ /** Hook-provided data to persist alongside compaction entry. */
129
+ preserveData?: Record<string, unknown>;
130
+ }
131
+
132
+ // ============================================================================
133
+ // Types
134
+ // ============================================================================
135
+
136
+ export interface CompactionSettings {
137
+ enabled: boolean;
138
+ strategy?: "context-full" | "handoff" | "shake" | "off";
139
+ thresholdPercent?: number;
140
+ thresholdTokens?: number;
141
+ reserveTokens: number;
142
+ keepRecentTokens: number;
143
+ autoContinue?: boolean;
144
+ remoteEnabled?: boolean;
145
+ remoteEndpoint?: string;
146
+ }
147
+
148
+ export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = {
149
+ enabled: true,
150
+ strategy: "context-full",
151
+ thresholdPercent: -1,
152
+ thresholdTokens: -1,
153
+ reserveTokens: 16384,
154
+ keepRecentTokens: 20000,
155
+ autoContinue: true,
156
+ remoteEnabled: true,
157
+ };
158
+
159
+ // ============================================================================
160
+ // Token calculation
161
+ // ============================================================================
162
+
163
+ /**
164
+ * Calculate total context tokens from usage.
165
+ * Uses the native totalTokens field when available, falls back to computing from components.
166
+ */
167
+ export function calculateContextTokens(usage: Usage): number {
168
+ return usage.totalTokens || usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
169
+ }
170
+
171
+ export function calculatePromptTokens(usage: Usage): number {
172
+ const promptTokens = usage.input + usage.cacheRead + usage.cacheWrite;
173
+ if (promptTokens > 0) {
174
+ return promptTokens;
175
+ }
176
+ return calculateContextTokens(usage);
177
+ }
178
+
179
+ /**
180
+ * Get usage from an assistant message if available.
181
+ * Skips aborted and error messages as they don't have valid usage data.
182
+ */
183
+ function getAssistantUsage(msg: AgentMessage): Usage | undefined {
184
+ if (msg.role === "assistant" && "usage" in msg) {
185
+ const assistantMsg = msg as AssistantMessage;
186
+ if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
187
+ return assistantMsg.usage;
188
+ }
189
+ }
190
+ return undefined;
191
+ }
192
+
193
+ /**
194
+ * Find the last non-aborted assistant message usage from session entries.
195
+ */
196
+ export function getLastAssistantUsage(entries: SessionEntry[]): Usage | undefined {
197
+ for (let i = entries.length - 1; i >= 0; i--) {
198
+ const entry = entries[i];
199
+ if (entry.type === "message") {
200
+ const usage = getAssistantUsage(entry.message);
201
+ if (usage) return usage;
202
+ }
203
+ }
204
+ return undefined;
205
+ }
206
+
207
+ /**
208
+ * Effective reserve: at least 15% of context window or the configured floor, whichever is larger.
209
+ */
210
+ export function effectiveReserveTokens(contextWindow: number, settings: CompactionSettings): number {
211
+ return Math.max(Math.floor(contextWindow * 0.15), settings.reserveTokens);
212
+ }
213
+
214
+ /**
215
+ * Check if compaction should trigger based on context usage.
216
+ */
217
+ export function shouldCompact(contextTokens: number, contextWindow: number, settings: CompactionSettings): boolean {
218
+ if (!settings.enabled || settings.strategy === "off" || contextWindow <= 0) return false;
219
+ const thresholdTokens = resolveThresholdTokens(contextWindow, settings);
220
+ return contextTokens > thresholdTokens;
221
+ }
222
+
223
+ export function resolveThresholdTokens(contextWindow: number, settings: CompactionSettings): number {
224
+ // Fixed token limit takes priority over percentage
225
+ const thresholdTokens = settings.thresholdTokens;
226
+ if (typeof thresholdTokens === "number" && Number.isFinite(thresholdTokens) && thresholdTokens > 0) {
227
+ // Clamp to [1, contextWindow - 1] so there's always room
228
+ return Math.min(contextWindow - 1, Math.max(1, thresholdTokens));
229
+ }
230
+
231
+ // Percentage-based threshold
232
+ const thresholdPercent = settings.thresholdPercent;
233
+ if (typeof thresholdPercent !== "number" || !Number.isFinite(thresholdPercent) || thresholdPercent <= 0) {
234
+ return contextWindow - effectiveReserveTokens(contextWindow, settings);
235
+ }
236
+ const clampedThresholdPercent = Math.min(99, Math.max(1, thresholdPercent));
237
+ return Math.floor(contextWindow * (clampedThresholdPercent / 100));
238
+ }
239
+
240
+ // ============================================================================
241
+ // Cut point detection
242
+ // ============================================================================
243
+
244
+ /**
245
+ * Image content has no tokenizer representation; charge a fixed estimate
246
+ * matching what providers typically bill for inline images.
247
+ */
248
+ const IMAGE_TOKEN_ESTIMATE = 1200;
249
+
250
+ /**
251
+ * Estimate token count for a message using cl100k_base via the native
252
+ * tokenizer. This is not Claude's first-party tokenizer (Anthropic doesn't
253
+ * publish one) but is within ~5–10% across English/code text.
254
+ */
255
+ export function estimateTokens(message: AgentMessage): number {
256
+ const fragments: string[] = [];
257
+ let extra = 0;
258
+ if ((message as { role?: string }).role === "bashExecution") {
259
+ const bash = message as { command?: unknown; output?: unknown };
260
+ if (typeof bash.command === "string") fragments.push(bash.command);
261
+ if (typeof bash.output === "string") fragments.push(bash.output);
262
+ return fragments.length === 0 ? 0 : countTokens(fragments);
263
+ }
264
+
265
+ switch (message.role) {
266
+ case "user": {
267
+ const content = (message as { content: string | Array<{ type: string; text?: string }> }).content;
268
+ if (typeof content === "string") {
269
+ fragments.push(content);
270
+ } else if (Array.isArray(content)) {
271
+ for (const block of content) {
272
+ if (block.type === "text" && block.text) {
273
+ fragments.push(block.text);
274
+ }
275
+ }
276
+ }
277
+ break;
278
+ }
279
+ case "assistant": {
280
+ const assistant = message as AssistantMessage;
281
+ for (const block of assistant.content) {
282
+ if (block.type === "text") {
283
+ fragments.push(block.text);
284
+ } else if (block.type === "thinking") {
285
+ fragments.push(block.thinking);
286
+ } else if (block.type === "toolCall") {
287
+ fragments.push(block.name);
288
+ fragments.push(JSON.stringify(block.arguments));
289
+ }
290
+ }
291
+ break;
292
+ }
293
+ case "hookMessage":
294
+ case "toolResult": {
295
+ if (typeof message.content === "string") {
296
+ fragments.push(message.content);
297
+ } else {
298
+ for (const block of message.content) {
299
+ if (block.type === "text" && block.text) {
300
+ fragments.push(block.text);
301
+ } else if (block.type === "image") {
302
+ extra += IMAGE_TOKEN_ESTIMATE;
303
+ }
304
+ }
305
+ }
306
+ break;
307
+ }
308
+ case "branchSummary":
309
+ case "compactionSummary": {
310
+ fragments.push(message.summary);
311
+ break;
312
+ }
313
+ default:
314
+ return 0;
315
+ }
316
+
317
+ if (fragments.length === 0) return extra;
318
+ return extra + countTokens(fragments);
319
+ }
320
+
321
+ function estimateEntriesTokens(entries: SessionEntry[], startIndex: number, endIndex: number): number {
322
+ let total = 0;
323
+ for (let i = startIndex; i < endIndex; i++) {
324
+ const msg = getMessageFromEntry(entries[i]);
325
+ if (msg) {
326
+ total += estimateTokens(msg);
327
+ }
328
+ }
329
+ return total;
330
+ }
331
+
332
+ /**
333
+ * Find valid cut points: indices of user, assistant, custom, or bashExecution messages.
334
+ * Never cut at tool results (they must follow their tool call).
335
+ * When we cut at an assistant message with tool calls, its tool results follow it
336
+ * and will be kept.
337
+ * BashExecutionMessage is treated like a user message (user-initiated context).
338
+ */
339
+ function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
340
+ const cutPoints: number[] = [];
341
+ for (let i = startIndex; i < endIndex; i++) {
342
+ const entry = entries[i];
343
+ switch (entry.type) {
344
+ case "message": {
345
+ const role = entry.message.role as string;
346
+ switch (role) {
347
+ case "bashExecution":
348
+ case "hookMessage":
349
+ case "branchSummary":
350
+ case "compactionSummary":
351
+ case "user":
352
+ case "assistant":
353
+ cutPoints.push(i);
354
+ break;
355
+ case "toolResult":
356
+ break;
357
+ }
358
+ break;
359
+ }
360
+ case "thinking_level_change":
361
+ case "model_change":
362
+ case "compaction":
363
+ case "branch_summary":
364
+ case "custom":
365
+ case "custom_message":
366
+ case "label":
367
+ }
368
+ // branch_summary and custom_message are user-role messages, valid cut points
369
+ if (entry.type === "branch_summary" || entry.type === "custom_message") {
370
+ cutPoints.push(i);
371
+ }
372
+ }
373
+ return cutPoints;
374
+ }
375
+
376
+ /**
377
+ * Find the user message (or bashExecution) that starts the turn containing the given entry index.
378
+ * Returns -1 if no turn start found before the index.
379
+ * BashExecutionMessage is treated like a user message for turn boundaries.
380
+ */
381
+ export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number {
382
+ for (let i = entryIndex; i >= startIndex; i--) {
383
+ const entry = entries[i];
384
+ // branch_summary and custom_message are user-role messages, can start a turn
385
+ if (entry.type === "branch_summary" || entry.type === "custom_message") {
386
+ return i;
387
+ }
388
+ if (entry.type === "message") {
389
+ const role = entry.message.role as string;
390
+ if (role === "user" || role === "bashExecution") {
391
+ return i;
392
+ }
393
+ }
394
+ }
395
+ return -1;
396
+ }
397
+
398
+ export interface CutPointResult {
399
+ /** Index of first entry to keep */
400
+ firstKeptEntryIndex: number;
401
+ /** Index of user message that starts the turn being split, or -1 if not splitting */
402
+ turnStartIndex: number;
403
+ /** Whether this cut splits a turn (cut point is not a user message) */
404
+ isSplitTurn: boolean;
405
+ }
406
+
407
+ /**
408
+ * Find the cut point in session entries that keeps approximately `keepRecentTokens`.
409
+ *
410
+ * Algorithm: Walk backwards from newest, accumulating estimated message sizes.
411
+ * Stop when we've accumulated >= keepRecentTokens. Cut at that point.
412
+ *
413
+ * Can cut at user OR assistant messages (never tool results). When cutting at an
414
+ * assistant message with tool calls, its tool results come after and will be kept.
415
+ *
416
+ * Returns CutPointResult with:
417
+ * - firstKeptEntryIndex: the entry index to start keeping from
418
+ * - turnStartIndex: if cutting mid-turn, the user message that started that turn
419
+ * - isSplitTurn: whether we're cutting in the middle of a turn
420
+ *
421
+ * Only considers entries between `startIndex` and `endIndex` (exclusive).
422
+ */
423
+ export function findCutPoint(
424
+ entries: SessionEntry[],
425
+ startIndex: number,
426
+ endIndex: number,
427
+ keepRecentTokens: number,
428
+ ): CutPointResult {
429
+ const cutPoints = findValidCutPoints(entries, startIndex, endIndex);
430
+
431
+ if (cutPoints.length === 0) {
432
+ return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
433
+ }
434
+
435
+ // Walk backwards from newest, accumulating estimated message sizes
436
+ let accumulatedTokens = 0;
437
+ let cutIndex = cutPoints[0]; // Default: keep from first message (not header)
438
+
439
+ for (let i = endIndex - 1; i >= startIndex; i--) {
440
+ const entry = entries[i];
441
+ if (entry.type !== "message") continue;
442
+
443
+ // Estimate this message's size
444
+ const messageTokens = estimateTokens(entry.message);
445
+ accumulatedTokens += messageTokens;
446
+
447
+ // Check if we've exceeded the budget
448
+ if (accumulatedTokens >= keepRecentTokens) {
449
+ // Find the closest valid cut point at or after this entry
450
+ for (let c = 0; c < cutPoints.length; c++) {
451
+ if (cutPoints[c] >= i) {
452
+ cutIndex = cutPoints[c];
453
+ break;
454
+ }
455
+ }
456
+ break;
457
+ }
458
+ }
459
+
460
+ // Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.)
461
+ while (cutIndex > startIndex) {
462
+ const prevEntry = entries[cutIndex - 1];
463
+ // Stop at session header or compaction boundaries
464
+ if (prevEntry.type === "compaction") {
465
+ break;
466
+ }
467
+ if (prevEntry.type === "message") {
468
+ // Stop if we hit any message
469
+ break;
470
+ }
471
+ // Include this non-message entry (bash, settings change, etc.)
472
+ cutIndex--;
473
+ }
474
+
475
+ // Determine if this is a split turn
476
+ const cutEntry = entries[cutIndex];
477
+ const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user";
478
+ const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex);
479
+
480
+ return {
481
+ firstKeptEntryIndex: cutIndex,
482
+ turnStartIndex,
483
+ isSplitTurn: !isUserMessage && turnStartIndex !== -1,
484
+ };
485
+ }
486
+
487
+ // ============================================================================
488
+ // Summarization
489
+ // ============================================================================
490
+
491
+ const SUMMARIZATION_PROMPT = prompt.render(compactionSummaryPrompt);
492
+
493
+ const UPDATE_SUMMARIZATION_PROMPT = prompt.render(compactionUpdateSummaryPrompt);
494
+
495
+ const SHORT_SUMMARY_PROMPT = prompt.render(compactionShortSummaryPrompt);
496
+
497
+ const HANDOFF_DOCUMENT_PROMPT = prompt.render(handoffDocumentPrompt);
498
+
499
+ export const AUTO_HANDOFF_THRESHOLD_FOCUS = prompt.render(autoHandoffThresholdFocusPrompt);
500
+
501
+ function formatAdditionalContext(context: string[] | undefined): string {
502
+ if (!context || context.length === 0) return "";
503
+ const lines = context.map(line => `- ${line}`).join("\n");
504
+ return `<additional-context>\n${lines}\n</additional-context>\n\n`;
505
+ }
506
+
507
+ /**
508
+ * Maps the non-special `ThinkingLevel` values to their `Effort` counterparts.
509
+ * Exhaustive over the union; throws for `Off`/`Inherit` to surface logic
510
+ * errors in callers that forgot to filter those out. Never use a TS cast for
511
+ * this — `ThinkingLevel` is a string-union over distinct concepts (Off /
512
+ * Inherit are not Efforts), and a cast hides the contract.
513
+ */
514
+ function effortFromThinkingLevel(level: ThinkingLevel): Effort {
515
+ switch (level) {
516
+ case ThinkingLevel.Minimal:
517
+ return Effort.Minimal;
518
+ case ThinkingLevel.Low:
519
+ return Effort.Low;
520
+ case ThinkingLevel.Medium:
521
+ return Effort.Medium;
522
+ case ThinkingLevel.High:
523
+ return Effort.High;
524
+ case ThinkingLevel.XHigh:
525
+ return Effort.XHigh;
526
+ case ThinkingLevel.Off:
527
+ case ThinkingLevel.Inherit:
528
+ throw new Error(`effortFromThinkingLevel: ${level} must be handled by caller`);
529
+ }
530
+ }
531
+
532
+ /**
533
+ * Resolves the reasoning effort to send on a compaction LLM call.
534
+ *
535
+ * - Explicit `Off` → `undefined` (omit reasoning entirely; the user said no thinking).
536
+ * - `undefined` / `Inherit` → historical `Effort.High` default → clamped per model
537
+ * (preserves current behavior for users who never touched the dial).
538
+ * - Explicit effort → respect user choice → clamped per model.
539
+ *
540
+ * The clamp routes through `clampThinkingLevelForModel`, which returns
541
+ * `undefined` for models with `compat.supportsReasoningEffort: false`
542
+ * (e.g. `xai-oauth/grok-build`). That `undefined` then flows through to the
543
+ * openai-responses mapper where `modelOmitsReasoningEffort` short-circuits
544
+ * the wire param — no `requireSupportedEffort` throw.
545
+ */
546
+ function resolveCompactionEffort(model: Model, level: ThinkingLevel | undefined): Effort | undefined {
547
+ if (level === ThinkingLevel.Off) return undefined;
548
+ const requested: Effort =
549
+ level === undefined || level === ThinkingLevel.Inherit ? Effort.High : effortFromThinkingLevel(level);
550
+ return clampThinkingLevelForModel(model, requested);
551
+ }
552
+
553
+ /**
554
+ * Build the error thrown when an LLM summarization call ends with
555
+ * `stopReason === "error"`. Carries the provider's HTTP `errorStatus`
556
+ * onto a top-level `.status` field so callers (notably
557
+ * `AgentSession.#isCompactionAuthFailure`) can branch on 401/403 without
558
+ * regex-scraping `error.message`. The `auth_unavailable` synthetic
559
+ * (Prometheus native gateway) does not populate `errorStatus`, hence the legacy
560
+ * message-based check is still required upstream — see issue #986.
561
+ */
562
+ function createSummarizationError(prefix: string, response: AssistantMessage): Error {
563
+ const error: Error & { status?: number } = new Error(`${prefix}: ${response.errorMessage || "Unknown error"}`);
564
+ if (response.errorStatus !== undefined) {
565
+ error.status = response.errorStatus;
566
+ }
567
+ return error;
568
+ }
569
+
570
+ /**
571
+ * Generate a summary of the conversation using the LLM.
572
+ * If previousSummary is provided, uses the update prompt to merge.
573
+ */
574
+ export interface SummaryOptions {
575
+ promptOverride?: string;
576
+ extraContext?: string[];
577
+ remoteEndpoint?: string;
578
+ remoteInstructions?: string;
579
+ initiatorOverride?: MessageAttribution;
580
+ metadata?: Record<string, unknown>;
581
+ convertToLlm?: ConvertToLlm;
582
+ /**
583
+ * Optional telemetry handle. When provided, every LLM call emitted during
584
+ * compaction is wrapped in an OTEL chat span tagged with
585
+ * `prometheus.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
586
+ * or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
587
+ */
588
+ telemetry?: AgentTelemetry;
589
+ /**
590
+ * Active session thinking level. Threaded from `agent-session.ts` so
591
+ * compaction honors the user's `/model` thinking selection instead of
592
+ * silently overriding it with `Effort.High` (the historical default).
593
+ * `undefined` / `ThinkingLevel.Inherit` falls back to that historical
594
+ * default; `ThinkingLevel.Off` omits reasoning entirely. See
595
+ * `resolveCompactionEffort` for the conversion contract.
596
+ */
597
+ thinkingLevel?: ThinkingLevel;
598
+ }
599
+
600
+ export async function generateSummary(
601
+ currentMessages: AgentMessage[],
602
+ model: Model,
603
+ reserveTokens: number,
604
+ apiKey: string,
605
+ signal?: AbortSignal,
606
+ customInstructions?: string,
607
+ previousSummary?: string,
608
+ options?: SummaryOptions,
609
+ ): Promise<string> {
610
+ const maxTokens = Math.floor(0.8 * reserveTokens);
611
+
612
+ // Use update prompt if we have a previous summary, otherwise initial prompt
613
+ let basePrompt = previousSummary ? UPDATE_SUMMARIZATION_PROMPT : SUMMARIZATION_PROMPT;
614
+ if (options?.promptOverride) {
615
+ basePrompt = options.promptOverride;
616
+ }
617
+ if (customInstructions) {
618
+ basePrompt = `${basePrompt}\n\nAdditional focus: ${customInstructions}`;
619
+ }
620
+
621
+ // Serialize conversation to text so model doesn't try to continue it
622
+ // Convert to LLM messages first (handles custom app messages when caller provides a transformer).
623
+ const llmMessages = (options?.convertToLlm ?? convertToLlm)(currentMessages);
624
+ const conversationText = serializeConversation(llmMessages);
625
+
626
+ // Build the prompt with conversation wrapped in tags
627
+ let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
628
+ if (previousSummary) {
629
+ promptText += `<previous-summary>\n${previousSummary}\n</previous-summary>\n\n`;
630
+ }
631
+ promptText += formatAdditionalContext(options?.extraContext);
632
+ promptText += basePrompt;
633
+
634
+ const summarizationMessages = [
635
+ {
636
+ role: "user" as const,
637
+ content: [{ type: "text" as const, text: promptText }],
638
+ timestamp: Date.now(),
639
+ },
640
+ ];
641
+
642
+ if (options?.remoteEndpoint) {
643
+ const remote = await requestRemoteCompaction(
644
+ options.remoteEndpoint,
645
+ {
646
+ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
647
+ prompt: promptText,
648
+ },
649
+ signal,
650
+ );
651
+ return remote.summary;
652
+ }
653
+
654
+ const response = await instrumentedCompleteSimple(
655
+ model,
656
+ { systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
657
+ {
658
+ maxTokens,
659
+ signal,
660
+ apiKey,
661
+ reasoning: resolveCompactionEffort(model, options?.thinkingLevel),
662
+ initiatorOverride: options?.initiatorOverride,
663
+ metadata: options?.metadata,
664
+ },
665
+ { telemetry: options?.telemetry, oneshotKind: "compaction_summary" },
666
+ );
667
+
668
+ if (response.stopReason === "error") {
669
+ throw createSummarizationError("Summarization failed", response);
670
+ }
671
+
672
+ const textContent = response.content
673
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
674
+ .map(c => c.text)
675
+ .join("\n");
676
+
677
+ return textContent;
678
+ }
679
+
680
+ // ============================================================================
681
+ // Handoff generation
682
+ // ============================================================================
683
+
684
+ export interface HandoffOptions {
685
+ /** Live agent system prompt — passed verbatim so providers hit the cached prefix. */
686
+ systemPrompt: string[];
687
+ /** Live agent tool list — same purpose. Forced to `toolChoice: "none"`. */
688
+ tools?: AgentTool<any>[];
689
+ customInstructions?: string;
690
+ convertToLlm?: ConvertToLlm;
691
+ initiatorOverride?: MessageAttribution;
692
+ metadata?: Record<string, unknown>;
693
+ /**
694
+ * Optional telemetry handle. When provided, the handoff LLM call is
695
+ * wrapped in an OTEL chat span tagged with `prometheus.gen_ai.oneshot.kind = "handoff"`.
696
+ */
697
+ telemetry?: AgentTelemetry;
698
+ /**
699
+ * Active session thinking level. Threaded from `agent-session.ts` so
700
+ * handoff generation honors the user's `/model` thinking selection
701
+ * instead of silently overriding it with `Effort.High`. See
702
+ * `resolveCompactionEffort` for the conversion contract.
703
+ */
704
+ thinkingLevel?: ThinkingLevel;
705
+ }
706
+
707
+ export function renderHandoffPrompt(customInstructions?: string): string {
708
+ if (!customInstructions) return HANDOFF_DOCUMENT_PROMPT;
709
+ return prompt.render(handoffDocumentPrompt, {
710
+ additionalFocus: customInstructions,
711
+ });
712
+ }
713
+
714
+ export async function generateHandoff(
715
+ messages: AgentMessage[],
716
+ model: Model,
717
+ apiKey: string,
718
+ options: HandoffOptions,
719
+ signal?: AbortSignal,
720
+ ): Promise<string> {
721
+ const llmMessages = (options.convertToLlm ?? convertToLlm)(messages);
722
+ const requestMessages: Message[] = [
723
+ ...llmMessages,
724
+ {
725
+ role: "user",
726
+ content: [{ type: "text", text: renderHandoffPrompt(options.customInstructions) }],
727
+ attribution: "agent",
728
+ timestamp: Date.now(),
729
+ },
730
+ ];
731
+
732
+ const response = await instrumentedCompleteSimple(
733
+ model,
734
+ {
735
+ systemPrompt: options.systemPrompt,
736
+ messages: requestMessages,
737
+ tools: options.tools,
738
+ },
739
+ {
740
+ apiKey,
741
+ signal,
742
+ reasoning: resolveCompactionEffort(model, options.thinkingLevel),
743
+ toolChoice: "none",
744
+ initiatorOverride: options.initiatorOverride,
745
+ metadata: options.metadata,
746
+ },
747
+ { telemetry: options.telemetry, oneshotKind: "handoff" },
748
+ );
749
+
750
+ if (response.stopReason === "error") {
751
+ throw createSummarizationError("Handoff generation failed", response);
752
+ }
753
+
754
+ return response.content
755
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
756
+ .map(c => c.text)
757
+ .join("\n");
758
+ }
759
+
760
+ async function generateShortSummary(
761
+ recentMessages: AgentMessage[],
762
+ historySummary: string | undefined,
763
+ model: Model,
764
+ reserveTokens: number,
765
+ apiKey: string,
766
+ signal?: AbortSignal,
767
+ options?: SummaryOptions,
768
+ ): Promise<string> {
769
+ const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
770
+ const llmMessages = (options?.convertToLlm ?? convertToLlm)(recentMessages);
771
+ const conversationText = serializeConversation(llmMessages);
772
+
773
+ let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
774
+ if (historySummary) {
775
+ promptText += `<previous-summary>\n${historySummary}\n</previous-summary>\n\n`;
776
+ }
777
+ promptText += formatAdditionalContext(options?.extraContext);
778
+ promptText += SHORT_SUMMARY_PROMPT;
779
+
780
+ if (options?.remoteEndpoint) {
781
+ const remote = await requestRemoteCompaction(
782
+ options.remoteEndpoint,
783
+ {
784
+ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
785
+ prompt: promptText,
786
+ },
787
+ signal,
788
+ );
789
+ return remote.summary;
790
+ }
791
+
792
+ const response = await instrumentedCompleteSimple(
793
+ model,
794
+ {
795
+ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT],
796
+ messages: [{ role: "user", content: [{ type: "text", text: promptText }], timestamp: Date.now() }],
797
+ },
798
+ {
799
+ maxTokens,
800
+ signal,
801
+ apiKey,
802
+ reasoning: resolveCompactionEffort(model, options?.thinkingLevel),
803
+ initiatorOverride: options?.initiatorOverride,
804
+ metadata: options?.metadata,
805
+ },
806
+ { telemetry: options?.telemetry, oneshotKind: "compaction_short_summary" },
807
+ );
808
+
809
+ if (response.stopReason === "error") {
810
+ throw createSummarizationError("Short summary failed", response);
811
+ }
812
+
813
+ return response.content
814
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
815
+ .map(c => c.text)
816
+ .join("\n");
817
+ }
818
+
819
+ // ============================================================================
820
+ // Compaction Preparation (for hooks)
821
+ // ============================================================================
822
+
823
+ export interface CompactionPreparation {
824
+ /** UUID of first entry to keep */
825
+ firstKeptEntryId: string;
826
+ /** Messages that will be summarized and discarded */
827
+ messagesToSummarize: AgentMessage[];
828
+ /** Messages that will be turned into turn prefix summary (if splitting) */
829
+ turnPrefixMessages: AgentMessage[];
830
+ /** Messages kept in full after compaction (recent history) */
831
+ recentMessages: AgentMessage[];
832
+ /** Whether this is a split turn (cut point in middle of turn) */
833
+ isSplitTurn: boolean;
834
+ tokensBefore: number;
835
+ /** Summary from previous compaction, for iterative update */
836
+ previousSummary?: string;
837
+ /** Preserved opaque compaction payload from the previous compaction, if any. */
838
+ previousPreserveData?: Record<string, unknown>;
839
+ /** File operations extracted from messagesToSummarize */
840
+ fileOps: FileOperations;
841
+ /** Compaction settions from settings.jsonl */
842
+ settings: CompactionSettings;
843
+ }
844
+
845
+ export function prepareCompaction(
846
+ pathEntries: SessionEntry[],
847
+ settings: CompactionSettings,
848
+ ): CompactionPreparation | undefined {
849
+ if (pathEntries.length > 0 && pathEntries[pathEntries.length - 1].type === "compaction") {
850
+ return undefined;
851
+ }
852
+
853
+ let prevCompactionIndex = -1;
854
+ for (let i = pathEntries.length - 1; i >= 0; i--) {
855
+ if (pathEntries[i].type === "compaction") {
856
+ prevCompactionIndex = i;
857
+ break;
858
+ }
859
+ }
860
+ const boundaryStart = prevCompactionIndex + 1;
861
+ const boundaryEnd = pathEntries.length;
862
+
863
+ const lastUsage = getLastAssistantUsage(pathEntries);
864
+ const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;
865
+ let keepRecentTokens = settings.keepRecentTokens;
866
+ if (lastUsage) {
867
+ const estimatedTokens = estimateEntriesTokens(pathEntries, boundaryStart, boundaryEnd);
868
+ const promptTokens = calculatePromptTokens(lastUsage);
869
+ const ratio = estimatedTokens > 0 ? promptTokens / estimatedTokens : 0;
870
+ if (Number.isFinite(ratio) && ratio > 1) {
871
+ keepRecentTokens = Math.max(1, Math.floor(keepRecentTokens / ratio));
872
+ }
873
+ }
874
+
875
+ const cutPoint = findCutPoint(pathEntries, boundaryStart, boundaryEnd, keepRecentTokens);
876
+
877
+ // Get ID of first kept entry
878
+ const firstKeptEntry = pathEntries[cutPoint.firstKeptEntryIndex];
879
+ if (!firstKeptEntry?.id) {
880
+ return undefined; // Session needs migration
881
+ }
882
+ const firstKeptEntryId = firstKeptEntry.id;
883
+
884
+ const historyEnd = cutPoint.isSplitTurn ? cutPoint.turnStartIndex : cutPoint.firstKeptEntryIndex;
885
+
886
+ // Messages to summarize (will be discarded after summary)
887
+ const messagesToSummarize: AgentMessage[] = [];
888
+ for (let i = boundaryStart; i < historyEnd; i++) {
889
+ const msg = getMessageFromEntry(pathEntries[i]);
890
+ if (msg) messagesToSummarize.push(msg);
891
+ }
892
+
893
+ // Messages for turn prefix summary (if splitting a turn)
894
+ const turnPrefixMessages: AgentMessage[] = [];
895
+ if (cutPoint.isSplitTurn) {
896
+ for (let i = cutPoint.turnStartIndex; i < cutPoint.firstKeptEntryIndex; i++) {
897
+ const msg = getMessageFromEntry(pathEntries[i]);
898
+ if (msg) turnPrefixMessages.push(msg);
899
+ }
900
+ }
901
+
902
+ // Messages kept after compaction (recent history)
903
+ const recentMessages: AgentMessage[] = [];
904
+ for (let i = cutPoint.firstKeptEntryIndex; i < boundaryEnd; i++) {
905
+ const msg = getMessageFromEntry(pathEntries[i]);
906
+ if (msg) recentMessages.push(msg);
907
+ }
908
+ // Nothing to summarize means compaction would be a no-op.
909
+ if (messagesToSummarize.length === 0 && turnPrefixMessages.length === 0) {
910
+ return undefined;
911
+ }
912
+
913
+ // Get previous summary and preserved data for iterative updates
914
+ let previousSummary: string | undefined;
915
+ let previousPreserveData: Record<string, unknown> | undefined;
916
+ if (prevCompactionIndex >= 0) {
917
+ const prevCompaction = pathEntries[prevCompactionIndex] as CompactionEntry;
918
+ previousSummary = prevCompaction.summary;
919
+ previousPreserveData = prevCompaction.preserveData;
920
+ }
921
+
922
+ // Extract file operations from messages and previous compaction
923
+ const fileOps = extractFileOperations(messagesToSummarize, pathEntries, prevCompactionIndex);
924
+
925
+ // Also extract file ops from turn prefix if splitting
926
+ if (cutPoint.isSplitTurn) {
927
+ for (const msg of turnPrefixMessages) {
928
+ extractFileOpsFromMessage(msg, fileOps);
929
+ }
930
+ }
931
+
932
+ return {
933
+ firstKeptEntryId,
934
+ messagesToSummarize,
935
+ turnPrefixMessages,
936
+ recentMessages,
937
+ isSplitTurn: cutPoint.isSplitTurn,
938
+ tokensBefore,
939
+ previousSummary,
940
+ previousPreserveData,
941
+ fileOps,
942
+ settings,
943
+ };
944
+ }
945
+
946
+ // ============================================================================
947
+ // Main compaction function
948
+ // ============================================================================
949
+
950
+ const TURN_PREFIX_SUMMARIZATION_PROMPT = prompt.render(compactionTurnPrefixPrompt);
951
+
952
+ /**
953
+ * Generate summaries for compaction using prepared data.
954
+ * Returns CompactionResult - SessionManager adds id/parentId when saving.
955
+ *
956
+ * @param preparation - Pre-calculated preparation from prepareCompaction()
957
+ * @param customInstructions - Optional custom focus for the summary
958
+ */
959
+ export async function compact(
960
+ preparation: CompactionPreparation,
961
+ model: Model,
962
+ apiKey: string,
963
+ customInstructions?: string,
964
+ signal?: AbortSignal,
965
+ options?: SummaryOptions,
966
+ ): Promise<CompactionResult> {
967
+ const {
968
+ firstKeptEntryId,
969
+ messagesToSummarize,
970
+ turnPrefixMessages,
971
+ recentMessages,
972
+ isSplitTurn,
973
+ tokensBefore,
974
+ previousSummary,
975
+ previousPreserveData,
976
+ fileOps,
977
+ settings,
978
+ } = preparation;
979
+
980
+ const summaryOptions: SummaryOptions = {
981
+ promptOverride: options?.promptOverride,
982
+ extraContext: options?.extraContext,
983
+ remoteEndpoint: settings.remoteEnabled === false ? undefined : settings.remoteEndpoint,
984
+ remoteInstructions: options?.remoteInstructions,
985
+ initiatorOverride: options?.initiatorOverride,
986
+ metadata: options?.metadata,
987
+ convertToLlm: options?.convertToLlm,
988
+ telemetry: options?.telemetry,
989
+ // Honor /model thinking selection on every fan-out summarizer.
990
+ // Without this propagation, generateSummary / generateTurnPrefixSummary
991
+ // see options?.thinkingLevel === undefined and resolveCompactionEffort
992
+ // silently falls back to Effort.High — the same defect e07b47ee4 fixed
993
+ // at the call sites, leaked back in here. See resolveCompactionEffort.
994
+ thinkingLevel: options?.thinkingLevel,
995
+ };
996
+
997
+ let preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, undefined);
998
+ if (settings.remoteEnabled !== false && shouldUseOpenAiRemoteCompaction(model)) {
999
+ const previousRemoteCompaction = getPreservedOpenAiRemoteCompactionData(previousPreserveData);
1000
+ const remoteMessages = [...messagesToSummarize, ...turnPrefixMessages, ...recentMessages];
1001
+ const previousReplacementHistory =
1002
+ previousRemoteCompaction?.provider === model.provider
1003
+ ? previousRemoteCompaction.replacementHistory
1004
+ : undefined;
1005
+ const remoteHistory = buildOpenAiNativeHistory(
1006
+ (summaryOptions.convertToLlm ?? convertToLlm)(remoteMessages),
1007
+ model,
1008
+ previousReplacementHistory,
1009
+ );
1010
+ if (remoteHistory.length > 0) {
1011
+ try {
1012
+ const remote = await requestOpenAiRemoteCompaction(
1013
+ model,
1014
+ apiKey,
1015
+ remoteHistory,
1016
+ summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
1017
+ signal,
1018
+ );
1019
+ preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
1020
+ } catch (err) {
1021
+ logger.warn("OpenAI remote compaction failed, falling back to local summarization", {
1022
+ error: err instanceof Error ? err.message : String(err),
1023
+ model: model.id,
1024
+ provider: model.provider,
1025
+ });
1026
+ }
1027
+ }
1028
+ }
1029
+
1030
+ // Generate summaries (can be parallel if both needed) and merge into one
1031
+ let summary: string;
1032
+
1033
+ if (isSplitTurn && turnPrefixMessages.length > 0) {
1034
+ // Generate both summaries in parallel
1035
+ const [historyResult, turnPrefixResult] = await Promise.all([
1036
+ messagesToSummarize.length > 0
1037
+ ? generateSummary(
1038
+ messagesToSummarize,
1039
+ model,
1040
+ settings.reserveTokens,
1041
+ apiKey,
1042
+ signal,
1043
+ customInstructions,
1044
+ previousSummary,
1045
+ summaryOptions,
1046
+ )
1047
+ : Promise.resolve("No prior history."),
1048
+ generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal, summaryOptions),
1049
+ ]);
1050
+ // Merge into single summary
1051
+ summary = `${historyResult}\n\n---\n\n**Turn Context (split turn):**\n\n${turnPrefixResult}`;
1052
+ } else if (messagesToSummarize.length > 0) {
1053
+ // Generate history summary from messages to summarize
1054
+ summary = await generateSummary(
1055
+ messagesToSummarize,
1056
+ model,
1057
+ settings.reserveTokens,
1058
+ apiKey,
1059
+ signal,
1060
+ customInstructions,
1061
+ previousSummary,
1062
+ summaryOptions,
1063
+ );
1064
+ } else if (previousSummary) {
1065
+ // No new messages to summarize, preserve previous summary
1066
+ summary = previousSummary;
1067
+ } else {
1068
+ // No messages and no previous summary
1069
+ summary = "No prior history.";
1070
+ }
1071
+
1072
+ const shortSummary = await generateShortSummary(
1073
+ recentMessages,
1074
+ summary,
1075
+ model,
1076
+ settings.reserveTokens,
1077
+ apiKey,
1078
+ signal,
1079
+ {
1080
+ extraContext: options?.extraContext,
1081
+ remoteEndpoint: summaryOptions.remoteEndpoint,
1082
+ initiatorOverride: summaryOptions.initiatorOverride,
1083
+ metadata: summaryOptions.metadata,
1084
+ telemetry: summaryOptions.telemetry,
1085
+ // Same propagation as summaryOptions above — generateShortSummary
1086
+ // resolves its own reasoning via resolveCompactionEffort.
1087
+ thinkingLevel: options?.thinkingLevel,
1088
+ },
1089
+ );
1090
+
1091
+ // Compute file lists and append to summary
1092
+ const { readFiles, modifiedFiles } = computeFileLists(fileOps);
1093
+ summary = upsertFileOperations(summary, readFiles, modifiedFiles);
1094
+
1095
+ if (!firstKeptEntryId) {
1096
+ throw new Error("First kept entry has no ID - session may need migration");
1097
+ }
1098
+
1099
+ return {
1100
+ summary,
1101
+ shortSummary,
1102
+ firstKeptEntryId,
1103
+ tokensBefore,
1104
+ details: { readFiles, modifiedFiles } as CompactionDetails,
1105
+ preserveData,
1106
+ };
1107
+ }
1108
+
1109
+ /**
1110
+ * Generate a summary for a turn prefix (when splitting a turn).
1111
+ */
1112
+ async function generateTurnPrefixSummary(
1113
+ messages: AgentMessage[],
1114
+ model: Model,
1115
+ reserveTokens: number,
1116
+ apiKey: string,
1117
+ signal?: AbortSignal,
1118
+ options?: SummaryOptions,
1119
+ ): Promise<string> {
1120
+ const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
1121
+
1122
+ const llmMessages = (options?.convertToLlm ?? convertToLlm)(messages);
1123
+ const conversationText = serializeConversation(llmMessages);
1124
+ const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
1125
+ const summarizationMessages = [
1126
+ {
1127
+ role: "user" as const,
1128
+ content: [{ type: "text" as const, text: promptText }],
1129
+ timestamp: Date.now(),
1130
+ },
1131
+ ];
1132
+
1133
+ const response = await instrumentedCompleteSimple(
1134
+ model,
1135
+ { systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
1136
+ {
1137
+ maxTokens,
1138
+ signal,
1139
+ apiKey,
1140
+ reasoning: resolveCompactionEffort(model, options?.thinkingLevel),
1141
+ initiatorOverride: options?.initiatorOverride,
1142
+ metadata: options?.metadata,
1143
+ },
1144
+ { telemetry: options?.telemetry, oneshotKind: "compaction_turn_prefix" },
1145
+ );
1146
+
1147
+ if (response.stopReason === "error") {
1148
+ throw createSummarizationError("Turn prefix summarization failed", response);
1149
+ }
1150
+
1151
+ return response.content
1152
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
1153
+ .map(c => c.text)
1154
+ .join("\n");
1155
+ }