@gajae-code/agent-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +482 -0
  2. package/README.md +473 -0
  3. package/dist/types/agent-loop.d.ts +55 -0
  4. package/dist/types/agent.d.ts +334 -0
  5. package/dist/types/append-only-context.d.ts +113 -0
  6. package/dist/types/compaction/branch-summarization.d.ts +94 -0
  7. package/dist/types/compaction/compaction.d.ts +166 -0
  8. package/dist/types/compaction/entries.d.ts +103 -0
  9. package/dist/types/compaction/errors.d.ts +26 -0
  10. package/dist/types/compaction/index.d.ts +11 -0
  11. package/dist/types/compaction/messages.d.ts +61 -0
  12. package/dist/types/compaction/openai.d.ts +58 -0
  13. package/dist/types/compaction/pruning.d.ts +18 -0
  14. package/dist/types/compaction/utils.d.ts +32 -0
  15. package/dist/types/compaction.d.ts +1 -0
  16. package/dist/types/harmony-leak.d.ts +99 -0
  17. package/dist/types/index.d.ts +10 -0
  18. package/dist/types/proxy.d.ts +84 -0
  19. package/dist/types/run-collector.d.ts +196 -0
  20. package/dist/types/telemetry.d.ts +588 -0
  21. package/dist/types/thinking.d.ts +17 -0
  22. package/dist/types/types.d.ts +407 -0
  23. package/package.json +75 -0
  24. package/src/agent-loop.ts +1279 -0
  25. package/src/agent.ts +1399 -0
  26. package/src/append-only-context.ts +297 -0
  27. package/src/compaction/branch-summarization.ts +339 -0
  28. package/src/compaction/compaction.ts +1065 -0
  29. package/src/compaction/entries.ts +133 -0
  30. package/src/compaction/errors.ts +31 -0
  31. package/src/compaction/index.ts +12 -0
  32. package/src/compaction/messages.ts +212 -0
  33. package/src/compaction/openai.ts +552 -0
  34. package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
  35. package/src/compaction/prompts/branch-summary-context.md +5 -0
  36. package/src/compaction/prompts/branch-summary-preamble.md +2 -0
  37. package/src/compaction/prompts/branch-summary.md +30 -0
  38. package/src/compaction/prompts/compaction-short-summary.md +9 -0
  39. package/src/compaction/prompts/compaction-summary-context.md +5 -0
  40. package/src/compaction/prompts/compaction-summary.md +38 -0
  41. package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
  42. package/src/compaction/prompts/compaction-update-summary.md +45 -0
  43. package/src/compaction/prompts/file-operations.md +10 -0
  44. package/src/compaction/prompts/handoff-document.md +49 -0
  45. package/src/compaction/prompts/summarization-system.md +3 -0
  46. package/src/compaction/pruning.ts +92 -0
  47. package/src/compaction/utils.ts +185 -0
  48. package/src/compaction.ts +1 -0
  49. package/src/harmony-leak.ts +427 -0
  50. package/src/index.ts +19 -0
  51. package/src/proxy.ts +326 -0
  52. package/src/run-collector.ts +631 -0
  53. package/src/telemetry.ts +2018 -0
  54. package/src/thinking.ts +19 -0
  55. package/src/types.ts +467 -0
@@ -0,0 +1,1065 @@
1
+ /**
2
+ * Context compaction for long sessions.
3
+ *
4
+ * Pure functions for compaction logic. The session manager handles I/O,
5
+ * and after compaction the session is reloaded.
6
+ */
7
+
8
+ import {
9
+ type AssistantMessage,
10
+ Effort,
11
+ type Message,
12
+ type MessageAttribution,
13
+ type Model,
14
+ type Usage,
15
+ } from "@gajae-code/ai";
16
+ import { countTokens } from "@gajae-code/natives";
17
+ import { logger, prompt } from "@gajae-code/utils";
18
+ import { type AgentTelemetry, instrumentedCompleteSimple } from "../telemetry";
19
+ import type { AgentMessage, AgentTool } from "../types";
20
+ import type { CompactionEntry, SessionEntry } from "./entries";
21
+ import { type ConvertToLlm, convertToLlm, createBranchSummaryMessage, createCustomMessage } from "./messages";
22
+ import {
23
+ buildOpenAiNativeHistory,
24
+ getPreservedOpenAiRemoteCompactionData,
25
+ requestOpenAiRemoteCompaction,
26
+ requestRemoteCompaction,
27
+ shouldUseOpenAiRemoteCompaction,
28
+ withOpenAiRemoteCompactionPreserveData,
29
+ } from "./openai";
30
+ import autoHandoffThresholdFocusPrompt from "./prompts/auto-handoff-threshold-focus.md" with { type: "text" };
31
+ import compactionShortSummaryPrompt from "./prompts/compaction-short-summary.md" with { type: "text" };
32
+ import compactionSummaryPrompt from "./prompts/compaction-summary.md" with { type: "text" };
33
+ import compactionTurnPrefixPrompt from "./prompts/compaction-turn-prefix.md" with { type: "text" };
34
+ import compactionUpdateSummaryPrompt from "./prompts/compaction-update-summary.md" with { type: "text" };
35
+ import handoffDocumentPrompt from "./prompts/handoff-document.md" with { type: "text" };
36
+
37
+ import {
38
+ computeFileLists,
39
+ createFileOps,
40
+ extractFileOpsFromMessage,
41
+ type FileOperations,
42
+ SUMMARIZATION_SYSTEM_PROMPT,
43
+ serializeConversation,
44
+ upsertFileOperations,
45
+ } from "./utils";
46
+
47
+ // ============================================================================
48
+ // File Operation Tracking
49
+ // ============================================================================
50
+
51
+ /** Details stored in CompactionEntry.details for file tracking */
52
+ export interface CompactionDetails {
53
+ readFiles: string[];
54
+ modifiedFiles: string[];
55
+ }
56
+
57
+ /**
58
+ * Extract file operations from messages and previous compaction entries.
59
+ */
60
+ function extractFileOperations(
61
+ messages: AgentMessage[],
62
+ entries: SessionEntry[],
63
+ prevCompactionIndex: number,
64
+ ): FileOperations {
65
+ const fileOps = createFileOps();
66
+
67
+ // Collect from previous compaction's details (if pi-generated)
68
+ if (prevCompactionIndex >= 0) {
69
+ const prevCompaction = entries[prevCompactionIndex] as CompactionEntry;
70
+ if (!prevCompaction.fromExtension && prevCompaction.details) {
71
+ const details = prevCompaction.details as CompactionDetails;
72
+ if (Array.isArray(details.readFiles)) {
73
+ for (const f of details.readFiles) fileOps.read.add(f);
74
+ }
75
+ if (Array.isArray(details.modifiedFiles)) {
76
+ for (const f of details.modifiedFiles) fileOps.edited.add(f);
77
+ }
78
+ }
79
+ }
80
+
81
+ // Extract from tool calls in messages
82
+ for (const msg of messages) {
83
+ extractFileOpsFromMessage(msg, fileOps);
84
+ }
85
+
86
+ return fileOps;
87
+ }
88
+
89
+ // ============================================================================
90
+ // Message Extraction
91
+ // ============================================================================
92
+
93
+ /**
94
+ * Extract AgentMessage from an entry if it produces one.
95
+ * Returns undefined for entries that don't contribute to LLM context.
96
+ */
97
+ function getMessageFromEntry(entry: SessionEntry): AgentMessage | undefined {
98
+ if (entry.type === "message") {
99
+ return entry.message;
100
+ }
101
+ if (entry.type === "custom_message") {
102
+ return createCustomMessage(
103
+ entry.customType,
104
+ entry.content,
105
+ entry.display,
106
+ entry.details,
107
+ entry.timestamp,
108
+ entry.attribution,
109
+ );
110
+ }
111
+ if (entry.type === "branch_summary") {
112
+ return createBranchSummaryMessage(entry.summary, entry.fromId, entry.timestamp);
113
+ }
114
+ return undefined;
115
+ }
116
+
117
+ /** Result from compact() - SessionManager adds uuid/parentUuid when saving */
118
+ export interface CompactionResult<T = unknown> {
119
+ summary: string;
120
+ /** Short PR-style summary for display purposes. */
121
+ shortSummary?: string;
122
+ firstKeptEntryId: string;
123
+ tokensBefore: number;
124
+ /** Hook-specific data (e.g., ArtifactIndex, version markers for structured compaction) */
125
+ details?: T;
126
+ /** Hook-provided data to persist alongside compaction entry. */
127
+ preserveData?: Record<string, unknown>;
128
+ }
129
+
130
+ // ============================================================================
131
+ // Types
132
+ // ============================================================================
133
+
134
+ export interface CompactionSettings {
135
+ enabled: boolean;
136
+ strategy?: "context-full" | "handoff" | "off";
137
+ thresholdPercent?: number;
138
+ thresholdTokens?: number;
139
+ reserveTokens: number;
140
+ keepRecentTokens: number;
141
+ autoContinue?: boolean;
142
+ remoteEnabled?: boolean;
143
+ remoteEndpoint?: string;
144
+ }
145
+
146
+ export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = {
147
+ enabled: true,
148
+ strategy: "context-full",
149
+ thresholdPercent: -1,
150
+ thresholdTokens: -1,
151
+ reserveTokens: 16384,
152
+ keepRecentTokens: 20000,
153
+ autoContinue: true,
154
+ remoteEnabled: true,
155
+ };
156
+
157
+ // ============================================================================
158
+ // Token calculation
159
+ // ============================================================================
160
+
161
+ /**
162
+ * Calculate total context tokens from usage.
163
+ * Uses the native totalTokens field when available, falls back to computing from components.
164
+ */
165
+ export function calculateContextTokens(usage: Usage): number {
166
+ return usage.totalTokens || usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
167
+ }
168
+
169
+ export function calculatePromptTokens(usage: Usage): number {
170
+ const promptTokens = usage.input + usage.cacheRead + usage.cacheWrite;
171
+ if (promptTokens > 0) {
172
+ return promptTokens;
173
+ }
174
+ return calculateContextTokens(usage);
175
+ }
176
+
177
+ /**
178
+ * Get usage from an assistant message if available.
179
+ * Skips aborted and error messages as they don't have valid usage data.
180
+ */
181
+ function getAssistantUsage(msg: AgentMessage): Usage | undefined {
182
+ if (msg.role === "assistant" && "usage" in msg) {
183
+ const assistantMsg = msg as AssistantMessage;
184
+ if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
185
+ return assistantMsg.usage;
186
+ }
187
+ }
188
+ return undefined;
189
+ }
190
+
191
+ /**
192
+ * Find the last non-aborted assistant message usage from session entries.
193
+ */
194
+ export function getLastAssistantUsage(entries: SessionEntry[]): Usage | undefined {
195
+ for (let i = entries.length - 1; i >= 0; i--) {
196
+ const entry = entries[i];
197
+ if (entry.type === "message") {
198
+ const usage = getAssistantUsage(entry.message);
199
+ if (usage) return usage;
200
+ }
201
+ }
202
+ return undefined;
203
+ }
204
+
205
+ /**
206
+ * Effective reserve: at least 15% of context window or the configured floor, whichever is larger.
207
+ */
208
+ export function effectiveReserveTokens(contextWindow: number, settings: CompactionSettings): number {
209
+ return Math.max(Math.floor(contextWindow * 0.15), settings.reserveTokens);
210
+ }
211
+
212
+ /**
213
+ * Check if compaction should trigger based on context usage.
214
+ */
215
+ export function shouldCompact(contextTokens: number, contextWindow: number, settings: CompactionSettings): boolean {
216
+ if (!settings.enabled || settings.strategy === "off" || contextWindow <= 0) return false;
217
+ const thresholdTokens = resolveThresholdTokens(contextWindow, settings);
218
+ return contextTokens > thresholdTokens;
219
+ }
220
+
221
+ export function resolveThresholdTokens(contextWindow: number, settings: CompactionSettings): number {
222
+ // Fixed token limit takes priority over percentage
223
+ const thresholdTokens = settings.thresholdTokens;
224
+ if (typeof thresholdTokens === "number" && Number.isFinite(thresholdTokens) && thresholdTokens > 0) {
225
+ // Clamp to [1, contextWindow - 1] so there's always room
226
+ return Math.min(contextWindow - 1, Math.max(1, thresholdTokens));
227
+ }
228
+
229
+ // Percentage-based threshold
230
+ const thresholdPercent = settings.thresholdPercent;
231
+ if (typeof thresholdPercent !== "number" || !Number.isFinite(thresholdPercent) || thresholdPercent <= 0) {
232
+ return contextWindow - effectiveReserveTokens(contextWindow, settings);
233
+ }
234
+ const clampedThresholdPercent = Math.min(99, Math.max(1, thresholdPercent));
235
+ return Math.floor(contextWindow * (clampedThresholdPercent / 100));
236
+ }
237
+
238
+ // ============================================================================
239
+ // Cut point detection
240
+ // ============================================================================
241
+
242
+ /**
243
+ * Image content has no tokenizer representation; charge a fixed estimate
244
+ * matching what providers typically bill for inline images.
245
+ */
246
+ const IMAGE_TOKEN_ESTIMATE = 1200;
247
+
248
+ /**
249
+ * Estimate token count for a message using cl100k_base via the native
250
+ * tokenizer. This is not Anthropic's first-party tokenizer (Anthropic doesn't
251
+ * publish one) but is within ~5–10% across English/code text.
252
+ */
253
+ export function estimateTokens(message: AgentMessage): number {
254
+ const fragments: string[] = [];
255
+ let extra = 0;
256
+ if ((message as { role?: string }).role === "bashExecution") {
257
+ const bash = message as { command?: unknown; output?: unknown };
258
+ if (typeof bash.command === "string") fragments.push(bash.command);
259
+ if (typeof bash.output === "string") fragments.push(bash.output);
260
+ return fragments.length === 0 ? 0 : countTokens(fragments);
261
+ }
262
+
263
+ switch (message.role) {
264
+ case "user": {
265
+ const content = (message as { content: string | Array<{ type: string; text?: string }> }).content;
266
+ if (typeof content === "string") {
267
+ fragments.push(content);
268
+ } else if (Array.isArray(content)) {
269
+ for (const block of content) {
270
+ if (block.type === "text" && block.text) {
271
+ fragments.push(block.text);
272
+ }
273
+ }
274
+ }
275
+ break;
276
+ }
277
+ case "assistant": {
278
+ const assistant = message as AssistantMessage;
279
+ for (const block of assistant.content) {
280
+ if (block.type === "text") {
281
+ fragments.push(block.text);
282
+ } else if (block.type === "thinking") {
283
+ fragments.push(block.thinking);
284
+ } else if (block.type === "toolCall") {
285
+ fragments.push(block.name);
286
+ fragments.push(JSON.stringify(block.arguments));
287
+ }
288
+ }
289
+ break;
290
+ }
291
+ case "hookMessage":
292
+ case "toolResult": {
293
+ if (typeof message.content === "string") {
294
+ fragments.push(message.content);
295
+ } else {
296
+ for (const block of message.content) {
297
+ if (block.type === "text" && block.text) {
298
+ fragments.push(block.text);
299
+ } else if (block.type === "image") {
300
+ extra += IMAGE_TOKEN_ESTIMATE;
301
+ }
302
+ }
303
+ }
304
+ break;
305
+ }
306
+ case "branchSummary":
307
+ case "compactionSummary": {
308
+ fragments.push(message.summary);
309
+ break;
310
+ }
311
+ default:
312
+ return 0;
313
+ }
314
+
315
+ if (fragments.length === 0) return extra;
316
+ return extra + countTokens(fragments);
317
+ }
318
+
319
+ function estimateEntriesTokens(entries: SessionEntry[], startIndex: number, endIndex: number): number {
320
+ let total = 0;
321
+ for (let i = startIndex; i < endIndex; i++) {
322
+ const msg = getMessageFromEntry(entries[i]);
323
+ if (msg) {
324
+ total += estimateTokens(msg);
325
+ }
326
+ }
327
+ return total;
328
+ }
329
+
330
+ /**
331
+ * Find valid cut points: indices of user, assistant, custom, or bashExecution messages.
332
+ * Never cut at tool results (they must follow their tool call).
333
+ * When we cut at an assistant message with tool calls, its tool results follow it
334
+ * and will be kept.
335
+ * BashExecutionMessage is treated like a user message (user-initiated context).
336
+ */
337
+ function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
338
+ const cutPoints: number[] = [];
339
+ for (let i = startIndex; i < endIndex; i++) {
340
+ const entry = entries[i];
341
+ switch (entry.type) {
342
+ case "message": {
343
+ const role = entry.message.role as string;
344
+ switch (role) {
345
+ case "bashExecution":
346
+ case "hookMessage":
347
+ case "branchSummary":
348
+ case "compactionSummary":
349
+ case "user":
350
+ case "assistant":
351
+ cutPoints.push(i);
352
+ break;
353
+ case "toolResult":
354
+ break;
355
+ }
356
+ break;
357
+ }
358
+ case "thinking_level_change":
359
+ case "model_change":
360
+ case "compaction":
361
+ case "branch_summary":
362
+ case "custom":
363
+ case "custom_message":
364
+ case "label":
365
+ }
366
+ // branch_summary and custom_message are user-role messages, valid cut points
367
+ if (entry.type === "branch_summary" || entry.type === "custom_message") {
368
+ cutPoints.push(i);
369
+ }
370
+ }
371
+ return cutPoints;
372
+ }
373
+
374
+ /**
375
+ * Find the user message (or bashExecution) that starts the turn containing the given entry index.
376
+ * Returns -1 if no turn start found before the index.
377
+ * BashExecutionMessage is treated like a user message for turn boundaries.
378
+ */
379
+ export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number {
380
+ for (let i = entryIndex; i >= startIndex; i--) {
381
+ const entry = entries[i];
382
+ // branch_summary and custom_message are user-role messages, can start a turn
383
+ if (entry.type === "branch_summary" || entry.type === "custom_message") {
384
+ return i;
385
+ }
386
+ if (entry.type === "message") {
387
+ const role = entry.message.role as string;
388
+ if (role === "user" || role === "bashExecution") {
389
+ return i;
390
+ }
391
+ }
392
+ }
393
+ return -1;
394
+ }
395
+
396
+ export interface CutPointResult {
397
+ /** Index of first entry to keep */
398
+ firstKeptEntryIndex: number;
399
+ /** Index of user message that starts the turn being split, or -1 if not splitting */
400
+ turnStartIndex: number;
401
+ /** Whether this cut splits a turn (cut point is not a user message) */
402
+ isSplitTurn: boolean;
403
+ }
404
+
405
+ /**
406
+ * Find the cut point in session entries that keeps approximately `keepRecentTokens`.
407
+ *
408
+ * Algorithm: Walk backwards from newest, accumulating estimated message sizes.
409
+ * Stop when we've accumulated >= keepRecentTokens. Cut at that point.
410
+ *
411
+ * Can cut at user OR assistant messages (never tool results). When cutting at an
412
+ * assistant message with tool calls, its tool results come after and will be kept.
413
+ *
414
+ * Returns CutPointResult with:
415
+ * - firstKeptEntryIndex: the entry index to start keeping from
416
+ * - turnStartIndex: if cutting mid-turn, the user message that started that turn
417
+ * - isSplitTurn: whether we're cutting in the middle of a turn
418
+ *
419
+ * Only considers entries between `startIndex` and `endIndex` (exclusive).
420
+ */
421
+ export function findCutPoint(
422
+ entries: SessionEntry[],
423
+ startIndex: number,
424
+ endIndex: number,
425
+ keepRecentTokens: number,
426
+ ): CutPointResult {
427
+ const cutPoints = findValidCutPoints(entries, startIndex, endIndex);
428
+
429
+ if (cutPoints.length === 0) {
430
+ return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
431
+ }
432
+
433
+ // Walk backwards from newest, accumulating estimated message sizes
434
+ let accumulatedTokens = 0;
435
+ let cutIndex = cutPoints[0]; // Default: keep from first message (not header)
436
+
437
+ for (let i = endIndex - 1; i >= startIndex; i--) {
438
+ const entry = entries[i];
439
+ if (entry.type !== "message") continue;
440
+
441
+ // Estimate this message's size
442
+ const messageTokens = estimateTokens(entry.message);
443
+ accumulatedTokens += messageTokens;
444
+
445
+ // Check if we've exceeded the budget
446
+ if (accumulatedTokens >= keepRecentTokens) {
447
+ // Find the closest valid cut point at or after this entry
448
+ for (let c = 0; c < cutPoints.length; c++) {
449
+ if (cutPoints[c] >= i) {
450
+ cutIndex = cutPoints[c];
451
+ break;
452
+ }
453
+ }
454
+ break;
455
+ }
456
+ }
457
+
458
+ // Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.)
459
+ while (cutIndex > startIndex) {
460
+ const prevEntry = entries[cutIndex - 1];
461
+ // Stop at session header or compaction boundaries
462
+ if (prevEntry.type === "compaction") {
463
+ break;
464
+ }
465
+ if (prevEntry.type === "message") {
466
+ // Stop if we hit any message
467
+ break;
468
+ }
469
+ // Include this non-message entry (bash, settings change, etc.)
470
+ cutIndex--;
471
+ }
472
+
473
+ // Determine if this is a split turn
474
+ const cutEntry = entries[cutIndex];
475
+ const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user";
476
+ const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex);
477
+
478
+ return {
479
+ firstKeptEntryIndex: cutIndex,
480
+ turnStartIndex,
481
+ isSplitTurn: !isUserMessage && turnStartIndex !== -1,
482
+ };
483
+ }
484
+
485
+ // ============================================================================
486
+ // Summarization
487
+ // ============================================================================
488
+
489
+ const SUMMARIZATION_PROMPT = prompt.render(compactionSummaryPrompt);
490
+
491
+ const UPDATE_SUMMARIZATION_PROMPT = prompt.render(compactionUpdateSummaryPrompt);
492
+
493
+ const SHORT_SUMMARY_PROMPT = prompt.render(compactionShortSummaryPrompt);
494
+
495
+ const HANDOFF_DOCUMENT_PROMPT = prompt.render(handoffDocumentPrompt);
496
+
497
+ export const AUTO_HANDOFF_THRESHOLD_FOCUS = prompt.render(autoHandoffThresholdFocusPrompt);
498
+
499
+ function formatAdditionalContext(context: string[] | undefined): string {
500
+ if (!context || context.length === 0) return "";
501
+ const lines = context.map(line => `- ${line}`).join("\n");
502
+ return `<additional-context>\n${lines}\n</additional-context>\n\n`;
503
+ }
504
+
505
+ /**
506
+ * Generate a summary of the conversation using the LLM.
507
+ * If previousSummary is provided, uses the update prompt to merge.
508
+ */
509
+ export interface SummaryOptions {
510
+ promptOverride?: string;
511
+ extraContext?: string[];
512
+ remoteEndpoint?: string;
513
+ remoteInstructions?: string;
514
+ initiatorOverride?: MessageAttribution;
515
+ metadata?: Record<string, unknown>;
516
+ convertToLlm?: ConvertToLlm;
517
+ /**
518
+ * Optional telemetry handle. When provided, every LLM call emitted during
519
+ * compaction is wrapped in an OTEL chat span tagged with
520
+ * `pi.gen_ai.oneshot.kind` (`compaction_summary`, `compaction_short_summary`,
521
+ * or `compaction_turn_prefix`). `undefined` keeps the call paths zero-cost.
522
+ */
523
+ telemetry?: AgentTelemetry;
524
+ }
525
+
526
+ export async function generateSummary(
527
+ currentMessages: AgentMessage[],
528
+ model: Model,
529
+ reserveTokens: number,
530
+ apiKey: string,
531
+ signal?: AbortSignal,
532
+ customInstructions?: string,
533
+ previousSummary?: string,
534
+ options?: SummaryOptions,
535
+ ): Promise<string> {
536
+ const maxTokens = Math.floor(0.8 * reserveTokens);
537
+
538
+ // Use update prompt if we have a previous summary, otherwise initial prompt
539
+ let basePrompt = previousSummary ? UPDATE_SUMMARIZATION_PROMPT : SUMMARIZATION_PROMPT;
540
+ if (options?.promptOverride) {
541
+ basePrompt = options.promptOverride;
542
+ }
543
+ if (customInstructions) {
544
+ basePrompt = `${basePrompt}\n\nAdditional focus: ${customInstructions}`;
545
+ }
546
+
547
+ // Serialize conversation to text so model doesn't try to continue it
548
+ // Convert to LLM messages first (handles custom app messages when caller provides a transformer).
549
+ const llmMessages = (options?.convertToLlm ?? convertToLlm)(currentMessages);
550
+ const conversationText = serializeConversation(llmMessages);
551
+
552
+ // Build the prompt with conversation wrapped in tags
553
+ let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
554
+ if (previousSummary) {
555
+ promptText += `<previous-summary>\n${previousSummary}\n</previous-summary>\n\n`;
556
+ }
557
+ promptText += formatAdditionalContext(options?.extraContext);
558
+ promptText += basePrompt;
559
+
560
+ const summarizationMessages = [
561
+ {
562
+ role: "user" as const,
563
+ content: [{ type: "text" as const, text: promptText }],
564
+ timestamp: Date.now(),
565
+ },
566
+ ];
567
+
568
+ if (options?.remoteEndpoint) {
569
+ const remote = await requestRemoteCompaction(
570
+ options.remoteEndpoint,
571
+ {
572
+ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
573
+ prompt: promptText,
574
+ },
575
+ signal,
576
+ );
577
+ return remote.summary;
578
+ }
579
+
580
+ const response = await instrumentedCompleteSimple(
581
+ model,
582
+ { systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
583
+ {
584
+ maxTokens,
585
+ signal,
586
+ apiKey,
587
+ reasoning: Effort.High,
588
+ initiatorOverride: options?.initiatorOverride,
589
+ metadata: options?.metadata,
590
+ },
591
+ { telemetry: options?.telemetry, oneshotKind: "compaction_summary" },
592
+ );
593
+
594
+ if (response.stopReason === "error") {
595
+ throw new Error(`Summarization failed: ${response.errorMessage || "Unknown error"}`);
596
+ }
597
+
598
+ const textContent = response.content
599
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
600
+ .map(c => c.text)
601
+ .join("\n");
602
+
603
+ return textContent;
604
+ }
605
+
606
+ // ============================================================================
607
+ // Handoff generation
608
+ // ============================================================================
609
+
610
+ export interface HandoffOptions {
611
+ /** Live agent system prompt — passed verbatim so providers hit the cached prefix. */
612
+ systemPrompt: string[];
613
+ /** Live agent tool list — same purpose. Forced to `toolChoice: "none"`. */
614
+ tools?: AgentTool<any>[];
615
+ customInstructions?: string;
616
+ convertToLlm?: ConvertToLlm;
617
+ initiatorOverride?: MessageAttribution;
618
+ metadata?: Record<string, unknown>;
619
+ /**
620
+ * Optional telemetry handle. When provided, the handoff LLM call is
621
+ * wrapped in an OTEL chat span tagged with `pi.gen_ai.oneshot.kind = "handoff"`.
622
+ */
623
+ telemetry?: AgentTelemetry;
624
+ }
625
+
626
+ export function renderHandoffPrompt(customInstructions?: string): string {
627
+ if (!customInstructions) return HANDOFF_DOCUMENT_PROMPT;
628
+ return prompt.render(handoffDocumentPrompt, {
629
+ additionalFocus: customInstructions,
630
+ });
631
+ }
632
+
633
+ export async function generateHandoff(
634
+ messages: AgentMessage[],
635
+ model: Model,
636
+ apiKey: string,
637
+ options: HandoffOptions,
638
+ signal?: AbortSignal,
639
+ ): Promise<string> {
640
+ const llmMessages = (options.convertToLlm ?? convertToLlm)(messages);
641
+ const requestMessages: Message[] = [
642
+ ...llmMessages,
643
+ {
644
+ role: "user",
645
+ content: [{ type: "text", text: renderHandoffPrompt(options.customInstructions) }],
646
+ attribution: "agent",
647
+ timestamp: Date.now(),
648
+ },
649
+ ];
650
+
651
+ const response = await instrumentedCompleteSimple(
652
+ model,
653
+ {
654
+ systemPrompt: options.systemPrompt,
655
+ messages: requestMessages,
656
+ tools: options.tools,
657
+ },
658
+ {
659
+ apiKey,
660
+ signal,
661
+ reasoning: Effort.High,
662
+ toolChoice: "none",
663
+ initiatorOverride: options.initiatorOverride,
664
+ metadata: options.metadata,
665
+ },
666
+ { telemetry: options.telemetry, oneshotKind: "handoff" },
667
+ );
668
+
669
+ if (response.stopReason === "error") {
670
+ throw new Error(`Handoff generation failed: ${response.errorMessage || "Unknown error"}`);
671
+ }
672
+
673
+ return response.content
674
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
675
+ .map(c => c.text)
676
+ .join("\n");
677
+ }
678
+
679
+ async function generateShortSummary(
680
+ recentMessages: AgentMessage[],
681
+ historySummary: string | undefined,
682
+ model: Model,
683
+ reserveTokens: number,
684
+ apiKey: string,
685
+ signal?: AbortSignal,
686
+ options?: SummaryOptions,
687
+ ): Promise<string> {
688
+ const maxTokens = Math.min(512, Math.floor(0.2 * reserveTokens));
689
+ const llmMessages = (options?.convertToLlm ?? convertToLlm)(recentMessages);
690
+ const conversationText = serializeConversation(llmMessages);
691
+
692
+ let promptText = `<conversation>\n${conversationText}\n</conversation>\n\n`;
693
+ if (historySummary) {
694
+ promptText += `<previous-summary>\n${historySummary}\n</previous-summary>\n\n`;
695
+ }
696
+ promptText += formatAdditionalContext(options?.extraContext);
697
+ promptText += SHORT_SUMMARY_PROMPT;
698
+
699
+ if (options?.remoteEndpoint) {
700
+ const remote = await requestRemoteCompaction(
701
+ options.remoteEndpoint,
702
+ {
703
+ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT,
704
+ prompt: promptText,
705
+ },
706
+ signal,
707
+ );
708
+ return remote.summary;
709
+ }
710
+
711
+ const response = await instrumentedCompleteSimple(
712
+ model,
713
+ {
714
+ systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT],
715
+ messages: [{ role: "user", content: [{ type: "text", text: promptText }], timestamp: Date.now() }],
716
+ },
717
+ {
718
+ maxTokens,
719
+ signal,
720
+ apiKey,
721
+ reasoning: Effort.High,
722
+ initiatorOverride: options?.initiatorOverride,
723
+ metadata: options?.metadata,
724
+ },
725
+ { telemetry: options?.telemetry, oneshotKind: "compaction_short_summary" },
726
+ );
727
+
728
+ if (response.stopReason === "error") {
729
+ throw new Error(`Short summary failed: ${response.errorMessage || "Unknown error"}`);
730
+ }
731
+
732
+ return response.content
733
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
734
+ .map(c => c.text)
735
+ .join("\n");
736
+ }
737
+
738
+ // ============================================================================
739
+ // Compaction Preparation (for hooks)
740
+ // ============================================================================
741
+
742
+ export interface CompactionPreparation {
743
+ /** UUID of first entry to keep */
744
+ firstKeptEntryId: string;
745
+ /** Messages that will be summarized and discarded */
746
+ messagesToSummarize: AgentMessage[];
747
+ /** Messages that will be turned into turn prefix summary (if splitting) */
748
+ turnPrefixMessages: AgentMessage[];
749
+ /** Messages kept in full after compaction (recent history) */
750
+ recentMessages: AgentMessage[];
751
+ /** Whether this is a split turn (cut point in middle of turn) */
752
+ isSplitTurn: boolean;
753
+ tokensBefore: number;
754
+ /** Summary from previous compaction, for iterative update */
755
+ previousSummary?: string;
756
+ /** Preserved opaque compaction payload from the previous compaction, if any. */
757
+ previousPreserveData?: Record<string, unknown>;
758
+ /** File operations extracted from messagesToSummarize */
759
+ fileOps: FileOperations;
760
+ /** Compaction settions from settings.jsonl */
761
+ settings: CompactionSettings;
762
+ }
763
+
764
+ export function prepareCompaction(
765
+ pathEntries: SessionEntry[],
766
+ settings: CompactionSettings,
767
+ ): CompactionPreparation | undefined {
768
+ if (pathEntries.length > 0 && pathEntries[pathEntries.length - 1].type === "compaction") {
769
+ return undefined;
770
+ }
771
+
772
+ let prevCompactionIndex = -1;
773
+ for (let i = pathEntries.length - 1; i >= 0; i--) {
774
+ if (pathEntries[i].type === "compaction") {
775
+ prevCompactionIndex = i;
776
+ break;
777
+ }
778
+ }
779
+ const boundaryStart = prevCompactionIndex + 1;
780
+ const boundaryEnd = pathEntries.length;
781
+
782
+ const lastUsage = getLastAssistantUsage(pathEntries);
783
+ const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;
784
+ let keepRecentTokens = settings.keepRecentTokens;
785
+ if (lastUsage) {
786
+ const estimatedTokens = estimateEntriesTokens(pathEntries, boundaryStart, boundaryEnd);
787
+ const promptTokens = calculatePromptTokens(lastUsage);
788
+ const ratio = estimatedTokens > 0 ? promptTokens / estimatedTokens : 0;
789
+ if (Number.isFinite(ratio) && ratio > 1) {
790
+ keepRecentTokens = Math.max(1, Math.floor(keepRecentTokens / ratio));
791
+ }
792
+ }
793
+
794
+ const cutPoint = findCutPoint(pathEntries, boundaryStart, boundaryEnd, keepRecentTokens);
795
+
796
+ // Get ID of first kept entry
797
+ const firstKeptEntry = pathEntries[cutPoint.firstKeptEntryIndex];
798
+ if (!firstKeptEntry?.id) {
799
+ return undefined; // Session needs migration
800
+ }
801
+ const firstKeptEntryId = firstKeptEntry.id;
802
+
803
+ const historyEnd = cutPoint.isSplitTurn ? cutPoint.turnStartIndex : cutPoint.firstKeptEntryIndex;
804
+
805
+ // Messages to summarize (will be discarded after summary)
806
+ const messagesToSummarize: AgentMessage[] = [];
807
+ for (let i = boundaryStart; i < historyEnd; i++) {
808
+ const msg = getMessageFromEntry(pathEntries[i]);
809
+ if (msg) messagesToSummarize.push(msg);
810
+ }
811
+
812
+ // Messages for turn prefix summary (if splitting a turn)
813
+ const turnPrefixMessages: AgentMessage[] = [];
814
+ if (cutPoint.isSplitTurn) {
815
+ for (let i = cutPoint.turnStartIndex; i < cutPoint.firstKeptEntryIndex; i++) {
816
+ const msg = getMessageFromEntry(pathEntries[i]);
817
+ if (msg) turnPrefixMessages.push(msg);
818
+ }
819
+ }
820
+
821
+ // Messages kept after compaction (recent history)
822
+ const recentMessages: AgentMessage[] = [];
823
+ for (let i = cutPoint.firstKeptEntryIndex; i < boundaryEnd; i++) {
824
+ const msg = getMessageFromEntry(pathEntries[i]);
825
+ if (msg) recentMessages.push(msg);
826
+ }
827
+ // Nothing to summarize means compaction would be a no-op.
828
+ if (messagesToSummarize.length === 0 && turnPrefixMessages.length === 0) {
829
+ return undefined;
830
+ }
831
+
832
+ // Get previous summary and preserved data for iterative updates
833
+ let previousSummary: string | undefined;
834
+ let previousPreserveData: Record<string, unknown> | undefined;
835
+ if (prevCompactionIndex >= 0) {
836
+ const prevCompaction = pathEntries[prevCompactionIndex] as CompactionEntry;
837
+ previousSummary = prevCompaction.summary;
838
+ previousPreserveData = prevCompaction.preserveData;
839
+ }
840
+
841
+ // Extract file operations from messages and previous compaction
842
+ const fileOps = extractFileOperations(messagesToSummarize, pathEntries, prevCompactionIndex);
843
+
844
+ // Also extract file ops from turn prefix if splitting
845
+ if (cutPoint.isSplitTurn) {
846
+ for (const msg of turnPrefixMessages) {
847
+ extractFileOpsFromMessage(msg, fileOps);
848
+ }
849
+ }
850
+
851
+ return {
852
+ firstKeptEntryId,
853
+ messagesToSummarize,
854
+ turnPrefixMessages,
855
+ recentMessages,
856
+ isSplitTurn: cutPoint.isSplitTurn,
857
+ tokensBefore,
858
+ previousSummary,
859
+ previousPreserveData,
860
+ fileOps,
861
+ settings,
862
+ };
863
+ }
864
+
865
+ // ============================================================================
866
+ // Main compaction function
867
+ // ============================================================================
868
+
869
+ const TURN_PREFIX_SUMMARIZATION_PROMPT = prompt.render(compactionTurnPrefixPrompt);
870
+
871
+ /**
872
+ * Generate summaries for compaction using prepared data.
873
+ * Returns CompactionResult - SessionManager adds id/parentId when saving.
874
+ *
875
+ * @param preparation - Pre-calculated preparation from prepareCompaction()
876
+ * @param customInstructions - Optional custom focus for the summary
877
+ */
878
+ export async function compact(
879
+ preparation: CompactionPreparation,
880
+ model: Model,
881
+ apiKey: string,
882
+ customInstructions?: string,
883
+ signal?: AbortSignal,
884
+ options?: SummaryOptions,
885
+ ): Promise<CompactionResult> {
886
+ const {
887
+ firstKeptEntryId,
888
+ messagesToSummarize,
889
+ turnPrefixMessages,
890
+ recentMessages,
891
+ isSplitTurn,
892
+ tokensBefore,
893
+ previousSummary,
894
+ previousPreserveData,
895
+ fileOps,
896
+ settings,
897
+ } = preparation;
898
+
899
+ const summaryOptions: SummaryOptions = {
900
+ promptOverride: options?.promptOverride,
901
+ extraContext: options?.extraContext,
902
+ remoteEndpoint: settings.remoteEnabled === false ? undefined : settings.remoteEndpoint,
903
+ remoteInstructions: options?.remoteInstructions,
904
+ initiatorOverride: options?.initiatorOverride,
905
+ metadata: options?.metadata,
906
+ convertToLlm: options?.convertToLlm,
907
+ telemetry: options?.telemetry,
908
+ };
909
+
910
+ let preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, undefined);
911
+ if (settings.remoteEnabled !== false && shouldUseOpenAiRemoteCompaction(model)) {
912
+ const previousRemoteCompaction = getPreservedOpenAiRemoteCompactionData(previousPreserveData);
913
+ const remoteMessages = [...messagesToSummarize, ...turnPrefixMessages, ...recentMessages];
914
+ const previousReplacementHistory =
915
+ previousRemoteCompaction?.provider === model.provider
916
+ ? previousRemoteCompaction.replacementHistory
917
+ : undefined;
918
+ const remoteHistory = buildOpenAiNativeHistory(
919
+ (summaryOptions.convertToLlm ?? convertToLlm)(remoteMessages),
920
+ model,
921
+ previousReplacementHistory,
922
+ );
923
+ if (remoteHistory.length > 0) {
924
+ try {
925
+ const remote = await requestOpenAiRemoteCompaction(
926
+ model,
927
+ apiKey,
928
+ remoteHistory,
929
+ summaryOptions.remoteInstructions ?? SUMMARIZATION_SYSTEM_PROMPT,
930
+ signal,
931
+ );
932
+ preserveData = withOpenAiRemoteCompactionPreserveData(previousPreserveData, remote);
933
+ } catch (err) {
934
+ logger.warn("OpenAI remote compaction failed, falling back to local summarization", {
935
+ error: err instanceof Error ? err.message : String(err),
936
+ model: model.id,
937
+ provider: model.provider,
938
+ });
939
+ }
940
+ }
941
+ }
942
+
943
+ // Generate summaries (can be parallel if both needed) and merge into one
944
+ let summary: string;
945
+
946
+ if (isSplitTurn && turnPrefixMessages.length > 0) {
947
+ // Generate both summaries in parallel
948
+ const [historyResult, turnPrefixResult] = await Promise.all([
949
+ messagesToSummarize.length > 0
950
+ ? generateSummary(
951
+ messagesToSummarize,
952
+ model,
953
+ settings.reserveTokens,
954
+ apiKey,
955
+ signal,
956
+ customInstructions,
957
+ previousSummary,
958
+ summaryOptions,
959
+ )
960
+ : Promise.resolve("No prior history."),
961
+ generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal, summaryOptions),
962
+ ]);
963
+ // Merge into single summary
964
+ summary = `${historyResult}\n\n---\n\n**Turn Context (split turn):**\n\n${turnPrefixResult}`;
965
+ } else if (messagesToSummarize.length > 0) {
966
+ // Generate history summary from messages to summarize
967
+ summary = await generateSummary(
968
+ messagesToSummarize,
969
+ model,
970
+ settings.reserveTokens,
971
+ apiKey,
972
+ signal,
973
+ customInstructions,
974
+ previousSummary,
975
+ summaryOptions,
976
+ );
977
+ } else if (previousSummary) {
978
+ // No new messages to summarize, preserve previous summary
979
+ summary = previousSummary;
980
+ } else {
981
+ // No messages and no previous summary
982
+ summary = "No prior history.";
983
+ }
984
+
985
+ const shortSummary = await generateShortSummary(
986
+ recentMessages,
987
+ summary,
988
+ model,
989
+ settings.reserveTokens,
990
+ apiKey,
991
+ signal,
992
+ {
993
+ extraContext: options?.extraContext,
994
+ remoteEndpoint: summaryOptions.remoteEndpoint,
995
+ initiatorOverride: summaryOptions.initiatorOverride,
996
+ metadata: summaryOptions.metadata,
997
+ telemetry: summaryOptions.telemetry,
998
+ },
999
+ );
1000
+
1001
+ // Compute file lists and append to summary
1002
+ const { readFiles, modifiedFiles } = computeFileLists(fileOps);
1003
+ summary = upsertFileOperations(summary, readFiles, modifiedFiles);
1004
+
1005
+ if (!firstKeptEntryId) {
1006
+ throw new Error("First kept entry has no ID - session may need migration");
1007
+ }
1008
+
1009
+ return {
1010
+ summary,
1011
+ shortSummary,
1012
+ firstKeptEntryId,
1013
+ tokensBefore,
1014
+ details: { readFiles, modifiedFiles } as CompactionDetails,
1015
+ preserveData,
1016
+ };
1017
+ }
1018
+
1019
+ /**
1020
+ * Generate a summary for a turn prefix (when splitting a turn).
1021
+ */
1022
+ async function generateTurnPrefixSummary(
1023
+ messages: AgentMessage[],
1024
+ model: Model,
1025
+ reserveTokens: number,
1026
+ apiKey: string,
1027
+ signal?: AbortSignal,
1028
+ options?: SummaryOptions,
1029
+ ): Promise<string> {
1030
+ const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
1031
+
1032
+ const llmMessages = (options?.convertToLlm ?? convertToLlm)(messages);
1033
+ const conversationText = serializeConversation(llmMessages);
1034
+ const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${TURN_PREFIX_SUMMARIZATION_PROMPT}`;
1035
+ const summarizationMessages = [
1036
+ {
1037
+ role: "user" as const,
1038
+ content: [{ type: "text" as const, text: promptText }],
1039
+ timestamp: Date.now(),
1040
+ },
1041
+ ];
1042
+
1043
+ const response = await instrumentedCompleteSimple(
1044
+ model,
1045
+ { systemPrompt: [SUMMARIZATION_SYSTEM_PROMPT], messages: summarizationMessages },
1046
+ {
1047
+ maxTokens,
1048
+ signal,
1049
+ apiKey,
1050
+ reasoning: Effort.High,
1051
+ initiatorOverride: options?.initiatorOverride,
1052
+ metadata: options?.metadata,
1053
+ },
1054
+ { telemetry: options?.telemetry, oneshotKind: "compaction_turn_prefix" },
1055
+ );
1056
+
1057
+ if (response.stopReason === "error") {
1058
+ throw new Error(`Turn prefix summarization failed: ${response.errorMessage || "Unknown error"}`);
1059
+ }
1060
+
1061
+ return response.content
1062
+ .filter((c): c is { type: "text"; text: string } => c.type === "text")
1063
+ .map(c => c.text)
1064
+ .join("\n");
1065
+ }