@agi-cli/server 0.1.112 → 0.1.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agi-cli/server",
3
- "version": "0.1.112",
3
+ "version": "0.1.113",
4
4
  "description": "HTTP API server for AGI CLI",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
@@ -29,8 +29,8 @@
29
29
  "typecheck": "tsc --noEmit"
30
30
  },
31
31
  "dependencies": {
32
- "@agi-cli/sdk": "0.1.112",
33
- "@agi-cli/database": "0.1.112",
32
+ "@agi-cli/sdk": "0.1.113",
33
+ "@agi-cli/database": "0.1.113",
34
34
  "drizzle-orm": "^0.44.5",
35
35
  "hono": "^4.9.9",
36
36
  "zod": "^4.1.8"
@@ -1,29 +1,32 @@
1
1
  /**
2
2
  * Context compaction module for managing token usage.
3
3
  *
4
- * This module implements OpenCode-style context management:
5
- * 1. Detects when context is overflowing (tokens > context_limit - output_limit)
6
- * 2. Prunes old tool outputs by marking them as "compacted"
7
- * 3. History builder returns "[Old tool result content cleared]" for compacted parts
4
+ * This module implements intelligent context management:
5
+ * 1. Detects /compact command and builds summarization context
6
+ * 2. After LLM responds with summary, marks old parts as compacted
7
+ * 3. History builder skips compacted parts entirely
8
8
  *
9
- * Pruning strategy:
10
- * - Protect the last PRUNE_PROTECT tokens worth of tool calls (40,000)
11
- * - Only prune if we'd save at least PRUNE_MINIMUM tokens (20,000)
12
- * - Skip the last 2 turns to preserve recent context
13
- * - Never prune "skill" or other protected tools
9
+ * Flow:
10
+ * - User sends "/compact" stored as regular user message
11
+ * - Runner detects command, builds context for LLM to summarize
12
+ * - LLM streams summary response naturally
13
+ * - On completion, markSessionCompacted() marks old tool_call/tool_result parts
14
+ * - Future history builds skip compacted parts
14
15
  */
15
16
 
16
17
  import type { getDb } from '@agi-cli/database';
17
18
  import { messages, messageParts } from '@agi-cli/database/schema';
18
- import { eq, desc } from 'drizzle-orm';
19
+ import { eq, desc, asc, and, lt } from 'drizzle-orm';
19
20
  import { debugLog } from './debug.ts';
21
+ import { streamText } from 'ai';
22
+ import { resolveModel } from './provider.ts';
23
+ import { loadConfig } from '@agi-cli/sdk';
20
24
 
21
- // Token thresholds (matching OpenCode)
22
- export const PRUNE_MINIMUM = 20_000; // Only prune if we'd save at least this many tokens
25
+ // Token thresholds
23
26
  export const PRUNE_PROTECT = 40_000; // Protect last N tokens worth of tool calls
24
27
 
25
- // Tools that should never be pruned
26
- const PRUNE_PROTECTED_TOOLS = ['skill'];
28
+ // Tools that should never be compacted
29
+ const PROTECTED_TOOLS = ['skill'];
27
30
 
28
31
  // Simple token estimation: ~4 chars per token
29
32
  export function estimateTokens(text: string): number {
@@ -44,51 +47,151 @@ export interface ModelLimits {
44
47
  }
45
48
 
46
49
  /**
47
- * Check if context is overflowing based on token usage and model limits.
48
- * Returns true if we've used more tokens than (context_limit - output_limit).
50
+ * Check if a message content is the /compact command.
49
51
  */
50
- export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
51
- if (limits.context === 0) return false;
52
+ export function isCompactCommand(content: string): boolean {
53
+ const trimmed = content.trim().toLowerCase();
54
+ return trimmed === '/compact';
55
+ }
52
56
 
53
- const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
54
- const usableContext = limits.context - limits.output;
57
+ /**
58
+ * Build context for the LLM to generate a summary.
59
+ * Returns a prompt that describes what to summarize.
60
+ * Includes tool calls and results with appropriate truncation to fit within model limits.
61
+ * @param contextTokenLimit - Max tokens for context (uses ~4 chars per token estimate)
62
+ */
63
+ export async function buildCompactionContext(
64
+ db: Awaited<ReturnType<typeof getDb>>,
65
+ sessionId: string,
66
+ contextTokenLimit?: number,
67
+ ): Promise<string> {
68
+ const allMessages = await db
69
+ .select()
70
+ .from(messages)
71
+ .where(eq(messages.sessionId, sessionId))
72
+ .orderBy(asc(messages.createdAt));
55
73
 
56
- const overflow = count > usableContext;
57
- if (overflow) {
58
- debugLog(
59
- `[compaction] Context overflow detected: ${count} tokens used, ${usableContext} usable (${limits.context} context - ${limits.output} output)`,
60
- );
74
+ const lines: string[] = [];
75
+ let totalChars = 0;
76
+ // Use provided limit or default to 60k chars (~15k tokens)
77
+ // We use ~50% of model context for compaction, leaving room for system prompt + response
78
+ const maxChars = contextTokenLimit ? contextTokenLimit * 4 : 60000;
79
+
80
+ for (const msg of allMessages) {
81
+ if (totalChars > maxChars) {
82
+ lines.unshift('[...earlier content truncated...]');
83
+ break;
84
+ }
85
+
86
+ const parts = await db
87
+ .select()
88
+ .from(messageParts)
89
+ .where(eq(messageParts.messageId, msg.id))
90
+ .orderBy(asc(messageParts.index));
91
+
92
+ for (const part of parts) {
93
+ if (part.compactedAt) continue; // Skip already compacted
94
+
95
+ try {
96
+ const content = JSON.parse(part.content ?? '{}');
97
+
98
+ if (part.type === 'text' && content.text) {
99
+ const text = `[${msg.role.toUpperCase()}]: ${content.text}`;
100
+ lines.push(text.slice(0, 3000)); // Allow more text content
101
+ totalChars += text.length;
102
+ } else if (part.type === 'tool_call' && content.name) {
103
+ // Include tool name and relevant args (file paths, commands, etc.)
104
+ const argsStr =
105
+ typeof content.args === 'object'
106
+ ? JSON.stringify(content.args).slice(0, 500)
107
+ : '';
108
+ const text = `[TOOL ${content.name}]: ${argsStr}`;
109
+ lines.push(text);
110
+ totalChars += text.length;
111
+ } else if (part.type === 'tool_result' && content.result !== null) {
112
+ // Include enough result context for the LLM to understand what happened
113
+ const resultStr =
114
+ typeof content.result === 'string'
115
+ ? content.result.slice(0, 1500)
116
+ : JSON.stringify(content.result ?? '').slice(0, 1500);
117
+ const text = `[RESULT]: ${resultStr}`;
118
+ lines.push(text);
119
+ totalChars += text.length;
120
+ }
121
+ } catch {}
122
+ }
61
123
  }
62
124
 
63
- return overflow;
125
+ return lines.join('\n');
64
126
  }
65
127
 
66
128
  /**
67
- * Prune old tool outputs from a session to reduce context size.
129
+ * Get the system prompt addition for compaction.
130
+ */
131
+ export function getCompactionSystemPrompt(): string {
132
+ return `
133
+ The user has requested to compact the conversation. Generate a comprehensive summary that captures:
134
+
135
+ 1. **Main Goals**: What was the user trying to accomplish?
136
+ 2. **Key Actions**: What files were created, modified, or deleted?
137
+ 3. **Important Decisions**: What approaches or solutions were chosen and why?
138
+ 4. **Current State**: What is done and what might be pending?
139
+ 5. **Critical Context**: Any gotchas, errors encountered, or important details for continuing.
140
+
141
+ Format your response as a clear, structured summary. Start with "📦 **Context Compacted**" header.
142
+ Keep under 2000 characters but be thorough. This summary will replace detailed tool history.
143
+ `;
144
+ }
145
+
146
+ /**
147
+ * Mark old tool_call and tool_result parts as compacted.
148
+ * Called after the compaction summary response is complete.
68
149
  *
69
- * Goes backwards through tool results, protecting the last PRUNE_PROTECT tokens.
70
- * Marks older tool results as "compacted" so history builder returns placeholder text.
150
+ * Protects:
151
+ * - Last N tokens of tool results (PRUNE_PROTECT)
152
+ * - Last 2 user turns
153
+ * - Protected tool names (skill, etc.)
71
154
  */
72
- export async function pruneSession(
155
+ export async function markSessionCompacted(
73
156
  db: Awaited<ReturnType<typeof getDb>>,
74
157
  sessionId: string,
75
- ): Promise<{ pruned: number; saved: number }> {
76
- debugLog(`[compaction] Starting prune for session ${sessionId}`);
158
+ compactMessageId: string,
159
+ ): Promise<{ compacted: number; saved: number }> {
160
+ debugLog(`[compaction] Marking session ${sessionId} as compacted`);
77
161
 
78
- // Get all messages in the session ordered by creation time
79
- const allMessages = await db
162
+ // Get the compact message to find the cutoff point
163
+ const compactMsg = await db
80
164
  .select()
81
165
  .from(messages)
82
- .where(eq(messages.sessionId, sessionId))
166
+ .where(eq(messages.id, compactMessageId))
167
+ .limit(1);
168
+
169
+ if (!compactMsg.length) {
170
+ debugLog('[compaction] Compact message not found');
171
+ return { compacted: 0, saved: 0 };
172
+ }
173
+
174
+ const cutoffTime = compactMsg[0].createdAt;
175
+
176
+ // Get all messages before the compact command
177
+ const oldMessages = await db
178
+ .select()
179
+ .from(messages)
180
+ .where(
181
+ and(
182
+ eq(messages.sessionId, sessionId),
183
+ lt(messages.createdAt, cutoffTime),
184
+ ),
185
+ )
83
186
  .orderBy(desc(messages.createdAt));
84
187
 
85
188
  let totalTokens = 0;
86
- let prunedTokens = 0;
87
- const toPrune: Array<{ id: string; content: string }> = [];
189
+ let compactedTokens = 0;
190
+ const toCompact: Array<{ id: string; content: string }> = [];
88
191
  let turns = 0;
89
192
 
90
193
  // Go backwards through messages
91
- for (const msg of allMessages) {
194
+ for (const msg of oldMessages) {
92
195
  // Count user messages as turns
93
196
  if (msg.role === 'user') {
94
197
  turns++;
@@ -105,31 +208,113 @@ export async function pruneSession(
105
208
  .orderBy(desc(messageParts.index));
106
209
 
107
210
  for (const part of parts) {
108
- // Only process tool results
109
- if (part.type !== 'tool_result') continue;
211
+ // Only compact tool_call and tool_result
212
+ if (part.type !== 'tool_call' && part.type !== 'tool_result') continue;
110
213
 
111
214
  // Skip protected tools
112
- if (part.toolName && PRUNE_PROTECTED_TOOLS.includes(part.toolName)) {
215
+ if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) {
113
216
  continue;
114
217
  }
115
218
 
116
- // Parse content to check if already compacted
117
- let content: { result?: unknown; compactedAt?: number };
219
+ // Skip already compacted
220
+ if (part.compactedAt) continue;
221
+
222
+ // Parse content
223
+ let content: { result?: unknown; args?: unknown };
118
224
  try {
119
225
  content = JSON.parse(part.content ?? '{}');
120
226
  } catch {
121
227
  continue;
122
228
  }
123
229
 
124
- // Stop if we hit already compacted content (we've pruned before)
125
- if (content.compactedAt) {
230
+ // Estimate tokens
231
+ const contentStr =
232
+ part.type === 'tool_result'
233
+ ? typeof content.result === 'string'
234
+ ? content.result
235
+ : JSON.stringify(content.result ?? '')
236
+ : JSON.stringify(content.args ?? '');
237
+
238
+ const estimate = estimateTokens(contentStr);
239
+ totalTokens += estimate;
240
+
241
+ // If we've exceeded the protection threshold, mark for compaction
242
+ if (totalTokens > PRUNE_PROTECT) {
243
+ compactedTokens += estimate;
244
+ toCompact.push({ id: part.id, content: part.content ?? '{}' });
245
+ }
246
+ }
247
+ }
248
+
249
+ debugLog(
250
+ `[compaction] Found ${toCompact.length} parts to compact, saving ~${compactedTokens} tokens`,
251
+ );
252
+
253
+ if (toCompact.length > 0) {
254
+ const compactedAt = Date.now();
255
+
256
+ for (const part of toCompact) {
257
+ try {
258
+ await db
259
+ .update(messageParts)
260
+ .set({ compactedAt })
261
+ .where(eq(messageParts.id, part.id));
262
+ } catch (err) {
126
263
  debugLog(
127
- `[compaction] Hit previously compacted content, stopping prune`,
264
+ `[compaction] Failed to mark part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
128
265
  );
129
- break;
266
+ }
267
+ }
268
+
269
+ debugLog(`[compaction] Marked ${toCompact.length} parts as compacted`);
270
+ }
271
+
272
+ return { compacted: toCompact.length, saved: compactedTokens };
273
+ }
274
+
275
+ /**
276
+ * Legacy prune function - marks tool results as compacted.
277
+ * Used for automatic overflow-triggered compaction.
278
+ */
279
+ export async function pruneSession(
280
+ db: Awaited<ReturnType<typeof getDb>>,
281
+ sessionId: string,
282
+ ): Promise<{ pruned: number; saved: number }> {
283
+ debugLog(`[compaction] Auto-pruning session ${sessionId}`);
284
+
285
+ const allMessages = await db
286
+ .select()
287
+ .from(messages)
288
+ .where(eq(messages.sessionId, sessionId))
289
+ .orderBy(desc(messages.createdAt));
290
+
291
+ let totalTokens = 0;
292
+ let prunedTokens = 0;
293
+ const toPrune: Array<{ id: string }> = [];
294
+ let turns = 0;
295
+
296
+ for (const msg of allMessages) {
297
+ if (msg.role === 'user') turns++;
298
+ if (turns < 2) continue;
299
+
300
+ const parts = await db
301
+ .select()
302
+ .from(messageParts)
303
+ .where(eq(messageParts.messageId, msg.id))
304
+ .orderBy(desc(messageParts.index));
305
+
306
+ for (const part of parts) {
307
+ if (part.type !== 'tool_result') continue;
308
+ if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) continue;
309
+ if (part.compactedAt) continue;
310
+
311
+ let content: { result?: unknown };
312
+ try {
313
+ content = JSON.parse(part.content ?? '{}');
314
+ } catch {
315
+ continue;
130
316
  }
131
317
 
132
- // Estimate tokens for this result
133
318
  const estimate = estimateTokens(
134
319
  typeof content.result === 'string'
135
320
  ? content.result
@@ -137,118 +322,215 @@ export async function pruneSession(
137
322
  );
138
323
  totalTokens += estimate;
139
324
 
140
- // If we've exceeded the protection threshold, mark for pruning
141
325
  if (totalTokens > PRUNE_PROTECT) {
142
326
  prunedTokens += estimate;
143
- toPrune.push({ id: part.id, content: part.content ?? '{}' });
327
+ toPrune.push({ id: part.id });
144
328
  }
145
329
  }
146
330
  }
147
331
 
148
- debugLog(
149
- `[compaction] Found ${toPrune.length} tool results to prune, saving ~${prunedTokens} tokens`,
150
- );
151
-
152
- // Only prune if we'd save enough tokens to be worthwhile
153
- if (prunedTokens > PRUNE_MINIMUM) {
332
+ if (toPrune.length > 0) {
154
333
  const compactedAt = Date.now();
155
-
156
334
  for (const part of toPrune) {
157
335
  try {
158
- const content = JSON.parse(part.content);
159
- // Keep the structure but mark as compacted
160
- content.compactedAt = compactedAt;
161
- // Keep a small summary if it was a string result
162
- if (typeof content.result === 'string' && content.result.length > 100) {
163
- content.resultSummary = `${content.result.slice(0, 100)}...`;
164
- }
165
- // Clear the actual result to save space
166
- content.result = null;
167
-
168
336
  await db
169
337
  .update(messageParts)
170
- .set({ content: JSON.stringify(content) })
338
+ .set({ compactedAt })
171
339
  .where(eq(messageParts.id, part.id));
172
- } catch (err) {
173
- debugLog(
174
- `[compaction] Failed to prune part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
175
- );
176
- }
340
+ } catch {}
177
341
  }
178
-
179
- debugLog(
180
- `[compaction] Pruned ${toPrune.length} tool results, saved ~${prunedTokens} tokens`,
181
- );
182
- } else {
183
- debugLog(
184
- `[compaction] Skipping prune, would only save ${prunedTokens} tokens (min: ${PRUNE_MINIMUM})`,
185
- );
186
342
  }
187
343
 
188
344
  return { pruned: toPrune.length, saved: prunedTokens };
189
345
  }
190
346
 
347
+ /**
348
+ * Check if context is overflowing based on token usage and model limits.
349
+ */
350
+ export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
351
+ if (limits.context === 0) return false;
352
+
353
+ const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
354
+ const usableContext = limits.context - limits.output;
355
+
356
+ return count > usableContext;
357
+ }
358
+
191
359
  /**
192
360
  * Get model limits from provider catalog or use defaults.
193
361
  */
194
362
  export function getModelLimits(
195
- provider: string,
363
+ _provider: string,
196
364
  model: string,
197
365
  ): ModelLimits | null {
198
- // Default limits for common models
199
- // These should ideally come from the provider catalog
200
366
  const defaults: Record<string, ModelLimits> = {
201
- // Anthropic
202
367
  'claude-sonnet-4-20250514': { context: 200000, output: 16000 },
203
368
  'claude-3-5-sonnet-20241022': { context: 200000, output: 8192 },
204
369
  'claude-3-5-haiku-20241022': { context: 200000, output: 8192 },
205
- 'claude-3-opus-20240229': { context: 200000, output: 4096 },
206
- // OpenAI
207
370
  'gpt-4o': { context: 128000, output: 16384 },
208
371
  'gpt-4o-mini': { context: 128000, output: 16384 },
209
- 'gpt-4-turbo': { context: 128000, output: 4096 },
210
372
  o1: { context: 200000, output: 100000 },
211
- 'o1-mini': { context: 128000, output: 65536 },
212
- 'o1-pro': { context: 200000, output: 100000 },
213
373
  'o3-mini': { context: 200000, output: 100000 },
214
- // Google
215
374
  'gemini-2.0-flash': { context: 1000000, output: 8192 },
216
375
  'gemini-1.5-pro': { context: 2000000, output: 8192 },
217
- 'gemini-1.5-flash': { context: 1000000, output: 8192 },
218
376
  };
219
377
 
220
- // Try exact match first
221
- if (defaults[model]) {
222
- return defaults[model];
223
- }
378
+ if (defaults[model]) return defaults[model];
224
379
 
225
- // Try partial match (e.g., "claude-3-5-sonnet" matches "claude-3-5-sonnet-20241022")
226
380
  for (const [key, limits] of Object.entries(defaults)) {
227
- if (model.includes(key) || key.includes(model)) {
228
- return limits;
229
- }
381
+ if (model.includes(key) || key.includes(model)) return limits;
230
382
  }
231
383
 
232
- // Return null if no match - caller should handle
233
- debugLog(
234
- `[compaction] No model limits found for ${provider}/${model}, skipping overflow check`,
235
- );
236
384
  return null;
237
385
  }
238
386
 
239
387
  /**
240
- * Check if a tool result content is compacted.
388
+ * Check if a part is compacted.
241
389
  */
242
- export function isCompacted(content: string): boolean {
243
- try {
244
- const parsed = JSON.parse(content);
245
- return !!parsed.compactedAt;
246
- } catch {
247
- return false;
248
- }
390
+ export function isCompacted(part: { compactedAt?: number | null }): boolean {
391
+ return !!part.compactedAt;
249
392
  }
250
393
 
394
+ export const COMPACTED_PLACEHOLDER = '[Compacted]';
395
+
251
396
  /**
252
- * Get the placeholder text for compacted tool results.
397
+ * Perform auto-compaction when context overflows.
398
+ * Streams the compaction summary (like /compact does), marks old parts as compacted.
399
+ * Returns info needed for caller to trigger a retry.
400
+ * Uses the session's model for consistency with /compact command.
253
401
  */
254
- export const COMPACTED_PLACEHOLDER = '[Old tool result content cleared]';
402
+ export async function performAutoCompaction(
403
+ db: Awaited<ReturnType<typeof getDb>>,
404
+ sessionId: string,
405
+ assistantMessageId: string,
406
+ publishFn: (event: {
407
+ type: string;
408
+ sessionId: string;
409
+ payload: Record<string, unknown>;
410
+ }) => void,
411
+ provider: string,
412
+ modelId: string,
413
+ ): Promise<{
414
+ success: boolean;
415
+ summary?: string;
416
+ error?: string;
417
+ compactMessageId?: string;
418
+ }> {
419
+ debugLog(`[compaction] Starting auto-compaction for session ${sessionId}`);
420
+
421
+ try {
422
+ // 1. Get model limits and build compaction context
423
+ const limits = getModelLimits(provider, modelId);
424
+ // Use 50% of context window for compaction, minimum 15k tokens
425
+ const contextTokenLimit = limits
426
+ ? Math.max(Math.floor(limits.context * 0.5), 15000)
427
+ : 15000;
428
+ debugLog(
429
+ `[compaction] Model ${modelId} context limit: ${limits?.context ?? 'unknown'}, using ${contextTokenLimit} tokens for compaction`,
430
+ );
431
+
432
+ const context = await buildCompactionContext(
433
+ db,
434
+ sessionId,
435
+ contextTokenLimit,
436
+ );
437
+ if (!context || context.length < 100) {
438
+ debugLog('[compaction] Not enough context to compact');
439
+ return { success: false, error: 'Not enough context to compact' };
440
+ }
441
+
442
+ // 2. Stream the compaction summary
443
+
444
+ // Use the session's model for consistency
445
+ const cfg = await loadConfig();
446
+ debugLog(
447
+ `[compaction] Using session model ${provider}/${modelId} for auto-compaction`,
448
+ );
449
+ const model = await resolveModel(
450
+ provider as Parameters<typeof resolveModel>[0],
451
+ modelId,
452
+ cfg,
453
+ );
454
+
455
+ // Create a text part for the compaction summary (after model created successfully)
456
+ const compactPartId = crypto.randomUUID();
457
+ const now = Date.now();
458
+
459
+ await db.insert(messageParts).values({
460
+ id: compactPartId,
461
+ messageId: assistantMessageId,
462
+ index: 0,
463
+ stepIndex: 0,
464
+ type: 'text',
465
+ content: JSON.stringify({ text: '' }),
466
+ agent: 'system',
467
+ provider: provider,
468
+ model: modelId,
469
+ startedAt: now,
470
+ });
471
+
472
+ const prompt = getCompactionSystemPrompt();
473
+ const result = streamText({
474
+ model,
475
+ system: `${prompt}\n\nIMPORTANT: Generate a comprehensive summary. This will replace the detailed conversation history.`,
476
+ messages: [
477
+ {
478
+ role: 'user',
479
+ content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${context}\n</conversation-to-summarize>`,
480
+ },
481
+ ],
482
+ maxTokens: 2000,
483
+ });
484
+
485
+ // Stream the summary
486
+ let summary = '';
487
+ for await (const chunk of result.textStream) {
488
+ summary += chunk;
489
+
490
+ // Publish delta event so UI updates in real-time
491
+ publishFn({
492
+ type: 'message.part.delta',
493
+ sessionId,
494
+ payload: {
495
+ messageId: assistantMessageId,
496
+ partId: compactPartId,
497
+ stepIndex: 0,
498
+ type: 'text',
499
+ delta: chunk,
500
+ },
501
+ });
502
+ }
503
+
504
+ // Update the part with final content
505
+ await db
506
+ .update(messageParts)
507
+ .set({
508
+ content: JSON.stringify({ text: summary }),
509
+ completedAt: Date.now(),
510
+ })
511
+ .where(eq(messageParts.id, compactPartId));
512
+
513
+ if (!summary || summary.length < 50) {
514
+ debugLog('[compaction] Failed to generate summary');
515
+ return { success: false, error: 'Failed to generate summary' };
516
+ }
517
+
518
+ debugLog(`[compaction] Generated summary: ${summary.slice(0, 100)}...`);
519
+
520
+ // 3. Mark old parts as compacted (using the assistant message as the cutoff)
521
+ const compactResult = await markSessionCompacted(
522
+ db,
523
+ sessionId,
524
+ assistantMessageId,
525
+ );
526
+ debugLog(
527
+ `[compaction] Marked ${compactResult.compacted} parts as compacted, saved ~${compactResult.saved} tokens`,
528
+ );
529
+
530
+ return { success: true, summary, compactMessageId: assistantMessageId };
531
+ } catch (err) {
532
+ const errorMsg = err instanceof Error ? err.message : String(err);
533
+ debugLog(`[compaction] Auto-compaction failed: ${errorMsg}`);
534
+ return { success: false, error: errorMsg };
535
+ }
536
+ }
@@ -4,7 +4,6 @@ import { messages, messageParts } from '@agi-cli/database/schema';
4
4
  import { eq, asc } from 'drizzle-orm';
5
5
  import { debugLog } from './debug.ts';
6
6
  import { ToolHistoryTracker } from './history/tool-history-tracker.ts';
7
- import { COMPACTED_PLACEHOLDER } from './compaction.ts';
8
7
 
9
8
  /**
10
9
  * Builds the conversation history for a session from the database,
@@ -89,6 +88,9 @@ export async function buildHistoryMessages(
89
88
  if (t) assistantParts.push({ type: 'text', text: t });
90
89
  } catch {}
91
90
  } else if (p.type === 'tool_call') {
91
+ // Skip compacted tool calls entirely
92
+ if (p.compactedAt) continue;
93
+
92
94
  try {
93
95
  const obj = JSON.parse(p.content ?? '{}') as {
94
96
  name?: string;
@@ -104,22 +106,20 @@ export async function buildHistoryMessages(
104
106
  }
105
107
  } catch {}
106
108
  } else if (p.type === 'tool_result') {
109
+ // Skip compacted tool results entirely
110
+ if (p.compactedAt) continue;
111
+
107
112
  try {
108
113
  const obj = JSON.parse(p.content ?? '{}') as {
109
114
  name?: string;
110
115
  callId?: string;
111
116
  result?: unknown;
112
- compactedAt?: number;
113
117
  };
114
118
  if (obj.callId) {
115
- // If this tool result was compacted, return placeholder instead
116
- const result = obj.compactedAt
117
- ? COMPACTED_PLACEHOLDER
118
- : obj.result;
119
119
  toolResults.push({
120
120
  name: obj.name ?? 'tool',
121
121
  callId: obj.callId,
122
- result,
122
+ result: obj.result,
123
123
  });
124
124
  }
125
125
  } catch {}
@@ -1,4 +1,4 @@
1
- import { generateText } from 'ai';
1
+ import { generateText, streamText } from 'ai';
2
2
  import { eq } from 'drizzle-orm';
3
3
  import type { AGIConfig } from '@agi-cli/sdk';
4
4
  import type { DB } from '@agi-cli/database';
@@ -9,6 +9,7 @@ import { runSessionLoop } from './runner.ts';
9
9
  import { resolveModel } from './provider.ts';
10
10
  import { getFastModel, type ProviderId } from '@agi-cli/sdk';
11
11
  import { debugLog } from './debug.ts';
12
+ import { isCompactCommand, buildCompactionContext } from './compaction.ts';
12
13
 
13
14
  type SessionRow = typeof sessions.$inferSelect;
14
15
 
@@ -119,6 +120,28 @@ export async function dispatchAssistantMessage(
119
120
  `[MESSAGE_SERVICE] Enqueuing assistant run with userContext: ${userContext ? `${userContext.substring(0, 50)}...` : 'NONE'}`,
120
121
  );
121
122
 
123
+ // Detect /compact command and build context with model-aware limits
124
+ const isCompact = isCompactCommand(content);
125
+ let compactionContext: string | undefined;
126
+
127
+ if (isCompact) {
128
+ debugLog('[MESSAGE_SERVICE] Detected /compact command, building context');
129
+ const { getModelLimits } = await import('./compaction.ts');
130
+ const limits = getModelLimits(provider, model);
131
+ // Use 50% of context window for compaction, minimum 15k tokens
132
+ const contextTokenLimit = limits
133
+ ? Math.max(Math.floor(limits.context * 0.5), 15000)
134
+ : 15000;
135
+ compactionContext = await buildCompactionContext(
136
+ db,
137
+ sessionId,
138
+ contextTokenLimit,
139
+ );
140
+ debugLog(
141
+ `[message-service] /compact context length: ${compactionContext.length}, limit: ${contextTokenLimit} tokens`,
142
+ );
143
+ }
144
+
122
145
  enqueueAssistantRun(
123
146
  {
124
147
  sessionId,
@@ -130,6 +153,8 @@ export async function dispatchAssistantMessage(
130
153
  oneShot: Boolean(oneShot),
131
154
  userContext,
132
155
  reasoning,
156
+ isCompactCommand: isCompact,
157
+ compactionContext,
133
158
  },
134
159
  runSessionLoop,
135
160
  );
@@ -240,7 +265,11 @@ async function generateSessionTitle(args: {
240
265
 
241
266
  // Use a smaller, faster model for title generation
242
267
  // Look up the cheapest/fastest model from the catalog for this provider
243
- const titleModel = getFastModel(provider) ?? modelName;
268
+ // For OpenAI OAuth, use codex-mini as it works with ChatGPT backend
269
+ const titleModel =
270
+ needsSpoof && provider === 'openai'
271
+ ? 'gpt-5.1-codex-mini'
272
+ : (getFastModel(provider) ?? modelName);
244
273
  debugLog(`[TITLE_GEN] Using title model: ${titleModel}`);
245
274
  const model = await resolveModel(provider, titleModel, cfg);
246
275
 
@@ -291,15 +320,29 @@ async function generateSessionTitle(args: {
291
320
  );
292
321
  }
293
322
 
294
- debugLog('[TITLE_GEN] Calling generateText...');
295
323
  let modelTitle = '';
296
324
  try {
297
- const out = await generateText({
298
- model,
299
- system,
300
- messages: messagesArray,
301
- });
302
- modelTitle = (out?.text || '').trim();
325
+ // ChatGPT backend requires streaming - use streamText for OAuth
326
+ if (needsSpoof) {
327
+ debugLog('[TITLE_GEN] Using streamText for OAuth...');
328
+ const result = streamText({
329
+ model,
330
+ system,
331
+ messages: messagesArray,
332
+ });
333
+ for await (const chunk of result.textStream) {
334
+ modelTitle += chunk;
335
+ }
336
+ modelTitle = modelTitle.trim();
337
+ } else {
338
+ debugLog('[TITLE_GEN] Using generateText...');
339
+ const out = await generateText({
340
+ model,
341
+ system,
342
+ messages: messagesArray,
343
+ });
344
+ modelTitle = (out?.text || '').trim();
345
+ }
303
346
 
304
347
  debugLog('[TITLE_GEN] Raw response from model:');
305
348
  debugLog(`[TITLE_GEN] "${modelTitle}"`);
@@ -28,6 +28,7 @@ export async function composeSystemPrompt(options: {
28
28
  includeEnvironment?: boolean;
29
29
  includeProjectTree?: boolean;
30
30
  userContext?: string;
31
+ contextSummary?: string;
31
32
  }): Promise<ComposedSystemPrompt> {
32
33
  const components: string[] = [];
33
34
  if (options.spoofPrompt) {
@@ -105,6 +106,19 @@ export async function composeSystemPrompt(options: {
105
106
  components.push('user-context');
106
107
  }
107
108
 
109
+ // Add compacted conversation summary if present
110
+ if (options.contextSummary?.trim()) {
111
+ const summaryBlock = [
112
+ '<compacted-conversation-summary>',
113
+ 'The conversation was compacted to save context. Here is a summary of the previous context:',
114
+ '',
115
+ options.contextSummary.trim(),
116
+ '</compacted-conversation-summary>',
117
+ ].join('\n');
118
+ parts.push(summaryBlock);
119
+ components.push('context-summary');
120
+ }
121
+
108
122
  // Add terminal context if available
109
123
  const terminalManager = getTerminalManager();
110
124
  if (terminalManager) {
@@ -1,7 +1,7 @@
1
1
  import { hasToolCall, streamText } from 'ai';
2
2
  import { loadConfig } from '@agi-cli/sdk';
3
3
  import { getDb } from '@agi-cli/database';
4
- import { messageParts } from '@agi-cli/database/schema';
4
+ import { messageParts, sessions } from '@agi-cli/database/schema';
5
5
  import { eq } from 'drizzle-orm';
6
6
  import { resolveModel } from './provider.ts';
7
7
  import { resolveAgentConfig } from './agent-registry.ts';
@@ -32,6 +32,7 @@ import {
32
32
  createAbortHandler,
33
33
  createFinishHandler,
34
34
  } from './stream-handlers.ts';
35
+ import { getCompactionSystemPrompt, pruneSession } from './compaction.ts';
35
36
 
36
37
  export { enqueueAssistantRun, abortSession } from './session-queue.ts';
37
38
  export { getRunnerState } from './session-queue.ts';
@@ -78,10 +79,30 @@ async function runAssistant(opts: RunOpts) {
78
79
 
79
80
  const agentPrompt = agentCfg.prompt || '';
80
81
 
82
+ // For /compact command, use minimal history - the compaction context has everything needed
81
83
  const historyTimer = time('runner:buildHistory');
82
- const history = await buildHistoryMessages(db, opts.sessionId);
84
+ let history: Awaited<ReturnType<typeof buildHistoryMessages>>;
85
+ if (opts.isCompactCommand && opts.compactionContext) {
86
+ debugLog('[RUNNER] Using minimal history for /compact command');
87
+ history = [];
88
+ } else {
89
+ history = await buildHistoryMessages(db, opts.sessionId);
90
+ }
83
91
  historyTimer.end({ messages: history.length });
84
92
 
93
+ // Fetch session to get context summary for compaction
94
+ const sessionRows = await db
95
+ .select()
96
+ .from(sessions)
97
+ .where(eq(sessions.id, opts.sessionId))
98
+ .limit(1);
99
+ const contextSummary = sessionRows[0]?.contextSummary ?? undefined;
100
+ if (contextSummary) {
101
+ debugLog(
102
+ `[RUNNER] Using context summary from compaction (${contextSummary.length} chars)`,
103
+ );
104
+ }
105
+
85
106
  // FIX: For OAuth, we need to check if this is the first ASSISTANT message
86
107
  // The user message is already in history by this point, so history.length will be > 0
87
108
  // We need to add additionalSystemMessages on the first assistant turn
@@ -127,6 +148,7 @@ async function runAssistant(opts: RunOpts) {
127
148
  spoofPrompt: undefined,
128
149
  includeProjectTree: isFirstMessage,
129
150
  userContext: opts.userContext,
151
+ contextSummary,
130
152
  });
131
153
  oauthFullPromptComponents = fullPrompt.components;
132
154
 
@@ -157,6 +179,7 @@ async function runAssistant(opts: RunOpts) {
157
179
  spoofPrompt: undefined,
158
180
  includeProjectTree: isFirstMessage,
159
181
  userContext: opts.userContext,
182
+ contextSummary,
160
183
  });
161
184
  system = composed.prompt;
162
185
  systemComponents = composed.components;
@@ -169,6 +192,23 @@ async function runAssistant(opts: RunOpts) {
169
192
  })}`,
170
193
  );
171
194
 
195
+ // Inject compaction prompt if this is a /compact command
196
+ if (opts.isCompactCommand && opts.compactionContext) {
197
+ debugLog('[RUNNER] Injecting compaction context for /compact command');
198
+ const compactPrompt = getCompactionSystemPrompt();
199
+ // Add compaction instructions as system message
200
+ // Don't modify `system` directly as it may contain OAuth spoof prompt
201
+ additionalSystemMessages.push({
202
+ role: 'system',
203
+ content: compactPrompt,
204
+ });
205
+ // Add the conversation context as a USER message (Anthropic requires at least one user message)
206
+ additionalSystemMessages.push({
207
+ role: 'user',
208
+ content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${opts.compactionContext}\n</conversation-to-summarize>`,
209
+ });
210
+ }
211
+
172
212
  const toolsTimer = time('runner:discoverTools');
173
213
  const allTools = await discoverProjectTools(cfg.projectRoot);
174
214
  toolsTimer.end({ count: allTools.length });
@@ -286,7 +326,13 @@ async function runAssistant(opts: RunOpts) {
286
326
  updateMessageTokensIncremental,
287
327
  );
288
328
 
289
- const onError = createErrorHandler(opts, db, getStepIndex, sharedCtx);
329
+ const onError = createErrorHandler(
330
+ opts,
331
+ db,
332
+ getStepIndex,
333
+ sharedCtx,
334
+ runSessionLoop,
335
+ );
290
336
 
291
337
  const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
292
338
 
@@ -491,6 +537,67 @@ async function runAssistant(opts: RunOpts) {
491
537
  } catch (err) {
492
538
  unsubscribeFinish();
493
539
  const payload = toErrorPayload(err);
540
+
541
+ // Check if this is a "prompt too long" error and auto-compact
542
+ const errorMessage = err instanceof Error ? err.message : String(err);
543
+ const errorCode = (err as { code?: string })?.code ?? '';
544
+ const responseBody = (err as { responseBody?: string })?.responseBody ?? '';
545
+ const apiErrorType = (err as { apiErrorType?: string })?.apiErrorType ?? '';
546
+ const combinedError = `${errorMessage} ${responseBody}`.toLowerCase();
547
+ debugLog(`[RUNNER] Error caught - message: ${errorMessage.slice(0, 100)}`);
548
+ debugLog(
549
+ `[RUNNER] Error caught - code: ${errorCode}, apiErrorType: ${apiErrorType}`,
550
+ );
551
+ debugLog(
552
+ `[RUNNER] Error caught - responseBody: ${responseBody.slice(0, 200)}`,
553
+ );
554
+ const isPromptTooLong =
555
+ combinedError.includes('prompt is too long') ||
556
+ combinedError.includes('maximum context length') ||
557
+ combinedError.includes('too many tokens') ||
558
+ combinedError.includes('context_length_exceeded') ||
559
+ combinedError.includes('request too large') ||
560
+ combinedError.includes('exceeds the model') ||
561
+ combinedError.includes('input is too long') ||
562
+ errorCode === 'context_length_exceeded' ||
563
+ apiErrorType === 'invalid_request_error';
564
+ debugLog(
565
+ `[RUNNER] isPromptTooLong: ${isPromptTooLong}, isCompactCommand: ${opts.isCompactCommand}`,
566
+ );
567
+
568
+ if (isPromptTooLong && !opts.isCompactCommand) {
569
+ debugLog(
570
+ '[RUNNER] Prompt too long - auto-compacting and will retry on next user message',
571
+ );
572
+ try {
573
+ const pruneResult = await pruneSession(db, opts.sessionId);
574
+ debugLog(
575
+ `[RUNNER] Auto-pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
576
+ );
577
+
578
+ // Publish a system message to inform the user
579
+ publish({
580
+ type: 'error',
581
+ sessionId: opts.sessionId,
582
+ payload: {
583
+ ...payload,
584
+ message: `Context too large (${errorMessage.match(/\d+/)?.[0] || 'many'} tokens). Auto-compacted old tool results. Please retry your message.`,
585
+ name: 'ContextOverflow',
586
+ },
587
+ });
588
+
589
+ // Complete the message as failed
590
+ try {
591
+ await completeAssistantMessage({}, opts, db);
592
+ } catch {}
593
+ return;
594
+ } catch (pruneErr) {
595
+ debugLog(
596
+ `[RUNNER] Auto-prune failed: ${pruneErr instanceof Error ? pruneErr.message : String(pruneErr)}`,
597
+ );
598
+ }
599
+ }
600
+
494
601
  debugLog(`[RUNNER] Error during stream: ${payload.message}`);
495
602
  debugLog(
496
603
  `[RUNNER] Error stack: ${err instanceof Error ? err.stack : 'no stack'}`,
@@ -11,6 +11,8 @@ export type RunOpts = {
11
11
  userContext?: string;
12
12
  reasoning?: boolean;
13
13
  abortSignal?: AbortSignal;
14
+ isCompactCommand?: boolean;
15
+ compactionContext?: string;
14
16
  };
15
17
 
16
18
  type RunnerState = { queue: RunOpts[]; running: boolean };
@@ -13,8 +13,11 @@ import {
13
13
  isOverflow,
14
14
  getModelLimits,
15
15
  type TokenUsage,
16
+ markSessionCompacted,
17
+ performAutoCompaction,
16
18
  } from './compaction.ts';
17
19
  import { debugLog } from './debug.ts';
20
+ import { enqueueAssistantRun } from './session-queue.ts';
18
21
 
19
22
  type StepFinishEvent = {
20
23
  usage?: UsageData;
@@ -131,14 +134,143 @@ export function createErrorHandler(
131
134
  db: Awaited<ReturnType<typeof getDb>>,
132
135
  getStepIndex: () => number,
133
136
  sharedCtx: ToolAdapterContext,
137
+ retryCallback?: (sessionId: string) => Promise<void>,
134
138
  ) {
135
139
  return async (err: unknown) => {
136
140
  const errorPayload = toErrorPayload(err);
137
141
  const isApiError = APICallError.isInstance(err);
138
142
  const stepIndex = getStepIndex();
139
143
 
144
+ // Check if this is a prompt-too-long error and auto-compact
145
+ // Handle nested error structures from AI SDK
146
+ const errObj = err as Record<string, unknown>;
147
+ const nestedError = (errObj?.error as Record<string, unknown>)?.error as
148
+ | Record<string, unknown>
149
+ | undefined;
150
+ const errorCode =
151
+ (errObj?.code as string) ?? (nestedError?.code as string) ?? '';
152
+ const errorType =
153
+ (errObj?.apiErrorType as string) ?? (nestedError?.type as string) ?? '';
154
+ const fullErrorStr = JSON.stringify(err).toLowerCase();
155
+
156
+ const isPromptTooLong =
157
+ fullErrorStr.includes('prompt is too long') ||
158
+ fullErrorStr.includes('maximum context length') ||
159
+ fullErrorStr.includes('too many tokens') ||
160
+ fullErrorStr.includes('context_length_exceeded') ||
161
+ fullErrorStr.includes('request too large') ||
162
+ fullErrorStr.includes('exceeds the model') ||
163
+ fullErrorStr.includes('context window') ||
164
+ fullErrorStr.includes('input is too long') ||
165
+ errorCode === 'context_length_exceeded' ||
166
+ errorType === 'invalid_request_error';
167
+
168
+ debugLog(
169
+ `[stream-handlers] isPromptTooLong: ${isPromptTooLong}, errorCode: ${errorCode}, errorType: ${errorType}`,
170
+ );
171
+
172
+ if (isPromptTooLong && !opts.isCompactCommand) {
173
+ debugLog(
174
+ '[stream-handlers] Prompt too long detected, auto-compacting...',
175
+ );
176
+ let compactionSucceeded = false;
177
+ try {
178
+ // Stream the compaction summary with proper publish function
179
+ const compactResult = await performAutoCompaction(
180
+ db,
181
+ opts.sessionId,
182
+ opts.assistantMessageId,
183
+ publish,
184
+ opts.provider,
185
+ opts.model,
186
+ );
187
+ if (compactResult.success) {
188
+ debugLog(
189
+ `[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
190
+ );
191
+ compactionSucceeded = true;
192
+ } else {
193
+ debugLog(
194
+ `[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
195
+ );
196
+ // Fall back to simple prune
197
+ const pruneResult = await pruneSession(db, opts.sessionId);
198
+ debugLog(
199
+ `[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
200
+ );
201
+ compactionSucceeded = pruneResult.pruned > 0;
202
+ }
203
+ } catch (compactErr) {
204
+ debugLog(
205
+ `[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
206
+ );
207
+ }
208
+
209
+ // If compaction succeeded, complete this message and trigger retry
210
+ if (compactionSucceeded) {
211
+ // Mark this compaction message as completed
212
+ await db
213
+ .update(messages)
214
+ .set({
215
+ status: 'completed',
216
+ })
217
+ .where(eq(messages.id, opts.assistantMessageId));
218
+
219
+ // Publish completion event for the compaction message
220
+ publish({
221
+ type: 'message.completed',
222
+ sessionId: opts.sessionId,
223
+ payload: {
224
+ id: opts.assistantMessageId,
225
+ autoCompacted: true,
226
+ },
227
+ });
228
+
229
+ // Trigger retry - create a new assistant message and enqueue the run
230
+ if (retryCallback) {
231
+ debugLog('[stream-handlers] Triggering retry after compaction...');
232
+ const newAssistantMessageId = crypto.randomUUID();
233
+ await db.insert(messages).values({
234
+ id: newAssistantMessageId,
235
+ sessionId: opts.sessionId,
236
+ role: 'assistant',
237
+ status: 'pending',
238
+ agent: opts.agent,
239
+ provider: opts.provider,
240
+ model: opts.model,
241
+ createdAt: Date.now(),
242
+ });
243
+
244
+ publish({
245
+ type: 'message.created',
246
+ sessionId: opts.sessionId,
247
+ payload: { id: newAssistantMessageId, role: 'assistant' },
248
+ });
249
+
250
+ // Enqueue the retry with the new assistant message
251
+ enqueueAssistantRun(
252
+ {
253
+ ...opts,
254
+ assistantMessageId: newAssistantMessageId,
255
+ },
256
+ retryCallback,
257
+ );
258
+ } else {
259
+ debugLog(
260
+ '[stream-handlers] No retryCallback provided, cannot auto-retry',
261
+ );
262
+ }
263
+
264
+ return; // Don't show error, compaction and retry handled it
265
+ }
266
+ }
267
+
140
268
  // Create error part for UI display
141
269
  const errorPartId = crypto.randomUUID();
270
+ const displayMessage =
271
+ isPromptTooLong && !opts.isCompactCommand
272
+ ? `${errorPayload.message}. Context auto-compacted - please retry your message.`
273
+ : errorPayload.message;
142
274
  await db.insert(messageParts).values({
143
275
  id: errorPartId,
144
276
  messageId: opts.assistantMessageId,
@@ -146,7 +278,7 @@ export function createErrorHandler(
146
278
  stepIndex,
147
279
  type: 'error',
148
280
  content: JSON.stringify({
149
- message: errorPayload.message,
281
+ message: displayMessage,
150
282
  type: errorPayload.type,
151
283
  details: errorPayload.details,
152
284
  isAborted: false,
@@ -163,11 +295,12 @@ export function createErrorHandler(
163
295
  .update(messages)
164
296
  .set({
165
297
  status: 'error',
166
- error: errorPayload.message,
298
+ error: displayMessage,
167
299
  errorType: errorPayload.type,
168
300
  errorDetails: JSON.stringify({
169
301
  ...errorPayload.details,
170
302
  isApiError,
303
+ autoCompacted: isPromptTooLong && !opts.isCompactCommand,
171
304
  }),
172
305
  isAborted: false,
173
306
  })
@@ -180,10 +313,11 @@ export function createErrorHandler(
180
313
  payload: {
181
314
  messageId: opts.assistantMessageId,
182
315
  partId: errorPartId,
183
- error: errorPayload.message,
316
+ error: displayMessage,
184
317
  errorType: errorPayload.type,
185
318
  details: errorPayload.details,
186
319
  isAborted: false,
320
+ autoCompacted: isPromptTooLong && !opts.isCompactCommand,
187
321
  },
188
322
  });
189
323
  };
@@ -273,6 +407,43 @@ export function createFinishHandler(
273
407
  await completeAssistantMessageFn(fin, opts, db);
274
408
  } catch {}
275
409
 
410
+ // If this was a /compact command, mark old parts as compacted
411
+ // Only mark as compacted if the response was successful and has content
412
+ if (opts.isCompactCommand && fin.finishReason !== 'error') {
413
+ // Verify the assistant actually generated text content (the summary)
414
+ const assistantParts = await db
415
+ .select()
416
+ .from(messageParts)
417
+ .where(eq(messageParts.messageId, opts.assistantMessageId));
418
+ const hasTextContent = assistantParts.some(
419
+ (p) => p.type === 'text' && p.content && p.content !== '{"text":""}',
420
+ );
421
+
422
+ if (!hasTextContent) {
423
+ debugLog(
424
+ '[stream-handlers] /compact finished but no summary generated, skipping compaction marking',
425
+ );
426
+ } else {
427
+ try {
428
+ debugLog(
429
+ `[stream-handlers] /compact complete, marking session compacted`,
430
+ );
431
+ const result = await markSessionCompacted(
432
+ db,
433
+ opts.sessionId,
434
+ opts.assistantMessageId,
435
+ );
436
+ debugLog(
437
+ `[stream-handlers] Compacted ${result.compacted} parts, saved ~${result.saved} tokens`,
438
+ );
439
+ } catch (err) {
440
+ debugLog(
441
+ `[stream-handlers] Compaction failed: ${err instanceof Error ? err.message : String(err)}`,
442
+ );
443
+ }
444
+ }
445
+ }
446
+
276
447
  // Use session totals from DB for accurate cost calculation
277
448
  const sessRows = await db
278
449
  .select()