@agi-cli/server 0.1.119 → 0.1.121

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/package.json +3 -3
  2. package/src/index.ts +9 -5
  3. package/src/openapi/paths/git.ts +4 -0
  4. package/src/routes/ask.ts +13 -14
  5. package/src/routes/branch.ts +106 -0
  6. package/src/routes/config/agents.ts +1 -1
  7. package/src/routes/config/cwd.ts +1 -1
  8. package/src/routes/config/main.ts +1 -1
  9. package/src/routes/config/models.ts +32 -4
  10. package/src/routes/config/providers.ts +1 -1
  11. package/src/routes/config/utils.ts +14 -1
  12. package/src/routes/files.ts +1 -1
  13. package/src/routes/git/commit.ts +23 -6
  14. package/src/routes/git/schemas.ts +1 -0
  15. package/src/routes/session-files.ts +1 -1
  16. package/src/routes/session-messages.ts +2 -2
  17. package/src/routes/sessions.ts +8 -6
  18. package/src/runtime/agent/registry.ts +333 -0
  19. package/src/runtime/agent/runner-reasoning.ts +108 -0
  20. package/src/runtime/agent/runner-setup.ts +265 -0
  21. package/src/runtime/agent/runner.ts +356 -0
  22. package/src/runtime/agent-registry.ts +6 -333
  23. package/src/runtime/{ask-service.ts → ask/service.ts} +5 -5
  24. package/src/runtime/{debug.ts → debug/index.ts} +1 -1
  25. package/src/runtime/{api-error.ts → errors/api-error.ts} +2 -2
  26. package/src/runtime/message/compaction-auto.ts +137 -0
  27. package/src/runtime/message/compaction-context.ts +64 -0
  28. package/src/runtime/message/compaction-detect.ts +19 -0
  29. package/src/runtime/message/compaction-limits.ts +58 -0
  30. package/src/runtime/message/compaction-mark.ts +115 -0
  31. package/src/runtime/message/compaction-prune.ts +75 -0
  32. package/src/runtime/message/compaction.ts +23 -0
  33. package/src/runtime/{history-builder.ts → message/history-builder.ts} +2 -2
  34. package/src/runtime/{message-service.ts → message/service.ts} +8 -14
  35. package/src/runtime/{history → message}/tool-history-tracker.ts +1 -1
  36. package/src/runtime/{prompt.ts → prompt/builder.ts} +1 -1
  37. package/src/runtime/{provider.ts → provider/anthropic.ts} +4 -219
  38. package/src/runtime/provider/google.ts +12 -0
  39. package/src/runtime/provider/index.ts +44 -0
  40. package/src/runtime/provider/openai.ts +26 -0
  41. package/src/runtime/provider/opencode.ts +61 -0
  42. package/src/runtime/provider/openrouter.ts +11 -0
  43. package/src/runtime/provider/solforge.ts +22 -0
  44. package/src/runtime/provider/zai.ts +53 -0
  45. package/src/runtime/session/branch.ts +277 -0
  46. package/src/runtime/{db-operations.ts → session/db-operations.ts} +1 -1
  47. package/src/runtime/{session-manager.ts → session/manager.ts} +1 -1
  48. package/src/runtime/{session-queue.ts → session/queue.ts} +2 -2
  49. package/src/runtime/stream/abort-handler.ts +65 -0
  50. package/src/runtime/stream/error-handler.ts +200 -0
  51. package/src/runtime/stream/finish-handler.ts +123 -0
  52. package/src/runtime/stream/handlers.ts +5 -0
  53. package/src/runtime/stream/step-finish.ts +93 -0
  54. package/src/runtime/stream/types.ts +17 -0
  55. package/src/runtime/{tool-context.ts → tools/context.ts} +1 -1
  56. package/src/runtime/{tool-context-setup.ts → tools/setup.ts} +3 -3
  57. package/src/runtime/{token-utils.ts → utils/token.ts} +2 -2
  58. package/src/tools/adapter.ts +4 -4
  59. package/src/runtime/compaction.ts +0 -536
  60. package/src/runtime/runner.ts +0 -654
  61. package/src/runtime/stream-handlers.ts +0 -508
  62. /package/src/runtime/{cache-optimizer.ts → context/cache-optimizer.ts} +0 -0
  63. /package/src/runtime/{environment.ts → context/environment.ts} +0 -0
  64. /package/src/runtime/{context-optimizer.ts → context/optimizer.ts} +0 -0
  65. /package/src/runtime/{debug-state.ts → debug/state.ts} +0 -0
  66. /package/src/runtime/{error-handling.ts → errors/handling.ts} +0 -0
  67. /package/src/runtime/{history-truncator.ts → message/history-truncator.ts} +0 -0
  68. /package/src/runtime/{provider-selection.ts → provider/selection.ts} +0 -0
  69. /package/src/runtime/{tool-mapping.ts → tools/mapping.ts} +0 -0
  70. /package/src/runtime/{cwd.ts → utils/cwd.ts} +0 -0
@@ -0,0 +1,93 @@
1
+ import type { getDb } from '@agi-cli/database';
2
+ import { messageParts } from '@agi-cli/database/schema';
3
+ import { eq } from 'drizzle-orm';
4
+ import { publish } from '../../events/bus.ts';
5
+ import type { RunOpts } from '../session/queue.ts';
6
+ import type { ToolAdapterContext } from '../../tools/adapter.ts';
7
+ import type { UsageData, ProviderMetadata } from '../session/db-operations.ts';
8
+ import type { StepFinishEvent } from './types.ts';
9
+
10
+ export function createStepFinishHandler(
11
+ opts: RunOpts,
12
+ db: Awaited<ReturnType<typeof getDb>>,
13
+ getStepIndex: () => number,
14
+ incrementStepIndex: () => number,
15
+ getCurrentPartId: () => string | null,
16
+ updateCurrentPartId: (id: string | null) => void,
17
+ updateAccumulated: (text: string) => void,
18
+ sharedCtx: ToolAdapterContext,
19
+ updateSessionTokensIncrementalFn: (
20
+ usage: UsageData,
21
+ providerMetadata: ProviderMetadata | undefined,
22
+ opts: RunOpts,
23
+ db: Awaited<ReturnType<typeof getDb>>,
24
+ ) => Promise<void>,
25
+ updateMessageTokensIncrementalFn: (
26
+ usage: UsageData,
27
+ providerMetadata: ProviderMetadata | undefined,
28
+ opts: RunOpts,
29
+ db: Awaited<ReturnType<typeof getDb>>,
30
+ ) => Promise<void>,
31
+ ) {
32
+ return async (step: StepFinishEvent) => {
33
+ const finishedAt = Date.now();
34
+ const currentPartId = getCurrentPartId();
35
+ const stepIndex = getStepIndex();
36
+
37
+ try {
38
+ if (currentPartId) {
39
+ await db
40
+ .update(messageParts)
41
+ .set({ completedAt: finishedAt })
42
+ .where(eq(messageParts.id, currentPartId));
43
+ }
44
+ } catch {}
45
+
46
+ if (step.usage) {
47
+ try {
48
+ await updateSessionTokensIncrementalFn(
49
+ step.usage,
50
+ step.experimental_providerMetadata,
51
+ opts,
52
+ db,
53
+ );
54
+ } catch {}
55
+
56
+ try {
57
+ await updateMessageTokensIncrementalFn(
58
+ step.usage,
59
+ step.experimental_providerMetadata,
60
+ opts,
61
+ db,
62
+ );
63
+ } catch {}
64
+ }
65
+
66
+ try {
67
+ publish({
68
+ type: 'finish-step',
69
+ sessionId: opts.sessionId,
70
+ payload: {
71
+ stepIndex,
72
+ usage: step.usage,
73
+ finishReason: step.finishReason,
74
+ response: step.response,
75
+ },
76
+ });
77
+ if (step.usage) {
78
+ publish({
79
+ type: 'usage',
80
+ sessionId: opts.sessionId,
81
+ payload: { stepIndex, ...step.usage },
82
+ });
83
+ }
84
+ } catch {}
85
+
86
+ try {
87
+ const newStepIndex = incrementStepIndex();
88
+ sharedCtx.stepIndex = newStepIndex;
89
+ updateCurrentPartId(null);
90
+ updateAccumulated('');
91
+ } catch {}
92
+ };
93
+ }
@@ -0,0 +1,17 @@
1
+ import type { UsageData, ProviderMetadata } from '../session/db-operations.ts';
2
+
3
+ export type StepFinishEvent = {
4
+ usage?: UsageData;
5
+ finishReason?: string;
6
+ response?: unknown;
7
+ experimental_providerMetadata?: ProviderMetadata;
8
+ };
9
+
10
+ export type FinishEvent = {
11
+ usage?: Pick<UsageData, 'inputTokens' | 'outputTokens' | 'totalTokens'>;
12
+ finishReason?: string;
13
+ };
14
+
15
+ export type AbortEvent = {
16
+ steps: unknown[];
17
+ };
@@ -1,7 +1,7 @@
1
1
  import { eq } from 'drizzle-orm';
2
2
  import type { DB } from '@agi-cli/database';
3
3
  import { messageParts } from '@agi-cli/database/schema';
4
- import { publish } from '../events/bus.ts';
4
+ import { publish } from '../../events/bus.ts';
5
5
 
6
6
  export type StepExecutionState = {
7
7
  chain: Promise<void>;
@@ -1,7 +1,7 @@
1
1
  import type { getDb } from '@agi-cli/database';
2
- import { time } from './debug.ts';
3
- import type { ToolAdapterContext } from '../tools/adapter.ts';
4
- import type { RunOpts } from './session-queue.ts';
2
+ import { time } from '../debug/index.ts';
3
+ import type { ToolAdapterContext } from '../../tools/adapter.ts';
4
+ import type { RunOpts } from '../session/queue.ts';
5
5
 
6
6
  export type RunnerToolContext = ToolAdapterContext & { stepIndex: number };
7
7
 
@@ -1,6 +1,6 @@
1
1
  import { catalog } from '@agi-cli/sdk';
2
- import { debugLog } from './debug.ts';
3
- import type { ProviderName } from './provider.ts';
2
+ import { debugLog } from '../debug/index.ts';
3
+ import type { ProviderName } from '../provider/index.ts';
4
4
 
5
5
  /**
6
6
  * Gets the maximum output tokens allowed for a given provider/model combination.
@@ -3,18 +3,18 @@ import { messageParts, sessions } from '@agi-cli/database/schema';
3
3
  import { eq } from 'drizzle-orm';
4
4
  import { publish } from '../events/bus.ts';
5
5
  import type { DiscoveredTool } from '@agi-cli/sdk';
6
- import { getCwd, setCwd, joinRelative } from '../runtime/cwd.ts';
6
+ import { getCwd, setCwd, joinRelative } from '../runtime/utils/cwd.ts';
7
7
  import type {
8
8
  ToolAdapterContext,
9
9
  StepExecutionState,
10
- } from '../runtime/tool-context.ts';
10
+ } from '../runtime/tools/context.ts';
11
11
  import { isToolError } from '@agi-cli/sdk/tools/error';
12
12
  import {
13
13
  toClaudeCodeName,
14
14
  requiresClaudeCodeNaming,
15
- } from '../runtime/tool-mapping.ts';
15
+ } from '../runtime/tools/mapping.ts';
16
16
 
17
- export type { ToolAdapterContext } from '../runtime/tool-context.ts';
17
+ export type { ToolAdapterContext } from '../runtime/tools/context.ts';
18
18
 
19
19
  type ToolExecuteSignature = Tool['execute'] extends (
20
20
  input: infer Input,
@@ -1,536 +0,0 @@
1
- /**
2
- * Context compaction module for managing token usage.
3
- *
4
- * This module implements intelligent context management:
5
- * 1. Detects /compact command and builds summarization context
6
- * 2. After LLM responds with summary, marks old parts as compacted
7
- * 3. History builder skips compacted parts entirely
8
- *
9
- * Flow:
10
- * - User sends "/compact" → stored as regular user message
11
- * - Runner detects command, builds context for LLM to summarize
12
- * - LLM streams summary response naturally
13
- * - On completion, markSessionCompacted() marks old tool_call/tool_result parts
14
- * - Future history builds skip compacted parts
15
- */
16
-
17
- import type { getDb } from '@agi-cli/database';
18
- import { messages, messageParts } from '@agi-cli/database/schema';
19
- import { eq, desc, asc, and, lt } from 'drizzle-orm';
20
- import { debugLog } from './debug.ts';
21
- import { streamText } from 'ai';
22
- import { resolveModel } from './provider.ts';
23
- import { loadConfig } from '@agi-cli/sdk';
24
-
25
- // Token thresholds
26
- export const PRUNE_PROTECT = 40_000; // Protect last N tokens worth of tool calls
27
-
28
- // Tools that should never be compacted
29
- const PROTECTED_TOOLS = ['skill'];
30
-
31
- // Simple token estimation: ~4 chars per token
32
- export function estimateTokens(text: string): number {
33
- return Math.max(0, Math.round((text || '').length / 4));
34
- }
35
-
36
- export interface TokenUsage {
37
- input: number;
38
- output: number;
39
- cacheRead?: number;
40
- cacheWrite?: number;
41
- reasoning?: number;
42
- }
43
-
44
- export interface ModelLimits {
45
- context: number;
46
- output: number;
47
- }
48
-
49
- /**
50
- * Check if a message content is the /compact command.
51
- */
52
- export function isCompactCommand(content: string): boolean {
53
- const trimmed = content.trim().toLowerCase();
54
- return trimmed === '/compact';
55
- }
56
-
57
- /**
58
- * Build context for the LLM to generate a summary.
59
- * Returns a prompt that describes what to summarize.
60
- * Includes tool calls and results with appropriate truncation to fit within model limits.
61
- * @param contextTokenLimit - Max tokens for context (uses ~4 chars per token estimate)
62
- */
63
- export async function buildCompactionContext(
64
- db: Awaited<ReturnType<typeof getDb>>,
65
- sessionId: string,
66
- contextTokenLimit?: number,
67
- ): Promise<string> {
68
- const allMessages = await db
69
- .select()
70
- .from(messages)
71
- .where(eq(messages.sessionId, sessionId))
72
- .orderBy(asc(messages.createdAt));
73
-
74
- const lines: string[] = [];
75
- let totalChars = 0;
76
- // Use provided limit or default to 60k chars (~15k tokens)
77
- // We use ~50% of model context for compaction, leaving room for system prompt + response
78
- const maxChars = contextTokenLimit ? contextTokenLimit * 4 : 60000;
79
-
80
- for (const msg of allMessages) {
81
- if (totalChars > maxChars) {
82
- lines.unshift('[...earlier content truncated...]');
83
- break;
84
- }
85
-
86
- const parts = await db
87
- .select()
88
- .from(messageParts)
89
- .where(eq(messageParts.messageId, msg.id))
90
- .orderBy(asc(messageParts.index));
91
-
92
- for (const part of parts) {
93
- if (part.compactedAt) continue; // Skip already compacted
94
-
95
- try {
96
- const content = JSON.parse(part.content ?? '{}');
97
-
98
- if (part.type === 'text' && content.text) {
99
- const text = `[${msg.role.toUpperCase()}]: ${content.text}`;
100
- lines.push(text.slice(0, 3000)); // Allow more text content
101
- totalChars += text.length;
102
- } else if (part.type === 'tool_call' && content.name) {
103
- // Include tool name and relevant args (file paths, commands, etc.)
104
- const argsStr =
105
- typeof content.args === 'object'
106
- ? JSON.stringify(content.args).slice(0, 500)
107
- : '';
108
- const text = `[TOOL ${content.name}]: ${argsStr}`;
109
- lines.push(text);
110
- totalChars += text.length;
111
- } else if (part.type === 'tool_result' && content.result !== null) {
112
- // Include enough result context for the LLM to understand what happened
113
- const resultStr =
114
- typeof content.result === 'string'
115
- ? content.result.slice(0, 1500)
116
- : JSON.stringify(content.result ?? '').slice(0, 1500);
117
- const text = `[RESULT]: ${resultStr}`;
118
- lines.push(text);
119
- totalChars += text.length;
120
- }
121
- } catch {}
122
- }
123
- }
124
-
125
- return lines.join('\n');
126
- }
127
-
128
- /**
129
- * Get the system prompt addition for compaction.
130
- */
131
- export function getCompactionSystemPrompt(): string {
132
- return `
133
- The user has requested to compact the conversation. Generate a comprehensive summary that captures:
134
-
135
- 1. **Main Goals**: What was the user trying to accomplish?
136
- 2. **Key Actions**: What files were created, modified, or deleted?
137
- 3. **Important Decisions**: What approaches or solutions were chosen and why?
138
- 4. **Current State**: What is done and what might be pending?
139
- 5. **Critical Context**: Any gotchas, errors encountered, or important details for continuing.
140
-
141
- Format your response as a clear, structured summary. Start with "📦 **Context Compacted**" header.
142
- Keep under 2000 characters but be thorough. This summary will replace detailed tool history.
143
- `;
144
- }
145
-
146
- /**
147
- * Mark old tool_call and tool_result parts as compacted.
148
- * Called after the compaction summary response is complete.
149
- *
150
- * Protects:
151
- * - Last N tokens of tool results (PRUNE_PROTECT)
152
- * - Last 2 user turns
153
- * - Protected tool names (skill, etc.)
154
- */
155
- export async function markSessionCompacted(
156
- db: Awaited<ReturnType<typeof getDb>>,
157
- sessionId: string,
158
- compactMessageId: string,
159
- ): Promise<{ compacted: number; saved: number }> {
160
- debugLog(`[compaction] Marking session ${sessionId} as compacted`);
161
-
162
- // Get the compact message to find the cutoff point
163
- const compactMsg = await db
164
- .select()
165
- .from(messages)
166
- .where(eq(messages.id, compactMessageId))
167
- .limit(1);
168
-
169
- if (!compactMsg.length) {
170
- debugLog('[compaction] Compact message not found');
171
- return { compacted: 0, saved: 0 };
172
- }
173
-
174
- const cutoffTime = compactMsg[0].createdAt;
175
-
176
- // Get all messages before the compact command
177
- const oldMessages = await db
178
- .select()
179
- .from(messages)
180
- .where(
181
- and(
182
- eq(messages.sessionId, sessionId),
183
- lt(messages.createdAt, cutoffTime),
184
- ),
185
- )
186
- .orderBy(desc(messages.createdAt));
187
-
188
- let totalTokens = 0;
189
- let compactedTokens = 0;
190
- const toCompact: Array<{ id: string; content: string }> = [];
191
- let turns = 0;
192
-
193
- // Go backwards through messages
194
- for (const msg of oldMessages) {
195
- // Count user messages as turns
196
- if (msg.role === 'user') {
197
- turns++;
198
- }
199
-
200
- // Skip the last 2 turns to preserve recent context
201
- if (turns < 2) continue;
202
-
203
- // Get all parts for this message
204
- const parts = await db
205
- .select()
206
- .from(messageParts)
207
- .where(eq(messageParts.messageId, msg.id))
208
- .orderBy(desc(messageParts.index));
209
-
210
- for (const part of parts) {
211
- // Only compact tool_call and tool_result
212
- if (part.type !== 'tool_call' && part.type !== 'tool_result') continue;
213
-
214
- // Skip protected tools
215
- if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) {
216
- continue;
217
- }
218
-
219
- // Skip already compacted
220
- if (part.compactedAt) continue;
221
-
222
- // Parse content
223
- let content: { result?: unknown; args?: unknown };
224
- try {
225
- content = JSON.parse(part.content ?? '{}');
226
- } catch {
227
- continue;
228
- }
229
-
230
- // Estimate tokens
231
- const contentStr =
232
- part.type === 'tool_result'
233
- ? typeof content.result === 'string'
234
- ? content.result
235
- : JSON.stringify(content.result ?? '')
236
- : JSON.stringify(content.args ?? '');
237
-
238
- const estimate = estimateTokens(contentStr);
239
- totalTokens += estimate;
240
-
241
- // If we've exceeded the protection threshold, mark for compaction
242
- if (totalTokens > PRUNE_PROTECT) {
243
- compactedTokens += estimate;
244
- toCompact.push({ id: part.id, content: part.content ?? '{}' });
245
- }
246
- }
247
- }
248
-
249
- debugLog(
250
- `[compaction] Found ${toCompact.length} parts to compact, saving ~${compactedTokens} tokens`,
251
- );
252
-
253
- if (toCompact.length > 0) {
254
- const compactedAt = Date.now();
255
-
256
- for (const part of toCompact) {
257
- try {
258
- await db
259
- .update(messageParts)
260
- .set({ compactedAt })
261
- .where(eq(messageParts.id, part.id));
262
- } catch (err) {
263
- debugLog(
264
- `[compaction] Failed to mark part ${part.id}: ${err instanceof Error ? err.message : String(err)}`,
265
- );
266
- }
267
- }
268
-
269
- debugLog(`[compaction] Marked ${toCompact.length} parts as compacted`);
270
- }
271
-
272
- return { compacted: toCompact.length, saved: compactedTokens };
273
- }
274
-
275
- /**
276
- * Legacy prune function - marks tool results as compacted.
277
- * Used for automatic overflow-triggered compaction.
278
- */
279
- export async function pruneSession(
280
- db: Awaited<ReturnType<typeof getDb>>,
281
- sessionId: string,
282
- ): Promise<{ pruned: number; saved: number }> {
283
- debugLog(`[compaction] Auto-pruning session ${sessionId}`);
284
-
285
- const allMessages = await db
286
- .select()
287
- .from(messages)
288
- .where(eq(messages.sessionId, sessionId))
289
- .orderBy(desc(messages.createdAt));
290
-
291
- let totalTokens = 0;
292
- let prunedTokens = 0;
293
- const toPrune: Array<{ id: string }> = [];
294
- let turns = 0;
295
-
296
- for (const msg of allMessages) {
297
- if (msg.role === 'user') turns++;
298
- if (turns < 2) continue;
299
-
300
- const parts = await db
301
- .select()
302
- .from(messageParts)
303
- .where(eq(messageParts.messageId, msg.id))
304
- .orderBy(desc(messageParts.index));
305
-
306
- for (const part of parts) {
307
- if (part.type !== 'tool_result') continue;
308
- if (part.toolName && PROTECTED_TOOLS.includes(part.toolName)) continue;
309
- if (part.compactedAt) continue;
310
-
311
- let content: { result?: unknown };
312
- try {
313
- content = JSON.parse(part.content ?? '{}');
314
- } catch {
315
- continue;
316
- }
317
-
318
- const estimate = estimateTokens(
319
- typeof content.result === 'string'
320
- ? content.result
321
- : JSON.stringify(content.result ?? ''),
322
- );
323
- totalTokens += estimate;
324
-
325
- if (totalTokens > PRUNE_PROTECT) {
326
- prunedTokens += estimate;
327
- toPrune.push({ id: part.id });
328
- }
329
- }
330
- }
331
-
332
- if (toPrune.length > 0) {
333
- const compactedAt = Date.now();
334
- for (const part of toPrune) {
335
- try {
336
- await db
337
- .update(messageParts)
338
- .set({ compactedAt })
339
- .where(eq(messageParts.id, part.id));
340
- } catch {}
341
- }
342
- }
343
-
344
- return { pruned: toPrune.length, saved: prunedTokens };
345
- }
346
-
347
- /**
348
- * Check if context is overflowing based on token usage and model limits.
349
- */
350
- export function isOverflow(tokens: TokenUsage, limits: ModelLimits): boolean {
351
- if (limits.context === 0) return false;
352
-
353
- const count = tokens.input + (tokens.cacheRead ?? 0) + tokens.output;
354
- const usableContext = limits.context - limits.output;
355
-
356
- return count > usableContext;
357
- }
358
-
359
- /**
360
- * Get model limits from provider catalog or use defaults.
361
- */
362
- export function getModelLimits(
363
- _provider: string,
364
- model: string,
365
- ): ModelLimits | null {
366
- const defaults: Record<string, ModelLimits> = {
367
- 'claude-sonnet-4-20250514': { context: 200000, output: 16000 },
368
- 'claude-3-5-sonnet-20241022': { context: 200000, output: 8192 },
369
- 'claude-3-5-haiku-20241022': { context: 200000, output: 8192 },
370
- 'gpt-4o': { context: 128000, output: 16384 },
371
- 'gpt-4o-mini': { context: 128000, output: 16384 },
372
- o1: { context: 200000, output: 100000 },
373
- 'o3-mini': { context: 200000, output: 100000 },
374
- 'gemini-2.0-flash': { context: 1000000, output: 8192 },
375
- 'gemini-1.5-pro': { context: 2000000, output: 8192 },
376
- };
377
-
378
- if (defaults[model]) return defaults[model];
379
-
380
- for (const [key, limits] of Object.entries(defaults)) {
381
- if (model.includes(key) || key.includes(model)) return limits;
382
- }
383
-
384
- return null;
385
- }
386
-
387
- /**
388
- * Check if a part is compacted.
389
- */
390
- export function isCompacted(part: { compactedAt?: number | null }): boolean {
391
- return !!part.compactedAt;
392
- }
393
-
394
- export const COMPACTED_PLACEHOLDER = '[Compacted]';
395
-
396
- /**
397
- * Perform auto-compaction when context overflows.
398
- * Streams the compaction summary (like /compact does), marks old parts as compacted.
399
- * Returns info needed for caller to trigger a retry.
400
- * Uses the session's model for consistency with /compact command.
401
- */
402
- export async function performAutoCompaction(
403
- db: Awaited<ReturnType<typeof getDb>>,
404
- sessionId: string,
405
- assistantMessageId: string,
406
- publishFn: (event: {
407
- type: string;
408
- sessionId: string;
409
- payload: Record<string, unknown>;
410
- }) => void,
411
- provider: string,
412
- modelId: string,
413
- ): Promise<{
414
- success: boolean;
415
- summary?: string;
416
- error?: string;
417
- compactMessageId?: string;
418
- }> {
419
- debugLog(`[compaction] Starting auto-compaction for session ${sessionId}`);
420
-
421
- try {
422
- // 1. Get model limits and build compaction context
423
- const limits = getModelLimits(provider, modelId);
424
- // Use 50% of context window for compaction, minimum 15k tokens
425
- const contextTokenLimit = limits
426
- ? Math.max(Math.floor(limits.context * 0.5), 15000)
427
- : 15000;
428
- debugLog(
429
- `[compaction] Model ${modelId} context limit: ${limits?.context ?? 'unknown'}, using ${contextTokenLimit} tokens for compaction`,
430
- );
431
-
432
- const context = await buildCompactionContext(
433
- db,
434
- sessionId,
435
- contextTokenLimit,
436
- );
437
- if (!context || context.length < 100) {
438
- debugLog('[compaction] Not enough context to compact');
439
- return { success: false, error: 'Not enough context to compact' };
440
- }
441
-
442
- // 2. Stream the compaction summary
443
-
444
- // Use the session's model for consistency
445
- const cfg = await loadConfig();
446
- debugLog(
447
- `[compaction] Using session model ${provider}/${modelId} for auto-compaction`,
448
- );
449
- const model = await resolveModel(
450
- provider as Parameters<typeof resolveModel>[0],
451
- modelId,
452
- cfg,
453
- );
454
-
455
- // Create a text part for the compaction summary (after model created successfully)
456
- const compactPartId = crypto.randomUUID();
457
- const now = Date.now();
458
-
459
- await db.insert(messageParts).values({
460
- id: compactPartId,
461
- messageId: assistantMessageId,
462
- index: 0,
463
- stepIndex: 0,
464
- type: 'text',
465
- content: JSON.stringify({ text: '' }),
466
- agent: 'system',
467
- provider: provider,
468
- model: modelId,
469
- startedAt: now,
470
- });
471
-
472
- const prompt = getCompactionSystemPrompt();
473
- const result = streamText({
474
- model,
475
- system: `${prompt}\n\nIMPORTANT: Generate a comprehensive summary. This will replace the detailed conversation history.`,
476
- messages: [
477
- {
478
- role: 'user',
479
- content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${context}\n</conversation-to-summarize>`,
480
- },
481
- ],
482
- maxTokens: 2000,
483
- });
484
-
485
- // Stream the summary
486
- let summary = '';
487
- for await (const chunk of result.textStream) {
488
- summary += chunk;
489
-
490
- // Publish delta event so UI updates in real-time
491
- publishFn({
492
- type: 'message.part.delta',
493
- sessionId,
494
- payload: {
495
- messageId: assistantMessageId,
496
- partId: compactPartId,
497
- stepIndex: 0,
498
- type: 'text',
499
- delta: chunk,
500
- },
501
- });
502
- }
503
-
504
- // Update the part with final content
505
- await db
506
- .update(messageParts)
507
- .set({
508
- content: JSON.stringify({ text: summary }),
509
- completedAt: Date.now(),
510
- })
511
- .where(eq(messageParts.id, compactPartId));
512
-
513
- if (!summary || summary.length < 50) {
514
- debugLog('[compaction] Failed to generate summary');
515
- return { success: false, error: 'Failed to generate summary' };
516
- }
517
-
518
- debugLog(`[compaction] Generated summary: ${summary.slice(0, 100)}...`);
519
-
520
- // 3. Mark old parts as compacted (using the assistant message as the cutoff)
521
- const compactResult = await markSessionCompacted(
522
- db,
523
- sessionId,
524
- assistantMessageId,
525
- );
526
- debugLog(
527
- `[compaction] Marked ${compactResult.compacted} parts as compacted, saved ~${compactResult.saved} tokens`,
528
- );
529
-
530
- return { success: true, summary, compactMessageId: assistantMessageId };
531
- } catch (err) {
532
- const errorMsg = err instanceof Error ? err.message : String(err);
533
- debugLog(`[compaction] Auto-compaction failed: ${errorMsg}`);
534
- return { success: false, error: errorMsg };
535
- }
536
- }