@agi-cli/server 0.1.112 → 0.1.114
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/index.ts +4 -0
- package/src/routes/session-files.ts +387 -0
- package/src/runtime/compaction.ts +396 -114
- package/src/runtime/history-builder.ts +7 -7
- package/src/runtime/message-service.ts +52 -9
- package/src/runtime/prompt.ts +14 -0
- package/src/runtime/runner.ts +110 -3
- package/src/runtime/session-queue.ts +2 -0
- package/src/runtime/stream-handlers.ts +174 -3
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { generateText } from 'ai';
|
|
1
|
+
import { generateText, streamText } from 'ai';
|
|
2
2
|
import { eq } from 'drizzle-orm';
|
|
3
3
|
import type { AGIConfig } from '@agi-cli/sdk';
|
|
4
4
|
import type { DB } from '@agi-cli/database';
|
|
@@ -9,6 +9,7 @@ import { runSessionLoop } from './runner.ts';
|
|
|
9
9
|
import { resolveModel } from './provider.ts';
|
|
10
10
|
import { getFastModel, type ProviderId } from '@agi-cli/sdk';
|
|
11
11
|
import { debugLog } from './debug.ts';
|
|
12
|
+
import { isCompactCommand, buildCompactionContext } from './compaction.ts';
|
|
12
13
|
|
|
13
14
|
type SessionRow = typeof sessions.$inferSelect;
|
|
14
15
|
|
|
@@ -119,6 +120,28 @@ export async function dispatchAssistantMessage(
|
|
|
119
120
|
`[MESSAGE_SERVICE] Enqueuing assistant run with userContext: ${userContext ? `${userContext.substring(0, 50)}...` : 'NONE'}`,
|
|
120
121
|
);
|
|
121
122
|
|
|
123
|
+
// Detect /compact command and build context with model-aware limits
|
|
124
|
+
const isCompact = isCompactCommand(content);
|
|
125
|
+
let compactionContext: string | undefined;
|
|
126
|
+
|
|
127
|
+
if (isCompact) {
|
|
128
|
+
debugLog('[MESSAGE_SERVICE] Detected /compact command, building context');
|
|
129
|
+
const { getModelLimits } = await import('./compaction.ts');
|
|
130
|
+
const limits = getModelLimits(provider, model);
|
|
131
|
+
// Use 50% of context window for compaction, minimum 15k tokens
|
|
132
|
+
const contextTokenLimit = limits
|
|
133
|
+
? Math.max(Math.floor(limits.context * 0.5), 15000)
|
|
134
|
+
: 15000;
|
|
135
|
+
compactionContext = await buildCompactionContext(
|
|
136
|
+
db,
|
|
137
|
+
sessionId,
|
|
138
|
+
contextTokenLimit,
|
|
139
|
+
);
|
|
140
|
+
debugLog(
|
|
141
|
+
`[message-service] /compact context length: ${compactionContext.length}, limit: ${contextTokenLimit} tokens`,
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
|
|
122
145
|
enqueueAssistantRun(
|
|
123
146
|
{
|
|
124
147
|
sessionId,
|
|
@@ -130,6 +153,8 @@ export async function dispatchAssistantMessage(
|
|
|
130
153
|
oneShot: Boolean(oneShot),
|
|
131
154
|
userContext,
|
|
132
155
|
reasoning,
|
|
156
|
+
isCompactCommand: isCompact,
|
|
157
|
+
compactionContext,
|
|
133
158
|
},
|
|
134
159
|
runSessionLoop,
|
|
135
160
|
);
|
|
@@ -240,7 +265,11 @@ async function generateSessionTitle(args: {
|
|
|
240
265
|
|
|
241
266
|
// Use a smaller, faster model for title generation
|
|
242
267
|
// Look up the cheapest/fastest model from the catalog for this provider
|
|
243
|
-
|
|
268
|
+
// For OpenAI OAuth, use codex-mini as it works with ChatGPT backend
|
|
269
|
+
const titleModel =
|
|
270
|
+
needsSpoof && provider === 'openai'
|
|
271
|
+
? 'gpt-5.1-codex-mini'
|
|
272
|
+
: (getFastModel(provider) ?? modelName);
|
|
244
273
|
debugLog(`[TITLE_GEN] Using title model: ${titleModel}`);
|
|
245
274
|
const model = await resolveModel(provider, titleModel, cfg);
|
|
246
275
|
|
|
@@ -291,15 +320,29 @@ async function generateSessionTitle(args: {
|
|
|
291
320
|
);
|
|
292
321
|
}
|
|
293
322
|
|
|
294
|
-
debugLog('[TITLE_GEN] Calling generateText...');
|
|
295
323
|
let modelTitle = '';
|
|
296
324
|
try {
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
325
|
+
// ChatGPT backend requires streaming - use streamText for OAuth
|
|
326
|
+
if (needsSpoof) {
|
|
327
|
+
debugLog('[TITLE_GEN] Using streamText for OAuth...');
|
|
328
|
+
const result = streamText({
|
|
329
|
+
model,
|
|
330
|
+
system,
|
|
331
|
+
messages: messagesArray,
|
|
332
|
+
});
|
|
333
|
+
for await (const chunk of result.textStream) {
|
|
334
|
+
modelTitle += chunk;
|
|
335
|
+
}
|
|
336
|
+
modelTitle = modelTitle.trim();
|
|
337
|
+
} else {
|
|
338
|
+
debugLog('[TITLE_GEN] Using generateText...');
|
|
339
|
+
const out = await generateText({
|
|
340
|
+
model,
|
|
341
|
+
system,
|
|
342
|
+
messages: messagesArray,
|
|
343
|
+
});
|
|
344
|
+
modelTitle = (out?.text || '').trim();
|
|
345
|
+
}
|
|
303
346
|
|
|
304
347
|
debugLog('[TITLE_GEN] Raw response from model:');
|
|
305
348
|
debugLog(`[TITLE_GEN] "${modelTitle}"`);
|
package/src/runtime/prompt.ts
CHANGED
|
@@ -28,6 +28,7 @@ export async function composeSystemPrompt(options: {
|
|
|
28
28
|
includeEnvironment?: boolean;
|
|
29
29
|
includeProjectTree?: boolean;
|
|
30
30
|
userContext?: string;
|
|
31
|
+
contextSummary?: string;
|
|
31
32
|
}): Promise<ComposedSystemPrompt> {
|
|
32
33
|
const components: string[] = [];
|
|
33
34
|
if (options.spoofPrompt) {
|
|
@@ -105,6 +106,19 @@ export async function composeSystemPrompt(options: {
|
|
|
105
106
|
components.push('user-context');
|
|
106
107
|
}
|
|
107
108
|
|
|
109
|
+
// Add compacted conversation summary if present
|
|
110
|
+
if (options.contextSummary?.trim()) {
|
|
111
|
+
const summaryBlock = [
|
|
112
|
+
'<compacted-conversation-summary>',
|
|
113
|
+
'The conversation was compacted to save context. Here is a summary of the previous context:',
|
|
114
|
+
'',
|
|
115
|
+
options.contextSummary.trim(),
|
|
116
|
+
'</compacted-conversation-summary>',
|
|
117
|
+
].join('\n');
|
|
118
|
+
parts.push(summaryBlock);
|
|
119
|
+
components.push('context-summary');
|
|
120
|
+
}
|
|
121
|
+
|
|
108
122
|
// Add terminal context if available
|
|
109
123
|
const terminalManager = getTerminalManager();
|
|
110
124
|
if (terminalManager) {
|
package/src/runtime/runner.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { hasToolCall, streamText } from 'ai';
|
|
2
2
|
import { loadConfig } from '@agi-cli/sdk';
|
|
3
3
|
import { getDb } from '@agi-cli/database';
|
|
4
|
-
import { messageParts } from '@agi-cli/database/schema';
|
|
4
|
+
import { messageParts, sessions } from '@agi-cli/database/schema';
|
|
5
5
|
import { eq } from 'drizzle-orm';
|
|
6
6
|
import { resolveModel } from './provider.ts';
|
|
7
7
|
import { resolveAgentConfig } from './agent-registry.ts';
|
|
@@ -32,6 +32,7 @@ import {
|
|
|
32
32
|
createAbortHandler,
|
|
33
33
|
createFinishHandler,
|
|
34
34
|
} from './stream-handlers.ts';
|
|
35
|
+
import { getCompactionSystemPrompt, pruneSession } from './compaction.ts';
|
|
35
36
|
|
|
36
37
|
export { enqueueAssistantRun, abortSession } from './session-queue.ts';
|
|
37
38
|
export { getRunnerState } from './session-queue.ts';
|
|
@@ -78,10 +79,30 @@ async function runAssistant(opts: RunOpts) {
|
|
|
78
79
|
|
|
79
80
|
const agentPrompt = agentCfg.prompt || '';
|
|
80
81
|
|
|
82
|
+
// For /compact command, use minimal history - the compaction context has everything needed
|
|
81
83
|
const historyTimer = time('runner:buildHistory');
|
|
82
|
-
|
|
84
|
+
let history: Awaited<ReturnType<typeof buildHistoryMessages>>;
|
|
85
|
+
if (opts.isCompactCommand && opts.compactionContext) {
|
|
86
|
+
debugLog('[RUNNER] Using minimal history for /compact command');
|
|
87
|
+
history = [];
|
|
88
|
+
} else {
|
|
89
|
+
history = await buildHistoryMessages(db, opts.sessionId);
|
|
90
|
+
}
|
|
83
91
|
historyTimer.end({ messages: history.length });
|
|
84
92
|
|
|
93
|
+
// Fetch session to get context summary for compaction
|
|
94
|
+
const sessionRows = await db
|
|
95
|
+
.select()
|
|
96
|
+
.from(sessions)
|
|
97
|
+
.where(eq(sessions.id, opts.sessionId))
|
|
98
|
+
.limit(1);
|
|
99
|
+
const contextSummary = sessionRows[0]?.contextSummary ?? undefined;
|
|
100
|
+
if (contextSummary) {
|
|
101
|
+
debugLog(
|
|
102
|
+
`[RUNNER] Using context summary from compaction (${contextSummary.length} chars)`,
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
|
|
85
106
|
// FIX: For OAuth, we need to check if this is the first ASSISTANT message
|
|
86
107
|
// The user message is already in history by this point, so history.length will be > 0
|
|
87
108
|
// We need to add additionalSystemMessages on the first assistant turn
|
|
@@ -127,6 +148,7 @@ async function runAssistant(opts: RunOpts) {
|
|
|
127
148
|
spoofPrompt: undefined,
|
|
128
149
|
includeProjectTree: isFirstMessage,
|
|
129
150
|
userContext: opts.userContext,
|
|
151
|
+
contextSummary,
|
|
130
152
|
});
|
|
131
153
|
oauthFullPromptComponents = fullPrompt.components;
|
|
132
154
|
|
|
@@ -157,6 +179,7 @@ async function runAssistant(opts: RunOpts) {
|
|
|
157
179
|
spoofPrompt: undefined,
|
|
158
180
|
includeProjectTree: isFirstMessage,
|
|
159
181
|
userContext: opts.userContext,
|
|
182
|
+
contextSummary,
|
|
160
183
|
});
|
|
161
184
|
system = composed.prompt;
|
|
162
185
|
systemComponents = composed.components;
|
|
@@ -169,6 +192,23 @@ async function runAssistant(opts: RunOpts) {
|
|
|
169
192
|
})}`,
|
|
170
193
|
);
|
|
171
194
|
|
|
195
|
+
// Inject compaction prompt if this is a /compact command
|
|
196
|
+
if (opts.isCompactCommand && opts.compactionContext) {
|
|
197
|
+
debugLog('[RUNNER] Injecting compaction context for /compact command');
|
|
198
|
+
const compactPrompt = getCompactionSystemPrompt();
|
|
199
|
+
// Add compaction instructions as system message
|
|
200
|
+
// Don't modify `system` directly as it may contain OAuth spoof prompt
|
|
201
|
+
additionalSystemMessages.push({
|
|
202
|
+
role: 'system',
|
|
203
|
+
content: compactPrompt,
|
|
204
|
+
});
|
|
205
|
+
// Add the conversation context as a USER message (Anthropic requires at least one user message)
|
|
206
|
+
additionalSystemMessages.push({
|
|
207
|
+
role: 'user',
|
|
208
|
+
content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${opts.compactionContext}\n</conversation-to-summarize>`,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
172
212
|
const toolsTimer = time('runner:discoverTools');
|
|
173
213
|
const allTools = await discoverProjectTools(cfg.projectRoot);
|
|
174
214
|
toolsTimer.end({ count: allTools.length });
|
|
@@ -286,7 +326,13 @@ async function runAssistant(opts: RunOpts) {
|
|
|
286
326
|
updateMessageTokensIncremental,
|
|
287
327
|
);
|
|
288
328
|
|
|
289
|
-
const onError = createErrorHandler(
|
|
329
|
+
const onError = createErrorHandler(
|
|
330
|
+
opts,
|
|
331
|
+
db,
|
|
332
|
+
getStepIndex,
|
|
333
|
+
sharedCtx,
|
|
334
|
+
runSessionLoop,
|
|
335
|
+
);
|
|
290
336
|
|
|
291
337
|
const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
|
|
292
338
|
|
|
@@ -491,6 +537,67 @@ async function runAssistant(opts: RunOpts) {
|
|
|
491
537
|
} catch (err) {
|
|
492
538
|
unsubscribeFinish();
|
|
493
539
|
const payload = toErrorPayload(err);
|
|
540
|
+
|
|
541
|
+
// Check if this is a "prompt too long" error and auto-compact
|
|
542
|
+
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
543
|
+
const errorCode = (err as { code?: string })?.code ?? '';
|
|
544
|
+
const responseBody = (err as { responseBody?: string })?.responseBody ?? '';
|
|
545
|
+
const apiErrorType = (err as { apiErrorType?: string })?.apiErrorType ?? '';
|
|
546
|
+
const combinedError = `${errorMessage} ${responseBody}`.toLowerCase();
|
|
547
|
+
debugLog(`[RUNNER] Error caught - message: ${errorMessage.slice(0, 100)}`);
|
|
548
|
+
debugLog(
|
|
549
|
+
`[RUNNER] Error caught - code: ${errorCode}, apiErrorType: ${apiErrorType}`,
|
|
550
|
+
);
|
|
551
|
+
debugLog(
|
|
552
|
+
`[RUNNER] Error caught - responseBody: ${responseBody.slice(0, 200)}`,
|
|
553
|
+
);
|
|
554
|
+
const isPromptTooLong =
|
|
555
|
+
combinedError.includes('prompt is too long') ||
|
|
556
|
+
combinedError.includes('maximum context length') ||
|
|
557
|
+
combinedError.includes('too many tokens') ||
|
|
558
|
+
combinedError.includes('context_length_exceeded') ||
|
|
559
|
+
combinedError.includes('request too large') ||
|
|
560
|
+
combinedError.includes('exceeds the model') ||
|
|
561
|
+
combinedError.includes('input is too long') ||
|
|
562
|
+
errorCode === 'context_length_exceeded' ||
|
|
563
|
+
apiErrorType === 'invalid_request_error';
|
|
564
|
+
debugLog(
|
|
565
|
+
`[RUNNER] isPromptTooLong: ${isPromptTooLong}, isCompactCommand: ${opts.isCompactCommand}`,
|
|
566
|
+
);
|
|
567
|
+
|
|
568
|
+
if (isPromptTooLong && !opts.isCompactCommand) {
|
|
569
|
+
debugLog(
|
|
570
|
+
'[RUNNER] Prompt too long - auto-compacting and will retry on next user message',
|
|
571
|
+
);
|
|
572
|
+
try {
|
|
573
|
+
const pruneResult = await pruneSession(db, opts.sessionId);
|
|
574
|
+
debugLog(
|
|
575
|
+
`[RUNNER] Auto-pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
|
|
576
|
+
);
|
|
577
|
+
|
|
578
|
+
// Publish a system message to inform the user
|
|
579
|
+
publish({
|
|
580
|
+
type: 'error',
|
|
581
|
+
sessionId: opts.sessionId,
|
|
582
|
+
payload: {
|
|
583
|
+
...payload,
|
|
584
|
+
message: `Context too large (${errorMessage.match(/\d+/)?.[0] || 'many'} tokens). Auto-compacted old tool results. Please retry your message.`,
|
|
585
|
+
name: 'ContextOverflow',
|
|
586
|
+
},
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// Complete the message as failed
|
|
590
|
+
try {
|
|
591
|
+
await completeAssistantMessage({}, opts, db);
|
|
592
|
+
} catch {}
|
|
593
|
+
return;
|
|
594
|
+
} catch (pruneErr) {
|
|
595
|
+
debugLog(
|
|
596
|
+
`[RUNNER] Auto-prune failed: ${pruneErr instanceof Error ? pruneErr.message : String(pruneErr)}`,
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
494
601
|
debugLog(`[RUNNER] Error during stream: ${payload.message}`);
|
|
495
602
|
debugLog(
|
|
496
603
|
`[RUNNER] Error stack: ${err instanceof Error ? err.stack : 'no stack'}`,
|
|
@@ -13,8 +13,11 @@ import {
|
|
|
13
13
|
isOverflow,
|
|
14
14
|
getModelLimits,
|
|
15
15
|
type TokenUsage,
|
|
16
|
+
markSessionCompacted,
|
|
17
|
+
performAutoCompaction,
|
|
16
18
|
} from './compaction.ts';
|
|
17
19
|
import { debugLog } from './debug.ts';
|
|
20
|
+
import { enqueueAssistantRun } from './session-queue.ts';
|
|
18
21
|
|
|
19
22
|
type StepFinishEvent = {
|
|
20
23
|
usage?: UsageData;
|
|
@@ -131,14 +134,143 @@ export function createErrorHandler(
|
|
|
131
134
|
db: Awaited<ReturnType<typeof getDb>>,
|
|
132
135
|
getStepIndex: () => number,
|
|
133
136
|
sharedCtx: ToolAdapterContext,
|
|
137
|
+
retryCallback?: (sessionId: string) => Promise<void>,
|
|
134
138
|
) {
|
|
135
139
|
return async (err: unknown) => {
|
|
136
140
|
const errorPayload = toErrorPayload(err);
|
|
137
141
|
const isApiError = APICallError.isInstance(err);
|
|
138
142
|
const stepIndex = getStepIndex();
|
|
139
143
|
|
|
144
|
+
// Check if this is a prompt-too-long error and auto-compact
|
|
145
|
+
// Handle nested error structures from AI SDK
|
|
146
|
+
const errObj = err as Record<string, unknown>;
|
|
147
|
+
const nestedError = (errObj?.error as Record<string, unknown>)?.error as
|
|
148
|
+
| Record<string, unknown>
|
|
149
|
+
| undefined;
|
|
150
|
+
const errorCode =
|
|
151
|
+
(errObj?.code as string) ?? (nestedError?.code as string) ?? '';
|
|
152
|
+
const errorType =
|
|
153
|
+
(errObj?.apiErrorType as string) ?? (nestedError?.type as string) ?? '';
|
|
154
|
+
const fullErrorStr = JSON.stringify(err).toLowerCase();
|
|
155
|
+
|
|
156
|
+
const isPromptTooLong =
|
|
157
|
+
fullErrorStr.includes('prompt is too long') ||
|
|
158
|
+
fullErrorStr.includes('maximum context length') ||
|
|
159
|
+
fullErrorStr.includes('too many tokens') ||
|
|
160
|
+
fullErrorStr.includes('context_length_exceeded') ||
|
|
161
|
+
fullErrorStr.includes('request too large') ||
|
|
162
|
+
fullErrorStr.includes('exceeds the model') ||
|
|
163
|
+
fullErrorStr.includes('context window') ||
|
|
164
|
+
fullErrorStr.includes('input is too long') ||
|
|
165
|
+
errorCode === 'context_length_exceeded' ||
|
|
166
|
+
errorType === 'invalid_request_error';
|
|
167
|
+
|
|
168
|
+
debugLog(
|
|
169
|
+
`[stream-handlers] isPromptTooLong: ${isPromptTooLong}, errorCode: ${errorCode}, errorType: ${errorType}`,
|
|
170
|
+
);
|
|
171
|
+
|
|
172
|
+
if (isPromptTooLong && !opts.isCompactCommand) {
|
|
173
|
+
debugLog(
|
|
174
|
+
'[stream-handlers] Prompt too long detected, auto-compacting...',
|
|
175
|
+
);
|
|
176
|
+
let compactionSucceeded = false;
|
|
177
|
+
try {
|
|
178
|
+
// Stream the compaction summary with proper publish function
|
|
179
|
+
const compactResult = await performAutoCompaction(
|
|
180
|
+
db,
|
|
181
|
+
opts.sessionId,
|
|
182
|
+
opts.assistantMessageId,
|
|
183
|
+
publish,
|
|
184
|
+
opts.provider,
|
|
185
|
+
opts.model,
|
|
186
|
+
);
|
|
187
|
+
if (compactResult.success) {
|
|
188
|
+
debugLog(
|
|
189
|
+
`[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
|
|
190
|
+
);
|
|
191
|
+
compactionSucceeded = true;
|
|
192
|
+
} else {
|
|
193
|
+
debugLog(
|
|
194
|
+
`[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
|
|
195
|
+
);
|
|
196
|
+
// Fall back to simple prune
|
|
197
|
+
const pruneResult = await pruneSession(db, opts.sessionId);
|
|
198
|
+
debugLog(
|
|
199
|
+
`[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
|
|
200
|
+
);
|
|
201
|
+
compactionSucceeded = pruneResult.pruned > 0;
|
|
202
|
+
}
|
|
203
|
+
} catch (compactErr) {
|
|
204
|
+
debugLog(
|
|
205
|
+
`[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// If compaction succeeded, complete this message and trigger retry
|
|
210
|
+
if (compactionSucceeded) {
|
|
211
|
+
// Mark this compaction message as completed
|
|
212
|
+
await db
|
|
213
|
+
.update(messages)
|
|
214
|
+
.set({
|
|
215
|
+
status: 'completed',
|
|
216
|
+
})
|
|
217
|
+
.where(eq(messages.id, opts.assistantMessageId));
|
|
218
|
+
|
|
219
|
+
// Publish completion event for the compaction message
|
|
220
|
+
publish({
|
|
221
|
+
type: 'message.completed',
|
|
222
|
+
sessionId: opts.sessionId,
|
|
223
|
+
payload: {
|
|
224
|
+
id: opts.assistantMessageId,
|
|
225
|
+
autoCompacted: true,
|
|
226
|
+
},
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
// Trigger retry - create a new assistant message and enqueue the run
|
|
230
|
+
if (retryCallback) {
|
|
231
|
+
debugLog('[stream-handlers] Triggering retry after compaction...');
|
|
232
|
+
const newAssistantMessageId = crypto.randomUUID();
|
|
233
|
+
await db.insert(messages).values({
|
|
234
|
+
id: newAssistantMessageId,
|
|
235
|
+
sessionId: opts.sessionId,
|
|
236
|
+
role: 'assistant',
|
|
237
|
+
status: 'pending',
|
|
238
|
+
agent: opts.agent,
|
|
239
|
+
provider: opts.provider,
|
|
240
|
+
model: opts.model,
|
|
241
|
+
createdAt: Date.now(),
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
publish({
|
|
245
|
+
type: 'message.created',
|
|
246
|
+
sessionId: opts.sessionId,
|
|
247
|
+
payload: { id: newAssistantMessageId, role: 'assistant' },
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// Enqueue the retry with the new assistant message
|
|
251
|
+
enqueueAssistantRun(
|
|
252
|
+
{
|
|
253
|
+
...opts,
|
|
254
|
+
assistantMessageId: newAssistantMessageId,
|
|
255
|
+
},
|
|
256
|
+
retryCallback,
|
|
257
|
+
);
|
|
258
|
+
} else {
|
|
259
|
+
debugLog(
|
|
260
|
+
'[stream-handlers] No retryCallback provided, cannot auto-retry',
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return; // Don't show error, compaction and retry handled it
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
140
268
|
// Create error part for UI display
|
|
141
269
|
const errorPartId = crypto.randomUUID();
|
|
270
|
+
const displayMessage =
|
|
271
|
+
isPromptTooLong && !opts.isCompactCommand
|
|
272
|
+
? `${errorPayload.message}. Context auto-compacted - please retry your message.`
|
|
273
|
+
: errorPayload.message;
|
|
142
274
|
await db.insert(messageParts).values({
|
|
143
275
|
id: errorPartId,
|
|
144
276
|
messageId: opts.assistantMessageId,
|
|
@@ -146,7 +278,7 @@ export function createErrorHandler(
|
|
|
146
278
|
stepIndex,
|
|
147
279
|
type: 'error',
|
|
148
280
|
content: JSON.stringify({
|
|
149
|
-
message:
|
|
281
|
+
message: displayMessage,
|
|
150
282
|
type: errorPayload.type,
|
|
151
283
|
details: errorPayload.details,
|
|
152
284
|
isAborted: false,
|
|
@@ -163,11 +295,12 @@ export function createErrorHandler(
|
|
|
163
295
|
.update(messages)
|
|
164
296
|
.set({
|
|
165
297
|
status: 'error',
|
|
166
|
-
error:
|
|
298
|
+
error: displayMessage,
|
|
167
299
|
errorType: errorPayload.type,
|
|
168
300
|
errorDetails: JSON.stringify({
|
|
169
301
|
...errorPayload.details,
|
|
170
302
|
isApiError,
|
|
303
|
+
autoCompacted: isPromptTooLong && !opts.isCompactCommand,
|
|
171
304
|
}),
|
|
172
305
|
isAborted: false,
|
|
173
306
|
})
|
|
@@ -180,10 +313,11 @@ export function createErrorHandler(
|
|
|
180
313
|
payload: {
|
|
181
314
|
messageId: opts.assistantMessageId,
|
|
182
315
|
partId: errorPartId,
|
|
183
|
-
error:
|
|
316
|
+
error: displayMessage,
|
|
184
317
|
errorType: errorPayload.type,
|
|
185
318
|
details: errorPayload.details,
|
|
186
319
|
isAborted: false,
|
|
320
|
+
autoCompacted: isPromptTooLong && !opts.isCompactCommand,
|
|
187
321
|
},
|
|
188
322
|
});
|
|
189
323
|
};
|
|
@@ -273,6 +407,43 @@ export function createFinishHandler(
|
|
|
273
407
|
await completeAssistantMessageFn(fin, opts, db);
|
|
274
408
|
} catch {}
|
|
275
409
|
|
|
410
|
+
// If this was a /compact command, mark old parts as compacted
|
|
411
|
+
// Only mark as compacted if the response was successful and has content
|
|
412
|
+
if (opts.isCompactCommand && fin.finishReason !== 'error') {
|
|
413
|
+
// Verify the assistant actually generated text content (the summary)
|
|
414
|
+
const assistantParts = await db
|
|
415
|
+
.select()
|
|
416
|
+
.from(messageParts)
|
|
417
|
+
.where(eq(messageParts.messageId, opts.assistantMessageId));
|
|
418
|
+
const hasTextContent = assistantParts.some(
|
|
419
|
+
(p) => p.type === 'text' && p.content && p.content !== '{"text":""}',
|
|
420
|
+
);
|
|
421
|
+
|
|
422
|
+
if (!hasTextContent) {
|
|
423
|
+
debugLog(
|
|
424
|
+
'[stream-handlers] /compact finished but no summary generated, skipping compaction marking',
|
|
425
|
+
);
|
|
426
|
+
} else {
|
|
427
|
+
try {
|
|
428
|
+
debugLog(
|
|
429
|
+
`[stream-handlers] /compact complete, marking session compacted`,
|
|
430
|
+
);
|
|
431
|
+
const result = await markSessionCompacted(
|
|
432
|
+
db,
|
|
433
|
+
opts.sessionId,
|
|
434
|
+
opts.assistantMessageId,
|
|
435
|
+
);
|
|
436
|
+
debugLog(
|
|
437
|
+
`[stream-handlers] Compacted ${result.compacted} parts, saved ~${result.saved} tokens`,
|
|
438
|
+
);
|
|
439
|
+
} catch (err) {
|
|
440
|
+
debugLog(
|
|
441
|
+
`[stream-handlers] Compaction failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
442
|
+
);
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
276
447
|
// Use session totals from DB for accurate cost calculation
|
|
277
448
|
const sessRows = await db
|
|
278
449
|
.select()
|