@agi-cli/server 0.1.112 → 0.1.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { generateText } from 'ai';
1
+ import { generateText, streamText } from 'ai';
2
2
  import { eq } from 'drizzle-orm';
3
3
  import type { AGIConfig } from '@agi-cli/sdk';
4
4
  import type { DB } from '@agi-cli/database';
@@ -9,6 +9,7 @@ import { runSessionLoop } from './runner.ts';
9
9
  import { resolveModel } from './provider.ts';
10
10
  import { getFastModel, type ProviderId } from '@agi-cli/sdk';
11
11
  import { debugLog } from './debug.ts';
12
+ import { isCompactCommand, buildCompactionContext } from './compaction.ts';
12
13
 
13
14
  type SessionRow = typeof sessions.$inferSelect;
14
15
 
@@ -119,6 +120,28 @@ export async function dispatchAssistantMessage(
119
120
  `[MESSAGE_SERVICE] Enqueuing assistant run with userContext: ${userContext ? `${userContext.substring(0, 50)}...` : 'NONE'}`,
120
121
  );
121
122
 
123
+ // Detect /compact command and build context with model-aware limits
124
+ const isCompact = isCompactCommand(content);
125
+ let compactionContext: string | undefined;
126
+
127
+ if (isCompact) {
128
+ debugLog('[MESSAGE_SERVICE] Detected /compact command, building context');
129
+ const { getModelLimits } = await import('./compaction.ts');
130
+ const limits = getModelLimits(provider, model);
131
+ // Use 50% of context window for compaction, minimum 15k tokens
132
+ const contextTokenLimit = limits
133
+ ? Math.max(Math.floor(limits.context * 0.5), 15000)
134
+ : 15000;
135
+ compactionContext = await buildCompactionContext(
136
+ db,
137
+ sessionId,
138
+ contextTokenLimit,
139
+ );
140
+ debugLog(
141
+ `[message-service] /compact context length: ${compactionContext.length}, limit: ${contextTokenLimit} tokens`,
142
+ );
143
+ }
144
+
122
145
  enqueueAssistantRun(
123
146
  {
124
147
  sessionId,
@@ -130,6 +153,8 @@ export async function dispatchAssistantMessage(
130
153
  oneShot: Boolean(oneShot),
131
154
  userContext,
132
155
  reasoning,
156
+ isCompactCommand: isCompact,
157
+ compactionContext,
133
158
  },
134
159
  runSessionLoop,
135
160
  );
@@ -240,7 +265,11 @@ async function generateSessionTitle(args: {
240
265
 
241
266
  // Use a smaller, faster model for title generation
242
267
  // Look up the cheapest/fastest model from the catalog for this provider
243
- const titleModel = getFastModel(provider) ?? modelName;
268
+ // For OpenAI OAuth, use codex-mini as it works with ChatGPT backend
269
+ const titleModel =
270
+ needsSpoof && provider === 'openai'
271
+ ? 'gpt-5.1-codex-mini'
272
+ : (getFastModel(provider) ?? modelName);
244
273
  debugLog(`[TITLE_GEN] Using title model: ${titleModel}`);
245
274
  const model = await resolveModel(provider, titleModel, cfg);
246
275
 
@@ -291,15 +320,29 @@ async function generateSessionTitle(args: {
291
320
  );
292
321
  }
293
322
 
294
- debugLog('[TITLE_GEN] Calling generateText...');
295
323
  let modelTitle = '';
296
324
  try {
297
- const out = await generateText({
298
- model,
299
- system,
300
- messages: messagesArray,
301
- });
302
- modelTitle = (out?.text || '').trim();
325
+ // ChatGPT backend requires streaming - use streamText for OAuth
326
+ if (needsSpoof) {
327
+ debugLog('[TITLE_GEN] Using streamText for OAuth...');
328
+ const result = streamText({
329
+ model,
330
+ system,
331
+ messages: messagesArray,
332
+ });
333
+ for await (const chunk of result.textStream) {
334
+ modelTitle += chunk;
335
+ }
336
+ modelTitle = modelTitle.trim();
337
+ } else {
338
+ debugLog('[TITLE_GEN] Using generateText...');
339
+ const out = await generateText({
340
+ model,
341
+ system,
342
+ messages: messagesArray,
343
+ });
344
+ modelTitle = (out?.text || '').trim();
345
+ }
303
346
 
304
347
  debugLog('[TITLE_GEN] Raw response from model:');
305
348
  debugLog(`[TITLE_GEN] "${modelTitle}"`);
@@ -28,6 +28,7 @@ export async function composeSystemPrompt(options: {
28
28
  includeEnvironment?: boolean;
29
29
  includeProjectTree?: boolean;
30
30
  userContext?: string;
31
+ contextSummary?: string;
31
32
  }): Promise<ComposedSystemPrompt> {
32
33
  const components: string[] = [];
33
34
  if (options.spoofPrompt) {
@@ -105,6 +106,19 @@ export async function composeSystemPrompt(options: {
105
106
  components.push('user-context');
106
107
  }
107
108
 
109
+ // Add compacted conversation summary if present
110
+ if (options.contextSummary?.trim()) {
111
+ const summaryBlock = [
112
+ '<compacted-conversation-summary>',
113
+ 'The conversation was compacted to save context. Here is a summary of the previous context:',
114
+ '',
115
+ options.contextSummary.trim(),
116
+ '</compacted-conversation-summary>',
117
+ ].join('\n');
118
+ parts.push(summaryBlock);
119
+ components.push('context-summary');
120
+ }
121
+
108
122
  // Add terminal context if available
109
123
  const terminalManager = getTerminalManager();
110
124
  if (terminalManager) {
@@ -1,7 +1,7 @@
1
1
  import { hasToolCall, streamText } from 'ai';
2
2
  import { loadConfig } from '@agi-cli/sdk';
3
3
  import { getDb } from '@agi-cli/database';
4
- import { messageParts } from '@agi-cli/database/schema';
4
+ import { messageParts, sessions } from '@agi-cli/database/schema';
5
5
  import { eq } from 'drizzle-orm';
6
6
  import { resolveModel } from './provider.ts';
7
7
  import { resolveAgentConfig } from './agent-registry.ts';
@@ -32,6 +32,7 @@ import {
32
32
  createAbortHandler,
33
33
  createFinishHandler,
34
34
  } from './stream-handlers.ts';
35
+ import { getCompactionSystemPrompt, pruneSession } from './compaction.ts';
35
36
 
36
37
  export { enqueueAssistantRun, abortSession } from './session-queue.ts';
37
38
  export { getRunnerState } from './session-queue.ts';
@@ -78,10 +79,30 @@ async function runAssistant(opts: RunOpts) {
78
79
 
79
80
  const agentPrompt = agentCfg.prompt || '';
80
81
 
82
+ // For /compact command, use minimal history - the compaction context has everything needed
81
83
  const historyTimer = time('runner:buildHistory');
82
- const history = await buildHistoryMessages(db, opts.sessionId);
84
+ let history: Awaited<ReturnType<typeof buildHistoryMessages>>;
85
+ if (opts.isCompactCommand && opts.compactionContext) {
86
+ debugLog('[RUNNER] Using minimal history for /compact command');
87
+ history = [];
88
+ } else {
89
+ history = await buildHistoryMessages(db, opts.sessionId);
90
+ }
83
91
  historyTimer.end({ messages: history.length });
84
92
 
93
+ // Fetch session to get context summary for compaction
94
+ const sessionRows = await db
95
+ .select()
96
+ .from(sessions)
97
+ .where(eq(sessions.id, opts.sessionId))
98
+ .limit(1);
99
+ const contextSummary = sessionRows[0]?.contextSummary ?? undefined;
100
+ if (contextSummary) {
101
+ debugLog(
102
+ `[RUNNER] Using context summary from compaction (${contextSummary.length} chars)`,
103
+ );
104
+ }
105
+
85
106
  // FIX: For OAuth, we need to check if this is the first ASSISTANT message
86
107
  // The user message is already in history by this point, so history.length will be > 0
87
108
  // We need to add additionalSystemMessages on the first assistant turn
@@ -127,6 +148,7 @@ async function runAssistant(opts: RunOpts) {
127
148
  spoofPrompt: undefined,
128
149
  includeProjectTree: isFirstMessage,
129
150
  userContext: opts.userContext,
151
+ contextSummary,
130
152
  });
131
153
  oauthFullPromptComponents = fullPrompt.components;
132
154
 
@@ -157,6 +179,7 @@ async function runAssistant(opts: RunOpts) {
157
179
  spoofPrompt: undefined,
158
180
  includeProjectTree: isFirstMessage,
159
181
  userContext: opts.userContext,
182
+ contextSummary,
160
183
  });
161
184
  system = composed.prompt;
162
185
  systemComponents = composed.components;
@@ -169,6 +192,23 @@ async function runAssistant(opts: RunOpts) {
169
192
  })}`,
170
193
  );
171
194
 
195
+ // Inject compaction prompt if this is a /compact command
196
+ if (opts.isCompactCommand && opts.compactionContext) {
197
+ debugLog('[RUNNER] Injecting compaction context for /compact command');
198
+ const compactPrompt = getCompactionSystemPrompt();
199
+ // Add compaction instructions as system message
200
+ // Don't modify `system` directly as it may contain OAuth spoof prompt
201
+ additionalSystemMessages.push({
202
+ role: 'system',
203
+ content: compactPrompt,
204
+ });
205
+ // Add the conversation context as a USER message (Anthropic requires at least one user message)
206
+ additionalSystemMessages.push({
207
+ role: 'user',
208
+ content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${opts.compactionContext}\n</conversation-to-summarize>`,
209
+ });
210
+ }
211
+
172
212
  const toolsTimer = time('runner:discoverTools');
173
213
  const allTools = await discoverProjectTools(cfg.projectRoot);
174
214
  toolsTimer.end({ count: allTools.length });
@@ -286,7 +326,13 @@ async function runAssistant(opts: RunOpts) {
286
326
  updateMessageTokensIncremental,
287
327
  );
288
328
 
289
- const onError = createErrorHandler(opts, db, getStepIndex, sharedCtx);
329
+ const onError = createErrorHandler(
330
+ opts,
331
+ db,
332
+ getStepIndex,
333
+ sharedCtx,
334
+ runSessionLoop,
335
+ );
290
336
 
291
337
  const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
292
338
 
@@ -491,6 +537,67 @@ async function runAssistant(opts: RunOpts) {
491
537
  } catch (err) {
492
538
  unsubscribeFinish();
493
539
  const payload = toErrorPayload(err);
540
+
541
+ // Check if this is a "prompt too long" error and auto-compact
542
+ const errorMessage = err instanceof Error ? err.message : String(err);
543
+ const errorCode = (err as { code?: string })?.code ?? '';
544
+ const responseBody = (err as { responseBody?: string })?.responseBody ?? '';
545
+ const apiErrorType = (err as { apiErrorType?: string })?.apiErrorType ?? '';
546
+ const combinedError = `${errorMessage} ${responseBody}`.toLowerCase();
547
+ debugLog(`[RUNNER] Error caught - message: ${errorMessage.slice(0, 100)}`);
548
+ debugLog(
549
+ `[RUNNER] Error caught - code: ${errorCode}, apiErrorType: ${apiErrorType}`,
550
+ );
551
+ debugLog(
552
+ `[RUNNER] Error caught - responseBody: ${responseBody.slice(0, 200)}`,
553
+ );
554
+ const isPromptTooLong =
555
+ combinedError.includes('prompt is too long') ||
556
+ combinedError.includes('maximum context length') ||
557
+ combinedError.includes('too many tokens') ||
558
+ combinedError.includes('context_length_exceeded') ||
559
+ combinedError.includes('request too large') ||
560
+ combinedError.includes('exceeds the model') ||
561
+ combinedError.includes('input is too long') ||
562
+ errorCode === 'context_length_exceeded' ||
563
+ apiErrorType === 'invalid_request_error';
564
+ debugLog(
565
+ `[RUNNER] isPromptTooLong: ${isPromptTooLong}, isCompactCommand: ${opts.isCompactCommand}`,
566
+ );
567
+
568
+ if (isPromptTooLong && !opts.isCompactCommand) {
569
+ debugLog(
570
+ '[RUNNER] Prompt too long - auto-compacting and will retry on next user message',
571
+ );
572
+ try {
573
+ const pruneResult = await pruneSession(db, opts.sessionId);
574
+ debugLog(
575
+ `[RUNNER] Auto-pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
576
+ );
577
+
578
+ // Publish a system message to inform the user
579
+ publish({
580
+ type: 'error',
581
+ sessionId: opts.sessionId,
582
+ payload: {
583
+ ...payload,
584
+ message: `Context too large (${errorMessage.match(/\d+/)?.[0] || 'many'} tokens). Auto-compacted old tool results. Please retry your message.`,
585
+ name: 'ContextOverflow',
586
+ },
587
+ });
588
+
589
+ // Complete the message as failed
590
+ try {
591
+ await completeAssistantMessage({}, opts, db);
592
+ } catch {}
593
+ return;
594
+ } catch (pruneErr) {
595
+ debugLog(
596
+ `[RUNNER] Auto-prune failed: ${pruneErr instanceof Error ? pruneErr.message : String(pruneErr)}`,
597
+ );
598
+ }
599
+ }
600
+
494
601
  debugLog(`[RUNNER] Error during stream: ${payload.message}`);
495
602
  debugLog(
496
603
  `[RUNNER] Error stack: ${err instanceof Error ? err.stack : 'no stack'}`,
@@ -11,6 +11,8 @@ export type RunOpts = {
11
11
  userContext?: string;
12
12
  reasoning?: boolean;
13
13
  abortSignal?: AbortSignal;
14
+ isCompactCommand?: boolean;
15
+ compactionContext?: string;
14
16
  };
15
17
 
16
18
  type RunnerState = { queue: RunOpts[]; running: boolean };
@@ -13,8 +13,11 @@ import {
13
13
  isOverflow,
14
14
  getModelLimits,
15
15
  type TokenUsage,
16
+ markSessionCompacted,
17
+ performAutoCompaction,
16
18
  } from './compaction.ts';
17
19
  import { debugLog } from './debug.ts';
20
+ import { enqueueAssistantRun } from './session-queue.ts';
18
21
 
19
22
  type StepFinishEvent = {
20
23
  usage?: UsageData;
@@ -131,14 +134,143 @@ export function createErrorHandler(
131
134
  db: Awaited<ReturnType<typeof getDb>>,
132
135
  getStepIndex: () => number,
133
136
  sharedCtx: ToolAdapterContext,
137
+ retryCallback?: (sessionId: string) => Promise<void>,
134
138
  ) {
135
139
  return async (err: unknown) => {
136
140
  const errorPayload = toErrorPayload(err);
137
141
  const isApiError = APICallError.isInstance(err);
138
142
  const stepIndex = getStepIndex();
139
143
 
144
+ // Check if this is a prompt-too-long error and auto-compact
145
+ // Handle nested error structures from AI SDK
146
+ const errObj = err as Record<string, unknown>;
147
+ const nestedError = (errObj?.error as Record<string, unknown>)?.error as
148
+ | Record<string, unknown>
149
+ | undefined;
150
+ const errorCode =
151
+ (errObj?.code as string) ?? (nestedError?.code as string) ?? '';
152
+ const errorType =
153
+ (errObj?.apiErrorType as string) ?? (nestedError?.type as string) ?? '';
154
+ const fullErrorStr = JSON.stringify(err).toLowerCase();
155
+
156
+ const isPromptTooLong =
157
+ fullErrorStr.includes('prompt is too long') ||
158
+ fullErrorStr.includes('maximum context length') ||
159
+ fullErrorStr.includes('too many tokens') ||
160
+ fullErrorStr.includes('context_length_exceeded') ||
161
+ fullErrorStr.includes('request too large') ||
162
+ fullErrorStr.includes('exceeds the model') ||
163
+ fullErrorStr.includes('context window') ||
164
+ fullErrorStr.includes('input is too long') ||
165
+ errorCode === 'context_length_exceeded' ||
166
+ errorType === 'invalid_request_error';
167
+
168
+ debugLog(
169
+ `[stream-handlers] isPromptTooLong: ${isPromptTooLong}, errorCode: ${errorCode}, errorType: ${errorType}`,
170
+ );
171
+
172
+ if (isPromptTooLong && !opts.isCompactCommand) {
173
+ debugLog(
174
+ '[stream-handlers] Prompt too long detected, auto-compacting...',
175
+ );
176
+ let compactionSucceeded = false;
177
+ try {
178
+ // Stream the compaction summary with proper publish function
179
+ const compactResult = await performAutoCompaction(
180
+ db,
181
+ opts.sessionId,
182
+ opts.assistantMessageId,
183
+ publish,
184
+ opts.provider,
185
+ opts.model,
186
+ );
187
+ if (compactResult.success) {
188
+ debugLog(
189
+ `[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
190
+ );
191
+ compactionSucceeded = true;
192
+ } else {
193
+ debugLog(
194
+ `[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
195
+ );
196
+ // Fall back to simple prune
197
+ const pruneResult = await pruneSession(db, opts.sessionId);
198
+ debugLog(
199
+ `[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
200
+ );
201
+ compactionSucceeded = pruneResult.pruned > 0;
202
+ }
203
+ } catch (compactErr) {
204
+ debugLog(
205
+ `[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
206
+ );
207
+ }
208
+
209
+ // If compaction succeeded, complete this message and trigger retry
210
+ if (compactionSucceeded) {
211
+ // Mark this compaction message as completed
212
+ await db
213
+ .update(messages)
214
+ .set({
215
+ status: 'completed',
216
+ })
217
+ .where(eq(messages.id, opts.assistantMessageId));
218
+
219
+ // Publish completion event for the compaction message
220
+ publish({
221
+ type: 'message.completed',
222
+ sessionId: opts.sessionId,
223
+ payload: {
224
+ id: opts.assistantMessageId,
225
+ autoCompacted: true,
226
+ },
227
+ });
228
+
229
+ // Trigger retry - create a new assistant message and enqueue the run
230
+ if (retryCallback) {
231
+ debugLog('[stream-handlers] Triggering retry after compaction...');
232
+ const newAssistantMessageId = crypto.randomUUID();
233
+ await db.insert(messages).values({
234
+ id: newAssistantMessageId,
235
+ sessionId: opts.sessionId,
236
+ role: 'assistant',
237
+ status: 'pending',
238
+ agent: opts.agent,
239
+ provider: opts.provider,
240
+ model: opts.model,
241
+ createdAt: Date.now(),
242
+ });
243
+
244
+ publish({
245
+ type: 'message.created',
246
+ sessionId: opts.sessionId,
247
+ payload: { id: newAssistantMessageId, role: 'assistant' },
248
+ });
249
+
250
+ // Enqueue the retry with the new assistant message
251
+ enqueueAssistantRun(
252
+ {
253
+ ...opts,
254
+ assistantMessageId: newAssistantMessageId,
255
+ },
256
+ retryCallback,
257
+ );
258
+ } else {
259
+ debugLog(
260
+ '[stream-handlers] No retryCallback provided, cannot auto-retry',
261
+ );
262
+ }
263
+
264
+ return; // Don't show error, compaction and retry handled it
265
+ }
266
+ }
267
+
140
268
  // Create error part for UI display
141
269
  const errorPartId = crypto.randomUUID();
270
+ const displayMessage =
271
+ isPromptTooLong && !opts.isCompactCommand
272
+ ? `${errorPayload.message}. Context auto-compacted - please retry your message.`
273
+ : errorPayload.message;
142
274
  await db.insert(messageParts).values({
143
275
  id: errorPartId,
144
276
  messageId: opts.assistantMessageId,
@@ -146,7 +278,7 @@ export function createErrorHandler(
146
278
  stepIndex,
147
279
  type: 'error',
148
280
  content: JSON.stringify({
149
- message: errorPayload.message,
281
+ message: displayMessage,
150
282
  type: errorPayload.type,
151
283
  details: errorPayload.details,
152
284
  isAborted: false,
@@ -163,11 +295,12 @@ export function createErrorHandler(
163
295
  .update(messages)
164
296
  .set({
165
297
  status: 'error',
166
- error: errorPayload.message,
298
+ error: displayMessage,
167
299
  errorType: errorPayload.type,
168
300
  errorDetails: JSON.stringify({
169
301
  ...errorPayload.details,
170
302
  isApiError,
303
+ autoCompacted: isPromptTooLong && !opts.isCompactCommand,
171
304
  }),
172
305
  isAborted: false,
173
306
  })
@@ -180,10 +313,11 @@ export function createErrorHandler(
180
313
  payload: {
181
314
  messageId: opts.assistantMessageId,
182
315
  partId: errorPartId,
183
- error: errorPayload.message,
316
+ error: displayMessage,
184
317
  errorType: errorPayload.type,
185
318
  details: errorPayload.details,
186
319
  isAborted: false,
320
+ autoCompacted: isPromptTooLong && !opts.isCompactCommand,
187
321
  },
188
322
  });
189
323
  };
@@ -273,6 +407,43 @@ export function createFinishHandler(
273
407
  await completeAssistantMessageFn(fin, opts, db);
274
408
  } catch {}
275
409
 
410
+ // If this was a /compact command, mark old parts as compacted
411
+ // Only mark as compacted if the response was successful and has content
412
+ if (opts.isCompactCommand && fin.finishReason !== 'error') {
413
+ // Verify the assistant actually generated text content (the summary)
414
+ const assistantParts = await db
415
+ .select()
416
+ .from(messageParts)
417
+ .where(eq(messageParts.messageId, opts.assistantMessageId));
418
+ const hasTextContent = assistantParts.some(
419
+ (p) => p.type === 'text' && p.content && p.content !== '{"text":""}',
420
+ );
421
+
422
+ if (!hasTextContent) {
423
+ debugLog(
424
+ '[stream-handlers] /compact finished but no summary generated, skipping compaction marking',
425
+ );
426
+ } else {
427
+ try {
428
+ debugLog(
429
+ `[stream-handlers] /compact complete, marking session compacted`,
430
+ );
431
+ const result = await markSessionCompacted(
432
+ db,
433
+ opts.sessionId,
434
+ opts.assistantMessageId,
435
+ );
436
+ debugLog(
437
+ `[stream-handlers] Compacted ${result.compacted} parts, saved ~${result.saved} tokens`,
438
+ );
439
+ } catch (err) {
440
+ debugLog(
441
+ `[stream-handlers] Compaction failed: ${err instanceof Error ? err.message : String(err)}`,
442
+ );
443
+ }
444
+ }
445
+ }
446
+
276
447
  // Use session totals from DB for accurate cost calculation
277
448
  const sessRows = await db
278
449
  .select()