npm - @agi-cli/server - Versions diffs - 0.1.112 → 0.1.114 - Mend

@agi-cli/server 0.1.112 → 0.1.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +3 -3
package/src/index.ts +4 -0
package/src/routes/session-files.ts +387 -0
package/src/runtime/compaction.ts +396 -114
package/src/runtime/history-builder.ts +7 -7
package/src/runtime/message-service.ts +52 -9
package/src/runtime/prompt.ts +14 -0
package/src/runtime/runner.ts +110 -3
package/src/runtime/session-queue.ts +2 -0
package/src/runtime/stream-handlers.ts +174 -3

package/src/runtime/message-service.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { generateText } from 'ai';
+import { generateText, streamText } from 'ai';
 import { eq } from 'drizzle-orm';
 import type { AGIConfig } from '@agi-cli/sdk';
 import type { DB } from '@agi-cli/database';
@@ -9,6 +9,7 @@ import { runSessionLoop } from './runner.ts';
 import { resolveModel } from './provider.ts';
 import { getFastModel, type ProviderId } from '@agi-cli/sdk';
 import { debugLog } from './debug.ts';
+import { isCompactCommand, buildCompactionContext } from './compaction.ts';
 type SessionRow = typeof sessions.$inferSelect;
@@ -119,6 +120,28 @@ export async function dispatchAssistantMessage(
 		`[MESSAGE_SERVICE] Enqueuing assistant run with userContext: ${userContext ? `${userContext.substring(0, 50)}...` : 'NONE'}`,
 	);
+	// Detect /compact command and build context with model-aware limits
+	const isCompact = isCompactCommand(content);
+	let compactionContext: string | undefined;
+	if (isCompact) {
+		debugLog('[MESSAGE_SERVICE] Detected /compact command, building context');
+		const { getModelLimits } = await import('./compaction.ts');
+		const limits = getModelLimits(provider, model);
+		// Use 50% of context window for compaction, minimum 15k tokens
+		const contextTokenLimit = limits
+			? Math.max(Math.floor(limits.context * 0.5), 15000)
+			: 15000;
+		compactionContext = await buildCompactionContext(
+			db,
+			sessionId,
+			contextTokenLimit,
+		);
+		debugLog(
+			`[message-service] /compact context length: ${compactionContext.length}, limit: ${contextTokenLimit} tokens`,
+		);
+	}
 	enqueueAssistantRun(
 		{
 			sessionId,
@@ -130,6 +153,8 @@ export async function dispatchAssistantMessage(
 			oneShot: Boolean(oneShot),
 			userContext,
 			reasoning,
+			isCompactCommand: isCompact,
+			compactionContext,
 		},
 		runSessionLoop,
 	);
@@ -240,7 +265,11 @@ async function generateSessionTitle(args: {
 		// Use a smaller, faster model for title generation
 		// Look up the cheapest/fastest model from the catalog for this provider
-		const titleModel = getFastModel(provider) ?? modelName;
+		// For OpenAI OAuth, use codex-mini as it works with ChatGPT backend
+		const titleModel =
+			needsSpoof && provider === 'openai'
+				? 'gpt-5.1-codex-mini'
+				: (getFastModel(provider) ?? modelName);
 		debugLog(`[TITLE_GEN] Using title model: ${titleModel}`);
 		const model = await resolveModel(provider, titleModel, cfg);
@@ -291,15 +320,29 @@ async function generateSessionTitle(args: {
 			);
 		}
-		debugLog('[TITLE_GEN] Calling generateText...');
 		let modelTitle = '';
 		try {
-			const out = await generateText({
-				model,
-				system,
-				messages: messagesArray,
-			});
-			modelTitle = (out?.text || '').trim();
+			// ChatGPT backend requires streaming - use streamText for OAuth
+			if (needsSpoof) {
+				debugLog('[TITLE_GEN] Using streamText for OAuth...');
+				const result = streamText({
+					model,
+					system,
+					messages: messagesArray,
+				});
+				for await (const chunk of result.textStream) {
+					modelTitle += chunk;
+				}
+				modelTitle = modelTitle.trim();
+			} else {
+				debugLog('[TITLE_GEN] Using generateText...');
+				const out = await generateText({
+					model,
+					system,
+					messages: messagesArray,
+				});
+				modelTitle = (out?.text || '').trim();
+			}
 			debugLog('[TITLE_GEN] Raw response from model:');
 			debugLog(`[TITLE_GEN] "${modelTitle}"`);

package/src/runtime/prompt.ts CHANGED Viewed

@@ -28,6 +28,7 @@ export async function composeSystemPrompt(options: {
 	includeEnvironment?: boolean;
 	includeProjectTree?: boolean;
 	userContext?: string;
+	contextSummary?: string;
 }): Promise<ComposedSystemPrompt> {
 	const components: string[] = [];
 	if (options.spoofPrompt) {
@@ -105,6 +106,19 @@ export async function composeSystemPrompt(options: {
 		components.push('user-context');
 	}
+	// Add compacted conversation summary if present
+	if (options.contextSummary?.trim()) {
+		const summaryBlock = [
+			'<compacted-conversation-summary>',
+			'The conversation was compacted to save context. Here is a summary of the previous context:',
+			'',
+			options.contextSummary.trim(),
+			'</compacted-conversation-summary>',
+		].join('\n');
+		parts.push(summaryBlock);
+		components.push('context-summary');
+	}
 	// Add terminal context if available
 	const terminalManager = getTerminalManager();
 	if (terminalManager) {

package/src/runtime/runner.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { hasToolCall, streamText } from 'ai';
 import { loadConfig } from '@agi-cli/sdk';
 import { getDb } from '@agi-cli/database';
-import { messageParts } from '@agi-cli/database/schema';
+import { messageParts, sessions } from '@agi-cli/database/schema';
 import { eq } from 'drizzle-orm';
 import { resolveModel } from './provider.ts';
 import { resolveAgentConfig } from './agent-registry.ts';
@@ -32,6 +32,7 @@ import {
 	createAbortHandler,
 	createFinishHandler,
 } from './stream-handlers.ts';
+import { getCompactionSystemPrompt, pruneSession } from './compaction.ts';
 export { enqueueAssistantRun, abortSession } from './session-queue.ts';
 export { getRunnerState } from './session-queue.ts';
@@ -78,10 +79,30 @@ async function runAssistant(opts: RunOpts) {
 	const agentPrompt = agentCfg.prompt || '';
+	// For /compact command, use minimal history - the compaction context has everything needed
 	const historyTimer = time('runner:buildHistory');
-	const history = await buildHistoryMessages(db, opts.sessionId);
+	let history: Awaited<ReturnType<typeof buildHistoryMessages>>;
+	if (opts.isCompactCommand && opts.compactionContext) {
+		debugLog('[RUNNER] Using minimal history for /compact command');
+		history = [];
+	} else {
+		history = await buildHistoryMessages(db, opts.sessionId);
+	}
 	historyTimer.end({ messages: history.length });
+	// Fetch session to get context summary for compaction
+	const sessionRows = await db
+		.select()
+		.from(sessions)
+		.where(eq(sessions.id, opts.sessionId))
+		.limit(1);
+	const contextSummary = sessionRows[0]?.contextSummary ?? undefined;
+	if (contextSummary) {
+		debugLog(
+			`[RUNNER] Using context summary from compaction (${contextSummary.length} chars)`,
+		);
+	}
 	// FIX: For OAuth, we need to check if this is the first ASSISTANT message
 	// The user message is already in history by this point, so history.length will be > 0
 	// We need to add additionalSystemMessages on the first assistant turn
@@ -127,6 +148,7 @@ async function runAssistant(opts: RunOpts) {
 			spoofPrompt: undefined,
 			includeProjectTree: isFirstMessage,
 			userContext: opts.userContext,
+			contextSummary,
 		});
 		oauthFullPromptComponents = fullPrompt.components;
@@ -157,6 +179,7 @@ async function runAssistant(opts: RunOpts) {
 			spoofPrompt: undefined,
 			includeProjectTree: isFirstMessage,
 			userContext: opts.userContext,
+			contextSummary,
 		});
 		system = composed.prompt;
 		systemComponents = composed.components;
@@ -169,6 +192,23 @@ async function runAssistant(opts: RunOpts) {
 		})}`,
 	);
+	// Inject compaction prompt if this is a /compact command
+	if (opts.isCompactCommand && opts.compactionContext) {
+		debugLog('[RUNNER] Injecting compaction context for /compact command');
+		const compactPrompt = getCompactionSystemPrompt();
+		// Add compaction instructions as system message
+		// Don't modify `system` directly as it may contain OAuth spoof prompt
+		additionalSystemMessages.push({
+			role: 'system',
+			content: compactPrompt,
+		});
+		// Add the conversation context as a USER message (Anthropic requires at least one user message)
+		additionalSystemMessages.push({
+			role: 'user',
+			content: `Please summarize this conversation:\n\n<conversation-to-summarize>\n${opts.compactionContext}\n</conversation-to-summarize>`,
+		});
+	}
 	const toolsTimer = time('runner:discoverTools');
 	const allTools = await discoverProjectTools(cfg.projectRoot);
 	toolsTimer.end({ count: allTools.length });
@@ -286,7 +326,13 @@ async function runAssistant(opts: RunOpts) {
 		updateMessageTokensIncremental,
 	);
-	const onError = createErrorHandler(opts, db, getStepIndex, sharedCtx);
+	const onError = createErrorHandler(
+		opts,
+		db,
+		getStepIndex,
+		sharedCtx,
+		runSessionLoop,
+	);
 	const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
@@ -491,6 +537,67 @@ async function runAssistant(opts: RunOpts) {
 	} catch (err) {
 		unsubscribeFinish();
 		const payload = toErrorPayload(err);
+		// Check if this is a "prompt too long" error and auto-compact
+		const errorMessage = err instanceof Error ? err.message : String(err);
+		const errorCode = (err as { code?: string })?.code ?? '';
+		const responseBody = (err as { responseBody?: string })?.responseBody ?? '';
+		const apiErrorType = (err as { apiErrorType?: string })?.apiErrorType ?? '';
+		const combinedError = `${errorMessage} ${responseBody}`.toLowerCase();
+		debugLog(`[RUNNER] Error caught - message: ${errorMessage.slice(0, 100)}`);
+		debugLog(
+			`[RUNNER] Error caught - code: ${errorCode}, apiErrorType: ${apiErrorType}`,
+		);
+		debugLog(
+			`[RUNNER] Error caught - responseBody: ${responseBody.slice(0, 200)}`,
+		);
+		const isPromptTooLong =
+			combinedError.includes('prompt is too long') ||
+			combinedError.includes('maximum context length') ||
+			combinedError.includes('too many tokens') ||
+			combinedError.includes('context_length_exceeded') ||
+			combinedError.includes('request too large') ||
+			combinedError.includes('exceeds the model') ||
+			combinedError.includes('input is too long') ||
+			errorCode === 'context_length_exceeded' ||
+			apiErrorType === 'invalid_request_error';
+		debugLog(
+			`[RUNNER] isPromptTooLong: ${isPromptTooLong}, isCompactCommand: ${opts.isCompactCommand}`,
+		);
+		if (isPromptTooLong && !opts.isCompactCommand) {
+			debugLog(
+				'[RUNNER] Prompt too long - auto-compacting and will retry on next user message',
+			);
+			try {
+				const pruneResult = await pruneSession(db, opts.sessionId);
+				debugLog(
+					`[RUNNER] Auto-pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
+				);
+				// Publish a system message to inform the user
+				publish({
+					type: 'error',
+					sessionId: opts.sessionId,
+					payload: {
+						...payload,
+						message: `Context too large (${errorMessage.match(/\d+/)?.[0] || 'many'} tokens). Auto-compacted old tool results. Please retry your message.`,
+						name: 'ContextOverflow',
+					},
+				});
+				// Complete the message as failed
+				try {
+					await completeAssistantMessage({}, opts, db);
+				} catch {}
+				return;
+			} catch (pruneErr) {
+				debugLog(
+					`[RUNNER] Auto-prune failed: ${pruneErr instanceof Error ? pruneErr.message : String(pruneErr)}`,
+				);
+			}
+		}
 		debugLog(`[RUNNER] Error during stream: ${payload.message}`);
 		debugLog(
 			`[RUNNER] Error stack: ${err instanceof Error ? err.stack : 'no stack'}`,

package/src/runtime/session-queue.ts CHANGED Viewed

@@ -11,6 +11,8 @@ export type RunOpts = {
 	userContext?: string;
 	reasoning?: boolean;
 	abortSignal?: AbortSignal;
+	isCompactCommand?: boolean;
+	compactionContext?: string;
 };
 type RunnerState = { queue: RunOpts[]; running: boolean };

package/src/runtime/stream-handlers.ts CHANGED Viewed

@@ -13,8 +13,11 @@ import {
 	isOverflow,
 	getModelLimits,
 	type TokenUsage,
+	markSessionCompacted,
+	performAutoCompaction,
 } from './compaction.ts';
 import { debugLog } from './debug.ts';
+import { enqueueAssistantRun } from './session-queue.ts';
 type StepFinishEvent = {
 	usage?: UsageData;
@@ -131,14 +134,143 @@ export function createErrorHandler(
 	db: Awaited<ReturnType<typeof getDb>>,
 	getStepIndex: () => number,
 	sharedCtx: ToolAdapterContext,
+	retryCallback?: (sessionId: string) => Promise<void>,
 ) {
 	return async (err: unknown) => {
 		const errorPayload = toErrorPayload(err);
 		const isApiError = APICallError.isInstance(err);
 		const stepIndex = getStepIndex();
+		// Check if this is a prompt-too-long error and auto-compact
+		// Handle nested error structures from AI SDK
+		const errObj = err as Record<string, unknown>;
+		const nestedError = (errObj?.error as Record<string, unknown>)?.error as
+			| Record<string, unknown>
+			| undefined;
+		const errorCode =
+			(errObj?.code as string) ?? (nestedError?.code as string) ?? '';
+		const errorType =
+			(errObj?.apiErrorType as string) ?? (nestedError?.type as string) ?? '';
+		const fullErrorStr = JSON.stringify(err).toLowerCase();
+		const isPromptTooLong =
+			fullErrorStr.includes('prompt is too long') ||
+			fullErrorStr.includes('maximum context length') ||
+			fullErrorStr.includes('too many tokens') ||
+			fullErrorStr.includes('context_length_exceeded') ||
+			fullErrorStr.includes('request too large') ||
+			fullErrorStr.includes('exceeds the model') ||
+			fullErrorStr.includes('context window') ||
+			fullErrorStr.includes('input is too long') ||
+			errorCode === 'context_length_exceeded' ||
+			errorType === 'invalid_request_error';
+		debugLog(
+			`[stream-handlers] isPromptTooLong: ${isPromptTooLong}, errorCode: ${errorCode}, errorType: ${errorType}`,
+		);
+		if (isPromptTooLong && !opts.isCompactCommand) {
+			debugLog(
+				'[stream-handlers] Prompt too long detected, auto-compacting...',
+			);
+			let compactionSucceeded = false;
+			try {
+				// Stream the compaction summary with proper publish function
+				const compactResult = await performAutoCompaction(
+					db,
+					opts.sessionId,
+					opts.assistantMessageId,
+					publish,
+					opts.provider,
+					opts.model,
+				);
+				if (compactResult.success) {
+					debugLog(
+						`[stream-handlers] Auto-compaction succeeded: ${compactResult.summary?.slice(0, 100)}...`,
+					);
+					compactionSucceeded = true;
+				} else {
+					debugLog(
+						`[stream-handlers] Auto-compaction failed: ${compactResult.error}, falling back to prune`,
+					);
+					// Fall back to simple prune
+					const pruneResult = await pruneSession(db, opts.sessionId);
+					debugLog(
+						`[stream-handlers] Fallback pruned ${pruneResult.pruned} parts, saved ~${pruneResult.saved} tokens`,
+					);
+					compactionSucceeded = pruneResult.pruned > 0;
+				}
+			} catch (compactErr) {
+				debugLog(
+					`[stream-handlers] Auto-compact error: ${compactErr instanceof Error ? compactErr.message : String(compactErr)}`,
+				);
+			}
+			// If compaction succeeded, complete this message and trigger retry
+			if (compactionSucceeded) {
+				// Mark this compaction message as completed
+				await db
+					.update(messages)
+					.set({
+						status: 'completed',
+					})
+					.where(eq(messages.id, opts.assistantMessageId));
+				// Publish completion event for the compaction message
+				publish({
+					type: 'message.completed',
+					sessionId: opts.sessionId,
+					payload: {
+						id: opts.assistantMessageId,
+						autoCompacted: true,
+					},
+				});
+				// Trigger retry - create a new assistant message and enqueue the run
+				if (retryCallback) {
+					debugLog('[stream-handlers] Triggering retry after compaction...');
+					const newAssistantMessageId = crypto.randomUUID();
+					await db.insert(messages).values({
+						id: newAssistantMessageId,
+						sessionId: opts.sessionId,
+						role: 'assistant',
+						status: 'pending',
+						agent: opts.agent,
+						provider: opts.provider,
+						model: opts.model,
+						createdAt: Date.now(),
+					});
+					publish({
+						type: 'message.created',
+						sessionId: opts.sessionId,
+						payload: { id: newAssistantMessageId, role: 'assistant' },
+					});
+					// Enqueue the retry with the new assistant message
+					enqueueAssistantRun(
+						{
+							...opts,
+							assistantMessageId: newAssistantMessageId,
+						},
+						retryCallback,
+					);
+				} else {
+					debugLog(
+						'[stream-handlers] No retryCallback provided, cannot auto-retry',
+					);
+				}
+				return; // Don't show error, compaction and retry handled it
+			}
+		}
 		// Create error part for UI display
 		const errorPartId = crypto.randomUUID();
+		const displayMessage =
+			isPromptTooLong && !opts.isCompactCommand
+				? `${errorPayload.message}. Context auto-compacted - please retry your message.`
+				: errorPayload.message;
 		await db.insert(messageParts).values({
 			id: errorPartId,
 			messageId: opts.assistantMessageId,
@@ -146,7 +278,7 @@ export function createErrorHandler(
 			stepIndex,
 			type: 'error',
 			content: JSON.stringify({
-				message: errorPayload.message,
+				message: displayMessage,
 				type: errorPayload.type,
 				details: errorPayload.details,
 				isAborted: false,
@@ -163,11 +295,12 @@ export function createErrorHandler(
 			.update(messages)
 			.set({
 				status: 'error',
-				error: errorPayload.message,
+				error: displayMessage,
 				errorType: errorPayload.type,
 				errorDetails: JSON.stringify({
 					...errorPayload.details,
 					isApiError,
+					autoCompacted: isPromptTooLong && !opts.isCompactCommand,
 				}),
 				isAborted: false,
 			})
@@ -180,10 +313,11 @@ export function createErrorHandler(
 			payload: {
 				messageId: opts.assistantMessageId,
 				partId: errorPartId,
-				error: errorPayload.message,
+				error: displayMessage,
 				errorType: errorPayload.type,
 				details: errorPayload.details,
 				isAborted: false,
+				autoCompacted: isPromptTooLong && !opts.isCompactCommand,
 			},
 		});
 	};
@@ -273,6 +407,43 @@ export function createFinishHandler(
 			await completeAssistantMessageFn(fin, opts, db);
 		} catch {}
+		// If this was a /compact command, mark old parts as compacted
+		// Only mark as compacted if the response was successful and has content
+		if (opts.isCompactCommand && fin.finishReason !== 'error') {
+			// Verify the assistant actually generated text content (the summary)
+			const assistantParts = await db
+				.select()
+				.from(messageParts)
+				.where(eq(messageParts.messageId, opts.assistantMessageId));
+			const hasTextContent = assistantParts.some(
+				(p) => p.type === 'text' && p.content && p.content !== '{"text":""}',
+			);
+			if (!hasTextContent) {
+				debugLog(
+					'[stream-handlers] /compact finished but no summary generated, skipping compaction marking',
+				);
+			} else {
+				try {
+					debugLog(
+						`[stream-handlers] /compact complete, marking session compacted`,
+					);
+					const result = await markSessionCompacted(
+						db,
+						opts.sessionId,
+						opts.assistantMessageId,
+					);
+					debugLog(
+						`[stream-handlers] Compacted ${result.compacted} parts, saved ~${result.saved} tokens`,
+					);
+				} catch (err) {
+					debugLog(
+						`[stream-handlers] Compaction failed: ${err instanceof Error ? err.message : String(err)}`,
+					);
+				}
+			}
+		}
 		// Use session totals from DB for accurate cost calculation
 		const sessRows = await db
 			.select()