npm - @agi-cli/server - Versions diffs - 0.1.61 → 0.1.63 - Mend

@agi-cli/server 0.1.61 → 0.1.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +3 -3
package/src/openapi/spec.ts +47 -0
package/src/routes/git.ts +514 -426
package/src/runtime/cache-optimizer.ts +51 -29
package/src/runtime/db-operations.ts +48 -43
package/src/runtime/runner.ts +248 -99
package/src/runtime/stream-handlers.ts +209 -175

package/src/runtime/cache-optimizer.ts CHANGED Viewed

@@ -1,23 +1,5 @@
 import type { ModelMessage } from 'ai';
-type SystemMessage =
-	| string
-	| Array<{
-			type: 'text';
-			text: string;
-			cache_control?: { type: 'ephemeral' };
-	  }>;
-interface ContentPart {
-	type: string;
-	[key: string]: unknown;
-	providerOptions?: {
-		anthropic?: {
-			cacheControl?: { type: 'ephemeral' };
-		};
-	};
-}
 /**
  * Adds cache control to messages for prompt caching optimization.
  * Anthropic supports caching for system messages, tools, and long context.
@@ -27,7 +9,13 @@ export function addCacheControl(
 	system: string | undefined,
 	messages: ModelMessage[],
 ): {
-	system?: SystemMessage;
+	system?:
+		| string
+		| Array<{
+				type: 'text';
+				text: string;
+				cache_control?: { type: 'ephemeral' };
+		  }>;
 	messages: ModelMessage[];
 } {
 	// Only Anthropic supports prompt caching currently
@@ -36,7 +24,7 @@ export function addCacheControl(
 	}
 	// Convert system to cacheable format if it's long enough
-	let cachedSystem: SystemMessage | undefined = system;
+	let cachedSystem: any = system;
 	if (system && system.length > 1024) {
 		// Anthropic requires 1024+ tokens for Claude Sonnet/Opus
 		cachedSystem = [
@@ -73,21 +61,55 @@ export function addCacheControl(
 				// Add cache control to the last content part of that message
 				const lastPart = targetMsg.content[targetMsg.content.length - 1];
 				if (lastPart && typeof lastPart === 'object' && 'type' in lastPart) {
-					(lastPart as ContentPart).providerOptions = {
+					(lastPart as any).providerOptions = {
 						anthropic: { cacheControl: { type: 'ephemeral' } },
 					};
 				}
 			}
 		}
-		return {
-			system: cachedSystem,
-			messages: cachedMessages,
-		};
+		return { system: cachedSystem, messages: cachedMessages };
+	}
+	return { system: cachedSystem, messages };
+}
+/**
+ * Truncates old messages to reduce context size while keeping recent context.
+ * Strategy: Keep system message + last N messages
+ */
+export function truncateHistory(
+	messages: ModelMessage[],
+	maxMessages = 20,
+): ModelMessage[] {
+	if (messages.length <= maxMessages) {
+		return messages;
+	}
+	// Keep the most recent messages
+	return messages.slice(-maxMessages);
+}
+/**
+ * Estimates token count (rough approximation: ~4 chars per token)
+ */
+export function estimateTokens(text: string): number {
+	return Math.ceil(text.length / 4);
+}
+/**
+ * Summarizes tool results if they're too long
+ */
+export function summarizeToolResult(result: unknown, maxLength = 5000): string {
+	const str = typeof result === 'string' ? result : JSON.stringify(result);
+	if (str.length <= maxLength) {
+		return str;
 	}
-	return {
-		system: cachedSystem,
-		messages,
-	};
+	// Truncate and add indicator
+	return (
+		str.slice(0, maxLength) +
+		`\n\n[... truncated ${str.length - maxLength} characters]`
+	);
 }

package/src/runtime/db-operations.ts CHANGED Viewed

@@ -11,20 +11,13 @@ type UsageData = {
 	reasoningTokens?: number;
 };
-interface ProviderMetadata {
-	openai?: {
-		cachedPromptTokens?: number;
-	};
-	[key: string]: unknown;
-}
 /**
  * Updates session token counts incrementally after each step.
  * Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
  */
 export async function updateSessionTokensIncremental(
 	usage: UsageData,
-	providerMetadata: ProviderMetadata | undefined,
+	providerMetadata: Record<string, any> | undefined,
 	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
@@ -136,7 +129,7 @@ export async function updateSessionTokens(
  */
 export async function updateMessageTokensIncremental(
 	usage: UsageData,
-	providerMetadata: ProviderMetadata | undefined,
+	providerMetadata: Record<string, any> | undefined,
 	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
@@ -155,74 +148,86 @@ export async function updateMessageTokensIncremental(
 		const priorReasoning = Number(msg.reasoningTokens ?? 0);
 		// Treat usage as cumulative per-message - REPLACE not ADD
-		const nextPrompt =
+		const cumPrompt =
 			usage.inputTokens != null ? Number(usage.inputTokens) : priorPrompt;
-		const nextCompletion =
+		const cumCompletion =
 			usage.outputTokens != null ? Number(usage.outputTokens) : priorCompletion;
-		const nextReasoning =
+		const cumReasoning =
 			usage.reasoningTokens != null
 				? Number(usage.reasoningTokens)
 				: priorReasoning;
-		const nextCached =
+		const cumCached =
 			usage.cachedInputTokens != null
 				? Number(usage.cachedInputTokens)
 				: providerMetadata?.openai?.cachedPromptTokens != null
 					? Number(providerMetadata.openai.cachedPromptTokens)
 					: priorCached;
+		const cumTotal =
+			usage.totalTokens != null
+				? Number(usage.totalTokens)
+				: cumPrompt + cumCompletion + cumReasoning;
 		await db
 			.update(messages)
 			.set({
-				promptTokens: nextPrompt,
-				completionTokens: nextCompletion,
-				cachedInputTokens: nextCached,
-				reasoningTokens: nextReasoning,
+				promptTokens: cumPrompt,
+				completionTokens: cumCompletion,
+				totalTokens: cumTotal,
+				cachedInputTokens: cumCached,
+				reasoningTokens: cumReasoning,
 			})
 			.where(eq(messages.id, opts.assistantMessageId));
 	}
 }
 /**
- * Completes the assistant message after the run finishes.
- * Used to finalize timing but NOT tokens, which are already incremental.
+ * Marks an assistant message as complete.
+ * Token usage is tracked incrementally via updateMessageTokensIncremental().
  */
 export async function completeAssistantMessage(
-	_fin: {
+	fin: {
 		usage?: {
 			inputTokens?: number;
 			outputTokens?: number;
+			totalTokens?: number;
 		};
 	},
 	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
-	const msgRow = await db
-		.select()
-		.from(messages)
+	// Only mark as complete - tokens are already tracked incrementally
+	await db
+		.update(messages)
+		.set({
+			status: 'complete',
+			completedAt: Date.now(),
+		})
 		.where(eq(messages.id, opts.assistantMessageId));
-	if (msgRow.length > 0) {
-		await db
-			.update(messages)
-			.set({
-				finishedAt: new Date(),
-			})
-			.where(eq(messages.id, opts.assistantMessageId));
-	}
 }
-export async function createMessagePart(
-	partData: {
-		messageId: number;
-		contentType: 'text' | 'tool' | 'other';
-		toolName?: string | null;
-		toolArgs?: unknown;
-		toolResult?: unknown;
-		textContent?: string | null;
-		stepIndex?: number | null;
-	},
+/**
+ * Removes empty text parts from an assistant message.
+ */
+export async function cleanupEmptyTextParts(
+	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
-	await db.insert(messageParts).values(partData);
+	const parts = await db
+		.select()
+		.from(messageParts)
+		.where(eq(messageParts.messageId, opts.assistantMessageId));
+	for (const p of parts) {
+		if (p.type === 'text') {
+			let t = '';
+			try {
+				t = JSON.parse(p.content || '{}')?.text || '';
+			} catch {}
+			if (!t || t.length === 0) {
+				await db.delete(messageParts).where(eq(messageParts.id, p.id));
+			}
+		}
+	}
 }

package/src/runtime/runner.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { streamText } from 'ai';
+import { hasToolCall, streamText } from 'ai';
 import { loadConfig } from '@agi-cli/sdk';
 import { getDb } from '@agi-cli/database';
 import { messageParts } from '@agi-cli/database/schema';
@@ -7,8 +7,11 @@ import { resolveModel } from './provider.ts';
 import { resolveAgentConfig } from './agent-registry.ts';
 import { composeSystemPrompt } from './prompt.ts';
 import { discoverProjectTools } from '@agi-cli/sdk';
-import { publish } from '../events/bus.ts';
+import { adaptTools } from '../tools/adapter.ts';
+import { publish, subscribe } from '../events/bus.ts';
+import { debugLog, time } from './debug.ts';
 import { buildHistoryMessages } from './history-builder.ts';
+import { toErrorPayload } from './error-handling.ts';
 import { getMaxOutputTokens } from './token-utils.ts';
 import {
 	type RunOpts,
@@ -19,11 +22,15 @@ import {
 	dequeueJob,
 	cleanupSession,
 } from './session-queue.ts';
-import { setupToolContext } from './tool-context-setup.ts';
+import {
+	setupToolContext,
+	type RunnerToolContext,
+} from './tool-context-setup.ts';
 import {
 	updateSessionTokensIncremental,
 	updateMessageTokensIncremental,
 	completeAssistantMessage,
+	cleanupEmptyTextParts,
 } from './db-operations.ts';
 import {
 	createStepFinishHandler,
@@ -31,38 +38,175 @@ import {
 	createAbortHandler,
 	createFinishHandler,
 } from './stream-handlers.ts';
-import { addCacheControl } from './cache-optimizer.ts';
-import { optimizeContext } from './context-optimizer.ts';
-import { truncateHistory } from './history-truncator.ts';
 /**
- * Main runner that executes the LLM streaming loop with tools
+ * Enqueues an assistant run for processing.
+ */
+export function enqueueAssistantRun(opts: Omit<RunOpts, 'abortSignal'>) {
+	enqueueRun(opts, processQueue);
+}
+/**
+ * Aborts an active session.
+ */
+export function abortSession(sessionId: string) {
+	abortSessionQueue(sessionId);
+}
+/**
+ * Processes the queue of assistant runs for a session.
+ */
+async function processQueue(sessionId: string) {
+	const state = getRunnerState(sessionId);
+	if (!state) return;
+	if (state.running) return;
+	setRunning(sessionId, true);
+	while (state.queue.length > 0) {
+		const job = dequeueJob(sessionId);
+		if (!job) break;
+		try {
+			await runAssistant(job);
+		} catch (_err) {
+			// Swallow to keep the loop alive; event published by runner
+		}
+	}
+	setRunning(sessionId, false);
+	cleanupSession(sessionId);
+}
+/**
+ * Ensures the finish tool is called if not already observed.
+ */
+async function ensureFinishToolCalled(
+	finishObserved: boolean,
+	toolset: ReturnType<typeof adaptTools>,
+	sharedCtx: RunnerToolContext,
+	stepIndex: number,
+) {
+	if (finishObserved || !toolset?.finish?.execute) return;
+	const finishInput = {} as const;
+	const callOptions = { input: finishInput } as const;
+	sharedCtx.stepIndex = stepIndex;
+	try {
+		await toolset.finish.onInputStart?.(callOptions as never);
+	} catch {}
+	try {
+		await toolset.finish.onInputAvailable?.(callOptions as never);
+	} catch {}
+	await toolset.finish.execute(finishInput, {} as never);
+}
+/**
+ * Main function to run the assistant for a given request.
  */
-export async function runAssistant(opts: RunOpts) {
-	const db = await getDb();
-	const config = await loadConfig();
-	const [provider, modelName] = opts.model.split('/', 2);
-	const model = resolveModel(provider, modelName);
-	// Build agent + system prompt
-	const agentConfig = resolveAgentConfig(opts.agent);
-	const availableTools = await discoverProjectTools(config.project.root);
-	const system = composeSystemPrompt(agentConfig, availableTools);
-	// Build message history
-	const history = await buildHistoryMessages(opts, db);
-	// Setup tool context
-	const toolContext = await setupToolContext(opts, db);
-	const { tools, sharedCtx } = toolContext;
-	// State
-	let currentPartId = sharedCtx.assistantPartId;
-	let stepIndex = sharedCtx.stepIndex;
+async function runAssistant(opts: RunOpts) {
+	const cfgTimer = time('runner:loadConfig+db');
+	const cfg = await loadConfig(opts.projectRoot);
+	const db = await getDb(cfg.projectRoot);
+	cfgTimer.end();
+	const agentTimer = time('runner:resolveAgentConfig');
+	const agentCfg = await resolveAgentConfig(cfg.projectRoot, opts.agent);
+	agentTimer.end({ agent: opts.agent });
+	const agentPrompt = agentCfg.prompt || '';
+	const historyTimer = time('runner:buildHistory');
+	const history = await buildHistoryMessages(db, opts.sessionId);
+	historyTimer.end({ messages: history.length });
+	const isFirstMessage = history.length === 0;
+	const systemTimer = time('runner:composeSystemPrompt');
+	const { getAuth } = await import('@agi-cli/sdk');
+	const { getProviderSpoofPrompt } = await import('./prompt.ts');
+	const auth = await getAuth(opts.provider, cfg.projectRoot);
+	const needsSpoof = auth?.type === 'oauth';
+	const spoofPrompt = needsSpoof
+		? getProviderSpoofPrompt(opts.provider)
+		: undefined;
+	let system: string;
+	let additionalSystemMessages: Array<{ role: 'system'; content: string }> = [];
+	if (spoofPrompt) {
+		system = spoofPrompt;
+		const fullPrompt = await composeSystemPrompt({
+			provider: opts.provider,
+			model: opts.model,
+			projectRoot: cfg.projectRoot,
+			agentPrompt,
+			oneShot: opts.oneShot,
+			spoofPrompt: undefined,
+			includeProjectTree: isFirstMessage,
+		});
+		additionalSystemMessages = [{ role: 'system', content: fullPrompt }];
+	} else {
+		system = await composeSystemPrompt({
+			provider: opts.provider,
+			model: opts.model,
+			projectRoot: cfg.projectRoot,
+			agentPrompt,
+			oneShot: opts.oneShot,
+			spoofPrompt: undefined,
+			includeProjectTree: isFirstMessage,
+		});
+	}
+	systemTimer.end();
+	debugLog('[system] composed prompt (provider+base+agent):');
+	debugLog(system);
+	const toolsTimer = time('runner:discoverTools');
+	const allTools = await discoverProjectTools(cfg.projectRoot);
+	toolsTimer.end({ count: allTools.length });
+	const allowedNames = new Set([
+		...(agentCfg.tools || []),
+		'finish',
+		'progress_update',
+	]);
+	const gated = allTools.filter((t) => allowedNames.has(t.name));
+	const messagesWithSystemInstructions = [
+		...(isFirstMessage ? additionalSystemMessages : []),
+		...history,
+	];
+	const { sharedCtx, firstToolTimer, firstToolSeen } = await setupToolContext(
+		opts,
+		db,
+	);
+	const toolset = adaptTools(gated, sharedCtx, opts.provider);
+	const modelTimer = time('runner:resolveModel');
+	const model = await resolveModel(opts.provider, opts.model, cfg);
+	modelTimer.end();
+	const maxOutputTokens = getMaxOutputTokens(opts.provider, opts.model);
+	let currentPartId = opts.assistantPartId;
 	let accumulated = '';
-	const abortController = new AbortController();
+	let stepIndex = 0;
+	let finishObserved = false;
+	const unsubscribeFinish = subscribe(opts.sessionId, (evt) => {
+		if (evt.type !== 'tool.result') return;
+		try {
+			const name = (evt.payload as { name?: string } | undefined)?.name;
+			if (name === 'finish') finishObserved = true;
+		} catch {}
+	});
-	// State getters/setters
+	const streamStartTimer = time('runner:first-delta');
+	let firstDeltaSeen = false;
+	debugLog(`[streamText] Calling with maxOutputTokens: ${maxOutputTokens}`);
+	// State management helpers
 	const getCurrentPartId = () => currentPartId;
 	const getStepIndex = () => stepIndex;
 	const updateCurrentPartId = (id: string) => {
@@ -71,10 +215,12 @@ export async function runAssistant(opts: RunOpts) {
 	const updateAccumulated = (text: string) => {
 		accumulated = text;
 	};
-	const getAccumulated = () => accumulated;
-	const incrementStepIndex = () => ++stepIndex;
+	const incrementStepIndex = () => {
+		stepIndex += 1;
+		return stepIndex;
+	};
-	// Handlers
+	// Create stream handlers
 	const onStepFinish = createStepFinishHandler(
 		opts,
 		db,
@@ -88,102 +234,105 @@ export async function runAssistant(opts: RunOpts) {
 		updateMessageTokensIncremental,
 	);
+	const onError = createErrorHandler(opts, db, getStepIndex, sharedCtx);
+	const onAbort = createAbortHandler(opts, db, getStepIndex, sharedCtx);
 	const onFinish = createFinishHandler(
 		opts,
 		db,
+		() => ensureFinishToolCalled(finishObserved, toolset, sharedCtx, stepIndex),
 		completeAssistantMessage,
-		getAccumulated,
-		abortController,
 	);
-	const _onAbort = createAbortHandler(opts, db, abortController);
-	const onError = createErrorHandler(opts, db);
+	// Apply optimizations: deduplication, pruning, cache control, and truncation
+	const { addCacheControl, truncateHistory } = await import(
+		'./cache-optimizer.ts'
+	);
+	const { optimizeContext } = await import('./context-optimizer.ts');
-	// Context optimization
-	const contextOptimized = optimizeContext(history, {
+	// 1. Optimize context (deduplicate file reads, prune old tool results)
+	const contextOptimized = optimizeContext(messagesWithSystemInstructions, {
 		deduplicateFiles: true,
 		maxToolResults: 30,
 	});
-	// Truncate history
+	// 2. Truncate history
 	const truncatedMessages = truncateHistory(contextOptimized, 20);
-	// Add cache control
+	// 3. Add cache control
 	const { system: cachedSystem, messages: optimizedMessages } = addCacheControl(
-		opts.provider,
+		opts.provider as any,
 		system,
 		truncatedMessages,
 	);
 	try {
-		const maxTokens = getMaxOutputTokens(provider, modelName);
-		const result = await streamText({
+		// @ts-expect-error this is fine 🔥
+		const result = streamText({
 			model,
-			system: cachedSystem,
+			tools: toolset,
+			...(cachedSystem ? { system: cachedSystem } : {}),
 			messages: optimizedMessages,
-			tools,
-			maxSteps: 50,
-			maxTokens,
-			temperature: agentConfig.temperature ?? 0.7,
-			abortSignal: abortController.signal,
+			...(maxOutputTokens ? { maxOutputTokens } : {}),
+			abortSignal: opts.abortSignal,
+			stopWhen: hasToolCall('finish'),
 			onStepFinish,
+			onError,
+			onAbort,
 			onFinish,
-			experimental_continueSteps: true,
 		});
-		// Process the stream
 		for await (const delta of result.textStream) {
-			if (abortController.signal.aborted) break;
-			accumulated += delta;
-			if (currentPartId) {
-				await db
-					.update(messageParts)
-					.set({ content: accumulated })
-					.where(eq(messageParts.id, currentPartId));
+			if (!delta) continue;
+			if (!firstDeltaSeen) {
+				firstDeltaSeen = true;
+				streamStartTimer.end();
 			}
-			publish('stream:text-delta', {
+			accumulated += delta;
+			publish({
+				type: 'message.part.delta',
 				sessionId: opts.sessionId,
-				messageId: opts.assistantMessageId,
-				assistantMessageId: opts.assistantMessageId,
-				stepIndex,
-				textDelta: delta,
-				fullText: accumulated,
+				payload: {
+					messageId: opts.assistantMessageId,
+					partId: currentPartId,
+					stepIndex,
+					delta,
+				},
 			});
+			await db
+				.update(messageParts)
+				.set({ content: JSON.stringify({ text: accumulated }) })
+				.where(eq(messageParts.id, currentPartId));
 		}
-	} catch (err) {
-		await onError(err);
+	} catch (error) {
+		const errorPayload = toErrorPayload(error);
+		await db
+			.update(messageParts)
+			.set({
+				content: JSON.stringify({
+					text: accumulated,
+					error: errorPayload.message,
+				}),
+			})
+			.where(eq(messageParts.messageId, opts.assistantMessageId));
+		publish({
+			type: 'error',
+			sessionId: opts.sessionId,
+			payload: {
+				messageId: opts.assistantMessageId,
+				error: errorPayload.message,
+				details: errorPayload.details,
+			},
+		});
+		throw error;
 	} finally {
-		setRunning(opts.sessionId, false);
-		dequeueJob(opts.sessionId);
+		if (!firstToolSeen()) firstToolTimer.end({ skipped: true });
+		try {
+			unsubscribeFinish();
+		} catch {}
+		try {
+			await cleanupEmptyTextParts(opts, db);
+		} catch {}
 	}
 }
-/**
- * Enqueues an assistant run
- */
-export async function enqueueAssistantRun(opts: RunOpts) {
-	return enqueueRun(opts);
-}
-/**
- * Aborts a running session
- */
-export async function abortSession(sessionId: number) {
-	return abortSessionQueue(sessionId);
-}
-/**
- * Gets the current runner state for a session
- */
-export function getSessionState(sessionId: number) {
-	return getRunnerState(sessionId);
-}
-/**
- * Cleanup session resources
- */
-export function cleanupSessionResources(sessionId: number) {
-	return cleanupSession(sessionId);
-}