npm - @agi-cli/server - Versions diffs - 0.1.59 → 0.1.61 - Mend

@agi-cli/server 0.1.59 → 0.1.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +3 -3
package/src/openapi/spec.ts +641 -0
package/src/runtime/cache-optimizer.ts +29 -51
package/src/runtime/context-optimizer.ts +20 -6
package/src/runtime/db-operations.ts +43 -48
package/src/runtime/history-truncator.ts +26 -0
package/src/runtime/runner.ts +99 -248
package/src/runtime/stream-handlers.ts +175 -209

package/src/runtime/cache-optimizer.ts CHANGED Viewed

@@ -1,5 +1,23 @@
 import type { ModelMessage } from 'ai';
+type SystemMessage =
+	| string
+	| Array<{
+			type: 'text';
+			text: string;
+			cache_control?: { type: 'ephemeral' };
+	  }>;
+interface ContentPart {
+	type: string;
+	[key: string]: unknown;
+	providerOptions?: {
+		anthropic?: {
+			cacheControl?: { type: 'ephemeral' };
+		};
+	};
+}
 /**
  * Adds cache control to messages for prompt caching optimization.
  * Anthropic supports caching for system messages, tools, and long context.
@@ -9,13 +27,7 @@ export function addCacheControl(
 	system: string | undefined,
 	messages: ModelMessage[],
 ): {
-	system?:
-		| string
-		| Array<{
-				type: 'text';
-				text: string;
-				cache_control?: { type: 'ephemeral' };
-		  }>;
+	system?: SystemMessage;
 	messages: ModelMessage[];
 } {
 	// Only Anthropic supports prompt caching currently
@@ -24,7 +36,7 @@ export function addCacheControl(
 	}
 	// Convert system to cacheable format if it's long enough
-	let cachedSystem: any = system;
+	let cachedSystem: SystemMessage | undefined = system;
 	if (system && system.length > 1024) {
 		// Anthropic requires 1024+ tokens for Claude Sonnet/Opus
 		cachedSystem = [
@@ -61,55 +73,21 @@ export function addCacheControl(
 				// Add cache control to the last content part of that message
 				const lastPart = targetMsg.content[targetMsg.content.length - 1];
 				if (lastPart && typeof lastPart === 'object' && 'type' in lastPart) {
-					(lastPart as any).providerOptions = {
+					(lastPart as ContentPart).providerOptions = {
 						anthropic: { cacheControl: { type: 'ephemeral' } },
 					};
 				}
 			}
 		}
-		return { system: cachedSystem, messages: cachedMessages };
-	}
-	return { system: cachedSystem, messages };
-}
-/**
- * Truncates old messages to reduce context size while keeping recent context.
- * Strategy: Keep system message + last N messages
- */
-export function truncateHistory(
-	messages: ModelMessage[],
-	maxMessages = 20,
-): ModelMessage[] {
-	if (messages.length <= maxMessages) {
-		return messages;
-	}
-	// Keep the most recent messages
-	return messages.slice(-maxMessages);
-}
-/**
- * Estimates token count (rough approximation: ~4 chars per token)
- */
-export function estimateTokens(text: string): number {
-	return Math.ceil(text.length / 4);
-}
-/**
- * Summarizes tool results if they're too long
- */
-export function summarizeToolResult(result: unknown, maxLength = 5000): string {
-	const str = typeof result === 'string' ? result : JSON.stringify(result);
-	if (str.length <= maxLength) {
-		return str;
+		return {
+			system: cachedSystem,
+			messages: cachedMessages,
+		};
 	}
-	// Truncate and add indicator
-	return (
-		str.slice(0, maxLength) +
-		`\n\n[... truncated ${str.length - maxLength} characters]`
-	);
+	return {
+		system: cachedSystem,
+		messages,
+	};
 }

package/src/runtime/context-optimizer.ts CHANGED Viewed

@@ -10,6 +10,17 @@ interface FileRead {
 	path: string;
 }
+interface ToolPart {
+	type: string;
+	input?: {
+		path?: string;
+		filePattern?: string;
+		pattern?: string;
+	};
+	output?: unknown;
+	[key: string]: unknown;
+}
 /**
  * Deduplicates file read results, keeping only the latest version of each file.
  *
@@ -38,7 +49,8 @@ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
 			if (!['read', 'grep', 'glob'].includes(toolName)) return;
 			// Extract file path from input
-			const input = (part as any).input;
+			const toolPart = part as ToolPart;
+			const input = toolPart.input;
 			if (!input) return;
 			const path = input.path || input.filePattern || input.pattern;
@@ -49,8 +61,8 @@ export function deduplicateFileReads(messages: ModelMessage[]): ModelMessage[] {
 				fileReads.set(path, []);
 			}
 			fileReads
-				.get(path)!
-				.push({ messageIndex: msgIdx, partIndex: partIdx, path });
+				.get(path)
+				?.push({ messageIndex: msgIdx, partIndex: partIdx, path });
 		});
 	});
@@ -112,7 +124,8 @@ export function pruneToolResults(
 			if (!toolType.startsWith('tool-')) return;
 			// Check if this has output
-			const hasOutput = (part as any).output !== undefined;
+			const toolPart = part as ToolPart;
+			const hasOutput = toolPart.output !== undefined;
 			if (!hasOutput) return;
 			toolResults.push({ messageIndex: msgIdx, partIndex: partIdx });
@@ -142,11 +155,12 @@ export function pruneToolResults(
 			if (!part || typeof part !== 'object') return part;
 			if (!('type' in part)) return part;
-			const toolType = (part as any).type as string;
+			const toolPart = part as ToolPart;
+			const toolType = toolPart.type;
 			if (!toolType.startsWith('tool-')) return part;
 			const key = `${msgIdx}-${partIdx}`;
-			const hasOutput = (part as any).output !== undefined;
+			const hasOutput = toolPart.output !== undefined;
 			// If this tool result should be pruned, remove its output
 			if (hasOutput && !toKeep.has(key)) {

package/src/runtime/db-operations.ts CHANGED Viewed

@@ -11,13 +11,20 @@ type UsageData = {
 	reasoningTokens?: number;
 };
+interface ProviderMetadata {
+	openai?: {
+		cachedPromptTokens?: number;
+	};
+	[key: string]: unknown;
+}
 /**
  * Updates session token counts incrementally after each step.
  * Note: onStepFinish.usage is CUMULATIVE per message, so we compute DELTA and add to session.
  */
 export async function updateSessionTokensIncremental(
 	usage: UsageData,
-	providerMetadata: Record<string, any> | undefined,
+	providerMetadata: ProviderMetadata | undefined,
 	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
@@ -129,7 +136,7 @@ export async function updateSessionTokens(
  */
 export async function updateMessageTokensIncremental(
 	usage: UsageData,
-	providerMetadata: Record<string, any> | undefined,
+	providerMetadata: ProviderMetadata | undefined,
 	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
@@ -148,86 +155,74 @@ export async function updateMessageTokensIncremental(
 		const priorReasoning = Number(msg.reasoningTokens ?? 0);
 		// Treat usage as cumulative per-message - REPLACE not ADD
-		const cumPrompt =
+		const nextPrompt =
 			usage.inputTokens != null ? Number(usage.inputTokens) : priorPrompt;
-		const cumCompletion =
+		const nextCompletion =
 			usage.outputTokens != null ? Number(usage.outputTokens) : priorCompletion;
-		const cumReasoning =
+		const nextReasoning =
 			usage.reasoningTokens != null
 				? Number(usage.reasoningTokens)
 				: priorReasoning;
-		const cumCached =
+		const nextCached =
 			usage.cachedInputTokens != null
 				? Number(usage.cachedInputTokens)
 				: providerMetadata?.openai?.cachedPromptTokens != null
 					? Number(providerMetadata.openai.cachedPromptTokens)
 					: priorCached;
-		const cumTotal =
-			usage.totalTokens != null
-				? Number(usage.totalTokens)
-				: cumPrompt + cumCompletion + cumReasoning;
 		await db
 			.update(messages)
 			.set({
-				promptTokens: cumPrompt,
-				completionTokens: cumCompletion,
-				totalTokens: cumTotal,
-				cachedInputTokens: cumCached,
-				reasoningTokens: cumReasoning,
+				promptTokens: nextPrompt,
+				completionTokens: nextCompletion,
+				cachedInputTokens: nextCached,
+				reasoningTokens: nextReasoning,
 			})
 			.where(eq(messages.id, opts.assistantMessageId));
 	}
 }
 /**
- * Marks an assistant message as complete.
- * Token usage is tracked incrementally via updateMessageTokensIncremental().
+ * Completes the assistant message after the run finishes.
+ * Used to finalize timing but NOT tokens, which are already incremental.
  */
 export async function completeAssistantMessage(
-	fin: {
+	_fin: {
 		usage?: {
 			inputTokens?: number;
 			outputTokens?: number;
-			totalTokens?: number;
 		};
 	},
 	opts: RunOpts,
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
-	// Only mark as complete - tokens are already tracked incrementally
-	await db
-		.update(messages)
-		.set({
-			status: 'complete',
-			completedAt: Date.now(),
-		})
+	const msgRow = await db
+		.select()
+		.from(messages)
 		.where(eq(messages.id, opts.assistantMessageId));
+	if (msgRow.length > 0) {
+		await db
+			.update(messages)
+			.set({
+				finishedAt: new Date(),
+			})
+			.where(eq(messages.id, opts.assistantMessageId));
+	}
 }
-/**
- * Removes empty text parts from an assistant message.
- */
-export async function cleanupEmptyTextParts(
-	opts: RunOpts,
+export async function createMessagePart(
+	partData: {
+		messageId: number;
+		contentType: 'text' | 'tool' | 'other';
+		toolName?: string | null;
+		toolArgs?: unknown;
+		toolResult?: unknown;
+		textContent?: string | null;
+		stepIndex?: number | null;
+	},
 	db: Awaited<ReturnType<typeof getDb>>,
 ) {
-	const parts = await db
-		.select()
-		.from(messageParts)
-		.where(eq(messageParts.messageId, opts.assistantMessageId));
-	for (const p of parts) {
-		if (p.type === 'text') {
-			let t = '';
-			try {
-				t = JSON.parse(p.content || '{}')?.text || '';
-			} catch {}
-			if (!t || t.length === 0) {
-				await db.delete(messageParts).where(eq(messageParts.id, p.id));
-			}
-		}
-	}
+	await db.insert(messageParts).values(partData);
 }

package/src/runtime/history-truncator.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import type { ModelMessage } from 'ai';
+/**
+ * Truncates conversation history to keep only the most recent messages.
+ * This helps manage context window size and improves performance.
+ *
+ * Strategy:
+ * - Keep only the last N messages
+ * - Preserve message pairs (assistant + user responses) when possible
+ * - Always keep at least the system message if present
+ */
+export function truncateHistory(
+	messages: ModelMessage[],
+	maxMessages: number,
+): ModelMessage[] {
+	if (messages.length <= maxMessages) {
+		return messages;
+	}
+	// Calculate how many messages to keep
+	const keepCount = Math.min(maxMessages, messages.length);
+	const startIndex = messages.length - keepCount;
+	// Return the most recent messages
+	return messages.slice(startIndex);
+}