npm - @townco/agent - Versions diffs - 0.1.122 → 0.1.123 - Mend

@townco/agent 0.1.122 → 0.1.123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/dist/acp-server/adapter.d.ts CHANGED Viewed

@@ -59,6 +59,7 @@ export declare class AgentAcpAdapter implements acp.Agent {
     private agentUiConfig;
     private currentToolOverheadTokens;
     private currentMcpOverheadTokens;
+    private currentSystemPromptTokens;
     constructor(agent: AgentRunner, connection: acp.AgentSideConnection, agentDir?: string, agentName?: string);
     /**
      * Extract tool metadata from the agent definition for exposing to clients.

package/dist/acp-server/adapter.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { getModelContextWindow, HookExecutor, loadHookCallback, } from "../runne
 import { getToolGroupChildren } from "../runner/langchain/index.js";
 import { telemetry } from "../telemetry/index.js";
 import { calculateContextSize, } from "../utils/context-size-calculator.js";
-import { countToolResultTokens } from "../utils/token-counter.js";
+import { countTokens, countToolResultTokens } from "../utils/token-counter.js";
 import { SessionStorage, } from "./session-storage.js";
 const logger = createLogger("adapter");
 /**
@@ -137,6 +137,7 @@ export class AgentAcpAdapter {
     agentUiConfig;
     currentToolOverheadTokens = 0; // Track tool overhead for current turn
     currentMcpOverheadTokens = 0; // Track MCP overhead for current turn
+    currentSystemPromptTokens = 0; // Track actual system prompt tokens after all injections
     constructor(agent, connection, agentDir, agentName) {
         this.connection = connection;
         this.sessions = new Map();
@@ -1080,6 +1081,7 @@ export class AgentAcpAdapter {
         // Reset tool overhead for new turn (will be set by harness)
         this.currentToolOverheadTokens = 0;
         this.currentMcpOverheadTokens = 0;
+        this.currentSystemPromptTokens = 0;
         // Generate a unique messageId for this assistant response
         const messageId = Math.random().toString(36).substring(2);
         // Convert prompt content blocks to session storage format
@@ -1163,10 +1165,20 @@ export class AgentAcpAdapter {
                     contextMessages.push(entry.message);
                 }
             }
-            // Calculate context size - only estimated values
-            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
+            // Calculate context size - use actual system prompt tokens from harness if available
+            const context_size = await calculateContextSize(contextMessages, undefined, // Don't use base prompt - we'll add actual tokens below
+            this.currentToolOverheadTokens, // Include tool overhead
             this.currentMcpOverheadTokens, // Include MCP overhead
             getModelContextWindow(this.agent.definition.model));
+            // Add actual system prompt tokens (includes all injections)
+            // If harness hasn't sent it yet, use base prompt as fallback
+            const systemPromptTokens = this.currentSystemPromptTokens > 0
+                ? this.currentSystemPromptTokens
+                : this.agent.definition.systemPrompt
+                    ? await countTokens(this.agent.definition.systemPrompt)
+                    : 0;
+            context_size.systemPromptTokens = systemPromptTokens;
+            context_size.totalEstimated += systemPromptTokens;
             const contextSnapshot = createContextSnapshot(session.messages.length - 1, // Exclude the newly added user message (it will be passed separately via prompt)
             new Date().toISOString(), previousContext, context_size);
             session.context.push(contextSnapshot);
@@ -1211,6 +1223,7 @@ export class AgentAcpAdapter {
             flushLogBuffer(true);
         };
         // Helper to save cancelled message to session
+        const sessionForTurn = session;
         const saveCancelledMessage = async () => {
             if (this.noSession)
                 return;
@@ -1242,14 +1255,15 @@ export class AgentAcpAdapter {
                     timestamp: new Date().toISOString(),
                 };
                 // Check if we already have a partial assistant message
-                const lastMessage = session.messages[session.messages.length - 1];
+                const lastMessage = sessionForTurn.messages[sessionForTurn.messages.length - 1];
                 if (lastMessage && lastMessage.role === "assistant") {
-                    session.messages[session.messages.length - 1] = cancelledMessage;
+                    sessionForTurn.messages[sessionForTurn.messages.length - 1] =
+                        cancelledMessage;
                 }
                 else {
-                    session.messages.push(cancelledMessage);
+                    sessionForTurn.messages.push(cancelledMessage);
                 }
-                await this.saveSessionToDisk(params.sessionId, session);
+                await this.saveSessionToDisk(params.sessionId, sessionForTurn);
                 logger.info("Saved cancelled message to session", {
                     sessionId: params.sessionId,
                     contentBlocks: contentBlocks.length,
@@ -1338,6 +1352,8 @@ export class AgentAcpAdapter {
             const generator = this.agent.invoke(invokeParams);
             // Track the invocation span for parenting hook spans
             let invocationSpan = null;
+            // Track whether we've updated the initial context snapshot with overhead info
+            let initialSnapshotUpdated = false;
             // Manually iterate to capture the return value
             let iterResult = await generator.next();
             while (!iterResult.done) {
@@ -1373,6 +1389,90 @@ export class AgentAcpAdapter {
                     iterResult = await generator.next();
                     continue;
                 }
+                // Capture system prompt overhead info if provided by harness
+                if ("sessionUpdate" in msg &&
+                    msg.sessionUpdate === "system_prompt_overhead") {
+                    const overheadInfo = msg;
+                    this.currentSystemPromptTokens = overheadInfo.systemPromptTokens;
+                    logger.debug("Received system prompt overhead from harness", {
+                        systemPromptTokens: this.currentSystemPromptTokens,
+                    });
+                    // Update the initial context snapshot with actual overhead values
+                    // This happens after both tool_overhead_info and system_prompt_overhead are received
+                    if (!initialSnapshotUpdated && session.context.length > 0) {
+                        const initialSnapshot = session.context[session.context.length - 1];
+                        if (initialSnapshot?.context_size) {
+                            // Calculate overhead delta
+                            const oldSystemPromptTokens = initialSnapshot.context_size.systemPromptTokens;
+                            const oldToolOverhead = initialSnapshot.context_size.toolOverheadTokens ?? 0;
+                            const oldMcpOverhead = initialSnapshot.context_size.mcpOverheadTokens ?? 0;
+                            // Update all overhead fields
+                            initialSnapshot.context_size.systemPromptTokens =
+                                this.currentSystemPromptTokens;
+                            initialSnapshot.context_size.toolOverheadTokens =
+                                this.currentToolOverheadTokens;
+                            initialSnapshot.context_size.mcpOverheadTokens =
+                                this.currentMcpOverheadTokens;
+                            // Recalculate total
+                            const oldTotal = initialSnapshot.context_size.totalEstimated;
+                            const overheadDelta = this.currentSystemPromptTokens -
+                                oldSystemPromptTokens +
+                                (this.currentToolOverheadTokens - oldToolOverhead) +
+                                (this.currentMcpOverheadTokens - oldMcpOverhead);
+                            initialSnapshot.context_size.totalEstimated =
+                                oldTotal + overheadDelta;
+                            logger.debug("Updated initial context snapshot with overhead", {
+                                systemPromptTokens: this.currentSystemPromptTokens,
+                                toolOverheadTokens: this.currentToolOverheadTokens,
+                                mcpOverheadTokens: this.currentMcpOverheadTokens,
+                                oldTotal,
+                                newTotal: initialSnapshot.context_size.totalEstimated,
+                                overheadDelta,
+                            });
+                            // Save updated snapshot
+                            await this.saveSessionToDisk(params.sessionId, session);
+                            initialSnapshotUpdated = true;
+                        }
+                    }
+                    // Don't send this update to client, it's internal metadata
+                    iterResult = await generator.next();
+                    continue;
+                }
+                // Capture actual token usage from API and compare with estimates
+                if ("sessionUpdate" in msg &&
+                    msg.sessionUpdate === "actual_token_usage") {
+                    const actualUsage = msg;
+                    const totalActual = actualUsage.inputTokens + actualUsage.outputTokens;
+                    // Get the most recent context entry's estimated total
+                    const lastContext = session.context.length > 0
+                        ? session.context[session.context.length - 1]
+                        : null;
+                    const estimatedTotal = lastContext?.context_size?.totalEstimated ?? 0;
+                    // Calculate discrepancy
+                    const discrepancy = totalActual - estimatedTotal;
+                    const discrepancyPercent = estimatedTotal > 0
+                        ? ((discrepancy / totalActual) * 100).toFixed(1)
+                        : "N/A";
+                    logger.warn("Token usage comparison (Actual vs Estimated)", {
+                        sessionId: params.sessionId,
+                        actual: {
+                            inputTokens: actualUsage.inputTokens,
+                            outputTokens: actualUsage.outputTokens,
+                            total: totalActual,
+                        },
+                        estimated: {
+                            total: estimatedTotal,
+                            breakdown: lastContext?.context_size,
+                        },
+                        discrepancy: {
+                            tokens: discrepancy,
+                            percent: discrepancyPercent,
+                        },
+                    });
+                    // Don't send this update to client, it's internal metadata
+                    iterResult = await generator.next();
+                    continue;
+                }
                 // Extract and accumulate token usage from message chunks
                 if ("sessionUpdate" in msg &&
                     msg.sessionUpdate === "agent_message_chunk" &&
@@ -1592,6 +1692,18 @@ export class AgentAcpAdapter {
                                 toolCallBlock._meta.originalTokens =
                                     compactionMeta.originalTokens;
                                 toolCallBlock._meta.finalTokens = compactionMeta.finalTokens;
+                                // If the runner already saved the original content to artifacts, persist that path.
+                                if (typeof compactionMeta.originalContentPath === "string" &&
+                                    compactionMeta.originalContentPath.length > 0) {
+                                    toolCallBlock._meta.originalContentPath =
+                                        compactionMeta.originalContentPath;
+                                }
+                                // Persist a short preview if provided (useful when original file isn't available)
+                                if (typeof compactionMeta.originalContentPreview === "string" &&
+                                    compactionMeta.originalContentPreview.length > 0) {
+                                    toolCallBlock._meta.originalContentPreview =
+                                        compactionMeta.originalContentPreview;
+                                }
                             }
                             if (compactionMeta.originalContent &&
                                 actuallyCompacted &&
@@ -1697,7 +1809,7 @@ export class AgentAcpAdapter {
                                         },
                                     });
                                 }
-                                const outputTokens = countToolResultTokens(rawOutput);
+                                const outputTokens = await countToolResultTokens(rawOutput);
                                 // Create notification callback to stream hook events in real-time
                                 const sendHookNotification = (notification) => {
                                     this.connection.sessionUpdate({
@@ -1942,7 +2054,7 @@ export class AgentAcpAdapter {
                                 }
                             }
                             // Calculate context size - tool result is now in the message, but hasn't been sent to LLM yet
-                            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
+                            const context_size = await calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
                             this.currentMcpOverheadTokens, // Include MCP overhead
                             getModelContextWindow(this.agent.definition.model));
                             // Create snapshot with a pointer to the partial message (not a full copy!)
@@ -2161,10 +2273,20 @@ export class AgentAcpAdapter {
                     contextMessages.push(entry.message);
                 }
             }
-            // Calculate context size - only estimated values
-            const context_size = calculateContextSize(contextMessages, this.agent.definition.systemPrompt ?? undefined, this.currentToolOverheadTokens, // Include tool overhead
+            // Calculate context size - use actual system prompt tokens from harness if available
+            const context_size = await calculateContextSize(contextMessages, undefined, // Don't use base prompt - we'll add actual tokens below
+            this.currentToolOverheadTokens, // Include tool overhead
             this.currentMcpOverheadTokens, // Include MCP overhead
             getModelContextWindow(this.agent.definition.model));
+            // Add actual system prompt tokens (includes all injections)
+            // If harness hasn't sent it yet, use base prompt as fallback
+            const systemPromptTokens = this.currentSystemPromptTokens > 0
+                ? this.currentSystemPromptTokens
+                : this.agent.definition.systemPrompt
+                    ? await countTokens(this.agent.definition.systemPrompt)
+                    : 0;
+            context_size.systemPromptTokens = systemPromptTokens;
+            context_size.totalEstimated += systemPromptTokens;
             const contextSnapshot = createContextSnapshot(session.messages.length, new Date().toISOString(), previousContext, context_size);
             session.context.push(contextSnapshot);
             await this.saveSessionToDisk(params.sessionId, session);

package/dist/runner/agent-runner.d.ts CHANGED Viewed

@@ -162,6 +162,13 @@ export type ExtendedSessionUpdate = (SessionNotification["update"] & {
     sessionUpdate: "tool_overhead_info";
     toolOverheadTokens: number;
     mcpOverheadTokens: number;
+} | {
+    sessionUpdate: "system_prompt_overhead";
+    systemPromptTokens: number;
+} | {
+    sessionUpdate: "actual_token_usage";
+    inputTokens: number;
+    outputTokens: number;
 } | {
     sessionUpdate: "__invocation_span";
     invocationSpan: Span;

package/dist/runner/hooks/executor.js CHANGED Viewed

@@ -231,7 +231,7 @@ export class HookExecutor {
                     if (result.metadata?.modifiedOutput) {
                         const newOutput = result.metadata.modifiedOutput;
                         const newOutputTokens = result.metadata.finalTokens ??
-                            countToolResultTokens(newOutput);
+                            (await countToolResultTokens(newOutput));
                         currentOutput = newOutput;
                         currentToolResponse = {
                             ...currentToolResponse,

package/dist/runner/hooks/predefined/context-validator.d.ts CHANGED Viewed

@@ -43,7 +43,7 @@ export declare function validateContextFits(contentTokens: number, currentContex
  * @param bufferPercent - Safety buffer as a percentage (default 10%)
  * @returns Validation result indicating if prompt fits
  */
-export declare function validatePromptFits(prompt: string, modelName: string, bufferPercent?: number): ValidationResult;
+export declare function validatePromptFits(prompt: string, modelName: string, bufferPercent?: number): Promise<ValidationResult>;
 /**
  * Checks if an error is a context overflow error from the Anthropic API.
  *

package/dist/runner/hooks/predefined/context-validator.js CHANGED Viewed

@@ -49,8 +49,8 @@ export function validateContextFits(contentTokens, currentContextTokens, modelCo
  * @param bufferPercent - Safety buffer as a percentage (default 10%)
  * @returns Validation result indicating if prompt fits
  */
-export function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
-    const promptTokens = countTokens(prompt);
+export async function validatePromptFits(prompt, modelName, bufferPercent = DEFAULT_BUFFER_PERCENT) {
+    const promptTokens = await countTokens(prompt);
     const modelContextWindow = getModelContextWindow(modelName);
     return validateContextFits(promptTokens, 0, // No existing context for a fresh prompt
     modelContextWindow, bufferPercent);

package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts CHANGED Viewed

@@ -24,7 +24,7 @@ export declare function calculateMaxIterations(documentTokens: number, chunkSize
  * The overlap helps maintain context continuity at chunk boundaries,
  * ensuring the LLM doesn't miss information that spans boundaries.
  */
-export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): ChunkInfo[];
+export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): Promise<ChunkInfo[]>;
 /**
  * Get summary statistics about chunks
  */

package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js CHANGED Viewed

@@ -42,9 +42,9 @@ export function calculateMaxIterations(documentTokens, chunkSizeTokens) {
  * The overlap helps maintain context continuity at chunk boundaries,
  * ensuring the LLM doesn't miss information that spans boundaries.
  */
-export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
+export async function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
     const chunks = [];
-    const totalTokens = countTokens(content);
+    const totalTokens = await countTokens(content);
     // If content fits in a single chunk, return it as-is
     if (totalTokens <= chunkSizeTokens) {
         return [
@@ -87,7 +87,7 @@ export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
         }
         // Extract chunk content
         const chunkContent = content.slice(currentOffset, endOffset);
-        const chunkTokens = countTokens(chunkContent);
+        const chunkTokens = await countTokens(chunkContent);
         chunks.push({
             index: chunkIndex,
             startOffset: currentOffset,

package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js CHANGED Viewed

@@ -84,8 +84,8 @@ async function extractFromChunk(chunk, keyRequirements, totalChunks, config) {
     try {
         const prompt = buildExtractionPrompt(chunk.content, keyRequirements, chunk.index, totalChunks, chunk.relevanceScore ?? 5);
         // Pre-flight validation: ensure prompt fits in model context
-        const systemPromptTokens = countTokens(EXTRACTION_SYSTEM_PROMPT);
-        const promptTokens = countTokens(prompt);
+        const systemPromptTokens = await countTokens(EXTRACTION_SYSTEM_PROMPT);
+        const promptTokens = await countTokens(prompt);
         const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
         if (!validation.isValid) {
             logger.warn("Extraction prompt too large for model context, skipping chunk", {

package/dist/runner/hooks/predefined/document-context-extractor/index.js CHANGED Viewed

@@ -75,7 +75,7 @@ Provide a concise list (3-5 bullet points) of the most important elements to ext
  * Perform final compaction of merged extractions if still too large
  */
 async function compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens) {
-    const currentTokens = countTokens(mergedContent);
+    const currentTokens = await countTokens(mergedContent);
     // If already under target, return as structured result
     if (currentTokens <= targetTokens) {
         return {
@@ -154,7 +154,7 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
     const startTime = Date.now();
     // Convert output to string for processing
     const outputString = JSON.stringify(rawOutput, null, 2);
-    const originalTokens = countToolResultTokens(rawOutput);
+    const originalTokens = await countToolResultTokens(rawOutput);
     logger.info("Starting document context extraction", {
         toolName,
         toolCallId,
@@ -174,7 +174,7 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
     };
     // Calculate chunk size and create chunks
     const chunkSizeTokens = calculateChunkSize(config);
-    const chunks = createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
+    const chunks = await createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
     const chunkStats = getChunkStats(chunks);
     // Update max iterations based on actual chunk count
     config.maxIterations = calculateMaxIterations(originalTokens, chunkSizeTokens);
@@ -263,12 +263,12 @@ export async function extractDocumentContext(rawOutput, toolName, toolCallId, to
         }
         const { content: mergedContent, keyFacts } = mergeExtractions(extractions, scoredChunks);
         logger.info("Extractions merged", {
-            mergedContentTokens: countTokens(mergedContent),
+            mergedContentTokens: await countTokens(mergedContent),
             keyFactsCount: keyFacts.length,
         });
         // Step 5: Final compaction if needed
         const result = await compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens);
-        const finalTokens = countToolResultTokens(result);
+        const finalTokens = await countToolResultTokens(result);
         // Mark state as complete
         state = updateStatePhase(state, "complete");
         if (storage) {

package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js CHANGED Viewed

@@ -85,8 +85,8 @@ async function scoreChunk(chunk, keyRequirements, totalChunks, config) {
     try {
         const prompt = buildScoringPrompt(chunk.content, keyRequirements, chunk.index, totalChunks);
         // Pre-flight validation: ensure prompt fits in model context
-        const systemPromptTokens = countTokens(SCORING_SYSTEM_PROMPT);
-        const promptTokens = countTokens(prompt);
+        const systemPromptTokens = await countTokens(SCORING_SYSTEM_PROMPT);
+        const promptTokens = await countTokens(prompt);
         const validation = validateContextFits(promptTokens, systemPromptTokens, config.modelContextSize, 0.1);
         if (!validation.isValid) {
             logger.warn("Scoring prompt too large for model context, skipping chunk", {

package/dist/runner/hooks/predefined/tool-response-compactor.js CHANGED Viewed

@@ -185,7 +185,7 @@ export const toolResponseCompactor = async (ctx) => {
         })
             .join("\n\n");
         const compacted = await compactWithLLM(rawOutput, toolName, toolInput, conversationContext, targetSize);
-        const finalTokens = countToolResultTokens(compacted);
+        const finalTokens = await countToolResultTokens(compacted);
         // Verify compaction stayed within boundaries
         if (finalTokens > targetSize) {
             // Compaction exceeded target - log warning but accept the result
@@ -248,7 +248,7 @@ Based on the tool input and conversation context, what key information is the us
 Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
     // Pre-flight validation: ensure analysis prompt fits in compaction model's context
-    const analysisValidation = validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
+    const analysisValidation = await validatePromptFits(analysisPrompt, COMPACTION_MODEL, 0.1);
     if (!analysisValidation.isValid) {
         logger.warn("Analysis prompt too large for compaction model, using default requirements", {
             promptTokens: analysisValidation.totalTokens,
@@ -296,7 +296,7 @@ Provide a concise list (3-5 bullet points) of the most important elements to ext
     });
     // Step 2: Recursively compact until we meet the target
     let currentData = rawOutput;
-    let currentTokens = countToolResultTokens(rawOutput);
+    let currentTokens = await countToolResultTokens(rawOutput);
     const maxAttempts = 4;
     for (let attempt = 0; attempt < maxAttempts; attempt++) {
         const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
@@ -348,7 +348,7 @@ Your task: Further compact this data by:
 Return ONLY valid JSON (no explanation text).`;
         }
         // Pre-flight validation: ensure compaction prompt fits in compaction model's context
-        const compactionValidation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
+        const compactionValidation = await validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
         if (!compactionValidation.isValid) {
             logger.warn("Compaction prompt too large for LLM, cannot compact further", {
                 attempt: attempt + 1,
@@ -411,7 +411,7 @@ Return ONLY valid JSON (no explanation text).`;
         ];
         const jsonText = jsonMatch[1] || responseText;
         const compacted = JSON.parse(jsonText.trim());
-        const compactedTokens = countToolResultTokens(compacted);
+        const compactedTokens = await countToolResultTokens(compacted);
         logger.info(`LLM compaction attempt ${attempt + 1}/${maxAttempts}`, {
             currentTokens,
             compactedTokens,
@@ -422,7 +422,7 @@ Return ONLY valid JSON (no explanation text).`;
         if (compactedTokens <= targetTokens) {
             logger.info("LLM compaction succeeded", {
                 attempts: attempt + 1,
-                originalTokens: countToolResultTokens(rawOutput),
+                originalTokens: await countToolResultTokens(rawOutput),
                 finalTokens: compactedTokens,
                 targetTokens,
             });
@@ -456,7 +456,7 @@ Return ONLY valid JSON (no explanation text).`;
  */
 async function compactWithLLMInternal(rawOutput, keyRequirements, targetTokens) {
     let currentData = rawOutput;
-    let currentTokens = countToolResultTokens(rawOutput);
+    let currentTokens = await countToolResultTokens(rawOutput);
     const maxAttempts = 4;
     for (let attempt = 0; attempt < maxAttempts; attempt++) {
         const reductionNeeded = Math.round(((currentTokens - targetTokens) / currentTokens) * 100);
@@ -480,7 +480,7 @@ Your task: Create a compacted version that:
 Return ONLY valid JSON (no explanation text).`;
         // Pre-flight validation
-        const validation = validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
+        const validation = await validatePromptFits(compactionPrompt, COMPACTION_MODEL, 0.1);
         if (!validation.isValid) {
             logger.warn("Internal compaction prompt too large", {
                 attempt: attempt + 1,
@@ -516,7 +516,7 @@ Return ONLY valid JSON (no explanation text).`;
             ];
             const jsonText = jsonMatch[1] || responseText;
             const compacted = JSON.parse(jsonText.trim());
-            const compactedTokens = countToolResultTokens(compacted);
+            const compactedTokens = await countToolResultTokens(compacted);
             if (compactedTokens <= targetTokens ||
                 compactedTokens <= targetTokens * 1.05) {
                 return compacted;