npm - @dexto/core - Versions diffs - 1.5.3 → 1.5.4 - Mend

@dexto/core 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/dist/agent/DextoAgent.cjs +284 -1
package/dist/agent/DextoAgent.d.ts +114 -0
package/dist/agent/DextoAgent.d.ts.map +1 -1
package/dist/agent/DextoAgent.js +275 -1
package/dist/agent/schemas.d.ts +51 -21
package/dist/agent/schemas.d.ts.map +1 -1
package/dist/context/compaction/overflow.cjs +6 -10
package/dist/context/compaction/overflow.d.ts +14 -11
package/dist/context/compaction/overflow.d.ts.map +1 -1
package/dist/context/compaction/overflow.js +6 -10
package/dist/context/compaction/providers/reactive-overflow-provider.cjs +15 -0
package/dist/context/compaction/providers/reactive-overflow-provider.d.ts +15 -0
package/dist/context/compaction/providers/reactive-overflow-provider.d.ts.map +1 -1
package/dist/context/compaction/providers/reactive-overflow-provider.js +15 -0
package/dist/context/compaction/schemas.cjs +22 -2
package/dist/context/compaction/schemas.d.ts +45 -0
package/dist/context/compaction/schemas.d.ts.map +1 -1
package/dist/context/compaction/schemas.js +22 -2
package/dist/context/compaction/strategies/reactive-overflow.cjs +166 -26
package/dist/context/compaction/strategies/reactive-overflow.d.ts +21 -0
package/dist/context/compaction/strategies/reactive-overflow.d.ts.map +1 -1
package/dist/context/compaction/strategies/reactive-overflow.js +166 -26
package/dist/context/manager.cjs +278 -31
package/dist/context/manager.d.ts +192 -5
package/dist/context/manager.d.ts.map +1 -1
package/dist/context/manager.js +285 -32
package/dist/context/types.d.ts +6 -0
package/dist/context/types.d.ts.map +1 -1
package/dist/context/utils.cjs +77 -11
package/dist/context/utils.d.ts +86 -8
package/dist/context/utils.d.ts.map +1 -1
package/dist/context/utils.js +71 -11
package/dist/events/index.cjs +4 -0
package/dist/events/index.d.ts +41 -7
package/dist/events/index.d.ts.map +1 -1
package/dist/events/index.js +4 -0
package/dist/llm/executor/stream-processor.cjs +19 -1
package/dist/llm/executor/stream-processor.d.ts +3 -0
package/dist/llm/executor/stream-processor.d.ts.map +1 -1
package/dist/llm/executor/stream-processor.js +19 -1
package/dist/llm/executor/turn-executor.cjs +219 -30
package/dist/llm/executor/turn-executor.d.ts +62 -10
package/dist/llm/executor/turn-executor.d.ts.map +1 -1
package/dist/llm/executor/turn-executor.js +219 -30
package/dist/llm/executor/types.d.ts +28 -0
package/dist/llm/executor/types.d.ts.map +1 -1
package/dist/llm/formatters/vercel.cjs +36 -28
package/dist/llm/formatters/vercel.d.ts.map +1 -1
package/dist/llm/formatters/vercel.js +36 -28
package/dist/llm/services/factory.cjs +3 -2
package/dist/llm/services/factory.d.ts +3 -1
package/dist/llm/services/factory.d.ts.map +1 -1
package/dist/llm/services/factory.js +3 -2
package/dist/llm/services/vercel.cjs +34 -6
package/dist/llm/services/vercel.d.ts +23 -3
package/dist/llm/services/vercel.d.ts.map +1 -1
package/dist/llm/services/vercel.js +34 -6
package/dist/session/chat-session.cjs +20 -11
package/dist/session/chat-session.d.ts +9 -4
package/dist/session/chat-session.d.ts.map +1 -1
package/dist/session/chat-session.js +20 -11
package/dist/session/compaction-service.cjs +139 -0
package/dist/session/compaction-service.d.ts +81 -0
package/dist/session/compaction-service.d.ts.map +1 -0
package/dist/session/compaction-service.js +106 -0
package/dist/session/session-manager.cjs +146 -0
package/dist/session/session-manager.d.ts +50 -0
package/dist/session/session-manager.d.ts.map +1 -1
package/dist/session/session-manager.js +146 -0
package/dist/session/title-generator.cjs +2 -2
package/dist/session/title-generator.js +2 -2
package/dist/systemPrompt/in-built-prompts.cjs +36 -0
package/dist/systemPrompt/in-built-prompts.d.ts +18 -1
package/dist/systemPrompt/in-built-prompts.d.ts.map +1 -1
package/dist/systemPrompt/in-built-prompts.js +25 -0
package/dist/systemPrompt/manager.cjs +22 -0
package/dist/systemPrompt/manager.d.ts +10 -0
package/dist/systemPrompt/manager.d.ts.map +1 -1
package/dist/systemPrompt/manager.js +22 -0
package/dist/systemPrompt/registry.cjs +2 -1
package/dist/systemPrompt/registry.d.ts +1 -1
package/dist/systemPrompt/registry.d.ts.map +1 -1
package/dist/systemPrompt/registry.js +2 -1
package/dist/systemPrompt/schemas.cjs +7 -0
package/dist/systemPrompt/schemas.d.ts +13 -13
package/dist/systemPrompt/schemas.d.ts.map +1 -1
package/dist/systemPrompt/schemas.js +7 -0
package/dist/utils/index.cjs +3 -1
package/dist/utils/index.d.ts +1 -0
package/dist/utils/index.d.ts.map +1 -1
package/dist/utils/index.js +1 -0
package/package.json +1 -1

package/dist/context/compaction/strategies/reactive-overflow.js CHANGED Viewed

@@ -4,15 +4,36 @@ import { isAssistantMessage, isToolMessage } from "../../types.js";
 const DEFAULT_OPTIONS = {
   preserveLastNTurns: 2,
   maxSummaryTokens: 2e3,
-  summaryPrompt: `You are a conversation summarizer. Summarize the following conversation history concisely, focusing on:
-- What tasks were attempted and their outcomes
-- Current state and context the assistant needs to remember
-- Any important decisions or information discovered
-- What the user was trying to accomplish
+  summaryPrompt: `You are a conversation summarizer creating a structured summary for session continuation.
-Be concise but preserve essential context. Output only the summary, no preamble.
+Analyze the conversation and produce a summary in the following XML format:
-Conversation:
+<session_compaction>
+  <conversation_history>
+    A concise summary of what happened in the conversation:
+    - Tasks attempted and their outcomes (success/failure/in-progress)
+    - Important decisions made
+    - Key information discovered (file paths, configurations, errors encountered)
+    - Tools used and their results
+  </conversation_history>
+  <current_task>
+    The most recent task or instruction the user requested that may still be in progress.
+    Be specific - include the exact request and current status.
+  </current_task>
+  <important_context>
+    Critical state that must be preserved:
+    - File paths being worked on
+    - Variable values or configurations
+    - Error messages that need addressing
+    - Any pending actions or next steps
+  </important_context>
+</session_compaction>
+IMPORTANT: The assistant will continue working based on this summary. Ensure the current_task section clearly states what needs to be done next.
+Conversation to summarize:
 {conversation}`
 };
 class ReactiveOverflowStrategy {
@@ -41,15 +62,71 @@ class ReactiveOverflowStrategy {
       this.logger.debug("ReactiveOverflowStrategy: History too short, skipping compaction");
       return [];
     }
+    let existingSummaryIndex = -1;
+    for (let i = history.length - 1; i >= 0; i--) {
+      const msg = history[i];
+      if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
+        existingSummaryIndex = i;
+        break;
+      }
+    }
+    if (existingSummaryIndex !== -1) {
+      const messagesAfterSummary = history.slice(existingSummaryIndex + 1);
+      if (messagesAfterSummary.length <= 4) {
+        this.logger.debug(
+          `ReactiveOverflowStrategy: Only ${messagesAfterSummary.length} messages after existing summary, skipping re-compaction`
+        );
+        return [];
+      }
+      this.logger.info(
+        `ReactiveOverflowStrategy: Found existing summary at index ${existingSummaryIndex}, working with ${messagesAfterSummary.length} messages after it`
+      );
+      return this.compactSubset(messagesAfterSummary, history);
+    }
     const { toSummarize, toKeep } = this.splitHistory(history);
     if (toSummarize.length === 0) {
       this.logger.debug("ReactiveOverflowStrategy: No messages to summarize");
       return [];
     }
+    const currentTaskMessage = this.findCurrentTaskMessage(history);
     this.logger.info(
       `ReactiveOverflowStrategy: Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`
     );
-    const summary = await this.generateSummary(toSummarize);
+    const summary = await this.generateSummary(toSummarize, currentTaskMessage);
+    const summaryMessage = {
+      role: "assistant",
+      content: [{ type: "text", text: summary }],
+      timestamp: Date.now(),
+      metadata: {
+        isSummary: true,
+        summarizedAt: Date.now(),
+        originalMessageCount: toSummarize.length,
+        originalFirstTimestamp: toSummarize[0]?.timestamp,
+        originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
+      }
+    };
+    return [summaryMessage];
+  }
+  /**
+   * Handle re-compaction when there's already a summary in history.
+   * Only summarizes messages AFTER the existing summary, preventing
+   * cascading summaries of summaries.
+   *
+   * @param messagesAfterSummary Messages after the existing summary
+   * @param fullHistory The complete history (for current task detection)
+   * @returns Array with single summary message, or empty if nothing to summarize
+   */
+  async compactSubset(messagesAfterSummary, fullHistory) {
+    const { toSummarize, toKeep } = this.splitHistory(messagesAfterSummary);
+    if (toSummarize.length === 0) {
+      this.logger.debug("ReactiveOverflowStrategy: No messages to summarize in subset");
+      return [];
+    }
+    const currentTaskMessage = this.findCurrentTaskMessage(fullHistory);
+    this.logger.info(
+      `ReactiveOverflowStrategy (re-compact): Summarizing ${toSummarize.length} messages after existing summary, keeping ${toKeep.length}`
+    );
+    const summary = await this.generateSummary(toSummarize, currentTaskMessage);
     const summaryMessage = {
       role: "assistant",
       content: [{ type: "text", text: summary }],
@@ -57,16 +134,43 @@ class ReactiveOverflowStrategy {
       metadata: {
         isSummary: true,
         summarizedAt: Date.now(),
-        summarizedMessageCount: toSummarize.length,
+        originalMessageCount: toSummarize.length,
+        isRecompaction: true,
+        // Mark that this is a re-compaction
         originalFirstTimestamp: toSummarize[0]?.timestamp,
         originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
       }
     };
     return [summaryMessage];
   }
+  /**
+   * Find the most recent user message that represents the current task.
+   * This helps preserve context about what the user is currently asking for.
+   */
+  findCurrentTaskMessage(history) {
+    for (let i = history.length - 1; i >= 0; i--) {
+      const msg = history[i];
+      if (msg?.role === "user") {
+        if (typeof msg.content === "string") {
+          return msg.content;
+        } else if (Array.isArray(msg.content)) {
+          const textParts = msg.content.filter(
+            (part) => part.type === "text"
+          ).map((part) => part.text).join("\n");
+          if (textParts.length > 0) {
+            return textParts;
+          }
+        }
+      }
+    }
+    return null;
+  }
   /**
    * Split history into messages to summarize and messages to keep.
    * Keeps the last N turns (user + assistant pairs) intact.
+   *
+   * For long agentic conversations with many tool calls, this also ensures
+   * we don't try to keep too many messages even within preserved turns.
    */
   splitHistory(history) {
     const turnsToKeep = this.options.preserveLastNTurns;
@@ -81,20 +185,25 @@ class ReactiveOverflowStrategy {
     }
     if (userMessageIndices.length > 0) {
       const splitIndex = userMessageIndices[0];
-      if (splitIndex !== void 0) {
-        if (splitIndex === 0) {
-          return {
-            toSummarize: [],
-            toKeep: history
-          };
-        }
+      if (splitIndex !== void 0 && splitIndex > 0) {
         return {
           toSummarize: history.slice(0, splitIndex),
           toKeep: history.slice(splitIndex)
         };
       }
     }
-    const keepCount = Math.min(4, history.length);
+    const minKeep = 3;
+    const maxKeepPercent = 0.2;
+    const keepCount = Math.max(minKeep, Math.floor(history.length * maxKeepPercent));
+    if (keepCount >= history.length) {
+      return {
+        toSummarize: [],
+        toKeep: history
+      };
+    }
+    this.logger.debug(
+      `splitHistory: Using fallback - keeping last ${keepCount} of ${history.length} messages`
+    );
     return {
       toSummarize: history.slice(0, -keepCount),
       toKeep: history.slice(-keepCount)
@@ -102,21 +211,36 @@ class ReactiveOverflowStrategy {
   }
   /**
    * Generate an LLM summary of the messages.
+   *
+   * @param messages Messages to summarize
+   * @param currentTask The most recent user message (current task context)
    */
-  async generateSummary(messages) {
+  async generateSummary(messages, currentTask) {
     const formattedConversation = this.formatMessagesForSummary(messages);
-    const prompt = this.options.summaryPrompt.replace("{conversation}", formattedConversation);
+    let conversationWithContext = formattedConversation;
+    if (currentTask) {
+      conversationWithContext += `
+--- CURRENT TASK (most recent user request) ---
+${currentTask}`;
+    }
+    const prompt = this.options.summaryPrompt.replace(
+      "{conversation}",
+      conversationWithContext
+    );
     try {
       const result = await generateText({
         model: this.model,
         prompt,
         maxOutputTokens: this.options.maxSummaryTokens
       });
-      return `[Previous conversation summary]
+      return `[Session Compaction Summary]
 ${result.text}`;
     } catch (error) {
-      this.logger.error("ReactiveOverflowStrategy: Failed to generate summary", { error });
-      return this.createFallbackSummary(messages);
+      this.logger.error(
+        `ReactiveOverflowStrategy: Failed to generate summary - ${error instanceof Error ? error.message : String(error)}`
+      );
+      return this.createFallbackSummary(messages, currentTask);
     }
   }
   /**
@@ -152,7 +276,7 @@ ${result.text}`;
   /**
    * Create a fallback summary if LLM call fails.
    */
-  createFallbackSummary(messages) {
+  createFallbackSummary(messages, currentTask) {
     const userMessages = messages.filter((m) => m.role === "user");
     const assistantWithTools = messages.filter(
       (m) => isAssistantMessage(m) && !!m.toolCalls && m.toolCalls.length > 0
@@ -168,9 +292,25 @@ ${result.text}`;
         assistantWithTools.flatMap((m) => m.toolCalls.map((tc) => tc.function.name))
       )
     ].join(", ");
-    return `[Previous conversation summary - fallback]
-User discussed: ${userTopics || "various topics"}
-Tools used: ${toolsUsed || "none"}`;
+    let fallback = `[Session Compaction Summary - Fallback]
+<session_compaction>
+  <conversation_history>
+    User discussed: ${userTopics || "various topics"}
+    Tools used: ${toolsUsed || "none"}
+    Messages summarized: ${messages.length}
+  </conversation_history>`;
+    if (currentTask) {
+      fallback += `
+  <current_task>
+    ${currentTask.slice(0, 500)}${currentTask.length > 500 ? "..." : ""}
+  </current_task>`;
+    }
+    fallback += `
+  <important_context>
+    Note: This is a fallback summary due to LLM error. Context may be incomplete.
+  </important_context>
+</session_compaction>`;
+    return fallback;
   }
 }
 export {

package/dist/context/manager.cjs CHANGED Viewed

@@ -54,6 +54,23 @@ class ContextManager {
    * Maximum number of tokens allowed in the conversation (if specified)
    */
   maxInputTokens;
+  /**
+   * Last known actual input token count from the LLM API response.
+   * Updated after each LLM call. Used by /context for accurate reporting.
+   */
+  lastActualInputTokens = null;
+  /**
+   * Last known actual output token count from the LLM API response.
+   * Updated after each LLM call. Used in the context estimation formula:
+   * estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
+   */
+  lastActualOutputTokens = null;
+  /**
+   * Message count at the time of the last LLM call.
+   * Used to identify which messages are "new" since the last call.
+   * Messages after this index are estimated with length/4 heuristic.
+   */
+  lastCallMessageCount = null;
   historyProvider;
   sessionId;
   /**
@@ -152,6 +169,119 @@ class ContextManager {
   getMaxInputTokens() {
     return this.maxInputTokens;
   }
+  /**
+   * Returns the last known actual input token count from the LLM API.
+   * Returns null if no LLM call has been made yet.
+   */
+  getLastActualInputTokens() {
+    return this.lastActualInputTokens;
+  }
+  /**
+   * Updates the last known actual input token count.
+   * Called after each LLM response with the actual usage from the API.
+   */
+  setLastActualInputTokens(tokens) {
+    this.lastActualInputTokens = tokens;
+    this.logger.debug(`Updated lastActualInputTokens: ${tokens}`);
+  }
+  /**
+   * Returns the last known actual output token count from the LLM API.
+   * Returns null if no LLM call has been made yet.
+   */
+  getLastActualOutputTokens() {
+    return this.lastActualOutputTokens;
+  }
+  /**
+   * Updates the last known actual output token count.
+   * Called after each LLM response with the actual usage from the API.
+   */
+  setLastActualOutputTokens(tokens) {
+    this.lastActualOutputTokens = tokens;
+    this.logger.debug(`Updated lastActualOutputTokens: ${tokens}`);
+  }
+  /**
+   * Returns the message count at the time of the last LLM call.
+   * Returns null if no LLM call has been made yet.
+   */
+  getLastCallMessageCount() {
+    return this.lastCallMessageCount;
+  }
+  /**
+   * Records the current message count after an LLM call completes.
+   * This marks the boundary for "new messages" calculation.
+   */
+  async recordLastCallMessageCount() {
+    const history = await this.historyProvider.getHistory();
+    this.lastCallMessageCount = history.length;
+    this.logger.debug(`Recorded lastCallMessageCount: ${this.lastCallMessageCount}`);
+  }
+  /**
+   * Resets the actual token tracking state.
+   * Called after compaction since the context has fundamentally changed.
+   */
+  resetActualTokenTracking() {
+    this.lastActualInputTokens = null;
+    this.lastActualOutputTokens = null;
+    this.lastCallMessageCount = null;
+    this.logger.debug("Reset actual token tracking state (after compaction)");
+  }
+  // ============= HISTORY PREPARATION =============
+  /**
+   * Placeholder text used when tool outputs are pruned.
+   * Shared constant to ensure consistency between preparation and estimation.
+   */
+  static PRUNED_TOOL_PLACEHOLDER = "[Old tool result content cleared]";
+  /**
+   * Prepares conversation history for LLM consumption.
+   * This is the single source of truth for history transformation logic.
+   *
+   * Transformations applied:
+   * 1. filterCompacted - Remove pre-summary messages (messages before the most recent summary)
+   * 2. Transform pruned tool messages - Replace compactedAt messages with placeholder text
+   *
+   * Used by both:
+   * - getFormattedMessagesForLLM() - For actual LLM calls
+   * - getContextTokenEstimate() - For /context command estimation
+   *
+   * @returns Prepared history and statistics about the transformations
+   */
+  async prepareHistory() {
+    const fullHistory = await this.historyProvider.getHistory();
+    const originalCount = fullHistory.length;
+    let history = (0, import_utils.filterCompacted)(fullHistory);
+    const filteredCount = history.length;
+    if (filteredCount < originalCount) {
+      this.logger.debug(
+        `prepareHistory: filterCompacted reduced from ${originalCount} to ${filteredCount} messages`
+      );
+    }
+    let prunedToolCount = 0;
+    history = history.map((msg) => {
+      if (msg.role === "tool" && msg.compactedAt) {
+        prunedToolCount++;
+        return {
+          ...msg,
+          content: [
+            { type: "text", text: ContextManager.PRUNED_TOOL_PLACEHOLDER }
+          ]
+        };
+      }
+      return msg;
+    });
+    if (prunedToolCount > 0) {
+      this.logger.debug(
+        `prepareHistory: Transformed ${prunedToolCount} pruned tool messages to placeholders`
+      );
+    }
+    return {
+      preparedHistory: history,
+      stats: {
+        originalCount,
+        filteredCount,
+        prunedToolCount
+      }
+    };
+  }
   /**
    * Assembles and returns the current system prompt by invoking the SystemPromptManager.
    */
@@ -200,6 +330,7 @@ ${prompt}`);
       }
     };
     await this.addMessage(clearMarker);
+    this.resetActualTokenTracking();
     this.logger.debug(`Context cleared for session: ${this.sessionId}`);
   }
   /**
@@ -571,51 +702,166 @@ ${prompt}`);
   /**
    * Gets the conversation ready for LLM consumption with proper flow:
    * 1. Get system prompt
-   * 2. Get history and filter (exclude pre-summary messages)
-   * 3. Format messages
-   * This method implements the correct ordering to avoid circular dependencies.
+   * 2. Prepare history (filter + transform pruned messages)
+   * 3. Format messages for LLM API
    *
    * @param contributorContext The DynamicContributorContext for system prompt contributors and formatting
    * @param llmContext The llmContext for the formatter to decide which messages to include based on the model's capabilities
-   * @returns Object containing formatted messages and system prompt
+   * @returns Object containing formatted messages, system prompt, and prepared history
    */
-  async getFormattedMessagesWithCompression(contributorContext, llmContext) {
+  async getFormattedMessagesForLLM(contributorContext, llmContext) {
     const systemPrompt = await this.getSystemPrompt(contributorContext);
-    const fullHistory = await this.historyProvider.getHistory();
-    let history = (0, import_utils.filterCompacted)(fullHistory);
-    if (history.length < fullHistory.length) {
-      this.logger.debug(
-        `filterCompacted: Reduced history from ${fullHistory.length} to ${history.length} messages (summary present)`
-      );
-    }
-    const compactedCount = history.filter((m) => m.role === "tool" && m.compactedAt).length;
-    if (compactedCount > 0) {
-      history = history.map((msg) => {
-        if (msg.role === "tool" && msg.compactedAt) {
-          return {
-            ...msg,
-            content: [
-              { type: "text", text: "[Old tool result content cleared]" }
-            ]
-          };
-        }
-        return msg;
-      });
-      this.logger.debug(
-        `Transformed ${compactedCount} compacted tool messages to placeholders`
-      );
-    }
+    const { preparedHistory } = await this.prepareHistory();
     const formattedMessages = await this.getFormattedMessages(
       contributorContext,
       llmContext,
       systemPrompt,
-      history
+      preparedHistory
     );
     return {
       formattedMessages,
-      systemPrompt
+      systemPrompt,
+      preparedHistory
+    };
+  }
+  /**
+   * Estimates context token usage for the /context command and compaction decisions.
+   * Uses the same prepareHistory() logic as getFormattedMessagesForLLM() to ensure consistency.
+   *
+   * When actuals are available from previous LLM calls:
+   *   estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
+   *
+   * This formula is more accurate because:
+   * - lastInputTokens: exactly what the API processed (ground truth)
+   * - lastOutputTokens: exactly what the LLM returned (ground truth)
+   * - newMessagesEstimate: only estimate the delta (tool results, new user messages)
+   *
+   * When no LLM call has been made yet (or after compaction), falls back to pure estimation.
+   *
+   * @param contributorContext Context for building the system prompt
+   * @param tools Tool definitions to include in the estimate
+   * @returns Token estimates with breakdown and comparison to actual (if available)
+   */
+  async getContextTokenEstimate(contributorContext, tools) {
+    const systemPrompt = await this.getSystemPrompt(contributorContext);
+    const { preparedHistory, stats } = await this.prepareHistory();
+    const lastInput = this.lastActualInputTokens;
+    const lastOutput = this.lastActualOutputTokens;
+    const lastMsgCount = this.lastCallMessageCount;
+    const currentHistory = await this.historyProvider.getHistory();
+    const pureEstimate = (0, import_utils.estimateContextTokens)(systemPrompt, preparedHistory, tools);
+    let total;
+    let calculationBasis;
+    if (lastInput !== null && lastOutput !== null && lastMsgCount !== null) {
+      const newMessages = currentHistory.slice(lastMsgCount);
+      const newMessagesEstimate = (0, import_utils.estimateMessagesTokens)(newMessages);
+      total = lastInput + lastOutput + newMessagesEstimate;
+      calculationBasis = {
+        method: "actuals",
+        lastInputTokens: lastInput,
+        lastOutputTokens: lastOutput,
+        newMessagesEstimate
+      };
+      this.logger.info(
+        `Context estimate (actuals-based): lastInput=${lastInput}, lastOutput=${lastOutput}, newMsgs=${newMessagesEstimate} (${newMessages.length} messages), total=${total}`
+      );
+    } else {
+      total = pureEstimate.total;
+      calculationBasis = {
+        method: "estimate"
+      };
+      this.logger.debug(
+        `Context estimate (pure estimate): total=${total} (no actuals available yet)`
+      );
+    }
+    const systemPromptTokens = pureEstimate.breakdown.systemPrompt;
+    const toolsTokens = pureEstimate.breakdown.tools;
+    const messagesDisplay = Math.max(0, total - systemPromptTokens - toolsTokens.total);
+    if (lastInput !== null) {
+      const pureTotal = pureEstimate.total;
+      const diff = pureTotal - lastInput;
+      const diffPercent = lastInput > 0 ? (diff / lastInput * 100).toFixed(1) : "0.0";
+      this.logger.info(
+        `Context token calibration: pureEstimate=${pureTotal}, lastActual=${lastInput}, diff=${diff} (${diffPercent}%)`
+      );
+    }
+    return {
+      estimated: total,
+      actual: lastInput,
+      breakdown: {
+        systemPrompt: systemPromptTokens,
+        tools: toolsTokens,
+        messages: messagesDisplay
+      },
+      stats: {
+        originalMessageCount: stats.originalCount,
+        filteredMessageCount: stats.filteredCount,
+        prunedToolCount: stats.prunedToolCount
+      },
+      calculationBasis
     };
   }
+  /**
+   * Estimates the next input token count using actual token data from the previous LLM call.
+   * This is a lightweight version for compaction pre-checks that only returns the total.
+   *
+   * ## Formula (when actuals are available):
+   *   estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
+   *
+   * ## Why this formula works:
+   *
+   * Consider two consecutive LLM calls:
+   *
+   * ```
+   * Call N:
+   *   Input sent: system + tools + [user1]           = lastInput tokens
+   *   Output received: assistant response            = lastOutput tokens
+   *
+   * Call N+1:
+   *   Input will be: system + tools + [user1, assistant1, user2, ...]
+   *                ≈ lastInput + assistant1_as_input + new_messages
+   *                ≈ lastInput + lastOutput + newMessagesEstimate
+   * ```
+   *
+   * The assistant's response (lastOutput) becomes part of the next input as conversation
+   * history. Text tokenizes similarly whether sent as input or received as output.
+   *
+   * ## No double-counting:
+   *
+   * The assistant message is added to history DURING streaming (before this method runs),
+   * and recordLastCallMessageCount() captures the count INCLUDING that message.
+   * Therefore, newMessages = history.slice(lastMsgCount) EXCLUDES the assistant message,
+   * so lastOutput and newMessages don't overlap.
+   *
+   * ## Pruning caveat:
+   *
+   * If tool output pruning occurs between calls, lastInput may be stale (higher than
+   * actual). This causes OVERESTIMATION, which is SAFE - we'd trigger compaction
+   * earlier rather than risk context overflow.
+   *
+   * @param systemPrompt The system prompt string
+   * @param preparedHistory Message history AFTER filterCompacted and pruning
+   * @param tools Tool definitions
+   * @returns Estimated total input tokens for the next LLM call
+   */
+  async getEstimatedNextInputTokens(systemPrompt, preparedHistory, tools) {
+    const lastInput = this.lastActualInputTokens;
+    const lastOutput = this.lastActualOutputTokens;
+    const lastMsgCount = this.lastCallMessageCount;
+    const currentHistory = await this.historyProvider.getHistory();
+    if (lastInput !== null && lastOutput !== null && lastMsgCount !== null) {
+      const newMessages = currentHistory.slice(lastMsgCount);
+      const newMessagesEstimate = (0, import_utils.estimateMessagesTokens)(newMessages);
+      const total = lastInput + lastOutput + newMessagesEstimate;
+      this.logger.debug(
+        `Estimated next input (actuals-based): ${lastInput} + ${lastOutput} + ${newMessagesEstimate} = ${total}`
+      );
+      return total;
+    }
+    const pureEstimate = (0, import_utils.estimateContextTokens)(systemPrompt, preparedHistory, tools);
+    this.logger.debug(`Estimated next input (pure estimate): ${pureEstimate.total}`);
+    return pureEstimate.total;
+  }
   /**
    * Gets the system prompt formatted for the target LLM provider
    * Some providers handle system prompts differently
@@ -632,6 +878,7 @@ ${prompt}`);
    */
   async resetConversation() {
     await this.historyProvider.clearHistory();
+    this.resetActualTokenTracking();
     this.logger.debug(
       `ContextManager: Conversation history cleared for session ${this.sessionId}`
     );