npm - @vybestack/llxprt-code-core - Versions diffs - 0.1.23 → 0.2.2-nightly.250908.7b895396 - Mend

@vybestack/llxprt-code-core 0.1.23 → 0.2.2-nightly.250908.7b895396

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

package/README.md +21 -17
package/dist/src/adapters/IStreamAdapter.d.ts +3 -3
package/dist/src/auth/oauth-errors.d.ts +173 -0
package/dist/src/auth/oauth-errors.js +461 -0
package/dist/src/auth/oauth-errors.js.map +1 -0
package/dist/src/auth/precedence.d.ts +1 -5
package/dist/src/auth/precedence.js +28 -48
package/dist/src/auth/precedence.js.map +1 -1
package/dist/src/auth/token-store.js +2 -2
package/dist/src/auth/token-store.js.map +1 -1
package/dist/src/auth/types.d.ts +4 -4
package/dist/src/code_assist/codeAssist.js +19 -6
package/dist/src/code_assist/codeAssist.js.map +1 -1
package/dist/src/code_assist/oauth2.d.ts +7 -0
package/dist/src/code_assist/oauth2.js +82 -32
package/dist/src/code_assist/oauth2.js.map +1 -1
package/dist/src/code_assist/server.js +15 -4
package/dist/src/code_assist/server.js.map +1 -1
package/dist/src/code_assist/setup.js +9 -0
package/dist/src/code_assist/setup.js.map +1 -1
package/dist/src/config/index.d.ts +7 -0
package/dist/src/config/index.js +8 -0
package/dist/src/config/index.js.map +1 -0
package/dist/src/core/client.d.ts +15 -20
package/dist/src/core/client.js +98 -124
package/dist/src/core/client.js.map +1 -1
package/dist/src/core/compression-config.d.ts +10 -0
package/dist/src/core/compression-config.js +17 -0
package/dist/src/core/compression-config.js.map +1 -0
package/dist/src/core/coreToolScheduler.js +50 -15
package/dist/src/core/coreToolScheduler.js.map +1 -1
package/dist/src/core/geminiChat.d.ts +68 -9
package/dist/src/core/geminiChat.js +940 -405
package/dist/src/core/geminiChat.js.map +1 -1
package/dist/src/core/nonInteractiveToolExecutor.js +70 -19
package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
package/dist/src/core/prompts.js +35 -25
package/dist/src/core/prompts.js.map +1 -1
package/dist/src/core/turn.d.ts +1 -0
package/dist/src/core/turn.js +8 -6
package/dist/src/core/turn.js.map +1 -1
package/dist/src/ide/ide-client.d.ts +1 -1
package/dist/src/ide/ide-client.js +12 -6
package/dist/src/ide/ide-client.js.map +1 -1
package/dist/src/index.d.ts +4 -2
package/dist/src/index.js +5 -2
package/dist/src/index.js.map +1 -1
package/dist/src/prompt-config/TemplateEngine.js +17 -0
package/dist/src/prompt-config/TemplateEngine.js.map +1 -1
package/dist/src/prompt-config/defaults/core-defaults.js +39 -32
package/dist/src/prompt-config/defaults/core-defaults.js.map +1 -1
package/dist/src/prompt-config/defaults/core.md +2 -0
package/dist/src/prompt-config/defaults/provider-defaults.js +34 -27
package/dist/src/prompt-config/defaults/provider-defaults.js.map +1 -1
package/dist/src/prompt-config/defaults/providers/gemini/core.md +270 -0
package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/core.md +12 -0
package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/gemini-2-5-flash/core.md +12 -0
package/dist/src/prompt-config/types.d.ts +2 -0
package/dist/src/providers/BaseProvider.d.ts +39 -13
package/dist/src/providers/BaseProvider.js +102 -28
package/dist/src/providers/BaseProvider.js.map +1 -1
package/dist/src/providers/IProvider.d.ts +17 -3
package/dist/src/providers/LoggingProviderWrapper.d.ts +10 -3
package/dist/src/providers/LoggingProviderWrapper.js +33 -27
package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
package/dist/src/providers/ProviderContentGenerator.js +9 -6
package/dist/src/providers/ProviderContentGenerator.js.map +1 -1
package/dist/src/providers/ProviderManager.d.ts +4 -0
package/dist/src/providers/ProviderManager.js +6 -0
package/dist/src/providers/ProviderManager.js.map +1 -1
package/dist/src/providers/anthropic/AnthropicProvider.d.ts +34 -21
package/dist/src/providers/anthropic/AnthropicProvider.js +505 -492
package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
package/dist/src/providers/gemini/GeminiProvider.d.ts +23 -9
package/dist/src/providers/gemini/GeminiProvider.js +344 -515
package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
package/dist/src/providers/openai/ConversationCache.d.ts +3 -3
package/dist/src/providers/openai/IChatGenerateParams.d.ts +9 -4
package/dist/src/providers/openai/OpenAIProvider.d.ts +46 -96
package/dist/src/providers/openai/OpenAIProvider.js +580 -1392
package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
package/dist/src/providers/openai/buildResponsesRequest.js +67 -37
package/dist/src/providers/openai/buildResponsesRequest.js.map +1 -1
package/dist/src/providers/openai/estimateRemoteTokens.d.ts +2 -2
package/dist/src/providers/openai/estimateRemoteTokens.js +21 -8
package/dist/src/providers/openai/estimateRemoteTokens.js.map +1 -1
package/dist/src/providers/openai/parseResponsesStream.d.ts +6 -2
package/dist/src/providers/openai/parseResponsesStream.js +99 -391
package/dist/src/providers/openai/parseResponsesStream.js.map +1 -1
package/dist/src/providers/openai/syntheticToolResponses.d.ts +5 -5
package/dist/src/providers/openai/syntheticToolResponses.js +102 -91
package/dist/src/providers/openai/syntheticToolResponses.js.map +1 -1
package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +89 -0
package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +451 -0
package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -0
package/dist/src/providers/openai-responses/index.d.ts +1 -0
package/dist/src/providers/openai-responses/index.js +2 -0
package/dist/src/providers/openai-responses/index.js.map +1 -0
package/dist/src/providers/tokenizers/OpenAITokenizer.js +3 -3
package/dist/src/providers/tokenizers/OpenAITokenizer.js.map +1 -1
package/dist/src/providers/types.d.ts +1 -1
package/dist/src/services/ClipboardService.d.ts +19 -0
package/dist/src/services/ClipboardService.js +66 -0
package/dist/src/services/ClipboardService.js.map +1 -0
package/dist/src/services/history/ContentConverters.d.ts +43 -0
package/dist/src/services/history/ContentConverters.js +325 -0
package/dist/src/services/history/ContentConverters.js.map +1 -0
package/dist/src/{providers/IMessage.d.ts → services/history/HistoryEvents.d.ts} +16 -22
package/dist/src/{providers/IMessage.js → services/history/HistoryEvents.js} +1 -1
package/dist/src/services/history/HistoryEvents.js.map +1 -0
package/dist/src/services/history/HistoryService.d.ts +220 -0
package/dist/src/services/history/HistoryService.js +673 -0
package/dist/src/services/history/HistoryService.js.map +1 -0
package/dist/src/services/history/IContent.d.ts +183 -0
package/dist/src/services/history/IContent.js +104 -0
package/dist/src/services/history/IContent.js.map +1 -0
package/dist/src/services/index.d.ts +1 -0
package/dist/src/services/index.js +1 -0
package/dist/src/services/index.js.map +1 -1
package/dist/src/settings/SettingsService.js.map +1 -1
package/dist/src/telemetry/types.d.ts +16 -4
package/dist/src/telemetry/types.js.map +1 -1
package/dist/src/tools/IToolFormatter.d.ts +2 -2
package/dist/src/tools/ToolFormatter.d.ts +42 -4
package/dist/src/tools/ToolFormatter.js +151 -64
package/dist/src/tools/ToolFormatter.js.map +1 -1
package/dist/src/tools/doubleEscapeUtils.d.ts +57 -0
package/dist/src/tools/doubleEscapeUtils.js +241 -0
package/dist/src/tools/doubleEscapeUtils.js.map +1 -0
package/dist/src/tools/read-file.d.ts +6 -1
package/dist/src/tools/read-file.js +25 -11
package/dist/src/tools/read-file.js.map +1 -1
package/dist/src/tools/todo-schemas.d.ts +4 -4
package/dist/src/tools/tool-registry.d.ts +8 -1
package/dist/src/tools/tool-registry.js +79 -23
package/dist/src/tools/tool-registry.js.map +1 -1
package/dist/src/tools/tools.js +13 -0
package/dist/src/tools/tools.js.map +1 -1
package/dist/src/tools/write-file.d.ts +6 -1
package/dist/src/tools/write-file.js +48 -26
package/dist/src/tools/write-file.js.map +1 -1
package/dist/src/types/modelParams.d.ts +12 -0
package/dist/src/utils/bfsFileSearch.js +2 -6
package/dist/src/utils/bfsFileSearch.js.map +1 -1
package/dist/src/utils/schemaValidator.js +16 -1
package/dist/src/utils/schemaValidator.js.map +1 -1
package/package.json +8 -7
package/dist/src/providers/IMessage.js.map +0 -1
package/dist/src/providers/adapters/GeminiCompatibleWrapper.d.ts +0 -69
package/dist/src/providers/adapters/GeminiCompatibleWrapper.js +0 -577
package/dist/src/providers/adapters/GeminiCompatibleWrapper.js.map +0 -1

package/dist/src/core/geminiChat.js CHANGED Viewed

@@ -9,12 +9,17 @@ import { createUserContent, } from '@google/genai';
 import { retryWithBackoff } from '../utils/retry.js';
 import { isFunctionResponse } from '../utils/messageInspectors.js';
 import { AuthType } from './contentGenerator.js';
-import { estimateTokens } from '../utils/toolOutputLimiter.js';
+import { HistoryService } from '../services/history/HistoryService.js';
+import { ContentConverters } from '../services/history/ContentConverters.js';
+// import { estimateTokens } from '../utils/toolOutputLimiter.js'; // Unused after retry stream refactor
 import { logApiRequest, logApiResponse, logApiError, } from '../telemetry/loggers.js';
 import { ApiErrorEvent, ApiRequestEvent, ApiResponseEvent, } from '../telemetry/types.js';
 import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import { isStructuredError } from '../utils/quotaErrorDetection.js';
+import { DebugLogger } from '../debug/index.js';
+import { getCompressionPrompt } from './prompts.js';
+import { COMPRESSION_TOKEN_THRESHOLD, COMPRESSION_PRESERVE_THRESHOLD, } from './compression-config.js';
 /**
  * Custom createUserContent function that properly handles function response arrays.
  * This fixes the issue where multiple function responses are incorrectly nested.
@@ -54,9 +59,6 @@ function createUserContentWithFunctionResponseFix(message) {
                 }
                 else if (Array.isArray(item)) {
                     // Nested array case - flatten it
-                    if (process.env.DEBUG) {
-                        console.log('[DEBUG] createUserContentWithFunctionResponseFix - flattening nested array:', JSON.stringify(item, null, 2));
-                    }
                     for (const subItem of item) {
                         parts.push(subItem);
                     }
@@ -82,6 +84,10 @@ function createUserContentWithFunctionResponseFix(message) {
     }
     return result;
 }
+const INVALID_CONTENT_RETRY_OPTIONS = {
+    maxAttempts: 3, // 1 initial call + 2 retries
+    initialDelayMs: 500,
+};
 /**
  * Returns true if the response is valid, false otherwise.
  */
@@ -155,14 +161,20 @@ function extractCuratedHistory(comprehensiveHistory) {
             if (isValid) {
                 curatedHistory.push(...modelOutput);
             }
-            else {
-                // Remove the last user input when model content is invalid.
-                curatedHistory.pop();
-            }
         }
     }
     return curatedHistory;
 }
+/**
+ * Custom error to signal that a stream completed without valid content,
+ * which should trigger a retry.
+ */
+export class EmptyStreamError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = 'EmptyStreamError';
+    }
+}
 /**
  * Chat session that enables sending messages to the model with previous
  * conversation context.
@@ -172,18 +184,58 @@ function extractCuratedHistory(comprehensiveHistory) {
  */
 export class GeminiChat {
     config;
-    contentGenerator;
     generationConfig;
-    history;
     // A promise to represent the current state of the message being sent to the
     // model.
     sendPromise = Promise.resolve();
-    constructor(config, contentGenerator, generationConfig = {}, history = []) {
+    // A promise to represent any ongoing compression operation
+    compressionPromise = null;
+    historyService;
+    logger = new DebugLogger('llxprt:gemini:chat');
+    // Cache the compression threshold to avoid recalculating
+    cachedCompressionThreshold = null;
+    constructor(config, contentGenerator, generationConfig = {}, initialHistory = [], historyService) {
         this.config = config;
-        this.contentGenerator = contentGenerator;
         this.generationConfig = generationConfig;
-        this.history = history;
-        validateHistory(history);
+        validateHistory(initialHistory);
+        // Use provided HistoryService or create a new one
+        this.historyService = historyService || new HistoryService();
+        this.logger.debug('GeminiChat initialized:', {
+            model: this.config.getModel(),
+            initialHistoryLength: initialHistory.length,
+            hasHistoryService: !!historyService,
+        });
+        // Convert and add initial history if provided
+        if (initialHistory.length > 0) {
+            const currentModel = this.config.getModel();
+            this.logger.debug('Adding initial history to service:', {
+                count: initialHistory.length,
+            });
+            const idGen = this.historyService.getIdGeneratorCallback();
+            for (const content of initialHistory) {
+                const matcher = this.makePositionMatcher();
+                this.historyService.add(ContentConverters.toIContent(content, idGen, matcher), currentModel);
+            }
+        }
+    }
+    /**
+     * Create a position-based matcher for Gemini tool responses.
+     * It returns the next unmatched tool call from the current history.
+     */
+    makePositionMatcher() {
+        const queue = this.historyService
+            .findUnmatchedToolCalls()
+            .map((b) => ({ historyId: b.id, toolName: b.name }));
+        // Return undefined if there are no unmatched tool calls
+        if (queue.length === 0) {
+            return undefined;
+        }
+        // Return a function that always returns a valid value (never undefined)
+        return () => {
+            const result = queue.shift();
+            // If queue is empty, return a fallback value
+            return result || { historyId: '', toolName: undefined };
+        };
     }
     _getRequestTextFromContents(contents) {
         return JSON.stringify(contents);
@@ -239,6 +291,13 @@ export class GeminiChat {
     setSystemInstruction(sysInstr) {
         this.generationConfig.systemInstruction = sysInstr;
     }
+    /**
+     * Get the underlying HistoryService instance
+     * @returns The HistoryService managing conversation history
+     */
+    getHistoryService() {
+        return this.historyService;
+    }
     /**
      * Sends a message to the model and returns the response.
      *
@@ -261,24 +320,79 @@ export class GeminiChat {
      */
     async sendMessage(params, prompt_id) {
         await this.sendPromise;
+        // Check compression - first check if already compressing, then check if needed
+        if (this.compressionPromise) {
+            this.logger.debug('Waiting for ongoing compression to complete');
+            await this.compressionPromise;
+        }
+        else if (this.shouldCompress()) {
+            // Only check shouldCompress if not already compressing
+            this.logger.debug('Triggering compression before message send');
+            this.compressionPromise = this.performCompression(prompt_id);
+            await this.compressionPromise;
+            this.compressionPromise = null;
+        }
         const userContent = createUserContentWithFunctionResponseFix(params.message);
-        const requestContents = this.getHistory(true).concat(userContent);
-        this._logApiRequest(requestContents, this.config.getModel(), prompt_id);
+        // DO NOT add user content to history yet - use send-then-commit pattern
+        // Get the active provider
+        const provider = this.getActiveProvider();
+        if (!provider) {
+            throw new Error('No active provider configured');
+        }
+        // Check if provider supports IContent interface
+        if (!this.providerSupportsIContent(provider)) {
+            throw new Error(`Provider ${provider.name} does not support IContent interface`);
+        }
+        // Get curated history WITHOUT the new user message
+        const currentHistory = this.historyService.getCuratedForProvider();
+        // Convert user content to IContent
+        const idGen = this.historyService.getIdGeneratorCallback();
+        const matcher = this.makePositionMatcher();
+        const userIContent = ContentConverters.toIContent(userContent, idGen, matcher);
+        // Build request with history + new message
+        const iContents = [...currentHistory, userIContent];
+        this._logApiRequest(ContentConverters.toGeminiContents(iContents), this.config.getModel(), prompt_id);
         const startTime = Date.now();
         let response;
         try {
-            const apiCall = () => {
+            const apiCall = async () => {
                 const modelToUse = this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL;
                 // Prevent Flash model calls immediately after quota error
                 if (this.config.getQuotaErrorOccurred() &&
                     modelToUse === DEFAULT_GEMINI_FLASH_MODEL) {
                     throw new Error('Please submit a new query to continue with the Flash model.');
                 }
-                return this.contentGenerator.generateContent({
-                    model: modelToUse,
-                    contents: requestContents,
-                    config: { ...this.generationConfig, ...params.config },
-                }, prompt_id);
+                // Get tools in the format the provider expects
+                const tools = this.generationConfig.tools;
+                // Debug log what tools we're passing to the provider
+                this.logger.debug(() => `[GeminiChat] Passing tools to provider.generateChatCompletion:`, {
+                    hasTools: !!tools,
+                    toolsLength: tools?.length,
+                    toolsType: typeof tools,
+                    isArray: Array.isArray(tools),
+                    firstTool: tools?.[0],
+                    toolNames: Array.isArray(tools)
+                        ? tools.map((t) => {
+                            const toolObj = t;
+                            return (toolObj.functionDeclarations?.[0]?.name ||
+                                toolObj.name ||
+                                'unknown');
+                        })
+                        : 'not-an-array',
+                    providerName: provider.name,
+                });
+                // Call the provider directly with IContent
+                const streamResponse = provider.generateChatCompletion(iContents, tools);
+                // Collect all chunks from the stream
+                let lastResponse;
+                for await (const iContent of streamResponse) {
+                    lastResponse = iContent;
+                }
+                if (!lastResponse) {
+                    throw new Error('No response from provider');
+                }
+                // Convert the final IContent to GenerateContentResponse
+                return this.convertIContentToResponse(lastResponse);
             };
             response = await retryWithBackoff(apiCall, {
                 shouldRetry: (error) => {
@@ -300,18 +414,49 @@ export class GeminiChat {
             await this._logApiResponse(durationMs, prompt_id, response.usageMetadata, JSON.stringify(response));
             this.sendPromise = (async () => {
                 const outputContent = response.candidates?.[0]?.content;
-                // Because the AFC input contains the entire curated chat history in
-                // addition to the new user input, we need to truncate the AFC history
-                // to deduplicate the existing chat history.
+                // Send-then-commit: Now that we have a successful response, add both user and model messages
+                const currentModel = this.config.getModel();
+                // Handle AFC history or regular history
                 const fullAutomaticFunctionCallingHistory = response.automaticFunctionCallingHistory;
-                const index = this.getHistory(true).length;
-                let automaticFunctionCallingHistory = [];
-                if (fullAutomaticFunctionCallingHistory != null) {
-                    automaticFunctionCallingHistory =
-                        fullAutomaticFunctionCallingHistory.slice(index) ?? [];
+                if (fullAutomaticFunctionCallingHistory &&
+                    fullAutomaticFunctionCallingHistory.length > 0) {
+                    // AFC case: Add the AFC history which includes the user input
+                    const curatedHistory = this.historyService.getCurated();
+                    const index = ContentConverters.toGeminiContents(curatedHistory).length;
+                    const automaticFunctionCallingHistory = fullAutomaticFunctionCallingHistory.slice(index) ?? [];
+                    for (const content of automaticFunctionCallingHistory) {
+                        const idGen = this.historyService.getIdGeneratorCallback();
+                        const matcher = this.makePositionMatcher();
+                        this.historyService.add(ContentConverters.toIContent(content, idGen, matcher), currentModel);
+                    }
+                }
+                else {
+                    // Regular case: Add user content first
+                    const idGen = this.historyService.getIdGeneratorCallback();
+                    const matcher = this.makePositionMatcher();
+                    this.historyService.add(ContentConverters.toIContent(userContent, idGen, matcher), currentModel);
+                }
+                // Add model response if we have one (but filter out pure thinking responses)
+                if (outputContent) {
+                    // Check if this is pure thinking content that should be filtered
+                    if (!this.isThoughtContent(outputContent)) {
+                        // Not pure thinking, add it
+                        const idGen = this.historyService.getIdGeneratorCallback();
+                        this.historyService.add(ContentConverters.toIContent(outputContent, idGen), currentModel);
+                    }
+                    // If it's pure thinking content, don't add it to history
+                }
+                else if (response.candidates && response.candidates.length > 0) {
+                    // We have candidates but no content - add empty model response
+                    // This handles the case where the model returns empty content
+                    if (!fullAutomaticFunctionCallingHistory ||
+                        fullAutomaticFunctionCallingHistory.length === 0) {
+                        const emptyModelContent = { role: 'model', parts: [] };
+                        const idGen = this.historyService.getIdGeneratorCallback();
+                        this.historyService.add(ContentConverters.toIContent(emptyModelContent, idGen), currentModel);
+                    }
                 }
-                const modelOutput = outputContent ? [outputContent] : [];
-                this.recordHistory(userContent, modelOutput, automaticFunctionCallingHistory);
+                // If no candidates at all, don't add anything (error case)
             })();
             await this.sendPromise.catch(() => {
                 // Resets sendPromise to avoid subsequent calls failing
@@ -350,189 +495,170 @@ export class GeminiChat {
      * ```
      */
     async sendMessageStream(params, prompt_id) {
-        if (process.env.DEBUG) {
-            console.log('DEBUG [geminiChat]: ===== SEND MESSAGE STREAM START =====');
-            console.log('DEBUG [geminiChat]: Model from config:', this.config.getModel());
-            console.log('DEBUG [geminiChat]: Params:', JSON.stringify(params, null, 2));
-            console.log('DEBUG [geminiChat]: Message type:', typeof params.message);
-            console.log('DEBUG [geminiChat]: Message content:', JSON.stringify(params.message, null, 2));
-        }
-        if (process.env.DEBUG) {
-            console.log('DEBUG: GeminiChat.sendMessageStream called');
-            console.log('DEBUG: GeminiChat.sendMessageStream params:', JSON.stringify(params, null, 2));
-            console.log('DEBUG: GeminiChat.sendMessageStream params.message type:', typeof params.message);
-            console.log('DEBUG: GeminiChat.sendMessageStream params.message:', JSON.stringify(params.message, null, 2));
-        }
+        this.logger.debug(() => 'DEBUG [geminiChat]: ===== SEND MESSAGE STREAM START =====');
+        this.logger.debug(() => `DEBUG [geminiChat]: Model from config: ${this.config.getModel()}`);
+        this.logger.debug(() => `DEBUG [geminiChat]: Params: ${JSON.stringify(params, null, 2)}`);
+        this.logger.debug(() => `DEBUG [geminiChat]: Message type: ${typeof params.message}`);
+        this.logger.debug(() => `DEBUG [geminiChat]: Message content: ${JSON.stringify(params.message, null, 2)}`);
+        this.logger.debug(() => 'DEBUG: GeminiChat.sendMessageStream called');
+        this.logger.debug(() => `DEBUG: GeminiChat.sendMessageStream params: ${JSON.stringify(params, null, 2)}`);
+        this.logger.debug(() => `DEBUG: GeminiChat.sendMessageStream params.message type: ${typeof params.message}`);
+        this.logger.debug(() => `DEBUG: GeminiChat.sendMessageStream params.message: ${JSON.stringify(params.message, null, 2)}`);
         await this.sendPromise;
-        const userContent = createUserContentWithFunctionResponseFix(params.message);
-        // Debug: Check if this is a function response submission
-        if (Array.isArray(params.message)) {
-            let functionResponseCount = 0;
-            params.message.forEach((part) => {
-                if (part && typeof part === 'object' && 'functionResponse' in part) {
-                    functionResponseCount++;
-                }
-            });
-            if (functionResponseCount > 0) {
-                if (process.env.DEBUG) {
-                    console.log(`[DEBUG geminiChat] Sending ${functionResponseCount} function response(s) in array`);
-                }
-            }
+        // Check compression - first check if already compressing, then check if needed
+        if (this.compressionPromise) {
+            this.logger.debug('Waiting for ongoing compression to complete');
+            await this.compressionPromise;
+        }
+        else if (this.shouldCompress()) {
+            // Only check shouldCompress if not already compressing
+            this.logger.debug('Triggering compression before message send in stream');
+            this.compressionPromise = this.performCompression(prompt_id);
+            await this.compressionPromise;
+            this.compressionPromise = null;
         }
-        if (process.env.DEBUG) {
-            console.log('DEBUG [geminiChat]: Created userContent:', JSON.stringify(userContent, null, 2));
-        }
-        if (process.env.DEBUG) {
-            console.log('DEBUG: GeminiChat.sendMessageStream userContent:', JSON.stringify(userContent, null, 2));
-        }
-        const requestContents = this.getHistory(true).concat(userContent);
-        // Apply max-prompt-tokens limit if configured
-        const ephemeralSettings = this.config.getEphemeralSettings();
-        const maxPromptTokens = ephemeralSettings['max-prompt-tokens'];
-        if (maxPromptTokens) {
-            // Estimate tokens in the full request
-            const fullPromptText = JSON.stringify(requestContents);
-            const estimatedTokens = estimateTokens(fullPromptText);
-            if (estimatedTokens > maxPromptTokens) {
-                console.warn(`WARNING: Prompt size (${estimatedTokens} tokens) exceeds max-prompt-tokens limit (${maxPromptTokens}). Trimming...`);
-                // Add a warning message to the request that will be visible to the LLM
-                const warningMessage = {
+        // Check if this is a paired tool call/response array
+        let userContent;
+        // Quick check for paired tool call/response
+        const messageArray = Array.isArray(params.message) ? params.message : null;
+        const isPairedToolResponse = messageArray &&
+            messageArray.length === 2 &&
+            messageArray[0] &&
+            typeof messageArray[0] === 'object' &&
+            'functionCall' in messageArray[0] &&
+            messageArray[1] &&
+            typeof messageArray[1] === 'object' &&
+            'functionResponse' in messageArray[1];
+        if (isPairedToolResponse && messageArray) {
+            // This is a paired tool call/response from the executor
+            // Create separate Content objects with correct roles
+            userContent = [
+                {
+                    role: 'model',
+                    parts: [messageArray[0]],
+                },
+                {
                     role: 'user',
-                    parts: [
-                        {
-                            text: `WARNING: SYSTEM WARNING: The original prompt exceeded the ${maxPromptTokens} token limit (estimated ${estimatedTokens} tokens). Some conversation history and tool outputs have been truncated to fit. This may affect context continuity. Please be aware that some information from earlier in the conversation or from tool outputs may be missing.`,
-                        },
-                    ],
-                };
-                // Strategy: Keep the most recent messages and the current user message
-                // Remove older messages and truncate tool outputs in the middle
-                const trimmedContents = this.trimPromptContents(requestContents, maxPromptTokens);
-                // Add the warning as the first message so LLM knows about the truncation
-                trimmedContents.unshift(warningMessage);
-                // Log the trimming action with more detail
-                const trimmedTokens = estimateTokens(JSON.stringify(trimmedContents));
-                console.log(`INFO: TRIMMED: Trimmed prompt from ${estimatedTokens} to ~${trimmedTokens} tokens`);
-                // Count function calls in original vs trimmed
-                let originalFunctionCalls = 0;
-                let trimmedFunctionCalls = 0;
-                requestContents.forEach((c) => c.parts?.forEach((p) => {
-                    if ('functionCall' in p)
-                        originalFunctionCalls++;
-                }));
-                trimmedContents.forEach((c) => c.parts?.forEach((p) => {
-                    if ('functionCall' in p)
-                        trimmedFunctionCalls++;
-                }));
-                if (originalFunctionCalls !== trimmedFunctionCalls) {
-                    console.warn(`WARNING: Trimming removed ${originalFunctionCalls - trimmedFunctionCalls} function calls (${originalFunctionCalls} -> ${trimmedFunctionCalls})`);
-                }
-                // Use trimmed contents instead
-                requestContents.length = 0;
-                requestContents.push(...trimmedContents);
-            }
+                    parts: [messageArray[1]],
+                },
+            ];
         }
-        // Debug: Log the last few messages to see the function call/response pattern
-        if (process.env.DEBUG && requestContents.length > 2) {
-            const recentContents = requestContents.slice(-3);
-            console.log('[DEBUG geminiChat] Recent conversation turns:');
-            recentContents.forEach((content, idx) => {
-                let summary = `  ${idx}: role=${content.role}, parts=${content.parts?.length || 0}`;
-                content.parts?.forEach((part) => {
-                    if ('functionCall' in part && part.functionCall) {
-                        summary += ` [functionCall: ${part.functionCall.name}]`;
-                    }
-                    else if ('functionResponse' in part && part.functionResponse) {
-                        summary += ` [functionResponse: ${part.functionResponse.name}]`;
+        else {
+            userContent = createUserContentWithFunctionResponseFix(params.message);
+        }
+        // DO NOT add anything to history here - wait until after successful send!
+        // Tool responses will be handled in recordHistory after the model responds
+        let streamDoneResolver;
+        const streamDonePromise = new Promise((resolve) => {
+            streamDoneResolver = resolve;
+        });
+        this.sendPromise = streamDonePromise;
+        // DO NOT add user content to history yet - wait until successful send
+        // This is the send-then-commit pattern to avoid orphaned tool calls
+        return (async function* (instance) {
+            try {
+                let lastError = new Error('Request failed after all retries.');
+                for (let attempt = 0; attempt <= INVALID_CONTENT_RETRY_OPTIONS.maxAttempts; attempt++) {
+                    try {
+                        const stream = await instance.makeApiCallAndProcessStream(params, prompt_id, userContent);
+                        for await (const chunk of stream) {
+                            yield chunk;
+                        }
+                        lastError = null;
+                        break;
                     }
-                    else if ('text' in part && part.text) {
-                        summary += ` [text: ${part.text.substring(0, 50)}...]`;
+                    catch (error) {
+                        lastError = error;
+                        const isContentError = error instanceof EmptyStreamError;
+                        if (isContentError) {
+                            // Check if we have more attempts left.
+                            if (attempt < INVALID_CONTENT_RETRY_OPTIONS.maxAttempts - 1) {
+                                await new Promise((res) => setTimeout(res, INVALID_CONTENT_RETRY_OPTIONS.initialDelayMs *
+                                    (attempt + 1)));
+                                continue;
+                            }
+                        }
+                        break;
                     }
-                });
-                console.log(summary);
-            });
-        }
-        if (process.env.DEBUG) {
-            console.log('DEBUG: GeminiChat.sendMessageStream requestContents:', JSON.stringify(requestContents, null, 2));
-        }
-        this._logApiRequest(requestContents, this.config.getModel(), prompt_id);
-        const startTime = Date.now();
-        try {
-            const apiCall = () => {
-                const modelToUse = this.config.getModel();
-                const authType = this.config.getContentGeneratorConfig()?.authType;
-                // Prevent Flash model calls immediately after quota error (only for Gemini providers)
-                if (authType !== AuthType.USE_PROVIDER &&
-                    this.config.getQuotaErrorOccurred() &&
-                    modelToUse === DEFAULT_GEMINI_FLASH_MODEL) {
-                    throw new Error('Please submit a new query to continue with the Flash model.');
-                }
-                if (process.env.DEBUG) {
-                    console.log('DEBUG [geminiChat]: About to call generateContentStream with:');
-                    console.log('DEBUG [geminiChat]: - Model:', modelToUse);
-                    console.log('DEBUG [geminiChat]: - Contents:', JSON.stringify(requestContents, null, 2));
-                    console.log('DEBUG [geminiChat]: - Config:', JSON.stringify({ ...this.generationConfig, ...params.config }, null, 2));
-                    console.log('DEBUG [geminiChat]: - Tools in generationConfig:', JSON.stringify(this.generationConfig.tools, null, 2));
-                    console.log('DEBUG [geminiChat]: - Tools in params.config:', JSON.stringify(params.config?.tools, null, 2));
                 }
-                // Check if this is a model-specific issue
-                const isFlashModel = modelToUse && modelToUse.includes('flash');
-                if (process.env.DEBUG) {
-                    console.log('DEBUG [geminiChat]: - Is Flash model:', isFlashModel);
+                if (lastError) {
+                    // With send-then-commit pattern, we don't add to history until success,
+                    // so there's nothing to remove on failure
+                    throw lastError;
                 }
-                // Extract systemInstruction from generationConfig if it exists
-                const { systemInstruction, ...restGenerationConfig } = this.generationConfig;
-                // Create properly typed request parameters
-                const mergedConfig = {
-                    ...restGenerationConfig,
-                    ...params.config,
-                };
-                // Add systemInstruction to the config if it exists
-                if (systemInstruction) {
-                    mergedConfig.systemInstruction = systemInstruction;
-                }
-                const requestParams = {
-                    model: modelToUse,
-                    contents: requestContents,
-                    config: mergedConfig,
-                };
-                return this.contentGenerator.generateContentStream(requestParams, prompt_id);
-            };
-            // Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries
-            // for transient issues internally before yielding the async generator, this retry will re-initiate
-            // the stream. For simple 429/500 errors on initial call, this is fine.
-            // If errors occur mid-stream, this setup won't resume the stream; it will restart it.
-            const streamResponse = await retryWithBackoff(apiCall, {
-                shouldRetry: (error) => {
-                    // Check for known error messages and codes.
-                    if (error instanceof Error && error.message) {
-                        if (isSchemaDepthError(error.message))
-                            return false;
-                        if (error.message.includes('429'))
-                            return true;
-                        if (error.message.match(/5\d{2}/))
-                            return true;
-                    }
-                    return false; // Don't retry other errors by default
-                },
-                onPersistent429: async (authType, error) => await this.handleFlashFallback(authType, error),
-                authType: this.config.getContentGeneratorConfig()?.authType,
-            });
-            // Resolve the internal tracking of send completion promise - `sendPromise`
-            // for both success and failure response. The actual failure is still
-            // propagated by the `await streamResponse`.
-            this.sendPromise = Promise.resolve(streamResponse)
-                .then(() => undefined)
-                .catch(() => undefined);
-            const result = this.processStreamResponse(streamResponse, userContent, startTime, prompt_id);
-            return result;
+            }
+            finally {
+                streamDoneResolver();
+            }
+        })(this);
+    }
+    async makeApiCallAndProcessStream(_params, _prompt_id, userContent) {
+        // Get the active provider
+        const provider = this.getActiveProvider();
+        if (!provider) {
+            throw new Error('No active provider configured');
         }
-        catch (error) {
-            const durationMs = Date.now() - startTime;
-            this._logApiError(durationMs, error, prompt_id);
-            this.sendPromise = Promise.resolve();
-            await this.maybeIncludeSchemaDepthContext(error);
-            throw error;
+        // Check if provider supports IContent interface
+        if (!this.providerSupportsIContent(provider)) {
+            throw new Error(`Provider ${provider.name} does not support IContent interface`);
         }
+        const apiCall = async () => {
+            const modelToUse = this.config.getModel();
+            const authType = this.config.getContentGeneratorConfig()?.authType;
+            // Prevent Flash model calls immediately after quota error (only for Gemini providers)
+            if (authType !== AuthType.USE_PROVIDER &&
+                this.config.getQuotaErrorOccurred() &&
+                modelToUse === DEFAULT_GEMINI_FLASH_MODEL) {
+                throw new Error('Please submit a new query to continue with the Flash model.');
+            }
+            // Convert user content to IContent first so we can check if it's a tool response
+            const idGen = this.historyService.getIdGeneratorCallback();
+            const matcher = this.makePositionMatcher();
+            let requestContents;
+            if (Array.isArray(userContent)) {
+                // This is a paired tool call/response - convert each separately
+                const userIContents = userContent.map((content) => ContentConverters.toIContent(content, idGen, matcher));
+                // Get curated history WITHOUT the new user message (since we haven't added it yet)
+                const currentHistory = this.historyService.getCuratedForProvider();
+                // Build request with history + new messages (but don't commit to history yet)
+                requestContents = [...currentHistory, ...userIContents];
+            }
+            else {
+                const userIContent = ContentConverters.toIContent(userContent, idGen, matcher);
+                // Get curated history WITHOUT the new user message (since we haven't added it yet)
+                const currentHistory = this.historyService.getCuratedForProvider();
+                // Build request with history + new message (but don't commit to history yet)
+                requestContents = [...currentHistory, userIContent];
+            }
+            // DEBUG: Check for malformed entries
+            this.logger.debug(() => `[DEBUG] geminiChat IContent request (history + new message): ${JSON.stringify(requestContents, null, 2)}`);
+            // Get tools in the format the provider expects
+            const tools = this.generationConfig.tools;
+            // Call the provider directly with IContent
+            const streamResponse = provider.generateChatCompletion(requestContents, tools);
+            // Convert the IContent stream to GenerateContentResponse stream
+            return (async function* (instance) {
+                for await (const iContent of streamResponse) {
+                    yield instance.convertIContentToResponse(iContent);
+                }
+            })(this);
+        };
+        const streamResponse = await retryWithBackoff(apiCall, {
+            shouldRetry: (error) => {
+                if (error instanceof Error && error.message) {
+                    if (isSchemaDepthError(error.message))
+                        return false;
+                    if (error.message.includes('429'))
+                        return true;
+                    if (error.message.match(/5\d{2}/))
+                        return true;
+                }
+                return false;
+            },
+            onPersistent429: async (authType, error) => await this.handleFlashFallback(authType, error),
+            authType: this.config.getContentGeneratorConfig()?.authType,
+        });
+        return this.processStreamResponse(streamResponse, userContent);
     }
     /**
      * Returns the chat history.
@@ -558,33 +684,220 @@ export class GeminiChat {
      *     chat session.
      */
     getHistory(curated = false) {
-        const history = curated
-            ? extractCuratedHistory(this.history)
-            : this.history;
+        // Get history from HistoryService in IContent format
+        const iContents = curated
+            ? this.historyService.getCurated()
+            : this.historyService.getAll();
+        // Convert to Gemini Content format
+        const contents = ContentConverters.toGeminiContents(iContents);
         // Deep copy the history to avoid mutating the history outside of the
         // chat session.
-        return structuredClone(history);
+        return structuredClone(contents);
     }
     /**
      * Clears the chat history.
      */
     clearHistory() {
-        this.history = [];
+        this.historyService.clear();
     }
     /**
      * Adds a new entry to the chat history.
-     *
-     * @param content - The content to add to the history.
      */
     addHistory(content) {
-        this.history.push(content);
+        this.historyService.add(ContentConverters.toIContent(content), this.config.getModel());
     }
     setHistory(history) {
-        this.history = history;
+        this.historyService.clear();
+        const currentModel = this.config.getModel();
+        for (const content of history) {
+            this.historyService.add(ContentConverters.toIContent(content), currentModel);
+        }
     }
     setTools(tools) {
         this.generationConfig.tools = tools;
     }
+    /**
+     * Check if compression is needed based on token count
+     */
+    shouldCompress() {
+        // Calculate compression threshold only if not cached
+        if (this.cachedCompressionThreshold === null) {
+            const threshold = this.config.getEphemeralSetting('compression-threshold') ?? COMPRESSION_TOKEN_THRESHOLD;
+            const contextLimit = this.config.getEphemeralSetting('context-limit') ?? 60000; // Default context limit
+            this.cachedCompressionThreshold = threshold * contextLimit;
+            this.logger.debug('Calculated compression threshold:', {
+                threshold,
+                contextLimit,
+                compressionThreshold: this.cachedCompressionThreshold,
+            });
+        }
+        const currentTokens = this.historyService.getTotalTokens();
+        const shouldCompress = currentTokens >= this.cachedCompressionThreshold;
+        if (shouldCompress) {
+            this.logger.debug('Compression needed:', {
+                currentTokens,
+                threshold: this.cachedCompressionThreshold,
+            });
+        }
+        return shouldCompress;
+    }
+    /**
+     * Perform compression of chat history
+     * Made public to allow manual compression triggering
+     */
+    async performCompression(prompt_id) {
+        this.logger.debug('Starting compression');
+        // Reset cached threshold after compression in case settings changed
+        this.cachedCompressionThreshold = null;
+        // Lock history service
+        this.historyService.startCompression();
+        try {
+            // Get compression split
+            const { toCompress, toKeep } = this.getCompressionSplit();
+            if (toCompress.length === 0) {
+                this.logger.debug('Nothing to compress');
+                return;
+            }
+            // Perform direct compression API call
+            const summary = await this.directCompressionCall(toCompress, prompt_id);
+            // Apply compression atomically
+            this.applyCompression(summary, toKeep);
+            this.logger.debug('Compression completed successfully');
+        }
+        catch (error) {
+            this.logger.error('Compression failed:', error);
+            throw error;
+        }
+        finally {
+            // Always unlock
+            this.historyService.endCompression();
+        }
+    }
+    /**
+     * Get the split point for compression
+     */
+    getCompressionSplit() {
+        const curated = this.historyService.getCurated();
+        // Calculate split point (keep last 30%)
+        const preserveThreshold = this.config.getEphemeralSetting('compression-preserve-threshold') ?? COMPRESSION_PRESERVE_THRESHOLD;
+        let splitIndex = Math.floor(curated.length * (1 - preserveThreshold));
+        // Adjust for tool call boundaries
+        splitIndex = this.adjustForToolCallBoundary(curated, splitIndex);
+        // Never compress if too few messages
+        if (splitIndex < 4) {
+            return { toCompress: [], toKeep: curated };
+        }
+        return {
+            toCompress: curated.slice(0, splitIndex),
+            toKeep: curated.slice(splitIndex),
+        };
+    }
+    /**
+     * Adjust compression boundary to not split tool call/response pairs
+     */
+    adjustForToolCallBoundary(history, index) {
+        // Don't split tool responses from their calls
+        while (index < history.length && history[index].speaker === 'tool') {
+            index++;
+        }
+        // Check if previous message has unmatched tool calls
+        if (index > 0) {
+            const prev = history[index - 1];
+            if (prev.speaker === 'ai') {
+                const toolCalls = prev.blocks.filter((b) => b.type === 'tool_call');
+                if (toolCalls.length > 0) {
+                    // Check if there are matching tool responses in the kept portion
+                    const keptHistory = history.slice(index);
+                    const hasMatchingResponses = toolCalls.every((call) => {
+                        const toolCall = call;
+                        return keptHistory.some((msg) => msg.speaker === 'tool' &&
+                            msg.blocks.some((b) => b.type === 'tool_response' &&
+                                b.callId === toolCall.id));
+                    });
+                    if (!hasMatchingResponses) {
+                        // Include the AI message with unmatched calls in the compression
+                        return index - 1;
+                    }
+                }
+            }
+        }
+        return index;
+    }
+    /**
+     * Direct API call for compression, bypassing normal message flow
+     */
+    async directCompressionCall(historyToCompress, _prompt_id) {
+        const provider = this.getActiveProvider();
+        if (!provider || !this.providerSupportsIContent(provider)) {
+            throw new Error('Provider does not support compression');
+        }
+        // Build compression request with system prompt and user history
+        const compressionRequest = [
+            // Add system instruction as the first message
+            {
+                speaker: 'human',
+                blocks: [
+                    {
+                        type: 'text',
+                        text: getCompressionPrompt(),
+                    },
+                ],
+            },
+            // Add the history to compress
+            ...historyToCompress,
+            // Add the trigger instruction
+            {
+                speaker: 'human',
+                blocks: [
+                    {
+                        type: 'text',
+                        text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
+                    },
+                ],
+            },
+        ];
+        // Direct provider call without tools for compression
+        const stream = provider.generateChatCompletion(compressionRequest, undefined);
+        // Collect response
+        let summary = '';
+        for await (const chunk of stream) {
+            if (chunk.blocks) {
+                for (const block of chunk.blocks) {
+                    if (block.type === 'text') {
+                        summary += block.text;
+                    }
+                }
+            }
+        }
+        return summary;
+    }
+    /**
+     * Apply compression results to history
+     */
+    applyCompression(summary, toKeep) {
+        // Clear and rebuild history atomically
+        this.historyService.clear();
+        const currentModel = this.config.getModel();
+        // Add compressed summary as user message
+        this.historyService.add({
+            speaker: 'human',
+            blocks: [{ type: 'text', text: summary }],
+        }, currentModel);
+        // Add acknowledgment from AI
+        this.historyService.add({
+            speaker: 'ai',
+            blocks: [
+                {
+                    type: 'text',
+                    text: 'Got it. Thanks for the additional context!',
+                },
+            ],
+        }, currentModel);
+        // Add back the kept messages
+        for (const content of toKeep) {
+            this.historyService.add(content, currentModel);
+        }
+    }
     getFinalUsageMetadata(chunks) {
         const lastChunkWithMetadata = chunks
             .slice()
@@ -592,112 +905,134 @@ export class GeminiChat {
             .find((chunk) => chunk.usageMetadata);
         return lastChunkWithMetadata?.usageMetadata;
     }
-    async *processStreamResponse(streamResponse, inputContent, startTime, prompt_id) {
-        const outputContent = [];
-        const chunks = [];
-        let errorOccurred = false;
-        try {
-            for await (const chunk of streamResponse) {
-                if (isValidResponse(chunk)) {
-                    chunks.push(chunk);
-                    const content = chunk.candidates?.[0]?.content;
-                    if (content !== undefined) {
-                        if (this.isThoughtContent(content)) {
-                            yield chunk;
-                            continue;
+    async *processStreamResponse(streamResponse, userInput) {
+        const modelResponseParts = [];
+        let hasReceivedValidContent = false;
+        let hasReceivedAnyChunk = false;
+        let invalidChunkCount = 0;
+        let totalChunkCount = 0;
+        for await (const chunk of streamResponse) {
+            hasReceivedAnyChunk = true;
+            totalChunkCount++;
+            if (isValidResponse(chunk)) {
+                const content = chunk.candidates?.[0]?.content;
+                if (content) {
+                    // Check if this chunk has meaningful content (text or function calls)
+                    if (content.parts && content.parts.length > 0) {
+                        const hasMeaningfulContent = content.parts.some((part) => part.text ||
+                            'functionCall' in part ||
+                            'functionResponse' in part);
+                        if (hasMeaningfulContent) {
+                            hasReceivedValidContent = true;
                         }
-                        outputContent.push(content);
+                    }
+                    // Filter out thought parts from being added to history.
+                    if (!this.isThoughtContent(content) && content.parts) {
+                        modelResponseParts.push(...content.parts);
                     }
                 }
-                yield chunk;
             }
+            else {
+                invalidChunkCount++;
+            }
+            yield chunk; // Yield every chunk to the UI immediately.
         }
-        catch (error) {
-            errorOccurred = true;
-            const durationMs = Date.now() - startTime;
-            this._logApiError(durationMs, error, prompt_id);
-            throw error;
-        }
-        if (!errorOccurred) {
-            const durationMs = Date.now() - startTime;
-            const allParts = [];
-            for (const content of outputContent) {
-                if (content.parts) {
-                    allParts.push(...content.parts);
-                }
+        // Now that the stream is finished, make a decision.
+        // Only throw an error if:
+        // 1. We received no chunks at all, OR
+        // 2. We received chunks but NONE had valid content (all were invalid or empty)
+        // This allows models like Qwen to send empty chunks at the end of a stream
+        // as long as they sent valid content earlier.
+        if (!hasReceivedAnyChunk ||
+            (!hasReceivedValidContent && totalChunkCount > 0)) {
+            // Only throw if this looks like a genuinely empty/invalid stream
+            // Not just a stream that ended with some invalid chunks
+            if (invalidChunkCount === totalChunkCount ||
+                modelResponseParts.length === 0) {
+                throw new EmptyStreamError('Model stream was invalid or completed without valid content.');
             }
-            await this._logApiResponse(durationMs, prompt_id, this.getFinalUsageMetadata(chunks), JSON.stringify(chunks));
         }
-        this.recordHistory(inputContent, outputContent);
+        // Use recordHistory to correctly save the conversation turn.
+        const modelOutput = [
+            { role: 'model', parts: modelResponseParts },
+        ];
+        this.recordHistory(userInput, modelOutput);
     }
     recordHistory(userInput, modelOutput, automaticFunctionCallingHistory) {
-        const nonThoughtModelOutput = modelOutput.filter((content) => !this.isThoughtContent(content));
-        let outputContents = [];
-        if (nonThoughtModelOutput.length > 0 &&
-            nonThoughtModelOutput.every((content) => content.role !== undefined)) {
-            outputContents = nonThoughtModelOutput;
-        }
-        else if (nonThoughtModelOutput.length === 0 && modelOutput.length > 0) {
-            // This case handles when the model returns only a thought.
-            // We don't want to add an empty model response in this case.
-        }
-        else {
-            // When not a function response appends an empty content when model returns empty response, so that the
-            // history is always alternating between user and model.
-            // Workaround for: https://b.corp.google.com/issues/420354090
-            if (!isFunctionResponse(userInput)) {
-                outputContents.push({
-                    role: 'model',
-                    parts: [],
-                });
-            }
-        }
+        const newHistoryEntries = [];
+        // Part 1: Handle the user's part of the turn.
         if (automaticFunctionCallingHistory &&
             automaticFunctionCallingHistory.length > 0) {
-            this.history.push(...extractCuratedHistory(automaticFunctionCallingHistory));
+            const curatedAfc = extractCuratedHistory(automaticFunctionCallingHistory);
+            for (const content of curatedAfc) {
+                newHistoryEntries.push(ContentConverters.toIContent(content));
+            }
         }
         else {
-            this.history.push(userInput);
-        }
-        // Consolidate adjacent model roles in outputContents
-        const consolidatedOutputContents = [];
-        for (const content of outputContents) {
-            if (this.isThoughtContent(content)) {
-                continue;
-            }
-            const lastContent = consolidatedOutputContents[consolidatedOutputContents.length - 1];
-            if (this.isTextContent(lastContent) && this.isTextContent(content)) {
-                // If both current and last are text, combine their text into the lastContent's first part
-                // and append any other parts from the current content.
-                lastContent.parts[0].text += content.parts[0].text || '';
-                if (content.parts.length > 1) {
-                    lastContent.parts.push(...content.parts.slice(1));
+            // Handle both single Content and Content[] (for paired tool call/response)
+            const idGen = this.historyService.getIdGeneratorCallback();
+            const matcher = this.makePositionMatcher();
+            if (Array.isArray(userInput)) {
+                // This is a paired tool call/response from the executor
+                // Add each part to history
+                for (const content of userInput) {
+                    const userIContent = ContentConverters.toIContent(content, idGen, matcher);
+                    newHistoryEntries.push(userIContent);
                 }
             }
             else {
-                consolidatedOutputContents.push(content);
+                // Normal user message
+                const userIContent = ContentConverters.toIContent(userInput, idGen, matcher);
+                newHistoryEntries.push(userIContent);
             }
         }
-        if (consolidatedOutputContents.length > 0) {
-            const lastHistoryEntry = this.history[this.history.length - 1];
-            const canMergeWithLastHistory = !automaticFunctionCallingHistory ||
-                automaticFunctionCallingHistory.length === 0;
-            if (canMergeWithLastHistory &&
-                this.isTextContent(lastHistoryEntry) &&
-                this.isTextContent(consolidatedOutputContents[0])) {
-                // If both current and last are text, combine their text into the lastHistoryEntry's first part
-                // and append any other parts from the current content.
-                lastHistoryEntry.parts[0].text +=
-                    consolidatedOutputContents[0].parts[0].text || '';
-                if (consolidatedOutputContents[0].parts.length > 1) {
-                    lastHistoryEntry.parts.push(...consolidatedOutputContents[0].parts.slice(1));
+        // Part 2: Handle the model's part of the turn, filtering out thoughts.
+        const nonThoughtModelOutput = modelOutput.filter((content) => !this.isThoughtContent(content));
+        let outputContents = [];
+        if (nonThoughtModelOutput.length > 0) {
+            outputContents = nonThoughtModelOutput;
+        }
+        else if (modelOutput.length === 0 &&
+            !Array.isArray(userInput) &&
+            !isFunctionResponse(userInput) &&
+            !automaticFunctionCallingHistory) {
+            // Add an empty model response if the model truly returned nothing.
+            outputContents.push({ role: 'model', parts: [] });
+        }
+        // Part 3: Consolidate the parts of this turn's model response.
+        const consolidatedOutputContents = [];
+        if (outputContents.length > 0) {
+            for (const content of outputContents) {
+                const lastContent = consolidatedOutputContents[consolidatedOutputContents.length - 1];
+                if (this.hasTextContent(lastContent) && this.hasTextContent(content)) {
+                    lastContent.parts[0].text += content.parts[0].text || '';
+                    if (content.parts.length > 1) {
+                        lastContent.parts.push(...content.parts.slice(1));
+                    }
+                }
+                else {
+                    consolidatedOutputContents.push(content);
                 }
-                consolidatedOutputContents.shift(); // Remove the first element as it's merged
             }
-            this.history.push(...consolidatedOutputContents);
+        }
+        // Part 4: Add the new turn (user and model parts) to the history service.
+        const currentModel = this.config.getModel();
+        for (const entry of newHistoryEntries) {
+            this.historyService.add(entry, currentModel);
+        }
+        for (const content of consolidatedOutputContents) {
+            // Check if this contains tool calls
+            const hasToolCalls = content.parts?.some((part) => part && typeof part === 'object' && 'functionCall' in part);
+            if (!hasToolCalls) {
+                // Only add non-tool-call responses to history immediately
+                // Tool calls will be added when the executor returns with the response
+                this.historyService.add(ContentConverters.toIContent(content), currentModel);
+            }
+            // Tool calls are NOT added here - they'll come back from the executor
+            // along with their responses and be added together
         }
     }
-    isTextContent(content) {
+    hasTextContent(content) {
         return !!(content &&
             content.role === 'model' &&
             content.parts &&
@@ -717,120 +1052,137 @@ export class GeminiChat {
      * Trim prompt contents to fit within token limit
      * Strategy: Keep the most recent user message, trim older history and tool outputs
      */
-    trimPromptContents(contents, maxTokens) {
-        if (contents.length === 0)
-            return contents;
-        // Always keep the last message (current user input)
-        const lastMessage = contents[contents.length - 1];
-        const result = [];
-        // Reserve tokens for the last message and warning
-        const lastMessageTokens = estimateTokens(JSON.stringify(lastMessage));
-        const warningTokens = 200; // Reserve for warning message
-        let remainingTokens = maxTokens - lastMessageTokens - warningTokens;
-        if (remainingTokens <= 0) {
-            // Even the last message is too big, truncate it
-            return [this.truncateContent(lastMessage, maxTokens - warningTokens)];
-        }
-        // Add messages from most recent to oldest, stopping when we hit the limit
-        for (let i = contents.length - 2; i >= 0; i--) {
-            const content = contents[i];
-            const contentTokens = estimateTokens(JSON.stringify(content));
-            if (contentTokens <= remainingTokens) {
-                result.unshift(content);
-                remainingTokens -= contentTokens;
-            }
-            else if (remainingTokens > 100) {
-                // Try to truncate this content to fit
-                const truncated = this.truncateContent(content, remainingTokens);
-                // Only add if we actually got some content back
-                if (truncated.parts && truncated.parts.length > 0) {
-                    result.unshift(truncated);
-                }
-                break;
-            }
-            else {
-                // No room left, stop
-                break;
-            }
-        }
-        // Add the last message
-        result.push(lastMessage);
-        return result;
-    }
+    //   private _trimPromptContents(
+    //     contents: Content[],
+    //     maxTokens: number,
+    //   ): Content[] {
+    //     if (contents.length === 0) return contents;
+    //
+    //     // Always keep the last message (current user input)
+    //     const lastMessage = contents[contents.length - 1];
+    //     const result: Content[] = [];
+    //
+    //     // Reserve tokens for the last message and warning
+    //     const lastMessageTokens = estimateTokens(JSON.stringify(lastMessage));
+    //     const warningTokens = 200; // Reserve for warning message
+    //     let remainingTokens = maxTokens - lastMessageTokens - warningTokens;
+    //
+    //     if (remainingTokens <= 0) {
+    //       // Even the last message is too big, truncate it
+    //       return [this._truncateContent(lastMessage, maxTokens - warningTokens)];
+    //     }
+    //
+    //     // Add messages from most recent to oldest, stopping when we hit the limit
+    //     for (let i = contents.length - 2; i >= 0; i--) {
+    //       const content = contents[i];
+    //       const contentTokens = estimateTokens(JSON.stringify(content));
+    //
+    //       if (contentTokens <= remainingTokens) {
+    //         result.unshift(content);
+    //         remainingTokens -= contentTokens;
+    //       } else if (remainingTokens > 100) {
+    //         // Try to truncate this content to fit
+    //         const truncated = this._truncateContent(content, remainingTokens);
+    //         // Only add if we actually got some content back
+    //         if (truncated.parts && truncated.parts.length > 0) {
+    //           result.unshift(truncated);
+    //         }
+    //         break;
+    //       } else {
+    //         // No room left, stop
+    //         break;
+    //       }
+    //     }
+    //
+    //     // Add the last message
+    //     result.push(lastMessage);
+    //
+    //     return result;
+    //   }
+    //
     /**
      * Truncate a single content to fit within token limit
      */
-    truncateContent(content, maxTokens) {
-        if (!content.parts || content.parts.length === 0) {
-            return content;
-        }
-        const truncatedParts = [];
-        let currentTokens = 0;
-        for (const part of content.parts) {
-            if ('text' in part && part.text) {
-                const partTokens = estimateTokens(part.text);
-                if (currentTokens + partTokens <= maxTokens) {
-                    truncatedParts.push(part);
-                    currentTokens += partTokens;
-                }
-                else {
-                    // Truncate this part
-                    const remainingTokens = maxTokens - currentTokens;
-                    if (remainingTokens > 10) {
-                        const remainingChars = remainingTokens * 4;
-                        truncatedParts.push({
-                            text: part.text.substring(0, remainingChars) +
-                                '\n[...content truncated due to token limit...]',
-                        });
-                    }
-                    break;
-                }
-            }
-            else {
-                // Non-text parts (function calls, responses, etc) - NEVER truncate these
-                // Either include them fully or skip them entirely to avoid breaking JSON
-                const partTokens = estimateTokens(JSON.stringify(part));
-                if (currentTokens + partTokens <= maxTokens) {
-                    truncatedParts.push(part);
-                    currentTokens += partTokens;
-                }
-                else {
-                    // Skip this part entirely - DO NOT truncate function calls/responses
-                    // Log what we're skipping for debugging
-                    if (process.env.DEBUG || process.env.VERBOSE) {
-                        let skipInfo = 'unknown part';
-                        if ('functionCall' in part) {
-                            const funcPart = part;
-                            skipInfo = `functionCall: ${funcPart.functionCall?.name || 'unnamed'}`;
-                        }
-                        else if ('functionResponse' in part) {
-                            const respPart = part;
-                            skipInfo = `functionResponse: ${respPart.functionResponse?.name || 'unnamed'}`;
-                        }
-                        console.warn(`INFO: Skipping ${skipInfo} due to token limit (needs ${partTokens} tokens, only ${maxTokens - currentTokens} available)`);
-                    }
-                    // Add a marker that content was omitted
-                    if (truncatedParts.length > 0 &&
-                        !truncatedParts.some((p) => 'text' in p &&
-                            p.text?.includes('[...function calls omitted due to token limit...]'))) {
-                        truncatedParts.push({
-                            text: '[...function calls omitted due to token limit...]',
-                        });
-                    }
-                    break;
-                }
-            }
-        }
-        return {
-            role: content.role,
-            parts: truncatedParts,
-        };
-    }
+    //   private _truncateContent(content: Content, maxTokens: number): Content {
+    //     if (!content.parts || content.parts.length === 0) {
+    //       return content;
+    //     }
+    //
+    //     const truncatedParts: Part[] = [];
+    //     let currentTokens = 0;
+    //
+    //     for (const part of content.parts) {
+    //       if ('text' in part && part.text) {
+    //         const partTokens = estimateTokens(part.text);
+    //         if (currentTokens + partTokens <= maxTokens) {
+    //           truncatedParts.push(part);
+    //           currentTokens += partTokens;
+    //         } else {
+    //           // Truncate this part
+    //           const remainingTokens = maxTokens - currentTokens;
+    //           if (remainingTokens > 10) {
+    //             const remainingChars = remainingTokens * 4;
+    //             truncatedParts.push({
+    //               text:
+    //                 part.text.substring(0, remainingChars) +
+    //                 '\n[...content truncated due to token limit...]',
+    //             });
+    //           }
+    //           break;
+    //         }
+    //       } else {
+    //         // Non-text parts (function calls, responses, etc) - NEVER truncate these
+    //         // Either include them fully or skip them entirely to avoid breaking JSON
+    //         const partTokens = estimateTokens(JSON.stringify(part));
+    //         if (currentTokens + partTokens <= maxTokens) {
+    //           truncatedParts.push(part);
+    //           currentTokens += partTokens;
+    //         } else {
+    //           // Skip this part entirely - DO NOT truncate function calls/responses
+    //           // Log what we're skipping for debugging
+    //           if (process.env.DEBUG || process.env.VERBOSE) {
+    //             let skipInfo = 'unknown part';
+    //             if ('functionCall' in part) {
+    //               const funcPart = part as { functionCall?: { name?: string } };
+    //               skipInfo = `functionCall: ${funcPart.functionCall?.name || 'unnamed'}`;
+    //             } else if ('functionResponse' in part) {
+    //               const respPart = part as { functionResponse?: { name?: string } };
+    //               skipInfo = `functionResponse: ${respPart.functionResponse?.name || 'unnamed'}`;
+    //             }
+    //             console.warn(
+    //               `INFO: Skipping ${skipInfo} due to token limit (needs ${partTokens} tokens, only ${maxTokens - currentTokens} available)`,
+    //             );
+    //           }
+    //           // Add a marker that content was omitted
+    //           if (
+    //             truncatedParts.length > 0 &&
+    //             !truncatedParts.some(
+    //               (p) =>
+    //                 'text' in p &&
+    //                 p.text?.includes(
+    //                   '[...function calls omitted due to token limit...]',
+    //                 ),
+    //             )
+    //           ) {
+    //             truncatedParts.push({
+    //               text: '[...function calls omitted due to token limit...]',
+    //             });
+    //           }
+    //           break;
+    //         }
+    //       }
+    //     }
+    //
+    //     return {
+    //       role: content.role,
+    //       parts: truncatedParts,
+    //     };
+    //   }
     async maybeIncludeSchemaDepthContext(error) {
         // Check for potentially problematic cyclic tools with cyclic schemas
         // and include a recommendation to remove potentially problematic tools.
         if (isStructuredError(error) && isSchemaDepthError(error.message)) {
-            const tools = (await this.config.getToolRegistry()).getAllTools();
+            const tools = this.config.getToolRegistry().getAllTools();
             const cyclicSchemaTools = [];
             for (const tool of tools) {
                 if ((tool.schema.parametersJsonSchema &&
@@ -847,6 +1199,189 @@ export class GeminiChat {
             }
         }
     }
+    /**
+     * Convert PartListUnion (user input) to IContent format for provider/history
+     */
+    convertPartListUnionToIContent(input) {
+        const blocks = [];
+        if (typeof input === 'string') {
+            // Simple string input from user
+            return {
+                speaker: 'human',
+                blocks: [{ type: 'text', text: input }],
+            };
+        }
+        // Handle Part or Part[]
+        const parts = Array.isArray(input) ? input : [input];
+        // Check if all parts are function responses (tool responses)
+        const allFunctionResponses = parts.every((part) => part && typeof part === 'object' && 'functionResponse' in part);
+        if (allFunctionResponses) {
+            // Tool responses - speaker is 'tool'
+            for (const part of parts) {
+                if (typeof part === 'object' &&
+                    'functionResponse' in part &&
+                    part.functionResponse) {
+                    blocks.push({
+                        type: 'tool_response',
+                        callId: part.functionResponse.id || '',
+                        toolName: part.functionResponse.name || '',
+                        result: part.functionResponse.response || {},
+                        error: undefined,
+                    });
+                }
+            }
+            return {
+                speaker: 'tool',
+                blocks,
+            };
+        }
+        // Mixed content or function calls - must be from AI
+        let hasAIContent = false;
+        for (const part of parts) {
+            if (typeof part === 'string') {
+                blocks.push({ type: 'text', text: part });
+            }
+            else if ('text' in part && part.text !== undefined) {
+                blocks.push({ type: 'text', text: part.text });
+            }
+            else if ('functionCall' in part && part.functionCall) {
+                hasAIContent = true; // Function calls only come from AI
+                blocks.push({
+                    type: 'tool_call',
+                    id: part.functionCall.id || '',
+                    name: part.functionCall.name || '',
+                    parameters: part.functionCall.args || {},
+                });
+            }
+            else if ('functionResponse' in part && part.functionResponse) {
+                // Single function response in mixed content
+                blocks.push({
+                    type: 'tool_response',
+                    callId: part.functionResponse.id || '',
+                    toolName: part.functionResponse.name || '',
+                    result: part.functionResponse.response || {},
+                    error: undefined,
+                });
+            }
+        }
+        // If we have function calls, it's AI content; otherwise assume human
+        return {
+            speaker: hasAIContent ? 'ai' : 'human',
+            blocks,
+        };
+    }
+    /**
+     * Convert IContent (from provider) to GenerateContentResponse for SDK compatibility
+     */
+    convertIContentToResponse(input) {
+        // Convert IContent blocks to Gemini Parts
+        const parts = [];
+        for (const block of input.blocks) {
+            switch (block.type) {
+                case 'text':
+                    parts.push({ text: block.text });
+                    break;
+                case 'tool_call': {
+                    const toolCall = block;
+                    parts.push({
+                        functionCall: {
+                            id: toolCall.id,
+                            name: toolCall.name,
+                            args: toolCall.parameters,
+                        },
+                    });
+                    break;
+                }
+                case 'tool_response': {
+                    const toolResponse = block;
+                    parts.push({
+                        functionResponse: {
+                            id: toolResponse.callId,
+                            name: toolResponse.toolName,
+                            response: toolResponse.result,
+                        },
+                    });
+                    break;
+                }
+                case 'thinking':
+                    // Include thinking blocks as thought parts
+                    parts.push({
+                        thought: true,
+                        text: block.thought,
+                    });
+                    break;
+                default:
+                    // Skip unsupported block types
+                    break;
+            }
+        }
+        // Build the response structure
+        const response = {
+            candidates: [
+                {
+                    content: {
+                        role: 'model',
+                        parts,
+                    },
+                },
+            ],
+            // These are required properties that must be present
+            get text() {
+                return parts.find((p) => 'text' in p)?.text || '';
+            },
+            functionCalls: parts
+                .filter((p) => 'functionCall' in p)
+                .map((p) => p.functionCall),
+            executableCode: undefined,
+            codeExecutionResult: undefined,
+            // data property will be added below
+        };
+        // Add data property that returns self-reference
+        // Make it non-enumerable to avoid circular reference in JSON.stringify
+        Object.defineProperty(response, 'data', {
+            get() {
+                return response;
+            },
+            enumerable: false, // Changed from true to false
+            configurable: true,
+        });
+        // Add usage metadata if present
+        if (input.metadata?.usage) {
+            response.usageMetadata = {
+                promptTokenCount: input.metadata.usage.promptTokens || 0,
+                candidatesTokenCount: input.metadata.usage.completionTokens || 0,
+                totalTokenCount: input.metadata.usage.totalTokens || 0,
+            };
+        }
+        return response;
+    }
+    /**
+     * Get the active provider from the ProviderManager via Config
+     */
+    getActiveProvider() {
+        const providerManager = this.config.getProviderManager();
+        if (!providerManager) {
+            return undefined;
+        }
+        try {
+            return providerManager.getActiveProvider();
+        }
+        catch {
+            // No active provider set
+            return undefined;
+        }
+    }
+    /**
+     * Check if a provider supports the IContent interface
+     */
+    providerSupportsIContent(provider) {
+        if (!provider) {
+            return false;
+        }
+        // Check if the provider has the IContent method
+        return (typeof provider
+            .generateChatCompletion === 'function');
+    }
 }
 /** Visible for Testing */
 export function isSchemaDepthError(errorMessage) {