npm - @nqminds/mcp-client - Versions diffs - 1.0.9 → 1.0.12 - Mend

@nqminds/mcp-client 1.0.9 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/MCPChat.d.ts.map +1 -1
package/dist/MCPChat.js +13 -1
package/dist/api-helpers.d.ts.map +1 -1
package/dist/api-helpers.js +17 -0
package/dist/openai-client.d.ts +148 -11
package/dist/openai-client.d.ts.map +1 -1
package/dist/openai-client.js +607 -184
package/dist/styles/MCPChat.css +12 -0
package/dist/types.d.ts +3 -1
package/dist/types.d.ts.map +1 -1
package/package.json +1 -1

package/dist/MCPChat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,~~qBAifd~~"}
1	+ {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBA6fd"}

package/dist/MCPChat.js CHANGED Viewed

@@ -167,6 +167,16 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
                                         return updated;
                                     });
                                 }
+                                else if (parsed.type === "usage") {
+                                    setMessages((prev) => {
+                                        const updated = [...prev];
+                                        const lastIndex = updated.length - 1;
+                                        if (lastIndex >= 0) {
+                                            updated[lastIndex] = { ...updated[lastIndex], tokenInfo: parsed.message };
+                                        }
+                                        return updated;
+                                    });
+                                }
                                 else if (parsed.type === "error") {
                                     throw new Error(parsed.message || "Stream error");
                                 }
@@ -315,7 +325,9 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
                         React.createElement("div", { className: "mcp-chat-message-bubble" },
                             msg.role === "assistant" ? (React.createElement("div", { className: "mcp-chat-message-content markdown-content" },
                                 React.createElement(ReactMarkdown, { remarkPlugins: [remarkGfm] }, msg.content))) : (React.createElement("div", { className: "mcp-chat-message-content" }, msg.content)),
-                            React.createElement("div", { className: "mcp-chat-message-timestamp" }, msg.timestamp.toLocaleTimeString()))))),
+                            React.createElement("div", { className: "mcp-chat-message-timestamp" },
+                                msg.timestamp.toLocaleTimeString(),
+                                msg.role === "assistant" && msg.tokenInfo && (React.createElement("span", { className: "mcp-chat-token-info" }, msg.tokenInfo))))))),
                     isLoading && (React.createElement("div", { className: "mcp-chat-message mcp-chat-message-assistant" },
                         React.createElement("div", { className: "mcp-chat-thinking" },
                             React.createElement("div", { className: "mcp-chat-thinking-title" },

package/dist/api-helpers.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,~~uBAiG~~/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
1	+ {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAmH/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}

package/dist/api-helpers.js CHANGED Viewed

@@ -47,6 +47,23 @@ export function createMCPChatHandler(config) {
                         sendEvent("thinking", { message: thinkingMessage });
                     }, abortController.signal, // Pass abort signal to enable cancellation
                     bypassSystemPrompt);
+                    // Emit token usage summary for debugging
+                    const usage = client.getUsage();
+                    if (usage.inputTokens > 0 || usage.outputTokens > 0) {
+                        const parts = [
+                            `in: ${usage.inputTokens.toLocaleString()}`,
+                            `out: ${usage.outputTokens.toLocaleString()}`,
+                            `total: ${usage.totalTokens.toLocaleString()}`,
+                        ];
+                        if (usage.cachedTokens > 0) {
+                            const cachedPct = Math.round((usage.cachedTokens / usage.inputTokens) * 100);
+                            parts.push(`cached: ${usage.cachedTokens.toLocaleString()} (${cachedPct}%)`);
+                        }
+                        if (usage.compactedTurns > 0) {
+                            parts.push(`compacted: ${usage.compactedTurns} turn${usage.compactedTurns !== 1 ? "s" : ""}`);
+                        }
+                        sendEvent("usage", { message: parts.join(" | ") });
+                    }
                     // Check if aborted before streaming response
                     if (abortController.signal.aborted) {
                         return;

package/dist/openai-client.d.ts CHANGED Viewed

@@ -1,6 +1,20 @@
 /**
  * OpenAI-powered MCP Client
- * Uses ChatGPT to intelligently interact with MCP tools
+ *
+ * Compaction strategy:
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
+ *   compact the oldest cold portion and keep the newest portion verbatim.
+ * - Feed the returned compaction object back into future requests.
+ *
+ * Notes:
+ * - This is written to align with the OpenAI Responses API shape:
+ *   - response usage fields
+ *   - previous_response_id
+ *   - input token counting
+ *   - response compaction
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
+ *   by SDK version. The logic here is the important part.
  */
 export interface MCPClientConfig {
     openaiApiKey: string;
@@ -8,30 +22,153 @@ export interface MCPClientConfig {
     openaiModel?: string;
     clientName?: string;
     clientVersion?: string;
+    /**
+     * Trigger compaction when the last measured input tokens reaches this threshold.
+     * Example policy from your suggestion:
+     * - compact when last measured input >= 200k
+     */
+    compactTriggerInputTokens?: number;
+    /**
+     * Keep roughly this many of the most recent input tokens uncompacted.
+     * Example policy from your suggestion:
+     * - retain last ~100k uncompacted
+     */
+    hotContextTargetInputTokens?: number;
+    /**
+     * Guardrail for unusually large tool outputs stored in history.
+     */
+    maxToolOutputChars?: number;
+}
+interface UsageStats {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cachedTokens: number;
+    reasoningTokens: number;
+    compactedTurns: number;
 }
 export declare class MCPClientOpenAI {
     private client;
     private openai;
     private transport;
+    /**
+     * Instructions are sent using the Responses API `instructions` field,
+     * not inserted as a fake message inside the rolling conversation items.
+     */
+    private instructions;
+    /**
+     * Rolling uncompacted conversation items.
+     * This contains the most recent "hot" context only.
+     */
     private conversationHistory;
-    private lastCompaction;
+    /**
+     * Opaque compaction object returned by OpenAI.
+     * This represents older "cold" context that has been compacted.
+     */
+    private compaction;
+    /**
+     * Last measured input tokens from a real Responses API call.
+     */
+    private lastInputTokens;
+    /**
+     * Latest usage snapshot for logging/inspection.
+     */
+    private lastUsage;
+    /**
+     * Per-turn accumulator — reset at the start of each processQuery / processRawQuery call.
+     * Tracks the max input tokens seen (largest context = most representative) and
+     * the sum of output tokens across all API calls in the turn.
+     */
+    private turnStats;
     private config;
     constructor(config: MCPClientConfig);
-    private compactConversation;
+    connect(): Promise<void>;
+    cleanup(): Promise<void>;
+    clearHistory(): void;
+    getUsage(): UsageStats;
     /**
-     * Fetches the system prompt from the MCP server's registered "system-prompt" prompt
-     * and prepends it to conversationHistory. Cached — only runs once per session.
-     * Direct Prompt (bypass mode) skips this entirely.
+     * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
+     * Cached per client session.
      */
     private ensureSystemPrompt;
-    connect(): Promise<void>;
+    /**
+     * Build request input:
+     * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
+     */
+    private buildInput;
+    /**
+     * Count input tokens before making a request.
+     * Falls back to a simple rough estimate if the SDK method is unavailable.
+     */
+    private countInputTokens;
+    /**
+     * Very rough fallback estimator.
+     * Only used if token counting endpoint is unavailable in the SDK version in use.
+     */
+    private roughEstimateInputTokens;
+    /**
+     * Reset the per-turn accumulator at the start of each processQuery / processRawQuery.
+     */
+    private startTurn;
+    /**
+     * Normalize usage from Responses API and accumulate into the per-turn stats.
+     *
+     * With previous_response_id chaining there are multiple API calls per user turn:
+     *   - inputTokens: use max across calls (the call with the largest context is most representative)
+     *   - cachedTokens: taken from the same call that has the max input tokens (keeps % ≤ 100%)
+     *   - outputTokens: sum across calls (each call generates separately-billed tokens)
+     *   - reasoningTokens: sum across calls
+     */
+    private captureUsage;
+    /**
+     * Compact oversized tool outputs before storing them in rolling history.
+     */
+    private compactToolResult;
+    private makeUserMessage;
+    private makeFunctionOutput;
+    /**
+     * We treat a "turn" boundary as:
+     * - starts at a user message
+     * - ends right before the next user message, or end of array
+     *
+     * This lets us compact or trim in coherent chunks instead of arbitrary items.
+     */
+    private getTurnBoundaries;
+    /**
+     * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
+     * Older turns become compaction candidates.
+     */
+    private splitColdAndHotHistory;
+    /**
+     * Incrementally update compaction using the cold slice only.
+     */
+    private compactColdHistory;
+    /**
+     * Proactively compact when the history has grown past the trigger.
+     * Keeps the newest hot window uncompacted and compacts the older cold window.
+     */
+    private maybeCompactHistory;
+    /**
+     * Keep history from growing pathologically in item count even before token limits.
+     * Uses turn-aware trimming, not arbitrary item slicing.
+     */
+    private enforceHardHistoryLimitByTurns;
+    /**
+     * Build MCP tool list for OpenAI Responses API.
+     */
+    private buildTools;
+    /**
+     * Create a response against the current full context.
+     */
+    private createResponse;
+    /**
+     * Main query method with rolling compaction.
+     */
     processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal, bypassSystemPrompt?: boolean): Promise<string>;
     /**
-     * Sends a raw query directly to the model — no system prompt, no conversation history.
-     * Used by the Direct Prompt dev tool to test prompts verbatim.
+     * Raw mode: no cached instructions, no rolling history, no compaction state.
      */
     private processRawQuery;
-    clearHistory(): void;
-    cleanup(): Promise<void>;
 }
+export {};
 //# sourceMappingURL=openai-client.d.ts.map

package/dist/openai-client.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA~~;;;GAGG~~;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;~~AAED~~,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;~~IACxC~~,OAAO,CAAC,mBAAmB,~~CAAsB~~;~~IACjD~~,OAAO,CAAC,~~cAAc~~,~~CAAa~~;~~IACnC~~,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;~~YAsCrB~~,~~mBAAmB~~;~~IAoBjC;;;;OAIG~~;YACW,kBAAkB;~~IA4B1B~~,OAAO,~~IAAI~~,OAAO,CAAC,~~IAAI~~,CAAC;~~IAIxB~~,YAAY,CAAC,KAAK,EAAE,MAAM,~~EAAE~~,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,~~EAAE~~,WAAW,CAAC,EAAE,WAAW,~~EAAE~~,kBAAkB,UAAQ,~~GAAG~~,OAAO,CAAC,MAAM,CAAC;~~IAuNjJ;;;OAGG~~;YACW,eAAe;~~IA4E7B,YAAY,IAAI,IAAI;IAOd,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B~~"}
1	+ {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF;;;;OAIG;IACH,OAAO,CAAC,SAAS,CAKf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAkBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,SAAS;IAIjB;;;;;;;;OAQG;IACH,OAAO,CAAC,YAAY;IAmCpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA8OlB;;OAEG;YACW,eAAe;CAyG9B"}

package/dist/openai-client.js CHANGED Viewed

@@ -1,31 +1,89 @@
 /**
  * OpenAI-powered MCP Client
- * Uses ChatGPT to intelligently interact with MCP tools
+ *
+ * Compaction strategy:
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
+ *   compact the oldest cold portion and keep the newest portion verbatim.
+ * - Feed the returned compaction object back into future requests.
+ *
+ * Notes:
+ * - This is written to align with the OpenAI Responses API shape:
+ *   - response usage fields
+ *   - previous_response_id
+ *   - input token counting
+ *   - response compaction
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
+ *   by SDK version. The logic here is the important part.
  */
 import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 import OpenAI from "openai";
 export class MCPClientOpenAI {
     constructor(config) {
+        /**
+         * Instructions are sent using the Responses API `instructions` field,
+         * not inserted as a fake message inside the rolling conversation items.
+         */
+        this.instructions = null;
+        /**
+         * Rolling uncompacted conversation items.
+         * This contains the most recent "hot" context only.
+         */
         this.conversationHistory = [];
-        this.lastCompaction = 0;
+        /**
+         * Opaque compaction object returned by OpenAI.
+         * This represents older "cold" context that has been compacted.
+         */
+        this.compaction = {
+            item: null,
+            compactedTurns: 0,
+        };
+        /**
+         * Last measured input tokens from a real Responses API call.
+         */
+        this.lastInputTokens = 0;
+        /**
+         * Latest usage snapshot for logging/inspection.
+         */
+        this.lastUsage = {
+            inputTokens: 0,
+            outputTokens: 0,
+            totalTokens: 0,
+            cachedTokens: 0,
+            reasoningTokens: 0,
+            compactedTurns: 0,
+        };
+        /**
+         * Per-turn accumulator — reset at the start of each processQuery / processRawQuery call.
+         * Tracks the max input tokens seen (largest context = most representative) and
+         * the sum of output tokens across all API calls in the turn.
+         */
+        this.turnStats = {
+            inputTokens: 0,
+            outputTokens: 0,
+            cachedTokens: 0,
+            reasoningTokens: 0,
+        };
         this.config = {
             openaiApiKey: config.openaiApiKey,
             mcpServerCommand: config.mcpServerCommand,
-            openaiModel: config.openaiModel || "chatgpt-5-mini",
+            openaiModel: config.openaiModel || "gpt-5-mini",
             clientName: config.clientName || "mcp-flair-client",
             clientVersion: config.clientVersion || "1.0.0",
+            compactTriggerInputTokens: config.compactTriggerInputTokens ?? 200000,
+            hotContextTargetInputTokens: config.hotContextTargetInputTokens ?? 100000,
+            maxToolOutputChars: config.maxToolOutputChars ?? 20000,
         };
         this.openai = new OpenAI({
             apiKey: this.config.openaiApiKey,
         });
-        // Parse the server command and args
         const serverCmd = this.config.mcpServerCommand.split(" ");
         const command = serverCmd[0];
         const args = serverCmd.slice(1);
         this.transport = new StdioClientTransport({
-            command: command,
-            args: args,
+            command,
+            args,
         });
         this.client = new Client({
             name: this.config.clientName,
@@ -33,177 +91,552 @@ export class MCPClientOpenAI {
         }, {
             capabilities: {},
         });
-        // Initialize conversation with system message
-        // System prompt is fetched from the MCP server on first use (see ensureSystemPrompt)
+    }
+    async connect() {
+        await this.client.connect(this.transport);
+    }
+    async cleanup() {
+        await this.client.close();
+    }
+    clearHistory() {
         this.conversationHistory = [];
+        this.compaction = {
+            item: null,
+            compactedTurns: 0,
+        };
+        this.lastInputTokens = 0;
+        this.turnStats = { inputTokens: 0, outputTokens: 0, cachedTokens: 0, reasoningTokens: 0 };
+        this.lastUsage = {
+            inputTokens: 0,
+            outputTokens: 0,
+            totalTokens: 0,
+            cachedTokens: 0,
+            reasoningTokens: 0,
+            compactedTurns: 0,
+        };
     }
-    async compactConversation() {
-        try {
-            const compactionResponse = await this.openai.responses.compact({
-                model: this.config.openaiModel,
-                // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                input: this.conversationHistory,
-            });
-            this.conversationHistory = compactionResponse.output;
-            this.lastCompaction = Date.now();
-        }
-        catch (error) {
-            // Keep system message and last 25 items
-            if (this.conversationHistory.length > 26) {
-                const systemMessage = this.conversationHistory[0];
-                const recentItems = this.conversationHistory.slice(-25);
-                this.conversationHistory = [systemMessage, ...recentItems];
-            }
-        }
+    getUsage() {
+        return { ...this.lastUsage, compactedTurns: this.compaction.compactedTurns };
     }
     /**
-     * Fetches the system prompt from the MCP server's registered "system-prompt" prompt
-     * and prepends it to conversationHistory. Cached — only runs once per session.
-     * Direct Prompt (bypass mode) skips this entirely.
+     * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
+     * Cached per client session.
      */
     async ensureSystemPrompt() {
-        // Already loaded if history starts with a system message
-        if (this.conversationHistory[0]?.role === "system")
+        if (this.instructions)
             return;
         try {
+            // SDK typing may not expose getPrompt.
             // eslint-disable-next-line @typescript-eslint/no-explicit-any
             const result = await this.client.getPrompt({ name: "system-prompt" });
             const parts = [];
-            for (const msg of result.messages) {
+            for (const msg of result.messages ?? []) {
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
                 const c = msg.content;
                 if (typeof c === "string")
                     parts.push(c);
                 else if (c?.text)
                     parts.push(c.text);
             }
-            const text = parts.join("\n\n");
+            const text = parts.join("\n\n").trim();
             if (text) {
-                this.conversationHistory = [
-                    {
-                        type: "message",
-                        role: "system",
-                        content: [{ type: "input_text", text }],
-                    },
-                    ...this.conversationHistory,
-                ];
+                this.instructions = text;
             }
         }
         catch (error) {
             console.error("[MCPClient] Failed to fetch system prompt from MCP server:", error);
+            this.instructions = null;
         }
     }
-    async connect() {
-        await this.client.connect(this.transport);
+    /**
+     * Build request input:
+     * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
+     */
+    buildInput(newItems = []) {
+        const input = [];
+        if (this.compaction.item) {
+            input.push(this.compaction.item);
+        }
+        input.push(...this.conversationHistory);
+        input.push(...newItems);
+        return input;
+    }
+    /**
+     * Count input tokens before making a request.
+     * Falls back to a simple rough estimate if the SDK method is unavailable.
+     */
+    async countInputTokens(input) {
+        try {
+            // Some SDK versions may expose this as responses.inputTokens.count(...)
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const result = await this.openai.responses.inputTokens.count({
+                model: this.config.openaiModel,
+                input,
+                instructions: this.instructions ?? undefined,
+                tools: [],
+            });
+            // Common guess for returned shape
+            return (result?.input_tokens ??
+                result?.total_tokens ??
+                result?.count ??
+                this.roughEstimateInputTokens(input));
+        }
+        catch {
+            return this.roughEstimateInputTokens(input);
+        }
+    }
+    /**
+     * Very rough fallback estimator.
+     * Only used if token counting endpoint is unavailable in the SDK version in use.
+     */
+    roughEstimateInputTokens(input) {
+        const serialized = JSON.stringify({
+            instructions: this.instructions,
+            input,
+        });
+        // Very rough English-ish heuristic.
+        return Math.ceil(serialized.length / 4);
+    }
+    /**
+     * Reset the per-turn accumulator at the start of each processQuery / processRawQuery.
+     */
+    startTurn() {
+        this.turnStats = { inputTokens: 0, outputTokens: 0, cachedTokens: 0, reasoningTokens: 0 };
+    }
+    /**
+     * Normalize usage from Responses API and accumulate into the per-turn stats.
+     *
+     * With previous_response_id chaining there are multiple API calls per user turn:
+     *   - inputTokens: use max across calls (the call with the largest context is most representative)
+     *   - cachedTokens: taken from the same call that has the max input tokens (keeps % ≤ 100%)
+     *   - outputTokens: sum across calls (each call generates separately-billed tokens)
+     *   - reasoningTokens: sum across calls
+     */
+    captureUsage(response) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const usage = response?.usage ?? {};
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const inputDetails = usage?.input_tokens_details ?? {};
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const outputDetails = usage?.output_tokens_details ?? {};
+        const inputTokens = usage.input_tokens ?? 0;
+        const outputTokens = usage.output_tokens ?? 0;
+        const cachedTokens = inputDetails.cached_tokens ?? 0;
+        const reasoningTokens = outputDetails.reasoning_tokens ?? 0;
+        // cachedTokens is only meaningful relative to its own call's inputTokens.
+        // Track it alongside the max-input call so the percentage stays ≤ 100%.
+        if (inputTokens >= this.turnStats.inputTokens) {
+            this.turnStats.inputTokens = inputTokens;
+            this.turnStats.cachedTokens = cachedTokens;
+        }
+        this.turnStats.outputTokens += outputTokens;
+        this.turnStats.reasoningTokens += reasoningTokens;
+        this.lastUsage = {
+            inputTokens: this.turnStats.inputTokens,
+            outputTokens: this.turnStats.outputTokens,
+            totalTokens: this.turnStats.inputTokens + this.turnStats.outputTokens,
+            cachedTokens: this.turnStats.cachedTokens,
+            reasoningTokens: this.turnStats.reasoningTokens,
+            compactedTurns: this.compaction.compactedTurns,
+        };
+        // Keep lastInputTokens as the raw per-call value for compaction threshold checks.
+        this.lastInputTokens = inputTokens;
+    }
+    /**
+     * Compact oversized tool outputs before storing them in rolling history.
+     */
+    compactToolResult(value) {
+        const seen = new WeakSet();
+        const prune = (v) => {
+            if (v == null)
+                return v;
+            if (typeof v === "string") {
+                if (v.length <= this.config.maxToolOutputChars)
+                    return v;
+                return (v.slice(0, this.config.maxToolOutputChars) +
+                    `\n...[truncated ${v.length - this.config.maxToolOutputChars} chars]`);
+            }
+            if (typeof v !== "object")
+                return v;
+            if (Array.isArray(v)) {
+                const maxItems = 30;
+                const sliced = v.slice(0, maxItems).map(prune);
+                if (v.length > maxItems) {
+                    sliced.push(`...[truncated ${v.length - maxItems} items]`);
+                }
+                return sliced;
+            }
+            if (seen.has(v))
+                return "[circular]";
+            seen.add(v);
+            const obj = v;
+            const out = {};
+            const entries = Object.entries(obj);
+            // Prefer keeping fewer, more informative fields.
+            const preferredFirst = [
+                "title",
+                "name",
+                "id",
+                "url",
+                "summary",
+                "description",
+                "text",
+                "content",
+                "status",
+                "result",
+                "items",
+                "data",
+            ];
+            const sorted = entries.sort(([a], [b]) => {
+                const ai = preferredFirst.indexOf(a);
+                const bi = preferredFirst.indexOf(b);
+                const av = ai === -1 ? 999 : ai;
+                const bv = bi === -1 ? 999 : bi;
+                return av - bv;
+            });
+            const maxFields = 25;
+            for (const [k, val] of sorted.slice(0, maxFields)) {
+                out[k] = prune(val);
+            }
+            if (entries.length > maxFields) {
+                out.__truncated_fields__ = entries.length - maxFields;
+            }
+            return out;
+        };
+        try {
+            return JSON.stringify(prune(value));
+        }
+        catch {
+            const s = String(value);
+            return s.length <= this.config.maxToolOutputChars
+                ? s
+                : s.slice(0, this.config.maxToolOutputChars) +
+                    `\n...[truncated ${s.length - this.config.maxToolOutputChars} chars]`;
+        }
+    }
+    makeUserMessage(text) {
+        return {
+            type: "message",
+            role: "user",
+            content: [{ type: "input_text", text }],
+        };
+    }
+    makeFunctionOutput(callId, output) {
+        return {
+            type: "function_call_output",
+            call_id: callId,
+            output,
+        };
+    }
+    /**
+     * We treat a "turn" boundary as:
+     * - starts at a user message
+     * - ends right before the next user message, or end of array
+     *
+     * This lets us compact or trim in coherent chunks instead of arbitrary items.
+     */
+    getTurnBoundaries(items) {
+        const boundaries = [];
+        let currentStart = -1;
+        for (let i = 0; i < items.length; i++) {
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const item = items[i];
+            const isUserMessage = item?.type === "message" && item?.role === "user";
+            if (isUserMessage) {
+                if (currentStart !== -1) {
+                    const slice = items.slice(currentStart, i);
+                    boundaries.push({
+                        startIndex: currentStart,
+                        endIndex: i - 1,
+                        estimatedTokens: this.roughEstimateInputTokens(slice),
+                    });
+                }
+                currentStart = i;
+            }
+        }
+        if (currentStart !== -1) {
+            const slice = items.slice(currentStart);
+            boundaries.push({
+                startIndex: currentStart,
+                endIndex: items.length - 1,
+                estimatedTokens: this.roughEstimateInputTokens(slice),
+            });
+        }
+        // If there are no user turns, treat all as one chunk.
+        if (boundaries.length === 0 && items.length > 0) {
+            boundaries.push({
+                startIndex: 0,
+                endIndex: items.length - 1,
+                estimatedTokens: this.roughEstimateInputTokens(items),
+            });
+        }
+        return boundaries;
+    }
+    /**
+     * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
+     * Older turns become compaction candidates.
+     */
+    splitColdAndHotHistory(items) {
+        const turns = this.getTurnBoundaries(items);
+        if (turns.length === 0) {
+            return { coldItems: [], hotItems: items };
+        }
+        let running = 0;
+        let keepFromTurnIndex = turns.length;
+        for (let i = turns.length - 1; i >= 0; i--) {
+            const next = running + turns[i].estimatedTokens;
+            if (next > this.config.hotContextTargetInputTokens) {
+                break;
+            }
+            running = next;
+            keepFromTurnIndex = i;
+        }
+        if (keepFromTurnIndex === turns.length) {
+            // Even the newest turn is too large; keep at least the latest turn hot.
+            const lastTurn = turns[turns.length - 1];
+            return {
+                coldItems: items.slice(0, lastTurn.startIndex),
+                hotItems: items.slice(lastTurn.startIndex),
+            };
+        }
+        const splitIndex = turns[keepFromTurnIndex].startIndex;
+        return {
+            coldItems: items.slice(0, splitIndex),
+            hotItems: items.slice(splitIndex),
+        };
+    }
+    /**
+     * Incrementally update compaction using the cold slice only.
+     */
+    async compactColdHistory(coldItems) {
+        if (coldItems.length === 0)
+            return;
+        try {
+            // Depending on SDK version, the exact shape may vary.
+            // The intent is:
+            // - compact [existing compaction object?, ...new cold items]
+            // - receive an updated opaque compaction item
+            const compactInput = [];
+            if (this.compaction.item)
+                compactInput.push(this.compaction.item);
+            compactInput.push(...coldItems);
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const response = await this.openai.responses.compact({
+                model: this.config.openaiModel,
+                input: compactInput,
+                instructions: this.instructions ?? undefined,
+            });
+            // We expect the new compaction object to be reusable as input.
+            // Some SDKs may return `output`, some `compacted`, etc.
+            const newItem = response?.output?.[0] ??
+                response?.compacted ??
+                response?.item ??
+                null;
+            if (newItem) {
+                this.compaction.item = newItem;
+                this.compaction.compactedTurns += this.getTurnBoundaries(coldItems).length;
+            }
+            else {
+                throw new Error("Compaction response did not include a reusable compaction item");
+            }
+            console.error(`[MCPClient] Compacted ${coldItems.length} old items. Total compacted turns: ${this.compaction.compactedTurns}`);
+        }
+        catch (error) {
+            // Fallback: if compaction fails, just drop the cold part rather than
+            // keeping everything and risking repeated context overflows.
+            console.error("[MCPClient] Compaction failed, dropping cold history:", error);
+        }
+    }
+    /**
+     * Proactively compact when the history has grown past the trigger.
+     * Keeps the newest hot window uncompacted and compacts the older cold window.
+     */
+    async maybeCompactHistory() {
+        if (this.lastInputTokens < this.config.compactTriggerInputTokens) {
+            return;
+        }
+        const { coldItems, hotItems } = this.splitColdAndHotHistory(this.conversationHistory);
+        if (coldItems.length === 0) {
+            return;
+        }
+        await this.compactColdHistory(coldItems);
+        this.conversationHistory = hotItems;
+        this.lastInputTokens = 0;
+        console.error(`[MCPClient] Applied rolling compaction. Kept ${hotItems.length} recent items uncompacted.`);
+    }
+    /**
+     * Keep history from growing pathologically in item count even before token limits.
+     * Uses turn-aware trimming, not arbitrary item slicing.
+     */
+    enforceHardHistoryLimitByTurns(maxTurns = 20) {
+        const turns = this.getTurnBoundaries(this.conversationHistory);
+        if (turns.length <= maxTurns)
+            return;
+        const keepFrom = turns[turns.length - maxTurns].startIndex;
+        const dropped = this.conversationHistory.slice(0, keepFrom);
+        this.conversationHistory = this.conversationHistory.slice(keepFrom);
+        console.error(`[MCPClient] Hard-trimmed ${dropped.length} old uncompacted items, preserving last ${maxTurns} turns`);
     }
+    /**
+     * Build MCP tool list for OpenAI Responses API.
+     */
+    async buildTools() {
+        const toolsResponse = await this.client.listTools();
+        return [
+            { type: "web_search_preview" },
+            ...toolsResponse.tools
+                .filter((t) => t.name !== "web_search" && t.name !== "fetch_webpage")
+                .map((tool) => ({
+                type: "function",
+                name: tool.name,
+                description: tool.description || "",
+                parameters: tool.inputSchema,
+                strict: false,
+            })),
+        ];
+    }
+    /**
+     * Create a response against the current full context.
+     */
+    async createResponse(params) {
+        const response = await this.openai.responses.create({
+            model: this.config.openaiModel,
+            instructions: this.instructions ?? undefined,
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            input: params.input,
+            tools: params.tools,
+            previous_response_id: params.previousResponseId,
+            truncation: "disabled",
+            prompt_cache_retention: "24h",
+        });
+        this.captureUsage(response);
+        return response;
+    }
+    /**
+     * Main query method with rolling compaction.
+     */
     async processQuery(query, onThinking, abortSignal, bypassSystemPrompt = false) {
-        // Check for cancellation at start
         if (abortSignal?.aborted) {
             throw new Error("Request was cancelled");
         }
-        // Bypass mode: send the raw prompt directly without system message or conversation history
         if (bypassSystemPrompt) {
             return this.processRawQuery(query, onThinking, abortSignal);
         }
-        // Load system prompt from MCP server (no-op after first call)
+        this.startTurn();
         await this.ensureSystemPrompt();
-        // Check if we should compact
-        const shouldCompact = this.conversationHistory.length >= 40 &&
-            (Date.now() - this.lastCompaction > 10 * 60 * 1000);
-        if (shouldCompact) {
-            await this.compactConversation();
+        // Proactive compaction based on last real measured request.
+        await this.maybeCompactHistory();
+        const tools = await this.buildTools();
+        const userMessage = this.makeUserMessage(query);
+        // Optional proactive token counting near/around threshold.
+        const projectedInputTokens = await this.countInputTokens(this.buildInput([userMessage]));
+        if (projectedInputTokens >= this.config.compactTriggerInputTokens) {
+            await this.maybeCompactHistory();
         }
-        // Add user message to conversation history
-        this.conversationHistory.push({
-            type: "message",
-            role: "user",
-            content: [
-                {
-                    type: "input_text",
-                    text: query,
-                }
-            ],
-        });
-        // Get available tools from MCP server
-        const toolsResponse = await this.client.listTools();
-        // Convert MCP tools to OpenAI Responses API format
-        const tools = toolsResponse.tools.map((tool) => ({
-            type: "function",
-            name: tool.name,
-            description: tool.description || "",
-            parameters: tool.inputSchema,
-            strict: false,
-        }));
-        // Multi-turn conversation with tool calling
+        // Add the new user message to rolling history now.
+        this.conversationHistory.push(userMessage);
         let loopCount = 0;
         const maxLoops = 15;
         let finalResponse = "";
         let outOfToolCalls = false;
+        let previousResponseId = undefined;
+        // Carries tool outputs across iterations so previous_response_id chain stays intact.
+        let pendingToolOutputs = null;
         while (loopCount < maxLoops) {
             loopCount++;
-            // Check for cancellation before each API call
             if (abortSignal?.aborted) {
                 throw new Error("Request was cancelled");
             }
-            // Call OpenAI Responses API with error handling
             let response;
             try {
-                response = await this.openai.responses.create({
-                    model: this.config.openaiModel,
-                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                    input: this.conversationHistory,
-                    tools: outOfToolCalls ? [] : tools,
-                });
+                if (!previousResponseId) {
+                    // First request in this query: send full current context.
+                    response = await this.createResponse({
+                        input: this.buildInput(),
+                        tools: outOfToolCalls ? [] : tools,
+                    });
+                }
+                else {
+                    // Send pending tool outputs to continue the response chain.
+                    response = await this.createResponse({
+                        input: pendingToolOutputs ?? [],
+                        tools: outOfToolCalls ? [] : tools,
+                        previousResponseId,
+                    });
+                    pendingToolOutputs = null;
+                }
             }
             catch (error) {
                 const err = error;
-                // Handle context length exceeded
-                if (err.status === 400 &&
-                    (err.code === 'context_length_exceeded' ||
-                        err.message?.includes('context') ||
-                        err.message?.includes('length'))) {
-                    await this.compactConversation();
-                    response = await this.openai.responses.create({
-                        model: this.config.openaiModel,
-                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                        input: this.conversationHistory,
-                        tools: outOfToolCalls ? [] : tools,
-                    });
+                const message = err.message?.toLowerCase() || "";
+                const contextProblem = err.status === 400 &&
+                    (err.code === "context_length_exceeded" ||
+                        message.includes("context") ||
+                        message.includes("length"));
+                const toolProblem = err.status === 400 &&
+                    (err.code === "response_incomplete" ||
+                        message.includes("incomplete") ||
+                        message.includes("tool"));
+                if (contextProblem) {
+                    await this.maybeCompactHistory();
+                    if (!previousResponseId) {
+                        response = await this.createResponse({
+                            input: this.buildInput(),
+                            tools: outOfToolCalls ? [] : tools,
+                        });
+                    }
+                    else {
+                        response = await this.createResponse({
+                            input: pendingToolOutputs ?? [],
+                            tools: outOfToolCalls ? [] : tools,
+                            previousResponseId,
+                        });
+                        pendingToolOutputs = null;
+                    }
                 }
-                // Handle tool calls exhausted
-                else if (err.status === 400 &&
-                    (err.code === 'response_incomplete' ||
-                        err.message?.includes('incomplete') ||
-                        err.message?.includes('tool'))) {
+                else if (toolProblem) {
                     outOfToolCalls = true;
-                    response = await this.openai.responses.create({
-                        model: this.config.openaiModel,
-                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                        input: this.conversationHistory,
-                        tools: [],
-                    });
+                    if (!previousResponseId) {
+                        response = await this.createResponse({
+                            input: this.buildInput(),
+                            tools: [],
+                        });
+                    }
+                    else {
+                        response = await this.createResponse({
+                            input: pendingToolOutputs ?? [],
+                            tools: [],
+                            previousResponseId,
+                        });
+                        pendingToolOutputs = null;
+                    }
                 }
                 else {
                     throw error;
                 }
             }
-            const output = response.output;
-            // Find function_call items
+            previousResponseId = response.id;
+            const output = response.output ?? [];
+            for (const item of output) {
+                if (item.type === "web_search_call") {
+                    onThinking?.("🔍 web_search_preview");
+                }
+            }
             const functionCalls = output.filter((item) => item.type === "function_call");
-            // Check if AI wants to call tools
             if (functionCalls.length > 0) {
+                // Persist model output items into rolling history.
                 this.conversationHistory.push(...output);
+                const toolOutputsForNextStep = [];
                 for (const functionCall of functionCalls) {
-                    // Check for cancellation before each tool call
                     if (abortSignal?.aborted) {
                         throw new Error("Request was cancelled");
                     }
                     const functionName = functionCall.name;
-                    const functionArgs = typeof functionCall.arguments === 'string'
+                    const functionArgs = typeof functionCall.arguments === "string"
                         ? JSON.parse(functionCall.arguments)
                         : functionCall.arguments;
-                    // Build a descriptive thinking message with key arguments
                     let toolDesc = functionName;
-                    if (functionName === "fetch_webpage" && functionArgs.url) {
+                    if (functionArgs?.url && functionName === "fetch_webpage") {
                         try {
                             toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
                         }
@@ -211,118 +644,113 @@ export class MCPClientOpenAI {
                             toolDesc = `fetch_webpage → ${functionArgs.url}`;
                         }
                     }
-                    else if (functionName === "web_search" && functionArgs.query) {
+                    else if (functionArgs?.query && functionName === "web_search") {
                         toolDesc = `web_search → "${functionArgs.query}"`;
                     }
                     onThinking?.(`🔧 ${toolDesc}`);
                     try {
-                        // Execute the tool via MCP
                         const result = await this.client.callTool({
                             name: functionName,
                             arguments: functionArgs,
                         });
-                        // Add tool result to conversation history
-                        this.conversationHistory.push({
-                            type: "function_call_output",
-                            call_id: functionCall.call_id,
-                            output: JSON.stringify(result.content),
-                        });
+                        const compactOutput = this.compactToolResult(result.content);
+                        const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, compactOutput);
+                        toolOutputsForNextStep.push(toolOutputItem);
+                        this.conversationHistory.push(toolOutputItem);
                     }
                     catch (error) {
-                        this.conversationHistory.push({
-                            type: "function_call_output",
-                            call_id: functionCall.call_id,
-                            output: `Error: ${error instanceof Error ? error.message : String(error)}`,
-                        });
+                        const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`);
+                        toolOutputsForNextStep.push(toolOutputItem);
+                        this.conversationHistory.push(toolOutputItem);
                     }
                 }
+                // Carry tool outputs to the next iteration so the response chain stays intact.
+                pendingToolOutputs = toolOutputsForNextStep;
                 continue;
             }
-            else {
-                // No more tool calls, extract final response
-                for (const item of output) {
-                    if (item.type === "message" && item.role === "assistant") {
-                        for (const contentItem of item.content) {
-                            if (contentItem.type === "output_text") {
-                                finalResponse += contentItem.text;
-                            }
+            for (const item of output) {
+                if (item.type === "message" && item.role === "assistant") {
+                    for (const contentItem of item.content ?? []) {
+                        if (contentItem.type === "output_text") {
+                            finalResponse += contentItem.text;
                         }
                     }
                 }
-                this.conversationHistory.push(...output);
-                break;
             }
+            this.conversationHistory.push(...output);
+            break;
         }
-        // If we hit max loops, make one final request without tools
         if (loopCount >= maxLoops && !finalResponse) {
             try {
                 const finalApiResponse = await this.openai.responses.create({
                     model: this.config.openaiModel,
+                    instructions: this.instructions ?? undefined,
                     // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                    input: this.conversationHistory,
+                    input: this.buildInput(),
                     tools: [],
+                    truncation: "disabled",
+                    prompt_cache_retention: "24h",
                 });
-                const finalOutput = finalApiResponse.output;
-                for (const item of finalOutput) {
+                this.captureUsage(finalApiResponse);
+                for (const item of finalApiResponse.output ?? []) {
                     if (item.type === "message" && item.role === "assistant") {
-                        for (const contentItem of item.content) {
+                        for (const contentItem of item.content ?? []) {
                             if (contentItem.type === "output_text") {
                                 finalResponse += contentItem.text;
                             }
                         }
                     }
                 }
-                this.conversationHistory.push(...finalOutput);
+                this.conversationHistory.push(...(finalApiResponse.output ?? []));
             }
-            catch (error) {
-                finalResponse = "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
+            catch {
+                finalResponse =
+                    "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
             }
         }
-        // Keep conversation history manageable
-        if (this.conversationHistory.length > 50) {
-            const systemMessage = this.conversationHistory[0];
-            const recentItems = this.conversationHistory.slice(-49);
-            this.conversationHistory = [systemMessage, ...recentItems];
-        }
+        // Prevent pathological item growth even when tokens are still OK.
+        this.enforceHardHistoryLimitByTurns(20);
+        // Optional: if this turn caused a large input, compact immediately so the next turn starts cleaner.
+        await this.maybeCompactHistory();
         return finalResponse;
     }
     /**
-     * Sends a raw query directly to the model — no system prompt, no conversation history.
-     * Used by the Direct Prompt dev tool to test prompts verbatim.
+     * Raw mode: no cached instructions, no rolling history, no compaction state.
      */
     async processRawQuery(query, onThinking, abortSignal) {
-        const toolsResponse = await this.client.listTools();
-        const tools = toolsResponse.tools.map((tool) => ({
-            type: "function",
-            name: tool.name,
-            description: tool.description || "",
-            parameters: tool.inputSchema,
-            strict: false,
-        }));
-        // Isolated history — just this message, no system prompt
-        const isolatedHistory = [
-            {
-                type: "message",
-                role: "user",
-                content: [{ type: "input_text", text: query }],
-            },
-        ];
+        this.startTurn();
+        const tools = await this.buildTools();
+        const isolatedHistory = [this.makeUserMessage(query)];
         let loopCount = 0;
         const maxLoops = 15;
         let finalResponse = "";
+        let previousResponseId = undefined;
+        let pendingRawToolOutputs = null;
         while (loopCount < maxLoops) {
             loopCount++;
             if (abortSignal?.aborted)
                 throw new Error("Request was cancelled");
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
             const response = await this.openai.responses.create({
                 model: this.config.openaiModel,
-                input: isolatedHistory,
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                input: (!previousResponseId ? isolatedHistory : (pendingRawToolOutputs ?? [])),
+                previous_response_id: previousResponseId,
                 tools,
+                truncation: "disabled",
             });
-            const output = response.output;
+            pendingRawToolOutputs = null;
+            this.captureUsage(response);
+            previousResponseId = response.id;
+            const output = response.output ?? [];
+            for (const item of output) {
+                if (item.type === "web_search_call") {
+                    onThinking?.("🔍 web_search_preview");
+                }
+            }
             const functionCalls = output.filter((item) => item.type === "function_call");
             if (functionCalls.length > 0) {
-                isolatedHistory.push(...output);
+                const newToolOutputs = [];
                 for (const functionCall of functionCalls) {
                     if (abortSignal?.aborted)
                         throw new Error("Request was cancelled");
@@ -331,7 +759,7 @@ export class MCPClientOpenAI {
                         ? JSON.parse(functionCall.arguments)
                         : functionCall.arguments;
                     let toolDesc = functionName;
-                    if (functionName === "fetch_webpage" && functionArgs.url) {
+                    if (functionName === "fetch_webpage" && functionArgs?.url) {
                         try {
                             toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
                         }
@@ -339,23 +767,27 @@ export class MCPClientOpenAI {
                             toolDesc = `fetch_webpage → ${functionArgs.url}`;
                         }
                     }
-                    else if (functionName === "web_search" && functionArgs.query) {
+                    else if (functionName === "web_search" && functionArgs?.query) {
                         toolDesc = `web_search → "${functionArgs.query}"`;
                     }
                     onThinking?.(`🔧 ${toolDesc}`);
                     try {
-                        const result = await this.client.callTool({ name: functionName, arguments: functionArgs });
-                        isolatedHistory.push({ type: "function_call_output", call_id: functionCall.call_id, output: JSON.stringify(result.content) });
+                        const result = await this.client.callTool({
+                            name: functionName,
+                            arguments: functionArgs,
+                        });
+                        newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, this.compactToolResult(result.content)));
                     }
                     catch (error) {
-                        isolatedHistory.push({ type: "function_call_output", call_id: functionCall.call_id, output: `Error: ${error instanceof Error ? error.message : String(error)}` });
+                        newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`));
                     }
                 }
+                pendingRawToolOutputs = newToolOutputs;
                 continue;
             }
             for (const item of output) {
                 if (item.type === "message" && item.role === "assistant") {
-                    for (const contentItem of item.content) {
+                    for (const contentItem of item.content ?? []) {
                         if (contentItem.type === "output_text")
                             finalResponse += contentItem.text;
                     }
@@ -365,13 +797,4 @@ export class MCPClientOpenAI {
         }
         return finalResponse;
     }
-    clearHistory() {
-        // Keep system message only if it genuinely is a system role message
-        const first = this.conversationHistory[0];
-        const systemMessage = first?.role === "system" ? this.conversationHistory[0] : undefined;
-        this.conversationHistory = systemMessage ? [systemMessage] : [];
-    }
-    async cleanup() {
-        await this.client.close();
-    }
 }

package/dist/styles/MCPChat.css CHANGED Viewed

@@ -446,6 +446,18 @@
   font-size: 12px;
   opacity: 0.6;
   margin-top: 6px;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  flex-wrap: wrap;
+}
+.mcp-chat-token-info {
+  font-size: 11px;
+  opacity: 0.75;
+  font-family: monospace;
+  border-left: 1px solid currentColor;
+  padding-left: 8px;
 }
 /* ───────────────────────────────────────────────

package/dist/types.d.ts CHANGED Viewed

@@ -8,6 +8,8 @@ export interface Message {
     isStreaming?: boolean;
     /** Hidden messages are sent to the AI but not shown in the chat bubble list */
     hidden?: boolean;
+    /** Token usage info shown in the footer of assistant messages — never fed back to AI */
+    tokenInfo?: string;
 }
 export interface ThinkingStep {
     id: string;
@@ -21,7 +23,7 @@ export interface MCPChatProps {
     className?: string;
 }
 export interface StreamEvent {
-    type: "thinking" | "content" | "done" | "error";
+    type: "thinking" | "content" | "done" | "error" | "usage";
     message?: string;
     chunk?: string;
 }

package/dist/types.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;~~CAClB~~;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;~~IAChD~~,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
1	+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nqminds/mcp-client",
-  "version": "1.0.9",
+  "version": "1.0.12",
   "description": "Reusable MCP client component with AI chat interface",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",