npm - @nqminds/mcp-client - Versions diffs - 1.0.8 → 1.0.11 - Mend

@nqminds/mcp-client 1.0.8 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/MCPChat.d.ts.map +1 -1
package/dist/MCPChat.js +17 -4
package/dist/api-helpers.d.ts.map +1 -1
package/dist/api-helpers.js +21 -4
package/dist/openai-client.d.ts +137 -5
package/dist/openai-client.d.ts.map +1 -1
package/dist/openai-client.js +639 -171
package/dist/styles/MCPChat.css +12 -0
package/dist/types.d.ts +3 -1
package/dist/types.d.ts.map +1 -1
package/package.json +1 -1

package/dist/MCPChat.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,~~qBAgfd~~"}
1	+ {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBA6fd"}

package/dist/MCPChat.js CHANGED Viewed

@@ -76,7 +76,7 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
             setMessages((prev) => prev.filter((m) => !m.isStreaming));
         }
     };
-    const sendMessage = useCallback(async (text, hidden = false) => {
+    const sendMessage = useCallback(async (text, hidden = false, bypass = false) => {
         if (!text.trim() || isLoading)
             return;
         setIsLoading(true);
@@ -108,6 +108,7 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
                 body: JSON.stringify({
                     message: userMessage.content,
                     context: companyNumber ? { company_number: companyNumber } : undefined,
+                    bypassSystemPrompt: bypass || undefined,
                 }),
                 signal: abortController.signal,
             });
@@ -166,6 +167,16 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
                                         return updated;
                                     });
                                 }
+                                else if (parsed.type === "usage") {
+                                    setMessages((prev) => {
+                                        const updated = [...prev];
+                                        const lastIndex = updated.length - 1;
+                                        if (lastIndex >= 0) {
+                                            updated[lastIndex] = { ...updated[lastIndex], tokenInfo: parsed.message };
+                                        }
+                                        return updated;
+                                    });
+                                }
                                 else if (parsed.type === "error") {
                                     throw new Error(parsed.message || "Stream error");
                                 }
@@ -231,8 +242,8 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
         if (!directPromptText.trim())
             return;
         setDirectPromptOpen(false);
-        // Send as a visible message so you can see exactly what went to the agent
-        await sendMessage(directPromptText, false);
+        // bypass=true: skips system prompt, sends raw to the model
+        await sendMessage(directPromptText, false, true);
         setDirectPromptText("");
     };
     const toggleTheme = () => {
@@ -314,7 +325,9 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
                         React.createElement("div", { className: "mcp-chat-message-bubble" },
                             msg.role === "assistant" ? (React.createElement("div", { className: "mcp-chat-message-content markdown-content" },
                                 React.createElement(ReactMarkdown, { remarkPlugins: [remarkGfm] }, msg.content))) : (React.createElement("div", { className: "mcp-chat-message-content" }, msg.content)),
-                            React.createElement("div", { className: "mcp-chat-message-timestamp" }, msg.timestamp.toLocaleTimeString()))))),
+                            React.createElement("div", { className: "mcp-chat-message-timestamp" },
+                                msg.timestamp.toLocaleTimeString(),
+                                msg.role === "assistant" && msg.tokenInfo && (React.createElement("span", { className: "mcp-chat-token-info" }, msg.tokenInfo))))))),
                     isLoading && (React.createElement("div", { className: "mcp-chat-message mcp-chat-message-assistant" },
                         React.createElement("div", { className: "mcp-chat-thinking" },
                             React.createElement("div", { className: "mcp-chat-thinking-title" },

package/dist/api-helpers.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,~~uBAgG~~/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
1	+ {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAmH/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}

package/dist/api-helpers.js CHANGED Viewed

@@ -9,7 +9,7 @@ const clients = new Map();
  */
 export function createMCPChatHandler(config) {
     return async (request) => {
-        const { message, context, sessionId = "default" } = await request.json();
+        const { message, context, sessionId = "default", bypassSystemPrompt = false } = await request.json();
         // Get or create client for this session
         let client = clients.get(sessionId);
         if (!client) {
@@ -41,12 +41,29 @@ export function createMCPChatHandler(config) {
                     }
                 };
                 try {
-                    sendEvent("thinking", { message: "🤔 Analyzing your question..." });
+                    sendEvent("thinking", { message: bypassSystemPrompt ? "🔧 Sending direct prompt (no system context)…" : "🤔 Analyzing your question..." });
                     // Process the query with thinking callback and abort signal
                     const response = await client.processQuery(context ? `${message}\nContext: ${JSON.stringify(context)}` : message, (thinkingMessage) => {
                         sendEvent("thinking", { message: thinkingMessage });
-                    }, abortController.signal // Pass abort signal to enable cancellation
-                    );
+                    }, abortController.signal, // Pass abort signal to enable cancellation
+                    bypassSystemPrompt);
+                    // Emit token usage summary for debugging
+                    const usage = client.getUsage();
+                    if (usage.inputTokens > 0 || usage.outputTokens > 0) {
+                        const parts = [
+                            `in: ${usage.inputTokens.toLocaleString()}`,
+                            `out: ${usage.outputTokens.toLocaleString()}`,
+                            `total: ${usage.totalTokens.toLocaleString()}`,
+                        ];
+                        if (usage.cachedTokens > 0) {
+                            const cachedPct = Math.round((usage.cachedTokens / usage.inputTokens) * 100);
+                            parts.push(`cached: ${usage.cachedTokens.toLocaleString()} (${cachedPct}%)`);
+                        }
+                        if (usage.compactedTurns > 0) {
+                            parts.push(`compacted: ${usage.compactedTurns} turn${usage.compactedTurns !== 1 ? "s" : ""}`);
+                        }
+                        sendEvent("usage", { message: parts.join(" | ") });
+                    }
                     // Check if aborted before streaming response
                     if (abortController.signal.aborted) {
                         return;

package/dist/openai-client.d.ts CHANGED Viewed

@@ -1,6 +1,20 @@
 /**
  * OpenAI-powered MCP Client
- * Uses ChatGPT to intelligently interact with MCP tools
+ *
+ * Compaction strategy:
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
+ *   compact the oldest cold portion and keep the newest portion verbatim.
+ * - Feed the returned compaction object back into future requests.
+ *
+ * Notes:
+ * - This is written to align with the OpenAI Responses API shape:
+ *   - response usage fields
+ *   - previous_response_id
+ *   - input token counting
+ *   - response compaction
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
+ *   by SDK version. The logic here is the important part.
  */
 export interface MCPClientConfig {
     openaiApiKey: string;
@@ -8,19 +22,137 @@ export interface MCPClientConfig {
     openaiModel?: string;
     clientName?: string;
     clientVersion?: string;
+    /**
+     * Trigger compaction when the last measured input tokens reaches this threshold.
+     * Example policy from your suggestion:
+     * - compact when last measured input >= 200k
+     */
+    compactTriggerInputTokens?: number;
+    /**
+     * Keep roughly this many of the most recent input tokens uncompacted.
+     * Example policy from your suggestion:
+     * - retain last ~100k uncompacted
+     */
+    hotContextTargetInputTokens?: number;
+    /**
+     * Guardrail for unusually large tool outputs stored in history.
+     */
+    maxToolOutputChars?: number;
+}
+interface UsageStats {
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cachedTokens: number;
+    reasoningTokens: number;
+    compactedTurns: number;
 }
 export declare class MCPClientOpenAI {
     private client;
     private openai;
     private transport;
+    /**
+     * Instructions are sent using the Responses API `instructions` field,
+     * not inserted as a fake message inside the rolling conversation items.
+     */
+    private instructions;
+    /**
+     * Rolling uncompacted conversation items.
+     * This contains the most recent "hot" context only.
+     */
     private conversationHistory;
-    private lastCompaction;
+    /**
+     * Opaque compaction object returned by OpenAI.
+     * This represents older "cold" context that has been compacted.
+     */
+    private compaction;
+    /**
+     * Last measured input tokens from a real Responses API call.
+     */
+    private lastInputTokens;
+    /**
+     * Latest usage snapshot for logging/inspection.
+     */
+    private lastUsage;
     private config;
     constructor(config: MCPClientConfig);
-    private compactConversation;
     connect(): Promise<void>;
-    processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal): Promise<string>;
-    clearHistory(): void;
     cleanup(): Promise<void>;
+    clearHistory(): void;
+    getUsage(): UsageStats;
+    /**
+     * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
+     * Cached per client session.
+     */
+    private ensureSystemPrompt;
+    /**
+     * Build request input:
+     * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
+     */
+    private buildInput;
+    /**
+     * Count input tokens before making a request.
+     * Falls back to a simple rough estimate if the SDK method is unavailable.
+     */
+    private countInputTokens;
+    /**
+     * Very rough fallback estimator.
+     * Only used if token counting endpoint is unavailable in the SDK version in use.
+     */
+    private roughEstimateInputTokens;
+    /**
+     * Normalize usage from Responses API.
+     */
+    private captureUsage;
+    /**
+     * Compact oversized tool outputs before storing them in rolling history.
+     */
+    private compactToolResult;
+    private makeUserMessage;
+    private makeFunctionOutput;
+    /**
+     * We treat a "turn" boundary as:
+     * - starts at a user message
+     * - ends right before the next user message, or end of array
+     *
+     * This lets us compact or trim in coherent chunks instead of arbitrary items.
+     */
+    private getTurnBoundaries;
+    /**
+     * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
+     * Older turns become compaction candidates.
+     */
+    private splitColdAndHotHistory;
+    /**
+     * Incrementally update compaction using the cold slice only.
+     */
+    private compactColdHistory;
+    /**
+     * Proactively compact when the history has grown past the trigger.
+     * Keeps the newest hot window uncompacted and compacts the older cold window.
+     */
+    private maybeCompactHistory;
+    /**
+     * Keep history from growing pathologically in item count even before token limits.
+     * Uses turn-aware trimming, not arbitrary item slicing.
+     */
+    private enforceHardHistoryLimitByTurns;
+    /**
+     * Build MCP tool list for OpenAI Responses API.
+     */
+    private buildTools;
+    /**
+     * Create a response against the current full context.
+     */
+    private createResponse;
+    /**
+     * Main query method with rolling compaction.
+     */
+    processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal, bypassSystemPrompt?: boolean): Promise<string>;
+    /**
+     * Raw mode: no cached instructions, no rolling history, no compaction state.
+     */
+    private processRawQuery;
 }
+export {};
 //# sourceMappingURL=openai-client.d.ts.map

package/dist/openai-client.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA~~;;;GAGG~~;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;~~AAED~~,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;~~IACxC~~,OAAO,CAAC,mBAAmB,~~CAAsB~~;~~IACjD~~,OAAO,CAAC,~~cAAc~~,~~CAAa~~;~~IACnC~~,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;~~YA4ErB~~,~~mBAAmB~~;~~IAoB3B~~,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;~~IAIxB~~,YAAY,~~CAAC~~,~~KAAK~~,~~EAAE~~,~~MAAM~~,~~EAAE~~,UAAU,CAAC,~~EAAE~~,CAAC,OAAO,~~EAAE~~,~~MAAM~~,~~KAAK~~,~~IAAI~~,~~EAAE~~,~~WAAW~~,CAAC,~~EAAE~~,~~WAAW~~,~~GAAG~~,OAAO,CAAC,~~MAAM~~,CAAC;~~IA+MrH~~,YAAY,~~IAAI~~,~~IAAI;IAMd~~,OAAO,IAAI,OAAO,CAAC,~~IAAI~~,CAAC;~~CAG/B~~"}
1	+ {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAiBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,YAAY;IAoBpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA6OlB;;OAEG;YACW,eAAe;CAwG9B"}

package/dist/openai-client.js CHANGED Viewed

@@ -1,31 +1,78 @@
 /**
  * OpenAI-powered MCP Client
- * Uses ChatGPT to intelligently interact with MCP tools
+ *
+ * Compaction strategy:
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
+ *   compact the oldest cold portion and keep the newest portion verbatim.
+ * - Feed the returned compaction object back into future requests.
+ *
+ * Notes:
+ * - This is written to align with the OpenAI Responses API shape:
+ *   - response usage fields
+ *   - previous_response_id
+ *   - input token counting
+ *   - response compaction
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
+ *   by SDK version. The logic here is the important part.
  */
 import { Client } from "@modelcontextprotocol/sdk/client/index.js";
 import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
 import OpenAI from "openai";
 export class MCPClientOpenAI {
     constructor(config) {
+        /**
+         * Instructions are sent using the Responses API `instructions` field,
+         * not inserted as a fake message inside the rolling conversation items.
+         */
+        this.instructions = null;
+        /**
+         * Rolling uncompacted conversation items.
+         * This contains the most recent "hot" context only.
+         */
         this.conversationHistory = [];
-        this.lastCompaction = 0;
+        /**
+         * Opaque compaction object returned by OpenAI.
+         * This represents older "cold" context that has been compacted.
+         */
+        this.compaction = {
+            item: null,
+            compactedTurns: 0,
+        };
+        /**
+         * Last measured input tokens from a real Responses API call.
+         */
+        this.lastInputTokens = 0;
+        /**
+         * Latest usage snapshot for logging/inspection.
+         */
+        this.lastUsage = {
+            inputTokens: 0,
+            outputTokens: 0,
+            totalTokens: 0,
+            cachedTokens: 0,
+            reasoningTokens: 0,
+            compactedTurns: 0,
+        };
         this.config = {
             openaiApiKey: config.openaiApiKey,
             mcpServerCommand: config.mcpServerCommand,
-            openaiModel: config.openaiModel || "chatgpt-5-mini",
+            openaiModel: config.openaiModel || "gpt-5-mini",
             clientName: config.clientName || "mcp-flair-client",
             clientVersion: config.clientVersion || "1.0.0",
+            compactTriggerInputTokens: config.compactTriggerInputTokens ?? 200000,
+            hotContextTargetInputTokens: config.hotContextTargetInputTokens ?? 100000,
+            maxToolOutputChars: config.maxToolOutputChars ?? 20000,
         };
         this.openai = new OpenAI({
             apiKey: this.config.openaiApiKey,
         });
-        // Parse the server command and args
         const serverCmd = this.config.mcpServerCommand.split(" ");
         const command = serverCmd[0];
         const args = serverCmd.slice(1);
         this.transport = new StdioClientTransport({
-            command: command,
-            args: args,
+            command,
+            args,
         });
         this.client = new Client({
             name: this.config.clientName,
@@ -33,173 +80,525 @@ export class MCPClientOpenAI {
         }, {
             capabilities: {},
         });
-        // Initialize conversation with system message
-        this.conversationHistory = [
-            {
-                type: "message",
-                role: "system",
-                content: [
-                    {
-                        type: "input_text",
-                        text: `You are a helpful assistant with access to Companies House data through specialized tools.
-CRITICAL CONTEXT AWARENESS RULES:
-1. Carefully track ALL entities you mention in your responses (company numbers, names, people, dates, etc.)
-2. When the user refers to "that company," "the person," "those results," or uses similar references, ALWAYS look back at what you just discussed in the immediately preceding messages
-3. If you mentioned specific company numbers, names, or other identifiers, remember them for follow-up questions
-4. Before saying "I don't have a record of X," review your recent responses to check if you did mention it
-5. Maintain awareness of the conversation flow - if you just provided information about something, the user's next question likely refers to it
-RESPONSE FORMATTING RULES:
-- NEVER show raw JSON data to users unless they explicitly ask for "JSON", "raw data", or similar
-- Use rich Markdown formatting — the UI renders it fully (bold, italic, headings, tables, code blocks)
-- Use **bold** for key facts, names, amounts, and important values
-- Use ## and ### headings to organise longer responses into clear sections
-- Use tables whenever comparing multiple entities or showing structured data (e.g. list of officers, financial figures across years, search results) — prefer tables over bullet lists for multi-field data
-- Use bullet lists only for genuinely unordered or enumerable items (e.g. a list of risks, a list of SIC codes) — do NOT default to bullets for everything
-- Convert dates to readable format (e.g., "15 March 2023" instead of "2023-03-15")
-- Format addresses as natural inline text, not as structured fields
-- When showing company officers or PSCs, use a table with columns like Name, Role, Nationality, DOB rather than a bullet per person
-- When showing financial figures, use a table with Year / Metric / Value columns
-- Only include the most relevant information — don't dump all available fields
-- Avoid walls of bullet points; use prose sentences for narrative context and reserve lists/tables for structured data
-When responding:
-- Be concise and direct
-- Use tools to fetch accurate, up-to-date Companies House data
-- Track key identifiers (company numbers, PSC names, etc.) across the conversation
-- If unclear what the user is referring to, check your previous response first before asking for clarification
-- Never expose internal implementation details like "MCP Server" or tool names to users`,
-                    },
-                ],
-            },
-        ];
     }
-    async compactConversation() {
+    async connect() {
+        await this.client.connect(this.transport);
+    }
+    async cleanup() {
+        await this.client.close();
+    }
+    clearHistory() {
+        this.conversationHistory = [];
+        this.compaction = {
+            item: null,
+            compactedTurns: 0,
+        };
+        this.lastInputTokens = 0;
+        this.lastUsage = {
+            inputTokens: 0,
+            outputTokens: 0,
+            totalTokens: 0,
+            cachedTokens: 0,
+            reasoningTokens: 0,
+            compactedTurns: 0,
+        };
+    }
+    getUsage() {
+        return { ...this.lastUsage, compactedTurns: this.compaction.compactedTurns };
+    }
+    /**
+     * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
+     * Cached per client session.
+     */
+    async ensureSystemPrompt() {
+        if (this.instructions)
+            return;
         try {
-            const compactionResponse = await this.openai.responses.compact({
-                model: this.config.openaiModel,
+            // SDK typing may not expose getPrompt.
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const result = await this.client.getPrompt({ name: "system-prompt" });
+            const parts = [];
+            for (const msg of result.messages ?? []) {
                 // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                input: this.conversationHistory,
-            });
-            this.conversationHistory = compactionResponse.output;
-            this.lastCompaction = Date.now();
+                const c = msg.content;
+                if (typeof c === "string")
+                    parts.push(c);
+                else if (c?.text)
+                    parts.push(c.text);
+            }
+            const text = parts.join("\n\n").trim();
+            if (text) {
+                this.instructions = text;
+            }
         }
         catch (error) {
-            // Keep system message and last 25 items
-            if (this.conversationHistory.length > 26) {
-                const systemMessage = this.conversationHistory[0];
-                const recentItems = this.conversationHistory.slice(-25);
-                this.conversationHistory = [systemMessage, ...recentItems];
-            }
+            console.error("[MCPClient] Failed to fetch system prompt from MCP server:", error);
+            this.instructions = null;
         }
     }
-    async connect() {
-        await this.client.connect(this.transport);
+    /**
+     * Build request input:
+     * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
+     */
+    buildInput(newItems = []) {
+        const input = [];
+        if (this.compaction.item) {
+            input.push(this.compaction.item);
+        }
+        input.push(...this.conversationHistory);
+        input.push(...newItems);
+        return input;
     }
-    async processQuery(query, onThinking, abortSignal) {
-        // Check for cancellation at start
-        if (abortSignal?.aborted) {
-            throw new Error("Request was cancelled");
+    /**
+     * Count input tokens before making a request.
+     * Falls back to a simple rough estimate if the SDK method is unavailable.
+     */
+    async countInputTokens(input) {
+        try {
+            // Some SDK versions may expose this as responses.inputTokens.count(...)
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const result = await this.openai.responses.inputTokens.count({
+                model: this.config.openaiModel,
+                input,
+                instructions: this.instructions ?? undefined,
+                tools: [],
+            });
+            // Common guess for returned shape
+            return (result?.input_tokens ??
+                result?.total_tokens ??
+                result?.count ??
+                this.roughEstimateInputTokens(input));
+        }
+        catch {
+            return this.roughEstimateInputTokens(input);
+        }
+    }
+    /**
+     * Very rough fallback estimator.
+     * Only used if token counting endpoint is unavailable in the SDK version in use.
+     */
+    roughEstimateInputTokens(input) {
+        const serialized = JSON.stringify({
+            instructions: this.instructions,
+            input,
+        });
+        // Very rough English-ish heuristic.
+        return Math.ceil(serialized.length / 4);
+    }
+    /**
+     * Normalize usage from Responses API.
+     */
+    captureUsage(response) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const usage = response?.usage ?? {};
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const inputDetails = usage?.input_tokens_details ?? {};
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const outputDetails = usage?.output_tokens_details ?? {};
+        this.lastUsage = {
+            inputTokens: usage.input_tokens ?? 0,
+            outputTokens: usage.output_tokens ?? 0,
+            totalTokens: usage.total_tokens ?? 0,
+            cachedTokens: inputDetails.cached_tokens ?? 0,
+            reasoningTokens: outputDetails.reasoning_tokens ?? 0,
+            compactedTurns: this.compaction.compactedTurns,
+        };
+        this.lastInputTokens = this.lastUsage.inputTokens;
+    }
+    /**
+     * Compact oversized tool outputs before storing them in rolling history.
+     */
+    compactToolResult(value) {
+        const seen = new WeakSet();
+        const prune = (v) => {
+            if (v == null)
+                return v;
+            if (typeof v === "string") {
+                if (v.length <= this.config.maxToolOutputChars)
+                    return v;
+                return (v.slice(0, this.config.maxToolOutputChars) +
+                    `\n...[truncated ${v.length - this.config.maxToolOutputChars} chars]`);
+            }
+            if (typeof v !== "object")
+                return v;
+            if (Array.isArray(v)) {
+                const maxItems = 30;
+                const sliced = v.slice(0, maxItems).map(prune);
+                if (v.length > maxItems) {
+                    sliced.push(`...[truncated ${v.length - maxItems} items]`);
+                }
+                return sliced;
+            }
+            if (seen.has(v))
+                return "[circular]";
+            seen.add(v);
+            const obj = v;
+            const out = {};
+            const entries = Object.entries(obj);
+            // Prefer keeping fewer, more informative fields.
+            const preferredFirst = [
+                "title",
+                "name",
+                "id",
+                "url",
+                "summary",
+                "description",
+                "text",
+                "content",
+                "status",
+                "result",
+                "items",
+                "data",
+            ];
+            const sorted = entries.sort(([a], [b]) => {
+                const ai = preferredFirst.indexOf(a);
+                const bi = preferredFirst.indexOf(b);
+                const av = ai === -1 ? 999 : ai;
+                const bv = bi === -1 ? 999 : bi;
+                return av - bv;
+            });
+            const maxFields = 25;
+            for (const [k, val] of sorted.slice(0, maxFields)) {
+                out[k] = prune(val);
+            }
+            if (entries.length > maxFields) {
+                out.__truncated_fields__ = entries.length - maxFields;
+            }
+            return out;
+        };
+        try {
+            return JSON.stringify(prune(value));
         }
-        // Check if we should compact
-        const shouldCompact = this.conversationHistory.length >= 40 &&
-            (Date.now() - this.lastCompaction > 10 * 60 * 1000);
-        if (shouldCompact) {
-            await this.compactConversation();
+        catch {
+            const s = String(value);
+            return s.length <= this.config.maxToolOutputChars
+                ? s
+                : s.slice(0, this.config.maxToolOutputChars) +
+                    `\n...[truncated ${s.length - this.config.maxToolOutputChars} chars]`;
         }
-        // Add user message to conversation history
-        this.conversationHistory.push({
+    }
+    makeUserMessage(text) {
+        return {
             type: "message",
             role: "user",
-            content: [
-                {
-                    type: "input_text",
-                    text: query,
+            content: [{ type: "input_text", text }],
+        };
+    }
+    makeFunctionOutput(callId, output) {
+        return {
+            type: "function_call_output",
+            call_id: callId,
+            output,
+        };
+    }
+    /**
+     * We treat a "turn" boundary as:
+     * - starts at a user message
+     * - ends right before the next user message, or end of array
+     *
+     * This lets us compact or trim in coherent chunks instead of arbitrary items.
+     */
+    getTurnBoundaries(items) {
+        const boundaries = [];
+        let currentStart = -1;
+        for (let i = 0; i < items.length; i++) {
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const item = items[i];
+            const isUserMessage = item?.type === "message" && item?.role === "user";
+            if (isUserMessage) {
+                if (currentStart !== -1) {
+                    const slice = items.slice(currentStart, i);
+                    boundaries.push({
+                        startIndex: currentStart,
+                        endIndex: i - 1,
+                        estimatedTokens: this.roughEstimateInputTokens(slice),
+                    });
                 }
-            ],
-        });
-        // Get available tools from MCP server
+                currentStart = i;
+            }
+        }
+        if (currentStart !== -1) {
+            const slice = items.slice(currentStart);
+            boundaries.push({
+                startIndex: currentStart,
+                endIndex: items.length - 1,
+                estimatedTokens: this.roughEstimateInputTokens(slice),
+            });
+        }
+        // If there are no user turns, treat all as one chunk.
+        if (boundaries.length === 0 && items.length > 0) {
+            boundaries.push({
+                startIndex: 0,
+                endIndex: items.length - 1,
+                estimatedTokens: this.roughEstimateInputTokens(items),
+            });
+        }
+        return boundaries;
+    }
+    /**
+     * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
+     * Older turns become compaction candidates.
+     */
+    splitColdAndHotHistory(items) {
+        const turns = this.getTurnBoundaries(items);
+        if (turns.length === 0) {
+            return { coldItems: [], hotItems: items };
+        }
+        let running = 0;
+        let keepFromTurnIndex = turns.length;
+        for (let i = turns.length - 1; i >= 0; i--) {
+            const next = running + turns[i].estimatedTokens;
+            if (next > this.config.hotContextTargetInputTokens) {
+                break;
+            }
+            running = next;
+            keepFromTurnIndex = i;
+        }
+        if (keepFromTurnIndex === turns.length) {
+            // Even the newest turn is too large; keep at least the latest turn hot.
+            const lastTurn = turns[turns.length - 1];
+            return {
+                coldItems: items.slice(0, lastTurn.startIndex),
+                hotItems: items.slice(lastTurn.startIndex),
+            };
+        }
+        const splitIndex = turns[keepFromTurnIndex].startIndex;
+        return {
+            coldItems: items.slice(0, splitIndex),
+            hotItems: items.slice(splitIndex),
+        };
+    }
+    /**
+     * Incrementally update compaction using the cold slice only.
+     */
+    async compactColdHistory(coldItems) {
+        if (coldItems.length === 0)
+            return;
+        try {
+            // Depending on SDK version, the exact shape may vary.
+            // The intent is:
+            // - compact [existing compaction object?, ...new cold items]
+            // - receive an updated opaque compaction item
+            const compactInput = [];
+            if (this.compaction.item)
+                compactInput.push(this.compaction.item);
+            compactInput.push(...coldItems);
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const response = await this.openai.responses.compact({
+                model: this.config.openaiModel,
+                input: compactInput,
+                instructions: this.instructions ?? undefined,
+            });
+            // We expect the new compaction object to be reusable as input.
+            // Some SDKs may return `output`, some `compacted`, etc.
+            const newItem = response?.output?.[0] ??
+                response?.compacted ??
+                response?.item ??
+                null;
+            if (newItem) {
+                this.compaction.item = newItem;
+                this.compaction.compactedTurns += this.getTurnBoundaries(coldItems).length;
+            }
+            else {
+                throw new Error("Compaction response did not include a reusable compaction item");
+            }
+            console.error(`[MCPClient] Compacted ${coldItems.length} old items. Total compacted turns: ${this.compaction.compactedTurns}`);
+        }
+        catch (error) {
+            // Fallback: if compaction fails, just drop the cold part rather than
+            // keeping everything and risking repeated context overflows.
+            console.error("[MCPClient] Compaction failed, dropping cold history:", error);
+        }
+    }
+    /**
+     * Proactively compact when the history has grown past the trigger.
+     * Keeps the newest hot window uncompacted and compacts the older cold window.
+     */
+    async maybeCompactHistory() {
+        if (this.lastInputTokens < this.config.compactTriggerInputTokens) {
+            return;
+        }
+        const { coldItems, hotItems } = this.splitColdAndHotHistory(this.conversationHistory);
+        if (coldItems.length === 0) {
+            return;
+        }
+        await this.compactColdHistory(coldItems);
+        this.conversationHistory = hotItems;
+        this.lastInputTokens = 0;
+        console.error(`[MCPClient] Applied rolling compaction. Kept ${hotItems.length} recent items uncompacted.`);
+    }
+    /**
+     * Keep history from growing pathologically in item count even before token limits.
+     * Uses turn-aware trimming, not arbitrary item slicing.
+     */
+    enforceHardHistoryLimitByTurns(maxTurns = 20) {
+        const turns = this.getTurnBoundaries(this.conversationHistory);
+        if (turns.length <= maxTurns)
+            return;
+        const keepFrom = turns[turns.length - maxTurns].startIndex;
+        const dropped = this.conversationHistory.slice(0, keepFrom);
+        this.conversationHistory = this.conversationHistory.slice(keepFrom);
+        console.error(`[MCPClient] Hard-trimmed ${dropped.length} old uncompacted items, preserving last ${maxTurns} turns`);
+    }
+    /**
+     * Build MCP tool list for OpenAI Responses API.
+     */
+    async buildTools() {
         const toolsResponse = await this.client.listTools();
-        // Convert MCP tools to OpenAI Responses API format
-        const tools = toolsResponse.tools.map((tool) => ({
-            type: "function",
-            name: tool.name,
-            description: tool.description || "",
-            parameters: tool.inputSchema,
-            strict: false,
-        }));
-        // Multi-turn conversation with tool calling
+        return [
+            { type: "web_search_preview" },
+            ...toolsResponse.tools
+                .filter((t) => t.name !== "web_search" && t.name !== "fetch_webpage")
+                .map((tool) => ({
+                type: "function",
+                name: tool.name,
+                description: tool.description || "",
+                parameters: tool.inputSchema,
+                strict: false,
+            })),
+        ];
+    }
+    /**
+     * Create a response against the current full context.
+     */
+    async createResponse(params) {
+        const response = await this.openai.responses.create({
+            model: this.config.openaiModel,
+            instructions: this.instructions ?? undefined,
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            input: params.input,
+            tools: params.tools,
+            previous_response_id: params.previousResponseId,
+            truncation: "disabled",
+            prompt_cache_retention: "24h",
+        });
+        this.captureUsage(response);
+        return response;
+    }
+    /**
+     * Main query method with rolling compaction.
+     */
+    async processQuery(query, onThinking, abortSignal, bypassSystemPrompt = false) {
+        if (abortSignal?.aborted) {
+            throw new Error("Request was cancelled");
+        }
+        if (bypassSystemPrompt) {
+            return this.processRawQuery(query, onThinking, abortSignal);
+        }
+        await this.ensureSystemPrompt();
+        // Proactive compaction based on last real measured request.
+        await this.maybeCompactHistory();
+        const tools = await this.buildTools();
+        const userMessage = this.makeUserMessage(query);
+        // Optional proactive token counting near/around threshold.
+        const projectedInputTokens = await this.countInputTokens(this.buildInput([userMessage]));
+        if (projectedInputTokens >= this.config.compactTriggerInputTokens) {
+            await this.maybeCompactHistory();
+        }
+        // Add the new user message to rolling history now.
+        this.conversationHistory.push(userMessage);
         let loopCount = 0;
         const maxLoops = 15;
         let finalResponse = "";
         let outOfToolCalls = false;
+        let previousResponseId = undefined;
+        // Carries tool outputs across iterations so previous_response_id chain stays intact.
+        let pendingToolOutputs = null;
         while (loopCount < maxLoops) {
             loopCount++;
-            // Check for cancellation before each API call
             if (abortSignal?.aborted) {
                 throw new Error("Request was cancelled");
             }
-            // Call OpenAI Responses API with error handling
             let response;
             try {
-                response = await this.openai.responses.create({
-                    model: this.config.openaiModel,
-                    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                    input: this.conversationHistory,
-                    tools: outOfToolCalls ? [] : tools,
-                });
+                if (!previousResponseId) {
+                    // First request in this query: send full current context.
+                    response = await this.createResponse({
+                        input: this.buildInput(),
+                        tools: outOfToolCalls ? [] : tools,
+                    });
+                }
+                else {
+                    // Send pending tool outputs to continue the response chain.
+                    response = await this.createResponse({
+                        input: pendingToolOutputs ?? [],
+                        tools: outOfToolCalls ? [] : tools,
+                        previousResponseId,
+                    });
+                    pendingToolOutputs = null;
+                }
             }
             catch (error) {
                 const err = error;
-                // Handle context length exceeded
-                if (err.status === 400 &&
-                    (err.code === 'context_length_exceeded' ||
-                        err.message?.includes('context') ||
-                        err.message?.includes('length'))) {
-                    await this.compactConversation();
-                    response = await this.openai.responses.create({
-                        model: this.config.openaiModel,
-                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                        input: this.conversationHistory,
-                        tools: outOfToolCalls ? [] : tools,
-                    });
+                const message = err.message?.toLowerCase() || "";
+                const contextProblem = err.status === 400 &&
+                    (err.code === "context_length_exceeded" ||
+                        message.includes("context") ||
+                        message.includes("length"));
+                const toolProblem = err.status === 400 &&
+                    (err.code === "response_incomplete" ||
+                        message.includes("incomplete") ||
+                        message.includes("tool"));
+                if (contextProblem) {
+                    await this.maybeCompactHistory();
+                    if (!previousResponseId) {
+                        response = await this.createResponse({
+                            input: this.buildInput(),
+                            tools: outOfToolCalls ? [] : tools,
+                        });
+                    }
+                    else {
+                        response = await this.createResponse({
+                            input: pendingToolOutputs ?? [],
+                            tools: outOfToolCalls ? [] : tools,
+                            previousResponseId,
+                        });
+                        pendingToolOutputs = null;
+                    }
                 }
-                // Handle tool calls exhausted
-                else if (err.status === 400 &&
-                    (err.code === 'response_incomplete' ||
-                        err.message?.includes('incomplete') ||
-                        err.message?.includes('tool'))) {
+                else if (toolProblem) {
                     outOfToolCalls = true;
-                    response = await this.openai.responses.create({
-                        model: this.config.openaiModel,
-                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                        input: this.conversationHistory,
-                        tools: [],
-                    });
+                    if (!previousResponseId) {
+                        response = await this.createResponse({
+                            input: this.buildInput(),
+                            tools: [],
+                        });
+                    }
+                    else {
+                        response = await this.createResponse({
+                            input: pendingToolOutputs ?? [],
+                            tools: [],
+                            previousResponseId,
+                        });
+                        pendingToolOutputs = null;
+                    }
                 }
                 else {
                     throw error;
                 }
             }
-            const output = response.output;
-            // Find function_call items
+            previousResponseId = response.id;
+            const output = response.output ?? [];
+            for (const item of output) {
+                if (item.type === "web_search_call") {
+                    onThinking?.("🔍 web_search_preview");
+                }
+            }
             const functionCalls = output.filter((item) => item.type === "function_call");
-            // Check if AI wants to call tools
             if (functionCalls.length > 0) {
+                // Persist model output items into rolling history.
                 this.conversationHistory.push(...output);
+                const toolOutputsForNextStep = [];
                 for (const functionCall of functionCalls) {
-                    // Check for cancellation before each tool call
                     if (abortSignal?.aborted) {
                         throw new Error("Request was cancelled");
                     }
                     const functionName = functionCall.name;
-                    const functionArgs = typeof functionCall.arguments === 'string'
+                    const functionArgs = typeof functionCall.arguments === "string"
                         ? JSON.parse(functionCall.arguments)
                         : functionCall.arguments;
-                    // Build a descriptive thinking message with key arguments
                     let toolDesc = functionName;
-                    if (functionName === "fetch_webpage" && functionArgs.url) {
+                    if (functionArgs?.url && functionName === "fetch_webpage") {
                         try {
                             toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
                         }
@@ -207,87 +606,156 @@ When responding:
                             toolDesc = `fetch_webpage → ${functionArgs.url}`;
                         }
                     }
-                    else if (functionName === "web_search" && functionArgs.query) {
+                    else if (functionArgs?.query && functionName === "web_search") {
                         toolDesc = `web_search → "${functionArgs.query}"`;
                     }
                     onThinking?.(`🔧 ${toolDesc}`);
                     try {
-                        // Execute the tool via MCP
                         const result = await this.client.callTool({
                             name: functionName,
                             arguments: functionArgs,
                         });
-                        // Add tool result to conversation history
-                        this.conversationHistory.push({
-                            type: "function_call_output",
-                            call_id: functionCall.call_id,
-                            output: JSON.stringify(result.content),
-                        });
+                        const compactOutput = this.compactToolResult(result.content);
+                        const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, compactOutput);
+                        toolOutputsForNextStep.push(toolOutputItem);
+                        this.conversationHistory.push(toolOutputItem);
                     }
                     catch (error) {
-                        this.conversationHistory.push({
-                            type: "function_call_output",
-                            call_id: functionCall.call_id,
-                            output: `Error: ${error instanceof Error ? error.message : String(error)}`,
-                        });
+                        const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`);
+                        toolOutputsForNextStep.push(toolOutputItem);
+                        this.conversationHistory.push(toolOutputItem);
                     }
                 }
+                // Carry tool outputs to the next iteration so the response chain stays intact.
+                pendingToolOutputs = toolOutputsForNextStep;
                 continue;
             }
-            else {
-                // No more tool calls, extract final response
-                for (const item of output) {
-                    if (item.type === "message" && item.role === "assistant") {
-                        for (const contentItem of item.content) {
-                            if (contentItem.type === "output_text") {
-                                finalResponse += contentItem.text;
-                            }
+            for (const item of output) {
+                if (item.type === "message" && item.role === "assistant") {
+                    for (const contentItem of item.content ?? []) {
+                        if (contentItem.type === "output_text") {
+                            finalResponse += contentItem.text;
                         }
                     }
                 }
-                this.conversationHistory.push(...output);
-                break;
             }
+            this.conversationHistory.push(...output);
+            break;
         }
-        // If we hit max loops, make one final request without tools
         if (loopCount >= maxLoops && !finalResponse) {
             try {
                 const finalApiResponse = await this.openai.responses.create({
                     model: this.config.openaiModel,
+                    instructions: this.instructions ?? undefined,
                     // eslint-disable-next-line @typescript-eslint/no-explicit-any
-                    input: this.conversationHistory,
+                    input: this.buildInput(),
                     tools: [],
+                    truncation: "disabled",
+                    prompt_cache_retention: "24h",
                 });
-                const finalOutput = finalApiResponse.output;
-                for (const item of finalOutput) {
+                this.captureUsage(finalApiResponse);
+                for (const item of finalApiResponse.output ?? []) {
                     if (item.type === "message" && item.role === "assistant") {
-                        for (const contentItem of item.content) {
+                        for (const contentItem of item.content ?? []) {
                             if (contentItem.type === "output_text") {
                                 finalResponse += contentItem.text;
                             }
                         }
                     }
                 }
-                this.conversationHistory.push(...finalOutput);
+                this.conversationHistory.push(...(finalApiResponse.output ?? []));
             }
-            catch (error) {
-                finalResponse = "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
+            catch {
+                finalResponse =
+                    "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
             }
         }
-        // Keep conversation history manageable
-        if (this.conversationHistory.length > 50) {
-            const systemMessage = this.conversationHistory[0];
-            const recentItems = this.conversationHistory.slice(-49);
-            this.conversationHistory = [systemMessage, ...recentItems];
-        }
+        // Prevent pathological item growth even when tokens are still OK.
+        this.enforceHardHistoryLimitByTurns(20);
+        // Optional: if this turn caused a large input, compact immediately so the next turn starts cleaner.
+        await this.maybeCompactHistory();
         return finalResponse;
     }
-    clearHistory() {
-        // Keep the system message (first item) when clearing history
-        const systemMessage = this.conversationHistory[0];
-        this.conversationHistory = systemMessage ? [systemMessage] : [];
-    }
-    async cleanup() {
-        await this.client.close();
+    /**
+     * Raw mode: no cached instructions, no rolling history, no compaction state.
+     */
+    async processRawQuery(query, onThinking, abortSignal) {
+        const tools = await this.buildTools();
+        const isolatedHistory = [this.makeUserMessage(query)];
+        let loopCount = 0;
+        const maxLoops = 15;
+        let finalResponse = "";
+        let previousResponseId = undefined;
+        let pendingRawToolOutputs = null;
+        while (loopCount < maxLoops) {
+            loopCount++;
+            if (abortSignal?.aborted)
+                throw new Error("Request was cancelled");
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            const response = await this.openai.responses.create({
+                model: this.config.openaiModel,
+                // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                input: (!previousResponseId ? isolatedHistory : (pendingRawToolOutputs ?? [])),
+                previous_response_id: previousResponseId,
+                tools,
+                truncation: "disabled",
+            });
+            pendingRawToolOutputs = null;
+            this.captureUsage(response);
+            previousResponseId = response.id;
+            const output = response.output ?? [];
+            for (const item of output) {
+                if (item.type === "web_search_call") {
+                    onThinking?.("🔍 web_search_preview");
+                }
+            }
+            const functionCalls = output.filter((item) => item.type === "function_call");
+            if (functionCalls.length > 0) {
+                const newToolOutputs = [];
+                for (const functionCall of functionCalls) {
+                    if (abortSignal?.aborted)
+                        throw new Error("Request was cancelled");
+                    const functionName = functionCall.name;
+                    const functionArgs = typeof functionCall.arguments === "string"
+                        ? JSON.parse(functionCall.arguments)
+                        : functionCall.arguments;
+                    let toolDesc = functionName;
+                    if (functionName === "fetch_webpage" && functionArgs?.url) {
+                        try {
+                            toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
+                        }
+                        catch {
+                            toolDesc = `fetch_webpage → ${functionArgs.url}`;
+                        }
+                    }
+                    else if (functionName === "web_search" && functionArgs?.query) {
+                        toolDesc = `web_search → "${functionArgs.query}"`;
+                    }
+                    onThinking?.(`🔧 ${toolDesc}`);
+                    try {
+                        const result = await this.client.callTool({
+                            name: functionName,
+                            arguments: functionArgs,
+                        });
+                        newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, this.compactToolResult(result.content)));
+                    }
+                    catch (error) {
+                        newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`));
+                    }
+                }
+                pendingRawToolOutputs = newToolOutputs;
+                continue;
+            }
+            for (const item of output) {
+                if (item.type === "message" && item.role === "assistant") {
+                    for (const contentItem of item.content ?? []) {
+                        if (contentItem.type === "output_text")
+                            finalResponse += contentItem.text;
+                    }
+                }
+            }
+            break;
+        }
+        return finalResponse;
     }
 }

package/dist/styles/MCPChat.css CHANGED Viewed

@@ -446,6 +446,18 @@
   font-size: 12px;
   opacity: 0.6;
   margin-top: 6px;
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  flex-wrap: wrap;
+}
+.mcp-chat-token-info {
+  font-size: 11px;
+  opacity: 0.75;
+  font-family: monospace;
+  border-left: 1px solid currentColor;
+  padding-left: 8px;
 }
 /* ───────────────────────────────────────────────

package/dist/types.d.ts CHANGED Viewed

@@ -8,6 +8,8 @@ export interface Message {
     isStreaming?: boolean;
     /** Hidden messages are sent to the AI but not shown in the chat bubble list */
     hidden?: boolean;
+    /** Token usage info shown in the footer of assistant messages — never fed back to AI */
+    tokenInfo?: string;
 }
 export interface ThinkingStep {
     id: string;
@@ -21,7 +23,7 @@ export interface MCPChatProps {
     className?: string;
 }
 export interface StreamEvent {
-    type: "thinking" | "content" | "done" | "error";
+    type: "thinking" | "content" | "done" | "error" | "usage";
     message?: string;
     chunk?: string;
 }

package/dist/types.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;~~CAClB~~;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;~~IAChD~~,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
1	+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nqminds/mcp-client",
-  "version": "1.0.8",
+  "version": "1.0.11",
   "description": "Reusable MCP client component with AI chat interface",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",