@nqminds/mcp-client 1.0.8 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBAgfd"}
1
+ {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBA6fd"}
package/dist/MCPChat.js CHANGED
@@ -76,7 +76,7 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
76
76
  setMessages((prev) => prev.filter((m) => !m.isStreaming));
77
77
  }
78
78
  };
79
- const sendMessage = useCallback(async (text, hidden = false) => {
79
+ const sendMessage = useCallback(async (text, hidden = false, bypass = false) => {
80
80
  if (!text.trim() || isLoading)
81
81
  return;
82
82
  setIsLoading(true);
@@ -108,6 +108,7 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
108
108
  body: JSON.stringify({
109
109
  message: userMessage.content,
110
110
  context: companyNumber ? { company_number: companyNumber } : undefined,
111
+ bypassSystemPrompt: bypass || undefined,
111
112
  }),
112
113
  signal: abortController.signal,
113
114
  });
@@ -166,6 +167,16 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
166
167
  return updated;
167
168
  });
168
169
  }
170
+ else if (parsed.type === "usage") {
171
+ setMessages((prev) => {
172
+ const updated = [...prev];
173
+ const lastIndex = updated.length - 1;
174
+ if (lastIndex >= 0) {
175
+ updated[lastIndex] = { ...updated[lastIndex], tokenInfo: parsed.message };
176
+ }
177
+ return updated;
178
+ });
179
+ }
169
180
  else if (parsed.type === "error") {
170
181
  throw new Error(parsed.message || "Stream error");
171
182
  }
@@ -231,8 +242,8 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
231
242
  if (!directPromptText.trim())
232
243
  return;
233
244
  setDirectPromptOpen(false);
234
- // Send as a visible message so you can see exactly what went to the agent
235
- await sendMessage(directPromptText, false);
245
+ // bypass=true: skips system prompt, sends raw to the model
246
+ await sendMessage(directPromptText, false, true);
236
247
  setDirectPromptText("");
237
248
  };
238
249
  const toggleTheme = () => {
@@ -314,7 +325,9 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
314
325
  React.createElement("div", { className: "mcp-chat-message-bubble" },
315
326
  msg.role === "assistant" ? (React.createElement("div", { className: "mcp-chat-message-content markdown-content" },
316
327
  React.createElement(ReactMarkdown, { remarkPlugins: [remarkGfm] }, msg.content))) : (React.createElement("div", { className: "mcp-chat-message-content" }, msg.content)),
317
- React.createElement("div", { className: "mcp-chat-message-timestamp" }, msg.timestamp.toLocaleTimeString()))))),
328
+ React.createElement("div", { className: "mcp-chat-message-timestamp" },
329
+ msg.timestamp.toLocaleTimeString(),
330
+ msg.role === "assistant" && msg.tokenInfo && (React.createElement("span", { className: "mcp-chat-token-info" }, msg.tokenInfo))))))),
318
331
  isLoading && (React.createElement("div", { className: "mcp-chat-message mcp-chat-message-assistant" },
319
332
  React.createElement("div", { className: "mcp-chat-thinking" },
320
333
  React.createElement("div", { className: "mcp-chat-thinking-title" },
@@ -1 +1 @@
1
- {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAgG/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
1
+ {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAmH/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
@@ -9,7 +9,7 @@ const clients = new Map();
9
9
  */
10
10
  export function createMCPChatHandler(config) {
11
11
  return async (request) => {
12
- const { message, context, sessionId = "default" } = await request.json();
12
+ const { message, context, sessionId = "default", bypassSystemPrompt = false } = await request.json();
13
13
  // Get or create client for this session
14
14
  let client = clients.get(sessionId);
15
15
  if (!client) {
@@ -41,12 +41,29 @@ export function createMCPChatHandler(config) {
41
41
  }
42
42
  };
43
43
  try {
44
- sendEvent("thinking", { message: "🤔 Analyzing your question..." });
44
+ sendEvent("thinking", { message: bypassSystemPrompt ? "🔧 Sending direct prompt (no system context)…" : "🤔 Analyzing your question..." });
45
45
  // Process the query with thinking callback and abort signal
46
46
  const response = await client.processQuery(context ? `${message}\nContext: ${JSON.stringify(context)}` : message, (thinkingMessage) => {
47
47
  sendEvent("thinking", { message: thinkingMessage });
48
- }, abortController.signal // Pass abort signal to enable cancellation
49
- );
48
+ }, abortController.signal, // Pass abort signal to enable cancellation
49
+ bypassSystemPrompt);
50
+ // Emit token usage summary for debugging
51
+ const usage = client.getUsage();
52
+ if (usage.inputTokens > 0 || usage.outputTokens > 0) {
53
+ const parts = [
54
+ `in: ${usage.inputTokens.toLocaleString()}`,
55
+ `out: ${usage.outputTokens.toLocaleString()}`,
56
+ `total: ${usage.totalTokens.toLocaleString()}`,
57
+ ];
58
+ if (usage.cachedTokens > 0) {
59
+ const cachedPct = Math.round((usage.cachedTokens / usage.inputTokens) * 100);
60
+ parts.push(`cached: ${usage.cachedTokens.toLocaleString()} (${cachedPct}%)`);
61
+ }
62
+ if (usage.compactedTurns > 0) {
63
+ parts.push(`compacted: ${usage.compactedTurns} turn${usage.compactedTurns !== 1 ? "s" : ""}`);
64
+ }
65
+ sendEvent("usage", { message: parts.join(" | ") });
66
+ }
50
67
  // Check if aborted before streaming response
51
68
  if (abortController.signal.aborted) {
52
69
  return;
@@ -1,6 +1,20 @@
1
1
  /**
2
2
  * OpenAI-powered MCP Client
3
- * Uses ChatGPT to intelligently interact with MCP tools
3
+ *
4
+ * Compaction strategy:
5
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
6
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
7
+ * compact the oldest cold portion and keep the newest portion verbatim.
8
+ * - Feed the returned compaction object back into future requests.
9
+ *
10
+ * Notes:
11
+ * - This is written to align with the OpenAI Responses API shape:
12
+ * - response usage fields
13
+ * - previous_response_id
14
+ * - input token counting
15
+ * - response compaction
16
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
17
+ * by SDK version. The logic here is the important part.
4
18
  */
5
19
  export interface MCPClientConfig {
6
20
  openaiApiKey: string;
@@ -8,19 +22,137 @@ export interface MCPClientConfig {
8
22
  openaiModel?: string;
9
23
  clientName?: string;
10
24
  clientVersion?: string;
25
+ /**
26
+ * Trigger compaction when the last measured input tokens reaches this threshold.
27
+ * Example policy from your suggestion:
28
+ * - compact when last measured input >= 200k
29
+ */
30
+ compactTriggerInputTokens?: number;
31
+ /**
32
+ * Keep roughly this many of the most recent input tokens uncompacted.
33
+ * Example policy from your suggestion:
34
+ * - retain last ~100k uncompacted
35
+ */
36
+ hotContextTargetInputTokens?: number;
37
+ /**
38
+ * Guardrail for unusually large tool outputs stored in history.
39
+ */
40
+ maxToolOutputChars?: number;
41
+ }
42
+ interface UsageStats {
43
+ inputTokens: number;
44
+ outputTokens: number;
45
+ totalTokens: number;
46
+ cachedTokens: number;
47
+ reasoningTokens: number;
48
+ compactedTurns: number;
11
49
  }
12
50
  export declare class MCPClientOpenAI {
13
51
  private client;
14
52
  private openai;
15
53
  private transport;
54
+ /**
55
+ * Instructions are sent using the Responses API `instructions` field,
56
+ * not inserted as a fake message inside the rolling conversation items.
57
+ */
58
+ private instructions;
59
+ /**
60
+ * Rolling uncompacted conversation items.
61
+ * This contains the most recent "hot" context only.
62
+ */
16
63
  private conversationHistory;
17
- private lastCompaction;
64
+ /**
65
+ * Opaque compaction object returned by OpenAI.
66
+ * This represents older "cold" context that has been compacted.
67
+ */
68
+ private compaction;
69
+ /**
70
+ * Last measured input tokens from a real Responses API call.
71
+ */
72
+ private lastInputTokens;
73
+ /**
74
+ * Latest usage snapshot for logging/inspection.
75
+ */
76
+ private lastUsage;
18
77
  private config;
19
78
  constructor(config: MCPClientConfig);
20
- private compactConversation;
21
79
  connect(): Promise<void>;
22
- processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal): Promise<string>;
23
- clearHistory(): void;
24
80
  cleanup(): Promise<void>;
81
+ clearHistory(): void;
82
+ getUsage(): UsageStats;
83
+ /**
84
+ * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
85
+ * Cached per client session.
86
+ */
87
+ private ensureSystemPrompt;
88
+ /**
89
+ * Build request input:
90
+ * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
91
+ */
92
+ private buildInput;
93
+ /**
94
+ * Count input tokens before making a request.
95
+ * Falls back to a simple rough estimate if the SDK method is unavailable.
96
+ */
97
+ private countInputTokens;
98
+ /**
99
+ * Very rough fallback estimator.
100
+ * Only used if token counting endpoint is unavailable in the SDK version in use.
101
+ */
102
+ private roughEstimateInputTokens;
103
+ /**
104
+ * Normalize usage from Responses API.
105
+ */
106
+ private captureUsage;
107
+ /**
108
+ * Compact oversized tool outputs before storing them in rolling history.
109
+ */
110
+ private compactToolResult;
111
+ private makeUserMessage;
112
+ private makeFunctionOutput;
113
+ /**
114
+ * We treat a "turn" boundary as:
115
+ * - starts at a user message
116
+ * - ends right before the next user message, or end of array
117
+ *
118
+ * This lets us compact or trim in coherent chunks instead of arbitrary items.
119
+ */
120
+ private getTurnBoundaries;
121
+ /**
122
+ * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
123
+ * Older turns become compaction candidates.
124
+ */
125
+ private splitColdAndHotHistory;
126
+ /**
127
+ * Incrementally update compaction using the cold slice only.
128
+ */
129
+ private compactColdHistory;
130
+ /**
131
+ * Proactively compact when the history has grown past the trigger.
132
+ * Keeps the newest hot window uncompacted and compacts the older cold window.
133
+ */
134
+ private maybeCompactHistory;
135
+ /**
136
+ * Keep history from growing pathologically in item count even before token limits.
137
+ * Uses turn-aware trimming, not arbitrary item slicing.
138
+ */
139
+ private enforceHardHistoryLimitByTurns;
140
+ /**
141
+ * Build MCP tool list for OpenAI Responses API.
142
+ */
143
+ private buildTools;
144
+ /**
145
+ * Create a response against the current full context.
146
+ */
147
+ private createResponse;
148
+ /**
149
+ * Main query method with rolling compaction.
150
+ */
151
+ processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal, bypassSystemPrompt?: boolean): Promise<string>;
152
+ /**
153
+ * Raw mode: no cached instructions, no rolling history, no compaction state.
154
+ */
155
+ private processRawQuery;
25
156
  }
157
+ export {};
26
158
  //# sourceMappingURL=openai-client.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;YA4ErB,mBAAmB;IAoB3B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EAAE,WAAW,CAAC,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;IA+MrH,YAAY,IAAI,IAAI;IAMd,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAiBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,YAAY;IAoBpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA6OlB;;OAEG;YACW,eAAe;CAwG9B"}
@@ -1,31 +1,78 @@
1
1
  /**
2
2
  * OpenAI-powered MCP Client
3
- * Uses ChatGPT to intelligently interact with MCP tools
3
+ *
4
+ * Compaction strategy:
5
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
6
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
7
+ * compact the oldest cold portion and keep the newest portion verbatim.
8
+ * - Feed the returned compaction object back into future requests.
9
+ *
10
+ * Notes:
11
+ * - This is written to align with the OpenAI Responses API shape:
12
+ * - response usage fields
13
+ * - previous_response_id
14
+ * - input token counting
15
+ * - response compaction
16
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
17
+ * by SDK version. The logic here is the important part.
4
18
  */
5
19
  import { Client } from "@modelcontextprotocol/sdk/client/index.js";
6
20
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
7
21
  import OpenAI from "openai";
8
22
  export class MCPClientOpenAI {
9
23
  constructor(config) {
24
+ /**
25
+ * Instructions are sent using the Responses API `instructions` field,
26
+ * not inserted as a fake message inside the rolling conversation items.
27
+ */
28
+ this.instructions = null;
29
+ /**
30
+ * Rolling uncompacted conversation items.
31
+ * This contains the most recent "hot" context only.
32
+ */
10
33
  this.conversationHistory = [];
11
- this.lastCompaction = 0;
34
+ /**
35
+ * Opaque compaction object returned by OpenAI.
36
+ * This represents older "cold" context that has been compacted.
37
+ */
38
+ this.compaction = {
39
+ item: null,
40
+ compactedTurns: 0,
41
+ };
42
+ /**
43
+ * Last measured input tokens from a real Responses API call.
44
+ */
45
+ this.lastInputTokens = 0;
46
+ /**
47
+ * Latest usage snapshot for logging/inspection.
48
+ */
49
+ this.lastUsage = {
50
+ inputTokens: 0,
51
+ outputTokens: 0,
52
+ totalTokens: 0,
53
+ cachedTokens: 0,
54
+ reasoningTokens: 0,
55
+ compactedTurns: 0,
56
+ };
12
57
  this.config = {
13
58
  openaiApiKey: config.openaiApiKey,
14
59
  mcpServerCommand: config.mcpServerCommand,
15
- openaiModel: config.openaiModel || "chatgpt-5-mini",
60
+ openaiModel: config.openaiModel || "gpt-5-mini",
16
61
  clientName: config.clientName || "mcp-flair-client",
17
62
  clientVersion: config.clientVersion || "1.0.0",
63
+ compactTriggerInputTokens: config.compactTriggerInputTokens ?? 200000,
64
+ hotContextTargetInputTokens: config.hotContextTargetInputTokens ?? 100000,
65
+ maxToolOutputChars: config.maxToolOutputChars ?? 20000,
18
66
  };
19
67
  this.openai = new OpenAI({
20
68
  apiKey: this.config.openaiApiKey,
21
69
  });
22
- // Parse the server command and args
23
70
  const serverCmd = this.config.mcpServerCommand.split(" ");
24
71
  const command = serverCmd[0];
25
72
  const args = serverCmd.slice(1);
26
73
  this.transport = new StdioClientTransport({
27
- command: command,
28
- args: args,
74
+ command,
75
+ args,
29
76
  });
30
77
  this.client = new Client({
31
78
  name: this.config.clientName,
@@ -33,173 +80,525 @@ export class MCPClientOpenAI {
33
80
  }, {
34
81
  capabilities: {},
35
82
  });
36
- // Initialize conversation with system message
37
- this.conversationHistory = [
38
- {
39
- type: "message",
40
- role: "system",
41
- content: [
42
- {
43
- type: "input_text",
44
- text: `You are a helpful assistant with access to Companies House data through specialized tools.
45
-
46
- CRITICAL CONTEXT AWARENESS RULES:
47
- 1. Carefully track ALL entities you mention in your responses (company numbers, names, people, dates, etc.)
48
- 2. When the user refers to "that company," "the person," "those results," or uses similar references, ALWAYS look back at what you just discussed in the immediately preceding messages
49
- 3. If you mentioned specific company numbers, names, or other identifiers, remember them for follow-up questions
50
- 4. Before saying "I don't have a record of X," review your recent responses to check if you did mention it
51
- 5. Maintain awareness of the conversation flow - if you just provided information about something, the user's next question likely refers to it
52
-
53
- RESPONSE FORMATTING RULES:
54
- - NEVER show raw JSON data to users unless they explicitly ask for "JSON", "raw data", or similar
55
- - Use rich Markdown formatting — the UI renders it fully (bold, italic, headings, tables, code blocks)
56
- - Use **bold** for key facts, names, amounts, and important values
57
- - Use ## and ### headings to organise longer responses into clear sections
58
- - Use tables whenever comparing multiple entities or showing structured data (e.g. list of officers, financial figures across years, search results) — prefer tables over bullet lists for multi-field data
59
- - Use bullet lists only for genuinely unordered or enumerable items (e.g. a list of risks, a list of SIC codes) — do NOT default to bullets for everything
60
- - Convert dates to readable format (e.g., "15 March 2023" instead of "2023-03-15")
61
- - Format addresses as natural inline text, not as structured fields
62
- - When showing company officers or PSCs, use a table with columns like Name, Role, Nationality, DOB rather than a bullet per person
63
- - When showing financial figures, use a table with Year / Metric / Value columns
64
- - Only include the most relevant information — don't dump all available fields
65
- - Avoid walls of bullet points; use prose sentences for narrative context and reserve lists/tables for structured data
66
-
67
- When responding:
68
- - Be concise and direct
69
- - Use tools to fetch accurate, up-to-date Companies House data
70
- - Track key identifiers (company numbers, PSC names, etc.) across the conversation
71
- - If unclear what the user is referring to, check your previous response first before asking for clarification
72
- - Never expose internal implementation details like "MCP Server" or tool names to users`,
73
- },
74
- ],
75
- },
76
- ];
77
83
  }
78
- async compactConversation() {
84
+ async connect() {
85
+ await this.client.connect(this.transport);
86
+ }
87
+ async cleanup() {
88
+ await this.client.close();
89
+ }
90
+ clearHistory() {
91
+ this.conversationHistory = [];
92
+ this.compaction = {
93
+ item: null,
94
+ compactedTurns: 0,
95
+ };
96
+ this.lastInputTokens = 0;
97
+ this.lastUsage = {
98
+ inputTokens: 0,
99
+ outputTokens: 0,
100
+ totalTokens: 0,
101
+ cachedTokens: 0,
102
+ reasoningTokens: 0,
103
+ compactedTurns: 0,
104
+ };
105
+ }
106
+ getUsage() {
107
+ return { ...this.lastUsage, compactedTurns: this.compaction.compactedTurns };
108
+ }
109
+ /**
110
+ * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
111
+ * Cached per client session.
112
+ */
113
+ async ensureSystemPrompt() {
114
+ if (this.instructions)
115
+ return;
79
116
  try {
80
- const compactionResponse = await this.openai.responses.compact({
81
- model: this.config.openaiModel,
117
+ // SDK typing may not expose getPrompt.
118
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
119
+ const result = await this.client.getPrompt({ name: "system-prompt" });
120
+ const parts = [];
121
+ for (const msg of result.messages ?? []) {
82
122
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
83
- input: this.conversationHistory,
84
- });
85
- this.conversationHistory = compactionResponse.output;
86
- this.lastCompaction = Date.now();
123
+ const c = msg.content;
124
+ if (typeof c === "string")
125
+ parts.push(c);
126
+ else if (c?.text)
127
+ parts.push(c.text);
128
+ }
129
+ const text = parts.join("\n\n").trim();
130
+ if (text) {
131
+ this.instructions = text;
132
+ }
87
133
  }
88
134
  catch (error) {
89
- // Keep system message and last 25 items
90
- if (this.conversationHistory.length > 26) {
91
- const systemMessage = this.conversationHistory[0];
92
- const recentItems = this.conversationHistory.slice(-25);
93
- this.conversationHistory = [systemMessage, ...recentItems];
94
- }
135
+ console.error("[MCPClient] Failed to fetch system prompt from MCP server:", error);
136
+ this.instructions = null;
95
137
  }
96
138
  }
97
- async connect() {
98
- await this.client.connect(this.transport);
139
+ /**
140
+ * Build request input:
141
+ * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
142
+ */
143
+ buildInput(newItems = []) {
144
+ const input = [];
145
+ if (this.compaction.item) {
146
+ input.push(this.compaction.item);
147
+ }
148
+ input.push(...this.conversationHistory);
149
+ input.push(...newItems);
150
+ return input;
99
151
  }
100
- async processQuery(query, onThinking, abortSignal) {
101
- // Check for cancellation at start
102
- if (abortSignal?.aborted) {
103
- throw new Error("Request was cancelled");
152
+ /**
153
+ * Count input tokens before making a request.
154
+ * Falls back to a simple rough estimate if the SDK method is unavailable.
155
+ */
156
+ async countInputTokens(input) {
157
+ try {
158
+ // Some SDK versions may expose this as responses.inputTokens.count(...)
159
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
160
+ const result = await this.openai.responses.inputTokens.count({
161
+ model: this.config.openaiModel,
162
+ input,
163
+ instructions: this.instructions ?? undefined,
164
+ tools: [],
165
+ });
166
+ // Common guess for returned shape
167
+ return (result?.input_tokens ??
168
+ result?.total_tokens ??
169
+ result?.count ??
170
+ this.roughEstimateInputTokens(input));
171
+ }
172
+ catch {
173
+ return this.roughEstimateInputTokens(input);
174
+ }
175
+ }
176
+ /**
177
+ * Very rough fallback estimator.
178
+ * Only used if token counting endpoint is unavailable in the SDK version in use.
179
+ */
180
+ roughEstimateInputTokens(input) {
181
+ const serialized = JSON.stringify({
182
+ instructions: this.instructions,
183
+ input,
184
+ });
185
+ // Very rough English-ish heuristic.
186
+ return Math.ceil(serialized.length / 4);
187
+ }
188
+ /**
189
+ * Normalize usage from Responses API.
190
+ */
191
+ captureUsage(response) {
192
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
193
+ const usage = response?.usage ?? {};
194
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
195
+ const inputDetails = usage?.input_tokens_details ?? {};
196
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
197
+ const outputDetails = usage?.output_tokens_details ?? {};
198
+ this.lastUsage = {
199
+ inputTokens: usage.input_tokens ?? 0,
200
+ outputTokens: usage.output_tokens ?? 0,
201
+ totalTokens: usage.total_tokens ?? 0,
202
+ cachedTokens: inputDetails.cached_tokens ?? 0,
203
+ reasoningTokens: outputDetails.reasoning_tokens ?? 0,
204
+ compactedTurns: this.compaction.compactedTurns,
205
+ };
206
+ this.lastInputTokens = this.lastUsage.inputTokens;
207
+ }
208
+ /**
209
+ * Compact oversized tool outputs before storing them in rolling history.
210
+ */
211
+ compactToolResult(value) {
212
+ const seen = new WeakSet();
213
+ const prune = (v) => {
214
+ if (v == null)
215
+ return v;
216
+ if (typeof v === "string") {
217
+ if (v.length <= this.config.maxToolOutputChars)
218
+ return v;
219
+ return (v.slice(0, this.config.maxToolOutputChars) +
220
+ `\n...[truncated ${v.length - this.config.maxToolOutputChars} chars]`);
221
+ }
222
+ if (typeof v !== "object")
223
+ return v;
224
+ if (Array.isArray(v)) {
225
+ const maxItems = 30;
226
+ const sliced = v.slice(0, maxItems).map(prune);
227
+ if (v.length > maxItems) {
228
+ sliced.push(`...[truncated ${v.length - maxItems} items]`);
229
+ }
230
+ return sliced;
231
+ }
232
+ if (seen.has(v))
233
+ return "[circular]";
234
+ seen.add(v);
235
+ const obj = v;
236
+ const out = {};
237
+ const entries = Object.entries(obj);
238
+ // Prefer keeping fewer, more informative fields.
239
+ const preferredFirst = [
240
+ "title",
241
+ "name",
242
+ "id",
243
+ "url",
244
+ "summary",
245
+ "description",
246
+ "text",
247
+ "content",
248
+ "status",
249
+ "result",
250
+ "items",
251
+ "data",
252
+ ];
253
+ const sorted = entries.sort(([a], [b]) => {
254
+ const ai = preferredFirst.indexOf(a);
255
+ const bi = preferredFirst.indexOf(b);
256
+ const av = ai === -1 ? 999 : ai;
257
+ const bv = bi === -1 ? 999 : bi;
258
+ return av - bv;
259
+ });
260
+ const maxFields = 25;
261
+ for (const [k, val] of sorted.slice(0, maxFields)) {
262
+ out[k] = prune(val);
263
+ }
264
+ if (entries.length > maxFields) {
265
+ out.__truncated_fields__ = entries.length - maxFields;
266
+ }
267
+ return out;
268
+ };
269
+ try {
270
+ return JSON.stringify(prune(value));
104
271
  }
105
- // Check if we should compact
106
- const shouldCompact = this.conversationHistory.length >= 40 &&
107
- (Date.now() - this.lastCompaction > 10 * 60 * 1000);
108
- if (shouldCompact) {
109
- await this.compactConversation();
272
+ catch {
273
+ const s = String(value);
274
+ return s.length <= this.config.maxToolOutputChars
275
+ ? s
276
+ : s.slice(0, this.config.maxToolOutputChars) +
277
+ `\n...[truncated ${s.length - this.config.maxToolOutputChars} chars]`;
110
278
  }
111
- // Add user message to conversation history
112
- this.conversationHistory.push({
279
+ }
280
+ makeUserMessage(text) {
281
+ return {
113
282
  type: "message",
114
283
  role: "user",
115
- content: [
116
- {
117
- type: "input_text",
118
- text: query,
284
+ content: [{ type: "input_text", text }],
285
+ };
286
+ }
287
+ makeFunctionOutput(callId, output) {
288
+ return {
289
+ type: "function_call_output",
290
+ call_id: callId,
291
+ output,
292
+ };
293
+ }
294
+ /**
295
+ * We treat a "turn" boundary as:
296
+ * - starts at a user message
297
+ * - ends right before the next user message, or end of array
298
+ *
299
+ * This lets us compact or trim in coherent chunks instead of arbitrary items.
300
+ */
301
+ getTurnBoundaries(items) {
302
+ const boundaries = [];
303
+ let currentStart = -1;
304
+ for (let i = 0; i < items.length; i++) {
305
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
306
+ const item = items[i];
307
+ const isUserMessage = item?.type === "message" && item?.role === "user";
308
+ if (isUserMessage) {
309
+ if (currentStart !== -1) {
310
+ const slice = items.slice(currentStart, i);
311
+ boundaries.push({
312
+ startIndex: currentStart,
313
+ endIndex: i - 1,
314
+ estimatedTokens: this.roughEstimateInputTokens(slice),
315
+ });
119
316
  }
120
- ],
121
- });
122
- // Get available tools from MCP server
317
+ currentStart = i;
318
+ }
319
+ }
320
+ if (currentStart !== -1) {
321
+ const slice = items.slice(currentStart);
322
+ boundaries.push({
323
+ startIndex: currentStart,
324
+ endIndex: items.length - 1,
325
+ estimatedTokens: this.roughEstimateInputTokens(slice),
326
+ });
327
+ }
328
+ // If there are no user turns, treat all as one chunk.
329
+ if (boundaries.length === 0 && items.length > 0) {
330
+ boundaries.push({
331
+ startIndex: 0,
332
+ endIndex: items.length - 1,
333
+ estimatedTokens: this.roughEstimateInputTokens(items),
334
+ });
335
+ }
336
+ return boundaries;
337
+ }
338
+ /**
339
+ * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
340
+ * Older turns become compaction candidates.
341
+ */
342
+ splitColdAndHotHistory(items) {
343
+ const turns = this.getTurnBoundaries(items);
344
+ if (turns.length === 0) {
345
+ return { coldItems: [], hotItems: items };
346
+ }
347
+ let running = 0;
348
+ let keepFromTurnIndex = turns.length;
349
+ for (let i = turns.length - 1; i >= 0; i--) {
350
+ const next = running + turns[i].estimatedTokens;
351
+ if (next > this.config.hotContextTargetInputTokens) {
352
+ break;
353
+ }
354
+ running = next;
355
+ keepFromTurnIndex = i;
356
+ }
357
+ if (keepFromTurnIndex === turns.length) {
358
+ // Even the newest turn is too large; keep at least the latest turn hot.
359
+ const lastTurn = turns[turns.length - 1];
360
+ return {
361
+ coldItems: items.slice(0, lastTurn.startIndex),
362
+ hotItems: items.slice(lastTurn.startIndex),
363
+ };
364
+ }
365
+ const splitIndex = turns[keepFromTurnIndex].startIndex;
366
+ return {
367
+ coldItems: items.slice(0, splitIndex),
368
+ hotItems: items.slice(splitIndex),
369
+ };
370
+ }
371
+ /**
372
+ * Incrementally update compaction using the cold slice only.
373
+ */
374
+ async compactColdHistory(coldItems) {
375
+ if (coldItems.length === 0)
376
+ return;
377
+ try {
378
+ // Depending on SDK version, the exact shape may vary.
379
+ // The intent is:
380
+ // - compact [existing compaction object?, ...new cold items]
381
+ // - receive an updated opaque compaction item
382
+ const compactInput = [];
383
+ if (this.compaction.item)
384
+ compactInput.push(this.compaction.item);
385
+ compactInput.push(...coldItems);
386
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
387
+ const response = await this.openai.responses.compact({
388
+ model: this.config.openaiModel,
389
+ input: compactInput,
390
+ instructions: this.instructions ?? undefined,
391
+ });
392
+ // We expect the new compaction object to be reusable as input.
393
+ // Some SDKs may return `output`, some `compacted`, etc.
394
+ const newItem = response?.output?.[0] ??
395
+ response?.compacted ??
396
+ response?.item ??
397
+ null;
398
+ if (newItem) {
399
+ this.compaction.item = newItem;
400
+ this.compaction.compactedTurns += this.getTurnBoundaries(coldItems).length;
401
+ }
402
+ else {
403
+ throw new Error("Compaction response did not include a reusable compaction item");
404
+ }
405
+ console.error(`[MCPClient] Compacted ${coldItems.length} old items. Total compacted turns: ${this.compaction.compactedTurns}`);
406
+ }
407
+ catch (error) {
408
+ // Fallback: if compaction fails, just drop the cold part rather than
409
+ // keeping everything and risking repeated context overflows.
410
+ console.error("[MCPClient] Compaction failed, dropping cold history:", error);
411
+ }
412
+ }
413
+ /**
414
+ * Proactively compact when the history has grown past the trigger.
415
+ * Keeps the newest hot window uncompacted and compacts the older cold window.
416
+ */
417
+ async maybeCompactHistory() {
418
+ if (this.lastInputTokens < this.config.compactTriggerInputTokens) {
419
+ return;
420
+ }
421
+ const { coldItems, hotItems } = this.splitColdAndHotHistory(this.conversationHistory);
422
+ if (coldItems.length === 0) {
423
+ return;
424
+ }
425
+ await this.compactColdHistory(coldItems);
426
+ this.conversationHistory = hotItems;
427
+ this.lastInputTokens = 0;
428
+ console.error(`[MCPClient] Applied rolling compaction. Kept ${hotItems.length} recent items uncompacted.`);
429
+ }
430
+ /**
431
+ * Keep history from growing pathologically in item count even before token limits.
432
+ * Uses turn-aware trimming, not arbitrary item slicing.
433
+ */
434
+ enforceHardHistoryLimitByTurns(maxTurns = 20) {
435
+ const turns = this.getTurnBoundaries(this.conversationHistory);
436
+ if (turns.length <= maxTurns)
437
+ return;
438
+ const keepFrom = turns[turns.length - maxTurns].startIndex;
439
+ const dropped = this.conversationHistory.slice(0, keepFrom);
440
+ this.conversationHistory = this.conversationHistory.slice(keepFrom);
441
+ console.error(`[MCPClient] Hard-trimmed ${dropped.length} old uncompacted items, preserving last ${maxTurns} turns`);
442
+ }
443
+ /**
444
+ * Build MCP tool list for OpenAI Responses API.
445
+ */
446
+ async buildTools() {
123
447
  const toolsResponse = await this.client.listTools();
124
- // Convert MCP tools to OpenAI Responses API format
125
- const tools = toolsResponse.tools.map((tool) => ({
126
- type: "function",
127
- name: tool.name,
128
- description: tool.description || "",
129
- parameters: tool.inputSchema,
130
- strict: false,
131
- }));
132
- // Multi-turn conversation with tool calling
448
+ return [
449
+ { type: "web_search_preview" },
450
+ ...toolsResponse.tools
451
+ .filter((t) => t.name !== "web_search" && t.name !== "fetch_webpage")
452
+ .map((tool) => ({
453
+ type: "function",
454
+ name: tool.name,
455
+ description: tool.description || "",
456
+ parameters: tool.inputSchema,
457
+ strict: false,
458
+ })),
459
+ ];
460
+ }
461
+ /**
462
+ * Create a response against the current full context.
463
+ */
464
+ async createResponse(params) {
465
+ const response = await this.openai.responses.create({
466
+ model: this.config.openaiModel,
467
+ instructions: this.instructions ?? undefined,
468
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
469
+ input: params.input,
470
+ tools: params.tools,
471
+ previous_response_id: params.previousResponseId,
472
+ truncation: "disabled",
473
+ prompt_cache_retention: "24h",
474
+ });
475
+ this.captureUsage(response);
476
+ return response;
477
+ }
478
+ /**
479
+ * Main query method with rolling compaction.
480
+ */
481
+ async processQuery(query, onThinking, abortSignal, bypassSystemPrompt = false) {
482
+ if (abortSignal?.aborted) {
483
+ throw new Error("Request was cancelled");
484
+ }
485
+ if (bypassSystemPrompt) {
486
+ return this.processRawQuery(query, onThinking, abortSignal);
487
+ }
488
+ await this.ensureSystemPrompt();
489
+ // Proactive compaction based on last real measured request.
490
+ await this.maybeCompactHistory();
491
+ const tools = await this.buildTools();
492
+ const userMessage = this.makeUserMessage(query);
493
+ // Optional proactive token counting near/around threshold.
494
+ const projectedInputTokens = await this.countInputTokens(this.buildInput([userMessage]));
495
+ if (projectedInputTokens >= this.config.compactTriggerInputTokens) {
496
+ await this.maybeCompactHistory();
497
+ }
498
+ // Add the new user message to rolling history now.
499
+ this.conversationHistory.push(userMessage);
133
500
  let loopCount = 0;
134
501
  const maxLoops = 15;
135
502
  let finalResponse = "";
136
503
  let outOfToolCalls = false;
504
+ let previousResponseId = undefined;
505
+ // Carries tool outputs across iterations so previous_response_id chain stays intact.
506
+ let pendingToolOutputs = null;
137
507
  while (loopCount < maxLoops) {
138
508
  loopCount++;
139
- // Check for cancellation before each API call
140
509
  if (abortSignal?.aborted) {
141
510
  throw new Error("Request was cancelled");
142
511
  }
143
- // Call OpenAI Responses API with error handling
144
512
  let response;
145
513
  try {
146
- response = await this.openai.responses.create({
147
- model: this.config.openaiModel,
148
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
149
- input: this.conversationHistory,
150
- tools: outOfToolCalls ? [] : tools,
151
- });
514
+ if (!previousResponseId) {
515
+ // First request in this query: send full current context.
516
+ response = await this.createResponse({
517
+ input: this.buildInput(),
518
+ tools: outOfToolCalls ? [] : tools,
519
+ });
520
+ }
521
+ else {
522
+ // Send pending tool outputs to continue the response chain.
523
+ response = await this.createResponse({
524
+ input: pendingToolOutputs ?? [],
525
+ tools: outOfToolCalls ? [] : tools,
526
+ previousResponseId,
527
+ });
528
+ pendingToolOutputs = null;
529
+ }
152
530
  }
153
531
  catch (error) {
154
532
  const err = error;
155
- // Handle context length exceeded
156
- if (err.status === 400 &&
157
- (err.code === 'context_length_exceeded' ||
158
- err.message?.includes('context') ||
159
- err.message?.includes('length'))) {
160
- await this.compactConversation();
161
- response = await this.openai.responses.create({
162
- model: this.config.openaiModel,
163
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
164
- input: this.conversationHistory,
165
- tools: outOfToolCalls ? [] : tools,
166
- });
533
+ const message = err.message?.toLowerCase() || "";
534
+ const contextProblem = err.status === 400 &&
535
+ (err.code === "context_length_exceeded" ||
536
+ message.includes("context") ||
537
+ message.includes("length"));
538
+ const toolProblem = err.status === 400 &&
539
+ (err.code === "response_incomplete" ||
540
+ message.includes("incomplete") ||
541
+ message.includes("tool"));
542
+ if (contextProblem) {
543
+ await this.maybeCompactHistory();
544
+ if (!previousResponseId) {
545
+ response = await this.createResponse({
546
+ input: this.buildInput(),
547
+ tools: outOfToolCalls ? [] : tools,
548
+ });
549
+ }
550
+ else {
551
+ response = await this.createResponse({
552
+ input: pendingToolOutputs ?? [],
553
+ tools: outOfToolCalls ? [] : tools,
554
+ previousResponseId,
555
+ });
556
+ pendingToolOutputs = null;
557
+ }
167
558
  }
168
- // Handle tool calls exhausted
169
- else if (err.status === 400 &&
170
- (err.code === 'response_incomplete' ||
171
- err.message?.includes('incomplete') ||
172
- err.message?.includes('tool'))) {
559
+ else if (toolProblem) {
173
560
  outOfToolCalls = true;
174
- response = await this.openai.responses.create({
175
- model: this.config.openaiModel,
176
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
177
- input: this.conversationHistory,
178
- tools: [],
179
- });
561
+ if (!previousResponseId) {
562
+ response = await this.createResponse({
563
+ input: this.buildInput(),
564
+ tools: [],
565
+ });
566
+ }
567
+ else {
568
+ response = await this.createResponse({
569
+ input: pendingToolOutputs ?? [],
570
+ tools: [],
571
+ previousResponseId,
572
+ });
573
+ pendingToolOutputs = null;
574
+ }
180
575
  }
181
576
  else {
182
577
  throw error;
183
578
  }
184
579
  }
185
- const output = response.output;
186
- // Find function_call items
580
+ previousResponseId = response.id;
581
+ const output = response.output ?? [];
582
+ for (const item of output) {
583
+ if (item.type === "web_search_call") {
584
+ onThinking?.("🔍 web_search_preview");
585
+ }
586
+ }
187
587
  const functionCalls = output.filter((item) => item.type === "function_call");
188
- // Check if AI wants to call tools
189
588
  if (functionCalls.length > 0) {
589
+ // Persist model output items into rolling history.
190
590
  this.conversationHistory.push(...output);
591
+ const toolOutputsForNextStep = [];
191
592
  for (const functionCall of functionCalls) {
192
- // Check for cancellation before each tool call
193
593
  if (abortSignal?.aborted) {
194
594
  throw new Error("Request was cancelled");
195
595
  }
196
596
  const functionName = functionCall.name;
197
- const functionArgs = typeof functionCall.arguments === 'string'
597
+ const functionArgs = typeof functionCall.arguments === "string"
198
598
  ? JSON.parse(functionCall.arguments)
199
599
  : functionCall.arguments;
200
- // Build a descriptive thinking message with key arguments
201
600
  let toolDesc = functionName;
202
- if (functionName === "fetch_webpage" && functionArgs.url) {
601
+ if (functionArgs?.url && functionName === "fetch_webpage") {
203
602
  try {
204
603
  toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
205
604
  }
@@ -207,87 +606,156 @@ When responding:
207
606
  toolDesc = `fetch_webpage → ${functionArgs.url}`;
208
607
  }
209
608
  }
210
- else if (functionName === "web_search" && functionArgs.query) {
609
+ else if (functionArgs?.query && functionName === "web_search") {
211
610
  toolDesc = `web_search → "${functionArgs.query}"`;
212
611
  }
213
612
  onThinking?.(`🔧 ${toolDesc}`);
214
613
  try {
215
- // Execute the tool via MCP
216
614
  const result = await this.client.callTool({
217
615
  name: functionName,
218
616
  arguments: functionArgs,
219
617
  });
220
- // Add tool result to conversation history
221
- this.conversationHistory.push({
222
- type: "function_call_output",
223
- call_id: functionCall.call_id,
224
- output: JSON.stringify(result.content),
225
- });
618
+ const compactOutput = this.compactToolResult(result.content);
619
+ const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, compactOutput);
620
+ toolOutputsForNextStep.push(toolOutputItem);
621
+ this.conversationHistory.push(toolOutputItem);
226
622
  }
227
623
  catch (error) {
228
- this.conversationHistory.push({
229
- type: "function_call_output",
230
- call_id: functionCall.call_id,
231
- output: `Error: ${error instanceof Error ? error.message : String(error)}`,
232
- });
624
+ const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`);
625
+ toolOutputsForNextStep.push(toolOutputItem);
626
+ this.conversationHistory.push(toolOutputItem);
233
627
  }
234
628
  }
629
+ // Carry tool outputs to the next iteration so the response chain stays intact.
630
+ pendingToolOutputs = toolOutputsForNextStep;
235
631
  continue;
236
632
  }
237
- else {
238
- // No more tool calls, extract final response
239
- for (const item of output) {
240
- if (item.type === "message" && item.role === "assistant") {
241
- for (const contentItem of item.content) {
242
- if (contentItem.type === "output_text") {
243
- finalResponse += contentItem.text;
244
- }
633
+ for (const item of output) {
634
+ if (item.type === "message" && item.role === "assistant") {
635
+ for (const contentItem of item.content ?? []) {
636
+ if (contentItem.type === "output_text") {
637
+ finalResponse += contentItem.text;
245
638
  }
246
639
  }
247
640
  }
248
- this.conversationHistory.push(...output);
249
- break;
250
641
  }
642
+ this.conversationHistory.push(...output);
643
+ break;
251
644
  }
252
- // If we hit max loops, make one final request without tools
253
645
  if (loopCount >= maxLoops && !finalResponse) {
254
646
  try {
255
647
  const finalApiResponse = await this.openai.responses.create({
256
648
  model: this.config.openaiModel,
649
+ instructions: this.instructions ?? undefined,
257
650
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
258
- input: this.conversationHistory,
651
+ input: this.buildInput(),
259
652
  tools: [],
653
+ truncation: "disabled",
654
+ prompt_cache_retention: "24h",
260
655
  });
261
- const finalOutput = finalApiResponse.output;
262
- for (const item of finalOutput) {
656
+ this.captureUsage(finalApiResponse);
657
+ for (const item of finalApiResponse.output ?? []) {
263
658
  if (item.type === "message" && item.role === "assistant") {
264
- for (const contentItem of item.content) {
659
+ for (const contentItem of item.content ?? []) {
265
660
  if (contentItem.type === "output_text") {
266
661
  finalResponse += contentItem.text;
267
662
  }
268
663
  }
269
664
  }
270
665
  }
271
- this.conversationHistory.push(...finalOutput);
666
+ this.conversationHistory.push(...(finalApiResponse.output ?? []));
272
667
  }
273
- catch (error) {
274
- finalResponse = "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
668
+ catch {
669
+ finalResponse =
670
+ "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
275
671
  }
276
672
  }
277
- // Keep conversation history manageable
278
- if (this.conversationHistory.length > 50) {
279
- const systemMessage = this.conversationHistory[0];
280
- const recentItems = this.conversationHistory.slice(-49);
281
- this.conversationHistory = [systemMessage, ...recentItems];
282
- }
673
+ // Prevent pathological item growth even when tokens are still OK.
674
+ this.enforceHardHistoryLimitByTurns(20);
675
+ // Optional: if this turn caused a large input, compact immediately so the next turn starts cleaner.
676
+ await this.maybeCompactHistory();
283
677
  return finalResponse;
284
678
  }
285
- clearHistory() {
286
- // Keep the system message (first item) when clearing history
287
- const systemMessage = this.conversationHistory[0];
288
- this.conversationHistory = systemMessage ? [systemMessage] : [];
289
- }
290
- async cleanup() {
291
- await this.client.close();
679
+ /**
680
+ * Raw mode: no cached instructions, no rolling history, no compaction state.
681
+ */
682
+ async processRawQuery(query, onThinking, abortSignal) {
683
+ const tools = await this.buildTools();
684
+ const isolatedHistory = [this.makeUserMessage(query)];
685
+ let loopCount = 0;
686
+ const maxLoops = 15;
687
+ let finalResponse = "";
688
+ let previousResponseId = undefined;
689
+ let pendingRawToolOutputs = null;
690
+ while (loopCount < maxLoops) {
691
+ loopCount++;
692
+ if (abortSignal?.aborted)
693
+ throw new Error("Request was cancelled");
694
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
695
+ const response = await this.openai.responses.create({
696
+ model: this.config.openaiModel,
697
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
698
+ input: (!previousResponseId ? isolatedHistory : (pendingRawToolOutputs ?? [])),
699
+ previous_response_id: previousResponseId,
700
+ tools,
701
+ truncation: "disabled",
702
+ });
703
+ pendingRawToolOutputs = null;
704
+ this.captureUsage(response);
705
+ previousResponseId = response.id;
706
+ const output = response.output ?? [];
707
+ for (const item of output) {
708
+ if (item.type === "web_search_call") {
709
+ onThinking?.("🔍 web_search_preview");
710
+ }
711
+ }
712
+ const functionCalls = output.filter((item) => item.type === "function_call");
713
+ if (functionCalls.length > 0) {
714
+ const newToolOutputs = [];
715
+ for (const functionCall of functionCalls) {
716
+ if (abortSignal?.aborted)
717
+ throw new Error("Request was cancelled");
718
+ const functionName = functionCall.name;
719
+ const functionArgs = typeof functionCall.arguments === "string"
720
+ ? JSON.parse(functionCall.arguments)
721
+ : functionCall.arguments;
722
+ let toolDesc = functionName;
723
+ if (functionName === "fetch_webpage" && functionArgs?.url) {
724
+ try {
725
+ toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
726
+ }
727
+ catch {
728
+ toolDesc = `fetch_webpage → ${functionArgs.url}`;
729
+ }
730
+ }
731
+ else if (functionName === "web_search" && functionArgs?.query) {
732
+ toolDesc = `web_search → "${functionArgs.query}"`;
733
+ }
734
+ onThinking?.(`🔧 ${toolDesc}`);
735
+ try {
736
+ const result = await this.client.callTool({
737
+ name: functionName,
738
+ arguments: functionArgs,
739
+ });
740
+ newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, this.compactToolResult(result.content)));
741
+ }
742
+ catch (error) {
743
+ newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`));
744
+ }
745
+ }
746
+ pendingRawToolOutputs = newToolOutputs;
747
+ continue;
748
+ }
749
+ for (const item of output) {
750
+ if (item.type === "message" && item.role === "assistant") {
751
+ for (const contentItem of item.content ?? []) {
752
+ if (contentItem.type === "output_text")
753
+ finalResponse += contentItem.text;
754
+ }
755
+ }
756
+ }
757
+ break;
758
+ }
759
+ return finalResponse;
292
760
  }
293
761
  }
@@ -446,6 +446,18 @@
446
446
  font-size: 12px;
447
447
  opacity: 0.6;
448
448
  margin-top: 6px;
449
+ display: flex;
450
+ align-items: center;
451
+ gap: 8px;
452
+ flex-wrap: wrap;
453
+ }
454
+
455
+ .mcp-chat-token-info {
456
+ font-size: 11px;
457
+ opacity: 0.75;
458
+ font-family: monospace;
459
+ border-left: 1px solid currentColor;
460
+ padding-left: 8px;
449
461
  }
450
462
 
451
463
  /* ───────────────────────────────────────────────
package/dist/types.d.ts CHANGED
@@ -8,6 +8,8 @@ export interface Message {
8
8
  isStreaming?: boolean;
9
9
  /** Hidden messages are sent to the AI but not shown in the chat bubble list */
10
10
  hidden?: boolean;
11
+ /** Token usage info shown in the footer of assistant messages — never fed back to AI */
12
+ tokenInfo?: string;
11
13
  }
12
14
  export interface ThinkingStep {
13
15
  id: string;
@@ -21,7 +23,7 @@ export interface MCPChatProps {
21
23
  className?: string;
22
24
  }
23
25
  export interface StreamEvent {
24
- type: "thinking" | "content" | "done" | "error";
26
+ type: "thinking" | "content" | "done" | "error" | "usage";
25
27
  message?: string;
26
28
  chunk?: string;
27
29
  }
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nqminds/mcp-client",
3
- "version": "1.0.8",
3
+ "version": "1.0.11",
4
4
  "description": "Reusable MCP client component with AI chat interface",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",