@nqminds/mcp-client 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBAifd"}
1
+ {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBA6fd"}
package/dist/MCPChat.js CHANGED
@@ -167,6 +167,16 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
167
167
  return updated;
168
168
  });
169
169
  }
170
+ else if (parsed.type === "usage") {
171
+ setMessages((prev) => {
172
+ const updated = [...prev];
173
+ const lastIndex = updated.length - 1;
174
+ if (lastIndex >= 0) {
175
+ updated[lastIndex] = { ...updated[lastIndex], tokenInfo: parsed.message };
176
+ }
177
+ return updated;
178
+ });
179
+ }
170
180
  else if (parsed.type === "error") {
171
181
  throw new Error(parsed.message || "Stream error");
172
182
  }
@@ -315,7 +325,9 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
315
325
  React.createElement("div", { className: "mcp-chat-message-bubble" },
316
326
  msg.role === "assistant" ? (React.createElement("div", { className: "mcp-chat-message-content markdown-content" },
317
327
  React.createElement(ReactMarkdown, { remarkPlugins: [remarkGfm] }, msg.content))) : (React.createElement("div", { className: "mcp-chat-message-content" }, msg.content)),
318
- React.createElement("div", { className: "mcp-chat-message-timestamp" }, msg.timestamp.toLocaleTimeString()))))),
328
+ React.createElement("div", { className: "mcp-chat-message-timestamp" },
329
+ msg.timestamp.toLocaleTimeString(),
330
+ msg.role === "assistant" && msg.tokenInfo && (React.createElement("span", { className: "mcp-chat-token-info" }, msg.tokenInfo))))))),
319
331
  isLoading && (React.createElement("div", { className: "mcp-chat-message mcp-chat-message-assistant" },
320
332
  React.createElement("div", { className: "mcp-chat-thinking" },
321
333
  React.createElement("div", { className: "mcp-chat-thinking-title" },
@@ -1 +1 @@
1
- {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAiG/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
1
+ {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAmH/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
@@ -47,6 +47,23 @@ export function createMCPChatHandler(config) {
47
47
  sendEvent("thinking", { message: thinkingMessage });
48
48
  }, abortController.signal, // Pass abort signal to enable cancellation
49
49
  bypassSystemPrompt);
50
+ // Emit token usage summary for debugging
51
+ const usage = client.getUsage();
52
+ if (usage.inputTokens > 0 || usage.outputTokens > 0) {
53
+ const parts = [
54
+ `in: ${usage.inputTokens.toLocaleString()}`,
55
+ `out: ${usage.outputTokens.toLocaleString()}`,
56
+ `total: ${usage.totalTokens.toLocaleString()}`,
57
+ ];
58
+ if (usage.cachedTokens > 0) {
59
+ const cachedPct = Math.round((usage.cachedTokens / usage.inputTokens) * 100);
60
+ parts.push(`cached: ${usage.cachedTokens.toLocaleString()} (${cachedPct}%)`);
61
+ }
62
+ if (usage.compactedTurns > 0) {
63
+ parts.push(`compacted: ${usage.compactedTurns} turn${usage.compactedTurns !== 1 ? "s" : ""}`);
64
+ }
65
+ sendEvent("usage", { message: parts.join(" | ") });
66
+ }
50
67
  // Check if aborted before streaming response
51
68
  if (abortController.signal.aborted) {
52
69
  return;
@@ -1,6 +1,20 @@
1
1
  /**
2
2
  * OpenAI-powered MCP Client
3
- * Uses ChatGPT to intelligently interact with MCP tools
3
+ *
4
+ * Compaction strategy:
5
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
6
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
7
+ * compact the oldest cold portion and keep the newest portion verbatim.
8
+ * - Feed the returned compaction object back into future requests.
9
+ *
10
+ * Notes:
11
+ * - This is written to align with the OpenAI Responses API shape:
12
+ * - response usage fields
13
+ * - previous_response_id
14
+ * - input token counting
15
+ * - response compaction
16
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
17
+ * by SDK version. The logic here is the important part.
4
18
  */
5
19
  export interface MCPClientConfig {
6
20
  openaiApiKey: string;
@@ -8,30 +22,137 @@ export interface MCPClientConfig {
8
22
  openaiModel?: string;
9
23
  clientName?: string;
10
24
  clientVersion?: string;
25
+ /**
26
+ * Trigger compaction when the last measured input tokens reaches this threshold.
27
+ * Example policy from your suggestion:
28
+ * - compact when last measured input >= 200k
29
+ */
30
+ compactTriggerInputTokens?: number;
31
+ /**
32
+ * Keep roughly this many of the most recent input tokens uncompacted.
33
+ * Example policy from your suggestion:
34
+ * - retain last ~100k uncompacted
35
+ */
36
+ hotContextTargetInputTokens?: number;
37
+ /**
38
+ * Guardrail for unusually large tool outputs stored in history.
39
+ */
40
+ maxToolOutputChars?: number;
41
+ }
42
+ interface UsageStats {
43
+ inputTokens: number;
44
+ outputTokens: number;
45
+ totalTokens: number;
46
+ cachedTokens: number;
47
+ reasoningTokens: number;
48
+ compactedTurns: number;
11
49
  }
12
50
  export declare class MCPClientOpenAI {
13
51
  private client;
14
52
  private openai;
15
53
  private transport;
54
+ /**
55
+ * Instructions are sent using the Responses API `instructions` field,
56
+ * not inserted as a fake message inside the rolling conversation items.
57
+ */
58
+ private instructions;
59
+ /**
60
+ * Rolling uncompacted conversation items.
61
+ * This contains the most recent "hot" context only.
62
+ */
16
63
  private conversationHistory;
17
- private lastCompaction;
64
+ /**
65
+ * Opaque compaction object returned by OpenAI.
66
+ * This represents older "cold" context that has been compacted.
67
+ */
68
+ private compaction;
69
+ /**
70
+ * Last measured input tokens from a real Responses API call.
71
+ */
72
+ private lastInputTokens;
73
+ /**
74
+ * Latest usage snapshot for logging/inspection.
75
+ */
76
+ private lastUsage;
18
77
  private config;
19
78
  constructor(config: MCPClientConfig);
20
- private compactConversation;
79
+ connect(): Promise<void>;
80
+ cleanup(): Promise<void>;
81
+ clearHistory(): void;
82
+ getUsage(): UsageStats;
21
83
  /**
22
- * Fetches the system prompt from the MCP server's registered "system-prompt" prompt
23
- * and prepends it to conversationHistory. Cached only runs once per session.
24
- * Direct Prompt (bypass mode) skips this entirely.
84
+ * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
85
+ * Cached per client session.
25
86
  */
26
87
  private ensureSystemPrompt;
27
- connect(): Promise<void>;
88
+ /**
89
+ * Build request input:
90
+ * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
91
+ */
92
+ private buildInput;
93
+ /**
94
+ * Count input tokens before making a request.
95
+ * Falls back to a simple rough estimate if the SDK method is unavailable.
96
+ */
97
+ private countInputTokens;
98
+ /**
99
+ * Very rough fallback estimator.
100
+ * Only used if token counting endpoint is unavailable in the SDK version in use.
101
+ */
102
+ private roughEstimateInputTokens;
103
+ /**
104
+ * Normalize usage from Responses API.
105
+ */
106
+ private captureUsage;
107
+ /**
108
+ * Compact oversized tool outputs before storing them in rolling history.
109
+ */
110
+ private compactToolResult;
111
+ private makeUserMessage;
112
+ private makeFunctionOutput;
113
+ /**
114
+ * We treat a "turn" boundary as:
115
+ * - starts at a user message
116
+ * - ends right before the next user message, or end of array
117
+ *
118
+ * This lets us compact or trim in coherent chunks instead of arbitrary items.
119
+ */
120
+ private getTurnBoundaries;
121
+ /**
122
+ * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
123
+ * Older turns become compaction candidates.
124
+ */
125
+ private splitColdAndHotHistory;
126
+ /**
127
+ * Incrementally update compaction using the cold slice only.
128
+ */
129
+ private compactColdHistory;
130
+ /**
131
+ * Proactively compact when the history has grown past the trigger.
132
+ * Keeps the newest hot window uncompacted and compacts the older cold window.
133
+ */
134
+ private maybeCompactHistory;
135
+ /**
136
+ * Keep history from growing pathologically in item count even before token limits.
137
+ * Uses turn-aware trimming, not arbitrary item slicing.
138
+ */
139
+ private enforceHardHistoryLimitByTurns;
140
+ /**
141
+ * Build MCP tool list for OpenAI Responses API.
142
+ */
143
+ private buildTools;
144
+ /**
145
+ * Create a response against the current full context.
146
+ */
147
+ private createResponse;
148
+ /**
149
+ * Main query method with rolling compaction.
150
+ */
28
151
  processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal, bypassSystemPrompt?: boolean): Promise<string>;
29
152
  /**
30
- * Sends a raw query directly to the model — no system prompt, no conversation history.
31
- * Used by the Direct Prompt dev tool to test prompts verbatim.
153
+ * Raw mode: no cached instructions, no rolling history, no compaction state.
32
154
  */
33
155
  private processRawQuery;
34
- clearHistory(): void;
35
- cleanup(): Promise<void>;
36
156
  }
157
+ export {};
37
158
  //# sourceMappingURL=openai-client.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;YAsCrB,mBAAmB;IAoBjC;;;;OAIG;YACW,kBAAkB;IA4B1B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EAAE,WAAW,CAAC,EAAE,WAAW,EAAE,kBAAkB,UAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAuNjJ;;;OAGG;YACW,eAAe;IA4E7B,YAAY,IAAI,IAAI;IAOd,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAiBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,YAAY;IAoBpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA6OlB;;OAEG;YACW,eAAe;CAwG9B"}
@@ -1,31 +1,78 @@
1
1
  /**
2
2
  * OpenAI-powered MCP Client
3
- * Uses ChatGPT to intelligently interact with MCP tools
3
+ *
4
+ * Compaction strategy:
5
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
6
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
7
+ * compact the oldest cold portion and keep the newest portion verbatim.
8
+ * - Feed the returned compaction object back into future requests.
9
+ *
10
+ * Notes:
11
+ * - This is written to align with the OpenAI Responses API shape:
12
+ * - response usage fields
13
+ * - previous_response_id
14
+ * - input token counting
15
+ * - response compaction
16
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
17
+ * by SDK version. The logic here is the important part.
4
18
  */
5
19
  import { Client } from "@modelcontextprotocol/sdk/client/index.js";
6
20
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
7
21
  import OpenAI from "openai";
8
22
  export class MCPClientOpenAI {
9
23
  constructor(config) {
24
+ /**
25
+ * Instructions are sent using the Responses API `instructions` field,
26
+ * not inserted as a fake message inside the rolling conversation items.
27
+ */
28
+ this.instructions = null;
29
+ /**
30
+ * Rolling uncompacted conversation items.
31
+ * This contains the most recent "hot" context only.
32
+ */
10
33
  this.conversationHistory = [];
11
- this.lastCompaction = 0;
34
+ /**
35
+ * Opaque compaction object returned by OpenAI.
36
+ * This represents older "cold" context that has been compacted.
37
+ */
38
+ this.compaction = {
39
+ item: null,
40
+ compactedTurns: 0,
41
+ };
42
+ /**
43
+ * Last measured input tokens from a real Responses API call.
44
+ */
45
+ this.lastInputTokens = 0;
46
+ /**
47
+ * Latest usage snapshot for logging/inspection.
48
+ */
49
+ this.lastUsage = {
50
+ inputTokens: 0,
51
+ outputTokens: 0,
52
+ totalTokens: 0,
53
+ cachedTokens: 0,
54
+ reasoningTokens: 0,
55
+ compactedTurns: 0,
56
+ };
12
57
  this.config = {
13
58
  openaiApiKey: config.openaiApiKey,
14
59
  mcpServerCommand: config.mcpServerCommand,
15
- openaiModel: config.openaiModel || "chatgpt-5-mini",
60
+ openaiModel: config.openaiModel || "gpt-5-mini",
16
61
  clientName: config.clientName || "mcp-flair-client",
17
62
  clientVersion: config.clientVersion || "1.0.0",
63
+ compactTriggerInputTokens: config.compactTriggerInputTokens ?? 200000,
64
+ hotContextTargetInputTokens: config.hotContextTargetInputTokens ?? 100000,
65
+ maxToolOutputChars: config.maxToolOutputChars ?? 20000,
18
66
  };
19
67
  this.openai = new OpenAI({
20
68
  apiKey: this.config.openaiApiKey,
21
69
  });
22
- // Parse the server command and args
23
70
  const serverCmd = this.config.mcpServerCommand.split(" ");
24
71
  const command = serverCmd[0];
25
72
  const args = serverCmd.slice(1);
26
73
  this.transport = new StdioClientTransport({
27
- command: command,
28
- args: args,
74
+ command,
75
+ args,
29
76
  });
30
77
  this.client = new Client({
31
78
  name: this.config.clientName,
@@ -33,177 +80,525 @@ export class MCPClientOpenAI {
33
80
  }, {
34
81
  capabilities: {},
35
82
  });
36
- // Initialize conversation with system message
37
- // System prompt is fetched from the MCP server on first use (see ensureSystemPrompt)
83
+ }
84
+ async connect() {
85
+ await this.client.connect(this.transport);
86
+ }
87
+ async cleanup() {
88
+ await this.client.close();
89
+ }
90
+ clearHistory() {
38
91
  this.conversationHistory = [];
92
+ this.compaction = {
93
+ item: null,
94
+ compactedTurns: 0,
95
+ };
96
+ this.lastInputTokens = 0;
97
+ this.lastUsage = {
98
+ inputTokens: 0,
99
+ outputTokens: 0,
100
+ totalTokens: 0,
101
+ cachedTokens: 0,
102
+ reasoningTokens: 0,
103
+ compactedTurns: 0,
104
+ };
39
105
  }
40
- async compactConversation() {
41
- try {
42
- const compactionResponse = await this.openai.responses.compact({
43
- model: this.config.openaiModel,
44
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
45
- input: this.conversationHistory,
46
- });
47
- this.conversationHistory = compactionResponse.output;
48
- this.lastCompaction = Date.now();
49
- }
50
- catch (error) {
51
- // Keep system message and last 25 items
52
- if (this.conversationHistory.length > 26) {
53
- const systemMessage = this.conversationHistory[0];
54
- const recentItems = this.conversationHistory.slice(-25);
55
- this.conversationHistory = [systemMessage, ...recentItems];
56
- }
57
- }
106
+ getUsage() {
107
+ return { ...this.lastUsage, compactedTurns: this.compaction.compactedTurns };
58
108
  }
59
109
  /**
60
- * Fetches the system prompt from the MCP server's registered "system-prompt" prompt
61
- * and prepends it to conversationHistory. Cached only runs once per session.
62
- * Direct Prompt (bypass mode) skips this entirely.
110
+ * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
111
+ * Cached per client session.
63
112
  */
64
113
  async ensureSystemPrompt() {
65
- // Already loaded if history starts with a system message
66
- if (this.conversationHistory[0]?.role === "system")
114
+ if (this.instructions)
67
115
  return;
68
116
  try {
117
+ // SDK typing may not expose getPrompt.
69
118
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
70
119
  const result = await this.client.getPrompt({ name: "system-prompt" });
71
120
  const parts = [];
72
- for (const msg of result.messages) {
121
+ for (const msg of result.messages ?? []) {
122
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
73
123
  const c = msg.content;
74
124
  if (typeof c === "string")
75
125
  parts.push(c);
76
126
  else if (c?.text)
77
127
  parts.push(c.text);
78
128
  }
79
- const text = parts.join("\n\n");
129
+ const text = parts.join("\n\n").trim();
80
130
  if (text) {
81
- this.conversationHistory = [
82
- {
83
- type: "message",
84
- role: "system",
85
- content: [{ type: "input_text", text }],
86
- },
87
- ...this.conversationHistory,
88
- ];
131
+ this.instructions = text;
89
132
  }
90
133
  }
91
134
  catch (error) {
92
135
  console.error("[MCPClient] Failed to fetch system prompt from MCP server:", error);
136
+ this.instructions = null;
93
137
  }
94
138
  }
95
- async connect() {
96
- await this.client.connect(this.transport);
139
+ /**
140
+ * Build request input:
141
+ * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
142
+ */
143
+ buildInput(newItems = []) {
144
+ const input = [];
145
+ if (this.compaction.item) {
146
+ input.push(this.compaction.item);
147
+ }
148
+ input.push(...this.conversationHistory);
149
+ input.push(...newItems);
150
+ return input;
151
+ }
152
+ /**
153
+ * Count input tokens before making a request.
154
+ * Falls back to a simple rough estimate if the SDK method is unavailable.
155
+ */
156
+ async countInputTokens(input) {
157
+ try {
158
+ // Some SDK versions may expose this as responses.inputTokens.count(...)
159
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
160
+ const result = await this.openai.responses.inputTokens.count({
161
+ model: this.config.openaiModel,
162
+ input,
163
+ instructions: this.instructions ?? undefined,
164
+ tools: [],
165
+ });
166
+ // Common guess for returned shape
167
+ return (result?.input_tokens ??
168
+ result?.total_tokens ??
169
+ result?.count ??
170
+ this.roughEstimateInputTokens(input));
171
+ }
172
+ catch {
173
+ return this.roughEstimateInputTokens(input);
174
+ }
97
175
  }
176
+ /**
177
+ * Very rough fallback estimator.
178
+ * Only used if token counting endpoint is unavailable in the SDK version in use.
179
+ */
180
+ roughEstimateInputTokens(input) {
181
+ const serialized = JSON.stringify({
182
+ instructions: this.instructions,
183
+ input,
184
+ });
185
+ // Very rough English-ish heuristic.
186
+ return Math.ceil(serialized.length / 4);
187
+ }
188
+ /**
189
+ * Normalize usage from Responses API.
190
+ */
191
+ captureUsage(response) {
192
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
193
+ const usage = response?.usage ?? {};
194
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
195
+ const inputDetails = usage?.input_tokens_details ?? {};
196
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
197
+ const outputDetails = usage?.output_tokens_details ?? {};
198
+ this.lastUsage = {
199
+ inputTokens: usage.input_tokens ?? 0,
200
+ outputTokens: usage.output_tokens ?? 0,
201
+ totalTokens: usage.total_tokens ?? 0,
202
+ cachedTokens: inputDetails.cached_tokens ?? 0,
203
+ reasoningTokens: outputDetails.reasoning_tokens ?? 0,
204
+ compactedTurns: this.compaction.compactedTurns,
205
+ };
206
+ this.lastInputTokens = this.lastUsage.inputTokens;
207
+ }
208
+ /**
209
+ * Compact oversized tool outputs before storing them in rolling history.
210
+ */
211
+ compactToolResult(value) {
212
+ const seen = new WeakSet();
213
+ const prune = (v) => {
214
+ if (v == null)
215
+ return v;
216
+ if (typeof v === "string") {
217
+ if (v.length <= this.config.maxToolOutputChars)
218
+ return v;
219
+ return (v.slice(0, this.config.maxToolOutputChars) +
220
+ `\n...[truncated ${v.length - this.config.maxToolOutputChars} chars]`);
221
+ }
222
+ if (typeof v !== "object")
223
+ return v;
224
+ if (Array.isArray(v)) {
225
+ const maxItems = 30;
226
+ const sliced = v.slice(0, maxItems).map(prune);
227
+ if (v.length > maxItems) {
228
+ sliced.push(`...[truncated ${v.length - maxItems} items]`);
229
+ }
230
+ return sliced;
231
+ }
232
+ if (seen.has(v))
233
+ return "[circular]";
234
+ seen.add(v);
235
+ const obj = v;
236
+ const out = {};
237
+ const entries = Object.entries(obj);
238
+ // Prefer keeping fewer, more informative fields.
239
+ const preferredFirst = [
240
+ "title",
241
+ "name",
242
+ "id",
243
+ "url",
244
+ "summary",
245
+ "description",
246
+ "text",
247
+ "content",
248
+ "status",
249
+ "result",
250
+ "items",
251
+ "data",
252
+ ];
253
+ const sorted = entries.sort(([a], [b]) => {
254
+ const ai = preferredFirst.indexOf(a);
255
+ const bi = preferredFirst.indexOf(b);
256
+ const av = ai === -1 ? 999 : ai;
257
+ const bv = bi === -1 ? 999 : bi;
258
+ return av - bv;
259
+ });
260
+ const maxFields = 25;
261
+ for (const [k, val] of sorted.slice(0, maxFields)) {
262
+ out[k] = prune(val);
263
+ }
264
+ if (entries.length > maxFields) {
265
+ out.__truncated_fields__ = entries.length - maxFields;
266
+ }
267
+ return out;
268
+ };
269
+ try {
270
+ return JSON.stringify(prune(value));
271
+ }
272
+ catch {
273
+ const s = String(value);
274
+ return s.length <= this.config.maxToolOutputChars
275
+ ? s
276
+ : s.slice(0, this.config.maxToolOutputChars) +
277
+ `\n...[truncated ${s.length - this.config.maxToolOutputChars} chars]`;
278
+ }
279
+ }
280
+ makeUserMessage(text) {
281
+ return {
282
+ type: "message",
283
+ role: "user",
284
+ content: [{ type: "input_text", text }],
285
+ };
286
+ }
287
+ makeFunctionOutput(callId, output) {
288
+ return {
289
+ type: "function_call_output",
290
+ call_id: callId,
291
+ output,
292
+ };
293
+ }
294
+ /**
295
+ * We treat a "turn" boundary as:
296
+ * - starts at a user message
297
+ * - ends right before the next user message, or end of array
298
+ *
299
+ * This lets us compact or trim in coherent chunks instead of arbitrary items.
300
+ */
301
+ getTurnBoundaries(items) {
302
+ const boundaries = [];
303
+ let currentStart = -1;
304
+ for (let i = 0; i < items.length; i++) {
305
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
306
+ const item = items[i];
307
+ const isUserMessage = item?.type === "message" && item?.role === "user";
308
+ if (isUserMessage) {
309
+ if (currentStart !== -1) {
310
+ const slice = items.slice(currentStart, i);
311
+ boundaries.push({
312
+ startIndex: currentStart,
313
+ endIndex: i - 1,
314
+ estimatedTokens: this.roughEstimateInputTokens(slice),
315
+ });
316
+ }
317
+ currentStart = i;
318
+ }
319
+ }
320
+ if (currentStart !== -1) {
321
+ const slice = items.slice(currentStart);
322
+ boundaries.push({
323
+ startIndex: currentStart,
324
+ endIndex: items.length - 1,
325
+ estimatedTokens: this.roughEstimateInputTokens(slice),
326
+ });
327
+ }
328
+ // If there are no user turns, treat all as one chunk.
329
+ if (boundaries.length === 0 && items.length > 0) {
330
+ boundaries.push({
331
+ startIndex: 0,
332
+ endIndex: items.length - 1,
333
+ estimatedTokens: this.roughEstimateInputTokens(items),
334
+ });
335
+ }
336
+ return boundaries;
337
+ }
338
+ /**
339
+ * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
340
+ * Older turns become compaction candidates.
341
+ */
342
+ splitColdAndHotHistory(items) {
343
+ const turns = this.getTurnBoundaries(items);
344
+ if (turns.length === 0) {
345
+ return { coldItems: [], hotItems: items };
346
+ }
347
+ let running = 0;
348
+ let keepFromTurnIndex = turns.length;
349
+ for (let i = turns.length - 1; i >= 0; i--) {
350
+ const next = running + turns[i].estimatedTokens;
351
+ if (next > this.config.hotContextTargetInputTokens) {
352
+ break;
353
+ }
354
+ running = next;
355
+ keepFromTurnIndex = i;
356
+ }
357
+ if (keepFromTurnIndex === turns.length) {
358
+ // Even the newest turn is too large; keep at least the latest turn hot.
359
+ const lastTurn = turns[turns.length - 1];
360
+ return {
361
+ coldItems: items.slice(0, lastTurn.startIndex),
362
+ hotItems: items.slice(lastTurn.startIndex),
363
+ };
364
+ }
365
+ const splitIndex = turns[keepFromTurnIndex].startIndex;
366
+ return {
367
+ coldItems: items.slice(0, splitIndex),
368
+ hotItems: items.slice(splitIndex),
369
+ };
370
+ }
371
+ /**
372
+ * Incrementally update compaction using the cold slice only.
373
+ */
374
+ async compactColdHistory(coldItems) {
375
+ if (coldItems.length === 0)
376
+ return;
377
+ try {
378
+ // Depending on SDK version, the exact shape may vary.
379
+ // The intent is:
380
+ // - compact [existing compaction object?, ...new cold items]
381
+ // - receive an updated opaque compaction item
382
+ const compactInput = [];
383
+ if (this.compaction.item)
384
+ compactInput.push(this.compaction.item);
385
+ compactInput.push(...coldItems);
386
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
387
+ const response = await this.openai.responses.compact({
388
+ model: this.config.openaiModel,
389
+ input: compactInput,
390
+ instructions: this.instructions ?? undefined,
391
+ });
392
+ // We expect the new compaction object to be reusable as input.
393
+ // Some SDKs may return `output`, some `compacted`, etc.
394
+ const newItem = response?.output?.[0] ??
395
+ response?.compacted ??
396
+ response?.item ??
397
+ null;
398
+ if (newItem) {
399
+ this.compaction.item = newItem;
400
+ this.compaction.compactedTurns += this.getTurnBoundaries(coldItems).length;
401
+ }
402
+ else {
403
+ throw new Error("Compaction response did not include a reusable compaction item");
404
+ }
405
+ console.error(`[MCPClient] Compacted ${coldItems.length} old items. Total compacted turns: ${this.compaction.compactedTurns}`);
406
+ }
407
+ catch (error) {
408
+ // Fallback: if compaction fails, just drop the cold part rather than
409
+ // keeping everything and risking repeated context overflows.
410
+ console.error("[MCPClient] Compaction failed, dropping cold history:", error);
411
+ }
412
+ }
413
+ /**
414
+ * Proactively compact when the history has grown past the trigger.
415
+ * Keeps the newest hot window uncompacted and compacts the older cold window.
416
+ */
417
+ async maybeCompactHistory() {
418
+ if (this.lastInputTokens < this.config.compactTriggerInputTokens) {
419
+ return;
420
+ }
421
+ const { coldItems, hotItems } = this.splitColdAndHotHistory(this.conversationHistory);
422
+ if (coldItems.length === 0) {
423
+ return;
424
+ }
425
+ await this.compactColdHistory(coldItems);
426
+ this.conversationHistory = hotItems;
427
+ this.lastInputTokens = 0;
428
+ console.error(`[MCPClient] Applied rolling compaction. Kept ${hotItems.length} recent items uncompacted.`);
429
+ }
430
+ /**
431
+ * Keep history from growing pathologically in item count even before token limits.
432
+ * Uses turn-aware trimming, not arbitrary item slicing.
433
+ */
434
+ enforceHardHistoryLimitByTurns(maxTurns = 20) {
435
+ const turns = this.getTurnBoundaries(this.conversationHistory);
436
+ if (turns.length <= maxTurns)
437
+ return;
438
+ const keepFrom = turns[turns.length - maxTurns].startIndex;
439
+ const dropped = this.conversationHistory.slice(0, keepFrom);
440
+ this.conversationHistory = this.conversationHistory.slice(keepFrom);
441
+ console.error(`[MCPClient] Hard-trimmed ${dropped.length} old uncompacted items, preserving last ${maxTurns} turns`);
442
+ }
443
+ /**
444
+ * Build MCP tool list for OpenAI Responses API.
445
+ */
446
+ async buildTools() {
447
+ const toolsResponse = await this.client.listTools();
448
+ return [
449
+ { type: "web_search_preview" },
450
+ ...toolsResponse.tools
451
+ .filter((t) => t.name !== "web_search" && t.name !== "fetch_webpage")
452
+ .map((tool) => ({
453
+ type: "function",
454
+ name: tool.name,
455
+ description: tool.description || "",
456
+ parameters: tool.inputSchema,
457
+ strict: false,
458
+ })),
459
+ ];
460
+ }
461
+ /**
462
+ * Create a response against the current full context.
463
+ */
464
+ async createResponse(params) {
465
+ const response = await this.openai.responses.create({
466
+ model: this.config.openaiModel,
467
+ instructions: this.instructions ?? undefined,
468
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
469
+ input: params.input,
470
+ tools: params.tools,
471
+ previous_response_id: params.previousResponseId,
472
+ truncation: "disabled",
473
+ prompt_cache_retention: "24h",
474
+ });
475
+ this.captureUsage(response);
476
+ return response;
477
+ }
478
+ /**
479
+ * Main query method with rolling compaction.
480
+ */
98
481
  async processQuery(query, onThinking, abortSignal, bypassSystemPrompt = false) {
99
- // Check for cancellation at start
100
482
  if (abortSignal?.aborted) {
101
483
  throw new Error("Request was cancelled");
102
484
  }
103
- // Bypass mode: send the raw prompt directly without system message or conversation history
104
485
  if (bypassSystemPrompt) {
105
486
  return this.processRawQuery(query, onThinking, abortSignal);
106
487
  }
107
- // Load system prompt from MCP server (no-op after first call)
108
488
  await this.ensureSystemPrompt();
109
- // Check if we should compact
110
- const shouldCompact = this.conversationHistory.length >= 40 &&
111
- (Date.now() - this.lastCompaction > 10 * 60 * 1000);
112
- if (shouldCompact) {
113
- await this.compactConversation();
489
+ // Proactive compaction based on last real measured request.
490
+ await this.maybeCompactHistory();
491
+ const tools = await this.buildTools();
492
+ const userMessage = this.makeUserMessage(query);
493
+ // Optional proactive token counting near/around threshold.
494
+ const projectedInputTokens = await this.countInputTokens(this.buildInput([userMessage]));
495
+ if (projectedInputTokens >= this.config.compactTriggerInputTokens) {
496
+ await this.maybeCompactHistory();
114
497
  }
115
- // Add user message to conversation history
116
- this.conversationHistory.push({
117
- type: "message",
118
- role: "user",
119
- content: [
120
- {
121
- type: "input_text",
122
- text: query,
123
- }
124
- ],
125
- });
126
- // Get available tools from MCP server
127
- const toolsResponse = await this.client.listTools();
128
- // Convert MCP tools to OpenAI Responses API format
129
- const tools = toolsResponse.tools.map((tool) => ({
130
- type: "function",
131
- name: tool.name,
132
- description: tool.description || "",
133
- parameters: tool.inputSchema,
134
- strict: false,
135
- }));
136
- // Multi-turn conversation with tool calling
498
+ // Add the new user message to rolling history now.
499
+ this.conversationHistory.push(userMessage);
137
500
  let loopCount = 0;
138
501
  const maxLoops = 15;
139
502
  let finalResponse = "";
140
503
  let outOfToolCalls = false;
504
+ let previousResponseId = undefined;
505
+ // Carries tool outputs across iterations so previous_response_id chain stays intact.
506
+ let pendingToolOutputs = null;
141
507
  while (loopCount < maxLoops) {
142
508
  loopCount++;
143
- // Check for cancellation before each API call
144
509
  if (abortSignal?.aborted) {
145
510
  throw new Error("Request was cancelled");
146
511
  }
147
- // Call OpenAI Responses API with error handling
148
512
  let response;
149
513
  try {
150
- response = await this.openai.responses.create({
151
- model: this.config.openaiModel,
152
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
153
- input: this.conversationHistory,
154
- tools: outOfToolCalls ? [] : tools,
155
- });
514
+ if (!previousResponseId) {
515
+ // First request in this query: send full current context.
516
+ response = await this.createResponse({
517
+ input: this.buildInput(),
518
+ tools: outOfToolCalls ? [] : tools,
519
+ });
520
+ }
521
+ else {
522
+ // Send pending tool outputs to continue the response chain.
523
+ response = await this.createResponse({
524
+ input: pendingToolOutputs ?? [],
525
+ tools: outOfToolCalls ? [] : tools,
526
+ previousResponseId,
527
+ });
528
+ pendingToolOutputs = null;
529
+ }
156
530
  }
157
531
  catch (error) {
158
532
  const err = error;
159
- // Handle context length exceeded
160
- if (err.status === 400 &&
161
- (err.code === 'context_length_exceeded' ||
162
- err.message?.includes('context') ||
163
- err.message?.includes('length'))) {
164
- await this.compactConversation();
165
- response = await this.openai.responses.create({
166
- model: this.config.openaiModel,
167
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
168
- input: this.conversationHistory,
169
- tools: outOfToolCalls ? [] : tools,
170
- });
533
+ const message = err.message?.toLowerCase() || "";
534
+ const contextProblem = err.status === 400 &&
535
+ (err.code === "context_length_exceeded" ||
536
+ message.includes("context") ||
537
+ message.includes("length"));
538
+ const toolProblem = err.status === 400 &&
539
+ (err.code === "response_incomplete" ||
540
+ message.includes("incomplete") ||
541
+ message.includes("tool"));
542
+ if (contextProblem) {
543
+ await this.maybeCompactHistory();
544
+ if (!previousResponseId) {
545
+ response = await this.createResponse({
546
+ input: this.buildInput(),
547
+ tools: outOfToolCalls ? [] : tools,
548
+ });
549
+ }
550
+ else {
551
+ response = await this.createResponse({
552
+ input: pendingToolOutputs ?? [],
553
+ tools: outOfToolCalls ? [] : tools,
554
+ previousResponseId,
555
+ });
556
+ pendingToolOutputs = null;
557
+ }
171
558
  }
172
- // Handle tool calls exhausted
173
- else if (err.status === 400 &&
174
- (err.code === 'response_incomplete' ||
175
- err.message?.includes('incomplete') ||
176
- err.message?.includes('tool'))) {
559
+ else if (toolProblem) {
177
560
  outOfToolCalls = true;
178
- response = await this.openai.responses.create({
179
- model: this.config.openaiModel,
180
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
181
- input: this.conversationHistory,
182
- tools: [],
183
- });
561
+ if (!previousResponseId) {
562
+ response = await this.createResponse({
563
+ input: this.buildInput(),
564
+ tools: [],
565
+ });
566
+ }
567
+ else {
568
+ response = await this.createResponse({
569
+ input: pendingToolOutputs ?? [],
570
+ tools: [],
571
+ previousResponseId,
572
+ });
573
+ pendingToolOutputs = null;
574
+ }
184
575
  }
185
576
  else {
186
577
  throw error;
187
578
  }
188
579
  }
189
- const output = response.output;
190
- // Find function_call items
580
+ previousResponseId = response.id;
581
+ const output = response.output ?? [];
582
+ for (const item of output) {
583
+ if (item.type === "web_search_call") {
584
+ onThinking?.("🔍 web_search_preview");
585
+ }
586
+ }
191
587
  const functionCalls = output.filter((item) => item.type === "function_call");
192
- // Check if AI wants to call tools
193
588
  if (functionCalls.length > 0) {
589
+ // Persist model output items into rolling history.
194
590
  this.conversationHistory.push(...output);
591
+ const toolOutputsForNextStep = [];
195
592
  for (const functionCall of functionCalls) {
196
- // Check for cancellation before each tool call
197
593
  if (abortSignal?.aborted) {
198
594
  throw new Error("Request was cancelled");
199
595
  }
200
596
  const functionName = functionCall.name;
201
- const functionArgs = typeof functionCall.arguments === 'string'
597
+ const functionArgs = typeof functionCall.arguments === "string"
202
598
  ? JSON.parse(functionCall.arguments)
203
599
  : functionCall.arguments;
204
- // Build a descriptive thinking message with key arguments
205
600
  let toolDesc = functionName;
206
- if (functionName === "fetch_webpage" && functionArgs.url) {
601
+ if (functionArgs?.url && functionName === "fetch_webpage") {
207
602
  try {
208
603
  toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
209
604
  }
@@ -211,118 +606,112 @@ export class MCPClientOpenAI {
211
606
  toolDesc = `fetch_webpage → ${functionArgs.url}`;
212
607
  }
213
608
  }
214
- else if (functionName === "web_search" && functionArgs.query) {
609
+ else if (functionArgs?.query && functionName === "web_search") {
215
610
  toolDesc = `web_search → "${functionArgs.query}"`;
216
611
  }
217
612
  onThinking?.(`🔧 ${toolDesc}`);
218
613
  try {
219
- // Execute the tool via MCP
220
614
  const result = await this.client.callTool({
221
615
  name: functionName,
222
616
  arguments: functionArgs,
223
617
  });
224
- // Add tool result to conversation history
225
- this.conversationHistory.push({
226
- type: "function_call_output",
227
- call_id: functionCall.call_id,
228
- output: JSON.stringify(result.content),
229
- });
618
+ const compactOutput = this.compactToolResult(result.content);
619
+ const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, compactOutput);
620
+ toolOutputsForNextStep.push(toolOutputItem);
621
+ this.conversationHistory.push(toolOutputItem);
230
622
  }
231
623
  catch (error) {
232
- this.conversationHistory.push({
233
- type: "function_call_output",
234
- call_id: functionCall.call_id,
235
- output: `Error: ${error instanceof Error ? error.message : String(error)}`,
236
- });
624
+ const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`);
625
+ toolOutputsForNextStep.push(toolOutputItem);
626
+ this.conversationHistory.push(toolOutputItem);
237
627
  }
238
628
  }
629
+ // Carry tool outputs to the next iteration so the response chain stays intact.
630
+ pendingToolOutputs = toolOutputsForNextStep;
239
631
  continue;
240
632
  }
241
- else {
242
- // No more tool calls, extract final response
243
- for (const item of output) {
244
- if (item.type === "message" && item.role === "assistant") {
245
- for (const contentItem of item.content) {
246
- if (contentItem.type === "output_text") {
247
- finalResponse += contentItem.text;
248
- }
633
+ for (const item of output) {
634
+ if (item.type === "message" && item.role === "assistant") {
635
+ for (const contentItem of item.content ?? []) {
636
+ if (contentItem.type === "output_text") {
637
+ finalResponse += contentItem.text;
249
638
  }
250
639
  }
251
640
  }
252
- this.conversationHistory.push(...output);
253
- break;
254
641
  }
642
+ this.conversationHistory.push(...output);
643
+ break;
255
644
  }
256
- // If we hit max loops, make one final request without tools
257
645
  if (loopCount >= maxLoops && !finalResponse) {
258
646
  try {
259
647
  const finalApiResponse = await this.openai.responses.create({
260
648
  model: this.config.openaiModel,
649
+ instructions: this.instructions ?? undefined,
261
650
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
262
- input: this.conversationHistory,
651
+ input: this.buildInput(),
263
652
  tools: [],
653
+ truncation: "disabled",
654
+ prompt_cache_retention: "24h",
264
655
  });
265
- const finalOutput = finalApiResponse.output;
266
- for (const item of finalOutput) {
656
+ this.captureUsage(finalApiResponse);
657
+ for (const item of finalApiResponse.output ?? []) {
267
658
  if (item.type === "message" && item.role === "assistant") {
268
- for (const contentItem of item.content) {
659
+ for (const contentItem of item.content ?? []) {
269
660
  if (contentItem.type === "output_text") {
270
661
  finalResponse += contentItem.text;
271
662
  }
272
663
  }
273
664
  }
274
665
  }
275
- this.conversationHistory.push(...finalOutput);
666
+ this.conversationHistory.push(...(finalApiResponse.output ?? []));
276
667
  }
277
- catch (error) {
278
- finalResponse = "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
668
+ catch {
669
+ finalResponse =
670
+ "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
279
671
  }
280
672
  }
281
- // Keep conversation history manageable
282
- if (this.conversationHistory.length > 50) {
283
- const systemMessage = this.conversationHistory[0];
284
- const recentItems = this.conversationHistory.slice(-49);
285
- this.conversationHistory = [systemMessage, ...recentItems];
286
- }
673
+ // Prevent pathological item growth even when tokens are still OK.
674
+ this.enforceHardHistoryLimitByTurns(20);
675
+ // Optional: if this turn caused a large input, compact immediately so the next turn starts cleaner.
676
+ await this.maybeCompactHistory();
287
677
  return finalResponse;
288
678
  }
289
679
  /**
290
- * Sends a raw query directly to the model — no system prompt, no conversation history.
291
- * Used by the Direct Prompt dev tool to test prompts verbatim.
680
+ * Raw mode: no cached instructions, no rolling history, no compaction state.
292
681
  */
293
682
  async processRawQuery(query, onThinking, abortSignal) {
294
- const toolsResponse = await this.client.listTools();
295
- const tools = toolsResponse.tools.map((tool) => ({
296
- type: "function",
297
- name: tool.name,
298
- description: tool.description || "",
299
- parameters: tool.inputSchema,
300
- strict: false,
301
- }));
302
- // Isolated history — just this message, no system prompt
303
- const isolatedHistory = [
304
- {
305
- type: "message",
306
- role: "user",
307
- content: [{ type: "input_text", text: query }],
308
- },
309
- ];
683
+ const tools = await this.buildTools();
684
+ const isolatedHistory = [this.makeUserMessage(query)];
310
685
  let loopCount = 0;
311
686
  const maxLoops = 15;
312
687
  let finalResponse = "";
688
+ let previousResponseId = undefined;
689
+ let pendingRawToolOutputs = null;
313
690
  while (loopCount < maxLoops) {
314
691
  loopCount++;
315
692
  if (abortSignal?.aborted)
316
693
  throw new Error("Request was cancelled");
694
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
317
695
  const response = await this.openai.responses.create({
318
696
  model: this.config.openaiModel,
319
- input: isolatedHistory,
697
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
698
+ input: (!previousResponseId ? isolatedHistory : (pendingRawToolOutputs ?? [])),
699
+ previous_response_id: previousResponseId,
320
700
  tools,
701
+ truncation: "disabled",
321
702
  });
322
- const output = response.output;
703
+ pendingRawToolOutputs = null;
704
+ this.captureUsage(response);
705
+ previousResponseId = response.id;
706
+ const output = response.output ?? [];
707
+ for (const item of output) {
708
+ if (item.type === "web_search_call") {
709
+ onThinking?.("🔍 web_search_preview");
710
+ }
711
+ }
323
712
  const functionCalls = output.filter((item) => item.type === "function_call");
324
713
  if (functionCalls.length > 0) {
325
- isolatedHistory.push(...output);
714
+ const newToolOutputs = [];
326
715
  for (const functionCall of functionCalls) {
327
716
  if (abortSignal?.aborted)
328
717
  throw new Error("Request was cancelled");
@@ -331,7 +720,7 @@ export class MCPClientOpenAI {
331
720
  ? JSON.parse(functionCall.arguments)
332
721
  : functionCall.arguments;
333
722
  let toolDesc = functionName;
334
- if (functionName === "fetch_webpage" && functionArgs.url) {
723
+ if (functionName === "fetch_webpage" && functionArgs?.url) {
335
724
  try {
336
725
  toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
337
726
  }
@@ -339,23 +728,27 @@ export class MCPClientOpenAI {
339
728
  toolDesc = `fetch_webpage → ${functionArgs.url}`;
340
729
  }
341
730
  }
342
- else if (functionName === "web_search" && functionArgs.query) {
731
+ else if (functionName === "web_search" && functionArgs?.query) {
343
732
  toolDesc = `web_search → "${functionArgs.query}"`;
344
733
  }
345
734
  onThinking?.(`🔧 ${toolDesc}`);
346
735
  try {
347
- const result = await this.client.callTool({ name: functionName, arguments: functionArgs });
348
- isolatedHistory.push({ type: "function_call_output", call_id: functionCall.call_id, output: JSON.stringify(result.content) });
736
+ const result = await this.client.callTool({
737
+ name: functionName,
738
+ arguments: functionArgs,
739
+ });
740
+ newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, this.compactToolResult(result.content)));
349
741
  }
350
742
  catch (error) {
351
- isolatedHistory.push({ type: "function_call_output", call_id: functionCall.call_id, output: `Error: ${error instanceof Error ? error.message : String(error)}` });
743
+ newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`));
352
744
  }
353
745
  }
746
+ pendingRawToolOutputs = newToolOutputs;
354
747
  continue;
355
748
  }
356
749
  for (const item of output) {
357
750
  if (item.type === "message" && item.role === "assistant") {
358
- for (const contentItem of item.content) {
751
+ for (const contentItem of item.content ?? []) {
359
752
  if (contentItem.type === "output_text")
360
753
  finalResponse += contentItem.text;
361
754
  }
@@ -365,13 +758,4 @@ export class MCPClientOpenAI {
365
758
  }
366
759
  return finalResponse;
367
760
  }
368
- clearHistory() {
369
- // Keep system message only if it genuinely is a system role message
370
- const first = this.conversationHistory[0];
371
- const systemMessage = first?.role === "system" ? this.conversationHistory[0] : undefined;
372
- this.conversationHistory = systemMessage ? [systemMessage] : [];
373
- }
374
- async cleanup() {
375
- await this.client.close();
376
- }
377
761
  }
@@ -446,6 +446,18 @@
446
446
  font-size: 12px;
447
447
  opacity: 0.6;
448
448
  margin-top: 6px;
449
+ display: flex;
450
+ align-items: center;
451
+ gap: 8px;
452
+ flex-wrap: wrap;
453
+ }
454
+
455
+ .mcp-chat-token-info {
456
+ font-size: 11px;
457
+ opacity: 0.75;
458
+ font-family: monospace;
459
+ border-left: 1px solid currentColor;
460
+ padding-left: 8px;
449
461
  }
450
462
 
451
463
  /* ───────────────────────────────────────────────
package/dist/types.d.ts CHANGED
@@ -8,6 +8,8 @@ export interface Message {
8
8
  isStreaming?: boolean;
9
9
  /** Hidden messages are sent to the AI but not shown in the chat bubble list */
10
10
  hidden?: boolean;
11
+ /** Token usage info shown in the footer of assistant messages — never fed back to AI */
12
+ tokenInfo?: string;
11
13
  }
12
14
  export interface ThinkingStep {
13
15
  id: string;
@@ -21,7 +23,7 @@ export interface MCPChatProps {
21
23
  className?: string;
22
24
  }
23
25
  export interface StreamEvent {
24
- type: "thinking" | "content" | "done" | "error";
26
+ type: "thinking" | "content" | "done" | "error" | "usage";
25
27
  message?: string;
26
28
  chunk?: string;
27
29
  }
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nqminds/mcp-client",
3
- "version": "1.0.9",
3
+ "version": "1.0.11",
4
4
  "description": "Reusable MCP client component with AI chat interface",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",