@nqminds/mcp-client 1.0.9 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBAifd"}
1
+ {"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBA6fd"}
package/dist/MCPChat.js CHANGED
@@ -167,6 +167,16 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
167
167
  return updated;
168
168
  });
169
169
  }
170
+ else if (parsed.type === "usage") {
171
+ setMessages((prev) => {
172
+ const updated = [...prev];
173
+ const lastIndex = updated.length - 1;
174
+ if (lastIndex >= 0) {
175
+ updated[lastIndex] = { ...updated[lastIndex], tokenInfo: parsed.message };
176
+ }
177
+ return updated;
178
+ });
179
+ }
170
180
  else if (parsed.type === "error") {
171
181
  throw new Error(parsed.message || "Stream error");
172
182
  }
@@ -315,7 +325,9 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
315
325
  React.createElement("div", { className: "mcp-chat-message-bubble" },
316
326
  msg.role === "assistant" ? (React.createElement("div", { className: "mcp-chat-message-content markdown-content" },
317
327
  React.createElement(ReactMarkdown, { remarkPlugins: [remarkGfm] }, msg.content))) : (React.createElement("div", { className: "mcp-chat-message-content" }, msg.content)),
318
- React.createElement("div", { className: "mcp-chat-message-timestamp" }, msg.timestamp.toLocaleTimeString()))))),
328
+ React.createElement("div", { className: "mcp-chat-message-timestamp" },
329
+ msg.timestamp.toLocaleTimeString(),
330
+ msg.role === "assistant" && msg.tokenInfo && (React.createElement("span", { className: "mcp-chat-token-info" }, msg.tokenInfo))))))),
319
331
  isLoading && (React.createElement("div", { className: "mcp-chat-message mcp-chat-message-assistant" },
320
332
  React.createElement("div", { className: "mcp-chat-thinking" },
321
333
  React.createElement("div", { className: "mcp-chat-thinking-title" },
@@ -1 +1 @@
1
- {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAiG/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
1
+ {"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAmH/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
@@ -47,6 +47,23 @@ export function createMCPChatHandler(config) {
47
47
  sendEvent("thinking", { message: thinkingMessage });
48
48
  }, abortController.signal, // Pass abort signal to enable cancellation
49
49
  bypassSystemPrompt);
50
+ // Emit token usage summary for debugging
51
+ const usage = client.getUsage();
52
+ if (usage.inputTokens > 0 || usage.outputTokens > 0) {
53
+ const parts = [
54
+ `in: ${usage.inputTokens.toLocaleString()}`,
55
+ `out: ${usage.outputTokens.toLocaleString()}`,
56
+ `total: ${usage.totalTokens.toLocaleString()}`,
57
+ ];
58
+ if (usage.cachedTokens > 0) {
59
+ const cachedPct = Math.round((usage.cachedTokens / usage.inputTokens) * 100);
60
+ parts.push(`cached: ${usage.cachedTokens.toLocaleString()} (${cachedPct}%)`);
61
+ }
62
+ if (usage.compactedTurns > 0) {
63
+ parts.push(`compacted: ${usage.compactedTurns} turn${usage.compactedTurns !== 1 ? "s" : ""}`);
64
+ }
65
+ sendEvent("usage", { message: parts.join(" | ") });
66
+ }
50
67
  // Check if aborted before streaming response
51
68
  if (abortController.signal.aborted) {
52
69
  return;
@@ -1,6 +1,20 @@
1
1
  /**
2
2
  * OpenAI-powered MCP Client
3
- * Uses ChatGPT to intelligently interact with MCP tools
3
+ *
4
+ * Compaction strategy:
5
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
6
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
7
+ * compact the oldest cold portion and keep the newest portion verbatim.
8
+ * - Feed the returned compaction object back into future requests.
9
+ *
10
+ * Notes:
11
+ * - This is written to align with the OpenAI Responses API shape:
12
+ * - response usage fields
13
+ * - previous_response_id
14
+ * - input token counting
15
+ * - response compaction
16
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
17
+ * by SDK version. The logic here is the important part.
4
18
  */
5
19
  export interface MCPClientConfig {
6
20
  openaiApiKey: string;
@@ -8,30 +22,153 @@ export interface MCPClientConfig {
8
22
  openaiModel?: string;
9
23
  clientName?: string;
10
24
  clientVersion?: string;
25
+ /**
26
+ * Trigger compaction when the last measured input tokens reaches this threshold.
27
+ * Example policy from your suggestion:
28
+ * - compact when last measured input >= 200k
29
+ */
30
+ compactTriggerInputTokens?: number;
31
+ /**
32
+ * Keep roughly this many of the most recent input tokens uncompacted.
33
+ * Example policy from your suggestion:
34
+ * - retain last ~100k uncompacted
35
+ */
36
+ hotContextTargetInputTokens?: number;
37
+ /**
38
+ * Guardrail for unusually large tool outputs stored in history.
39
+ */
40
+ maxToolOutputChars?: number;
41
+ }
42
+ interface UsageStats {
43
+ inputTokens: number;
44
+ outputTokens: number;
45
+ totalTokens: number;
46
+ cachedTokens: number;
47
+ reasoningTokens: number;
48
+ compactedTurns: number;
11
49
  }
12
50
  export declare class MCPClientOpenAI {
13
51
  private client;
14
52
  private openai;
15
53
  private transport;
54
+ /**
55
+ * Instructions are sent using the Responses API `instructions` field,
56
+ * not inserted as a fake message inside the rolling conversation items.
57
+ */
58
+ private instructions;
59
+ /**
60
+ * Rolling uncompacted conversation items.
61
+ * This contains the most recent "hot" context only.
62
+ */
16
63
  private conversationHistory;
17
- private lastCompaction;
64
+ /**
65
+ * Opaque compaction object returned by OpenAI.
66
+ * This represents older "cold" context that has been compacted.
67
+ */
68
+ private compaction;
69
+ /**
70
+ * Last measured input tokens from a real Responses API call.
71
+ */
72
+ private lastInputTokens;
73
+ /**
74
+ * Latest usage snapshot for logging/inspection.
75
+ */
76
+ private lastUsage;
77
+ /**
78
+ * Per-turn accumulator — reset at the start of each processQuery / processRawQuery call.
79
+ * Tracks the max input tokens seen (largest context = most representative) and
80
+ * the sum of output tokens across all API calls in the turn.
81
+ */
82
+ private turnStats;
18
83
  private config;
19
84
  constructor(config: MCPClientConfig);
20
- private compactConversation;
85
+ connect(): Promise<void>;
86
+ cleanup(): Promise<void>;
87
+ clearHistory(): void;
88
+ getUsage(): UsageStats;
21
89
  /**
22
- * Fetches the system prompt from the MCP server's registered "system-prompt" prompt
23
- * and prepends it to conversationHistory. Cached only runs once per session.
24
- * Direct Prompt (bypass mode) skips this entirely.
90
+ * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
91
+ * Cached per client session.
25
92
  */
26
93
  private ensureSystemPrompt;
27
- connect(): Promise<void>;
94
+ /**
95
+ * Build request input:
96
+ * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
97
+ */
98
+ private buildInput;
99
+ /**
100
+ * Count input tokens before making a request.
101
+ * Falls back to a simple rough estimate if the SDK method is unavailable.
102
+ */
103
+ private countInputTokens;
104
+ /**
105
+ * Very rough fallback estimator.
106
+ * Only used if token counting endpoint is unavailable in the SDK version in use.
107
+ */
108
+ private roughEstimateInputTokens;
109
+ /**
110
+ * Reset the per-turn accumulator at the start of each processQuery / processRawQuery.
111
+ */
112
+ private startTurn;
113
+ /**
114
+ * Normalize usage from Responses API and accumulate into the per-turn stats.
115
+ *
116
+ * With previous_response_id chaining there are multiple API calls per user turn:
117
+ * - inputTokens: use max across calls (the call with the largest context is most representative)
118
+ * - cachedTokens: taken from the same call that has the max input tokens (keeps % ≤ 100%)
119
+ * - outputTokens: sum across calls (each call generates separately-billed tokens)
120
+ * - reasoningTokens: sum across calls
121
+ */
122
+ private captureUsage;
123
+ /**
124
+ * Compact oversized tool outputs before storing them in rolling history.
125
+ */
126
+ private compactToolResult;
127
+ private makeUserMessage;
128
+ private makeFunctionOutput;
129
+ /**
130
+ * We treat a "turn" boundary as:
131
+ * - starts at a user message
132
+ * - ends right before the next user message, or end of array
133
+ *
134
+ * This lets us compact or trim in coherent chunks instead of arbitrary items.
135
+ */
136
+ private getTurnBoundaries;
137
+ /**
138
+ * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
139
+ * Older turns become compaction candidates.
140
+ */
141
+ private splitColdAndHotHistory;
142
+ /**
143
+ * Incrementally update compaction using the cold slice only.
144
+ */
145
+ private compactColdHistory;
146
+ /**
147
+ * Proactively compact when the history has grown past the trigger.
148
+ * Keeps the newest hot window uncompacted and compacts the older cold window.
149
+ */
150
+ private maybeCompactHistory;
151
+ /**
152
+ * Keep history from growing pathologically in item count even before token limits.
153
+ * Uses turn-aware trimming, not arbitrary item slicing.
154
+ */
155
+ private enforceHardHistoryLimitByTurns;
156
+ /**
157
+ * Build MCP tool list for OpenAI Responses API.
158
+ */
159
+ private buildTools;
160
+ /**
161
+ * Create a response against the current full context.
162
+ */
163
+ private createResponse;
164
+ /**
165
+ * Main query method with rolling compaction.
166
+ */
28
167
  processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal, bypassSystemPrompt?: boolean): Promise<string>;
29
168
  /**
30
- * Sends a raw query directly to the model — no system prompt, no conversation history.
31
- * Used by the Direct Prompt dev tool to test prompts verbatim.
169
+ * Raw mode: no cached instructions, no rolling history, no compaction state.
32
170
  */
33
171
  private processRawQuery;
34
- clearHistory(): void;
35
- cleanup(): Promise<void>;
36
172
  }
173
+ export {};
37
174
  //# sourceMappingURL=openai-client.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IACxC,OAAO,CAAC,mBAAmB,CAAsB;IACjD,OAAO,CAAC,cAAc,CAAa;IACnC,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;YAsCrB,mBAAmB;IAoBjC;;;;OAIG;YACW,kBAAkB;IA4B1B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EAAE,WAAW,CAAC,EAAE,WAAW,EAAE,kBAAkB,UAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAuNjJ;;;OAGG;YACW,eAAe;IA4E7B,YAAY,IAAI,IAAI;IAOd,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF;;;;OAIG;IACH,OAAO,CAAC,SAAS,CAKf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAkBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,SAAS;IAIjB;;;;;;;;OAQG;IACH,OAAO,CAAC,YAAY;IAmCpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA8OlB;;OAEG;YACW,eAAe;CAyG9B"}
@@ -1,31 +1,89 @@
1
1
  /**
2
2
  * OpenAI-powered MCP Client
3
- * Uses ChatGPT to intelligently interact with MCP tools
3
+ *
4
+ * Compaction strategy:
5
+ * - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
6
+ * - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
7
+ * compact the oldest cold portion and keep the newest portion verbatim.
8
+ * - Feed the returned compaction object back into future requests.
9
+ *
10
+ * Notes:
11
+ * - This is written to align with the OpenAI Responses API shape:
12
+ * - response usage fields
13
+ * - previous_response_id
14
+ * - input token counting
15
+ * - response compaction
16
+ * - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
17
+ * by SDK version. The logic here is the important part.
4
18
  */
5
19
  import { Client } from "@modelcontextprotocol/sdk/client/index.js";
6
20
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
7
21
  import OpenAI from "openai";
8
22
  export class MCPClientOpenAI {
9
23
  constructor(config) {
24
+ /**
25
+ * Instructions are sent using the Responses API `instructions` field,
26
+ * not inserted as a fake message inside the rolling conversation items.
27
+ */
28
+ this.instructions = null;
29
+ /**
30
+ * Rolling uncompacted conversation items.
31
+ * This contains the most recent "hot" context only.
32
+ */
10
33
  this.conversationHistory = [];
11
- this.lastCompaction = 0;
34
+ /**
35
+ * Opaque compaction object returned by OpenAI.
36
+ * This represents older "cold" context that has been compacted.
37
+ */
38
+ this.compaction = {
39
+ item: null,
40
+ compactedTurns: 0,
41
+ };
42
+ /**
43
+ * Last measured input tokens from a real Responses API call.
44
+ */
45
+ this.lastInputTokens = 0;
46
+ /**
47
+ * Latest usage snapshot for logging/inspection.
48
+ */
49
+ this.lastUsage = {
50
+ inputTokens: 0,
51
+ outputTokens: 0,
52
+ totalTokens: 0,
53
+ cachedTokens: 0,
54
+ reasoningTokens: 0,
55
+ compactedTurns: 0,
56
+ };
57
+ /**
58
+ * Per-turn accumulator — reset at the start of each processQuery / processRawQuery call.
59
+ * Tracks the max input tokens seen (largest context = most representative) and
60
+ * the sum of output tokens across all API calls in the turn.
61
+ */
62
+ this.turnStats = {
63
+ inputTokens: 0,
64
+ outputTokens: 0,
65
+ cachedTokens: 0,
66
+ reasoningTokens: 0,
67
+ };
12
68
  this.config = {
13
69
  openaiApiKey: config.openaiApiKey,
14
70
  mcpServerCommand: config.mcpServerCommand,
15
- openaiModel: config.openaiModel || "chatgpt-5-mini",
71
+ openaiModel: config.openaiModel || "gpt-5-mini",
16
72
  clientName: config.clientName || "mcp-flair-client",
17
73
  clientVersion: config.clientVersion || "1.0.0",
74
+ compactTriggerInputTokens: config.compactTriggerInputTokens ?? 200000,
75
+ hotContextTargetInputTokens: config.hotContextTargetInputTokens ?? 100000,
76
+ maxToolOutputChars: config.maxToolOutputChars ?? 20000,
18
77
  };
19
78
  this.openai = new OpenAI({
20
79
  apiKey: this.config.openaiApiKey,
21
80
  });
22
- // Parse the server command and args
23
81
  const serverCmd = this.config.mcpServerCommand.split(" ");
24
82
  const command = serverCmd[0];
25
83
  const args = serverCmd.slice(1);
26
84
  this.transport = new StdioClientTransport({
27
- command: command,
28
- args: args,
85
+ command,
86
+ args,
29
87
  });
30
88
  this.client = new Client({
31
89
  name: this.config.clientName,
@@ -33,177 +91,552 @@ export class MCPClientOpenAI {
33
91
  }, {
34
92
  capabilities: {},
35
93
  });
36
- // Initialize conversation with system message
37
- // System prompt is fetched from the MCP server on first use (see ensureSystemPrompt)
94
+ }
95
+ async connect() {
96
+ await this.client.connect(this.transport);
97
+ }
98
+ async cleanup() {
99
+ await this.client.close();
100
+ }
101
+ clearHistory() {
38
102
  this.conversationHistory = [];
103
+ this.compaction = {
104
+ item: null,
105
+ compactedTurns: 0,
106
+ };
107
+ this.lastInputTokens = 0;
108
+ this.turnStats = { inputTokens: 0, outputTokens: 0, cachedTokens: 0, reasoningTokens: 0 };
109
+ this.lastUsage = {
110
+ inputTokens: 0,
111
+ outputTokens: 0,
112
+ totalTokens: 0,
113
+ cachedTokens: 0,
114
+ reasoningTokens: 0,
115
+ compactedTurns: 0,
116
+ };
39
117
  }
40
- async compactConversation() {
41
- try {
42
- const compactionResponse = await this.openai.responses.compact({
43
- model: this.config.openaiModel,
44
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
45
- input: this.conversationHistory,
46
- });
47
- this.conversationHistory = compactionResponse.output;
48
- this.lastCompaction = Date.now();
49
- }
50
- catch (error) {
51
- // Keep system message and last 25 items
52
- if (this.conversationHistory.length > 26) {
53
- const systemMessage = this.conversationHistory[0];
54
- const recentItems = this.conversationHistory.slice(-25);
55
- this.conversationHistory = [systemMessage, ...recentItems];
56
- }
57
- }
118
+ getUsage() {
119
+ return { ...this.lastUsage, compactedTurns: this.compaction.compactedTurns };
58
120
  }
59
121
  /**
60
- * Fetches the system prompt from the MCP server's registered "system-prompt" prompt
61
- * and prepends it to conversationHistory. Cached only runs once per session.
62
- * Direct Prompt (bypass mode) skips this entirely.
122
+ * Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
123
+ * Cached per client session.
63
124
  */
64
125
  async ensureSystemPrompt() {
65
- // Already loaded if history starts with a system message
66
- if (this.conversationHistory[0]?.role === "system")
126
+ if (this.instructions)
67
127
  return;
68
128
  try {
129
+ // SDK typing may not expose getPrompt.
69
130
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
70
131
  const result = await this.client.getPrompt({ name: "system-prompt" });
71
132
  const parts = [];
72
- for (const msg of result.messages) {
133
+ for (const msg of result.messages ?? []) {
134
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
73
135
  const c = msg.content;
74
136
  if (typeof c === "string")
75
137
  parts.push(c);
76
138
  else if (c?.text)
77
139
  parts.push(c.text);
78
140
  }
79
- const text = parts.join("\n\n");
141
+ const text = parts.join("\n\n").trim();
80
142
  if (text) {
81
- this.conversationHistory = [
82
- {
83
- type: "message",
84
- role: "system",
85
- content: [{ type: "input_text", text }],
86
- },
87
- ...this.conversationHistory,
88
- ];
143
+ this.instructions = text;
89
144
  }
90
145
  }
91
146
  catch (error) {
92
147
  console.error("[MCPClient] Failed to fetch system prompt from MCP server:", error);
148
+ this.instructions = null;
93
149
  }
94
150
  }
95
- async connect() {
96
- await this.client.connect(this.transport);
151
+ /**
152
+ * Build request input:
153
+ * [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
154
+ */
155
+ buildInput(newItems = []) {
156
+ const input = [];
157
+ if (this.compaction.item) {
158
+ input.push(this.compaction.item);
159
+ }
160
+ input.push(...this.conversationHistory);
161
+ input.push(...newItems);
162
+ return input;
163
+ }
164
+ /**
165
+ * Count input tokens before making a request.
166
+ * Falls back to a simple rough estimate if the SDK method is unavailable.
167
+ */
168
+ async countInputTokens(input) {
169
+ try {
170
+ // Some SDK versions may expose this as responses.inputTokens.count(...)
171
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
172
+ const result = await this.openai.responses.inputTokens.count({
173
+ model: this.config.openaiModel,
174
+ input,
175
+ instructions: this.instructions ?? undefined,
176
+ tools: [],
177
+ });
178
+ // Common guess for returned shape
179
+ return (result?.input_tokens ??
180
+ result?.total_tokens ??
181
+ result?.count ??
182
+ this.roughEstimateInputTokens(input));
183
+ }
184
+ catch {
185
+ return this.roughEstimateInputTokens(input);
186
+ }
187
+ }
188
+ /**
189
+ * Very rough fallback estimator.
190
+ * Only used if token counting endpoint is unavailable in the SDK version in use.
191
+ */
192
+ roughEstimateInputTokens(input) {
193
+ const serialized = JSON.stringify({
194
+ instructions: this.instructions,
195
+ input,
196
+ });
197
+ // Very rough English-ish heuristic.
198
+ return Math.ceil(serialized.length / 4);
199
+ }
200
+ /**
201
+ * Reset the per-turn accumulator at the start of each processQuery / processRawQuery.
202
+ */
203
+ startTurn() {
204
+ this.turnStats = { inputTokens: 0, outputTokens: 0, cachedTokens: 0, reasoningTokens: 0 };
205
+ }
206
+ /**
207
+ * Normalize usage from Responses API and accumulate into the per-turn stats.
208
+ *
209
+ * With previous_response_id chaining there are multiple API calls per user turn:
210
+ * - inputTokens: use max across calls (the call with the largest context is most representative)
211
+ * - cachedTokens: taken from the same call that has the max input tokens (keeps % ≤ 100%)
212
+ * - outputTokens: sum across calls (each call generates separately-billed tokens)
213
+ * - reasoningTokens: sum across calls
214
+ */
215
+ captureUsage(response) {
216
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
217
+ const usage = response?.usage ?? {};
218
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
219
+ const inputDetails = usage?.input_tokens_details ?? {};
220
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
221
+ const outputDetails = usage?.output_tokens_details ?? {};
222
+ const inputTokens = usage.input_tokens ?? 0;
223
+ const outputTokens = usage.output_tokens ?? 0;
224
+ const cachedTokens = inputDetails.cached_tokens ?? 0;
225
+ const reasoningTokens = outputDetails.reasoning_tokens ?? 0;
226
+ // cachedTokens is only meaningful relative to its own call's inputTokens.
227
+ // Track it alongside the max-input call so the percentage stays ≤ 100%.
228
+ if (inputTokens >= this.turnStats.inputTokens) {
229
+ this.turnStats.inputTokens = inputTokens;
230
+ this.turnStats.cachedTokens = cachedTokens;
231
+ }
232
+ this.turnStats.outputTokens += outputTokens;
233
+ this.turnStats.reasoningTokens += reasoningTokens;
234
+ this.lastUsage = {
235
+ inputTokens: this.turnStats.inputTokens,
236
+ outputTokens: this.turnStats.outputTokens,
237
+ totalTokens: this.turnStats.inputTokens + this.turnStats.outputTokens,
238
+ cachedTokens: this.turnStats.cachedTokens,
239
+ reasoningTokens: this.turnStats.reasoningTokens,
240
+ compactedTurns: this.compaction.compactedTurns,
241
+ };
242
+ // Keep lastInputTokens as the raw per-call value for compaction threshold checks.
243
+ this.lastInputTokens = inputTokens;
244
+ }
245
+ /**
246
+ * Compact oversized tool outputs before storing them in rolling history.
247
+ */
248
+ compactToolResult(value) {
249
+ const seen = new WeakSet();
250
+ const prune = (v) => {
251
+ if (v == null)
252
+ return v;
253
+ if (typeof v === "string") {
254
+ if (v.length <= this.config.maxToolOutputChars)
255
+ return v;
256
+ return (v.slice(0, this.config.maxToolOutputChars) +
257
+ `\n...[truncated ${v.length - this.config.maxToolOutputChars} chars]`);
258
+ }
259
+ if (typeof v !== "object")
260
+ return v;
261
+ if (Array.isArray(v)) {
262
+ const maxItems = 30;
263
+ const sliced = v.slice(0, maxItems).map(prune);
264
+ if (v.length > maxItems) {
265
+ sliced.push(`...[truncated ${v.length - maxItems} items]`);
266
+ }
267
+ return sliced;
268
+ }
269
+ if (seen.has(v))
270
+ return "[circular]";
271
+ seen.add(v);
272
+ const obj = v;
273
+ const out = {};
274
+ const entries = Object.entries(obj);
275
+ // Prefer keeping fewer, more informative fields.
276
+ const preferredFirst = [
277
+ "title",
278
+ "name",
279
+ "id",
280
+ "url",
281
+ "summary",
282
+ "description",
283
+ "text",
284
+ "content",
285
+ "status",
286
+ "result",
287
+ "items",
288
+ "data",
289
+ ];
290
+ const sorted = entries.sort(([a], [b]) => {
291
+ const ai = preferredFirst.indexOf(a);
292
+ const bi = preferredFirst.indexOf(b);
293
+ const av = ai === -1 ? 999 : ai;
294
+ const bv = bi === -1 ? 999 : bi;
295
+ return av - bv;
296
+ });
297
+ const maxFields = 25;
298
+ for (const [k, val] of sorted.slice(0, maxFields)) {
299
+ out[k] = prune(val);
300
+ }
301
+ if (entries.length > maxFields) {
302
+ out.__truncated_fields__ = entries.length - maxFields;
303
+ }
304
+ return out;
305
+ };
306
+ try {
307
+ return JSON.stringify(prune(value));
308
+ }
309
+ catch {
310
+ const s = String(value);
311
+ return s.length <= this.config.maxToolOutputChars
312
+ ? s
313
+ : s.slice(0, this.config.maxToolOutputChars) +
314
+ `\n...[truncated ${s.length - this.config.maxToolOutputChars} chars]`;
315
+ }
316
+ }
317
+ makeUserMessage(text) {
318
+ return {
319
+ type: "message",
320
+ role: "user",
321
+ content: [{ type: "input_text", text }],
322
+ };
323
+ }
324
+ makeFunctionOutput(callId, output) {
325
+ return {
326
+ type: "function_call_output",
327
+ call_id: callId,
328
+ output,
329
+ };
330
+ }
331
+ /**
332
+ * We treat a "turn" boundary as:
333
+ * - starts at a user message
334
+ * - ends right before the next user message, or end of array
335
+ *
336
+ * This lets us compact or trim in coherent chunks instead of arbitrary items.
337
+ */
338
+ getTurnBoundaries(items) {
339
+ const boundaries = [];
340
+ let currentStart = -1;
341
+ for (let i = 0; i < items.length; i++) {
342
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
343
+ const item = items[i];
344
+ const isUserMessage = item?.type === "message" && item?.role === "user";
345
+ if (isUserMessage) {
346
+ if (currentStart !== -1) {
347
+ const slice = items.slice(currentStart, i);
348
+ boundaries.push({
349
+ startIndex: currentStart,
350
+ endIndex: i - 1,
351
+ estimatedTokens: this.roughEstimateInputTokens(slice),
352
+ });
353
+ }
354
+ currentStart = i;
355
+ }
356
+ }
357
+ if (currentStart !== -1) {
358
+ const slice = items.slice(currentStart);
359
+ boundaries.push({
360
+ startIndex: currentStart,
361
+ endIndex: items.length - 1,
362
+ estimatedTokens: this.roughEstimateInputTokens(slice),
363
+ });
364
+ }
365
+ // If there are no user turns, treat all as one chunk.
366
+ if (boundaries.length === 0 && items.length > 0) {
367
+ boundaries.push({
368
+ startIndex: 0,
369
+ endIndex: items.length - 1,
370
+ estimatedTokens: this.roughEstimateInputTokens(items),
371
+ });
372
+ }
373
+ return boundaries;
374
+ }
375
+ /**
376
+ * Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
377
+ * Older turns become compaction candidates.
378
+ */
379
+ splitColdAndHotHistory(items) {
380
+ const turns = this.getTurnBoundaries(items);
381
+ if (turns.length === 0) {
382
+ return { coldItems: [], hotItems: items };
383
+ }
384
+ let running = 0;
385
+ let keepFromTurnIndex = turns.length;
386
+ for (let i = turns.length - 1; i >= 0; i--) {
387
+ const next = running + turns[i].estimatedTokens;
388
+ if (next > this.config.hotContextTargetInputTokens) {
389
+ break;
390
+ }
391
+ running = next;
392
+ keepFromTurnIndex = i;
393
+ }
394
+ if (keepFromTurnIndex === turns.length) {
395
+ // Even the newest turn is too large; keep at least the latest turn hot.
396
+ const lastTurn = turns[turns.length - 1];
397
+ return {
398
+ coldItems: items.slice(0, lastTurn.startIndex),
399
+ hotItems: items.slice(lastTurn.startIndex),
400
+ };
401
+ }
402
+ const splitIndex = turns[keepFromTurnIndex].startIndex;
403
+ return {
404
+ coldItems: items.slice(0, splitIndex),
405
+ hotItems: items.slice(splitIndex),
406
+ };
407
+ }
408
+ /**
409
+ * Incrementally update compaction using the cold slice only.
410
+ */
411
+ async compactColdHistory(coldItems) {
412
+ if (coldItems.length === 0)
413
+ return;
414
+ try {
415
+ // Depending on SDK version, the exact shape may vary.
416
+ // The intent is:
417
+ // - compact [existing compaction object?, ...new cold items]
418
+ // - receive an updated opaque compaction item
419
+ const compactInput = [];
420
+ if (this.compaction.item)
421
+ compactInput.push(this.compaction.item);
422
+ compactInput.push(...coldItems);
423
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
424
+ const response = await this.openai.responses.compact({
425
+ model: this.config.openaiModel,
426
+ input: compactInput,
427
+ instructions: this.instructions ?? undefined,
428
+ });
429
+ // We expect the new compaction object to be reusable as input.
430
+ // Some SDKs may return `output`, some `compacted`, etc.
431
+ const newItem = response?.output?.[0] ??
432
+ response?.compacted ??
433
+ response?.item ??
434
+ null;
435
+ if (newItem) {
436
+ this.compaction.item = newItem;
437
+ this.compaction.compactedTurns += this.getTurnBoundaries(coldItems).length;
438
+ }
439
+ else {
440
+ throw new Error("Compaction response did not include a reusable compaction item");
441
+ }
442
+ console.error(`[MCPClient] Compacted ${coldItems.length} old items. Total compacted turns: ${this.compaction.compactedTurns}`);
443
+ }
444
+ catch (error) {
445
+ // Fallback: if compaction fails, just drop the cold part rather than
446
+ // keeping everything and risking repeated context overflows.
447
+ console.error("[MCPClient] Compaction failed, dropping cold history:", error);
448
+ }
449
+ }
450
+ /**
451
+ * Proactively compact when the history has grown past the trigger.
452
+ * Keeps the newest hot window uncompacted and compacts the older cold window.
453
+ */
454
+ async maybeCompactHistory() {
455
+ if (this.lastInputTokens < this.config.compactTriggerInputTokens) {
456
+ return;
457
+ }
458
+ const { coldItems, hotItems } = this.splitColdAndHotHistory(this.conversationHistory);
459
+ if (coldItems.length === 0) {
460
+ return;
461
+ }
462
+ await this.compactColdHistory(coldItems);
463
+ this.conversationHistory = hotItems;
464
+ this.lastInputTokens = 0;
465
+ console.error(`[MCPClient] Applied rolling compaction. Kept ${hotItems.length} recent items uncompacted.`);
466
+ }
467
+ /**
468
+ * Keep history from growing pathologically in item count even before token limits.
469
+ * Uses turn-aware trimming, not arbitrary item slicing.
470
+ */
471
+ enforceHardHistoryLimitByTurns(maxTurns = 20) {
472
+ const turns = this.getTurnBoundaries(this.conversationHistory);
473
+ if (turns.length <= maxTurns)
474
+ return;
475
+ const keepFrom = turns[turns.length - maxTurns].startIndex;
476
+ const dropped = this.conversationHistory.slice(0, keepFrom);
477
+ this.conversationHistory = this.conversationHistory.slice(keepFrom);
478
+ console.error(`[MCPClient] Hard-trimmed ${dropped.length} old uncompacted items, preserving last ${maxTurns} turns`);
97
479
  }
480
+ /**
481
+ * Build MCP tool list for OpenAI Responses API.
482
+ */
483
+ async buildTools() {
484
+ const toolsResponse = await this.client.listTools();
485
+ return [
486
+ { type: "web_search_preview" },
487
+ ...toolsResponse.tools
488
+ .filter((t) => t.name !== "web_search" && t.name !== "fetch_webpage")
489
+ .map((tool) => ({
490
+ type: "function",
491
+ name: tool.name,
492
+ description: tool.description || "",
493
+ parameters: tool.inputSchema,
494
+ strict: false,
495
+ })),
496
+ ];
497
+ }
498
+ /**
499
+ * Create a response against the current full context.
500
+ */
501
+ async createResponse(params) {
502
+ const response = await this.openai.responses.create({
503
+ model: this.config.openaiModel,
504
+ instructions: this.instructions ?? undefined,
505
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
506
+ input: params.input,
507
+ tools: params.tools,
508
+ previous_response_id: params.previousResponseId,
509
+ truncation: "disabled",
510
+ prompt_cache_retention: "24h",
511
+ });
512
+ this.captureUsage(response);
513
+ return response;
514
+ }
515
+ /**
516
+ * Main query method with rolling compaction.
517
+ */
98
518
  async processQuery(query, onThinking, abortSignal, bypassSystemPrompt = false) {
99
- // Check for cancellation at start
100
519
  if (abortSignal?.aborted) {
101
520
  throw new Error("Request was cancelled");
102
521
  }
103
- // Bypass mode: send the raw prompt directly without system message or conversation history
104
522
  if (bypassSystemPrompt) {
105
523
  return this.processRawQuery(query, onThinking, abortSignal);
106
524
  }
107
- // Load system prompt from MCP server (no-op after first call)
525
+ this.startTurn();
108
526
  await this.ensureSystemPrompt();
109
- // Check if we should compact
110
- const shouldCompact = this.conversationHistory.length >= 40 &&
111
- (Date.now() - this.lastCompaction > 10 * 60 * 1000);
112
- if (shouldCompact) {
113
- await this.compactConversation();
527
+ // Proactive compaction based on last real measured request.
528
+ await this.maybeCompactHistory();
529
+ const tools = await this.buildTools();
530
+ const userMessage = this.makeUserMessage(query);
531
+ // Optional proactive token counting near/around threshold.
532
+ const projectedInputTokens = await this.countInputTokens(this.buildInput([userMessage]));
533
+ if (projectedInputTokens >= this.config.compactTriggerInputTokens) {
534
+ await this.maybeCompactHistory();
114
535
  }
115
- // Add user message to conversation history
116
- this.conversationHistory.push({
117
- type: "message",
118
- role: "user",
119
- content: [
120
- {
121
- type: "input_text",
122
- text: query,
123
- }
124
- ],
125
- });
126
- // Get available tools from MCP server
127
- const toolsResponse = await this.client.listTools();
128
- // Convert MCP tools to OpenAI Responses API format
129
- const tools = toolsResponse.tools.map((tool) => ({
130
- type: "function",
131
- name: tool.name,
132
- description: tool.description || "",
133
- parameters: tool.inputSchema,
134
- strict: false,
135
- }));
136
- // Multi-turn conversation with tool calling
536
+ // Add the new user message to rolling history now.
537
+ this.conversationHistory.push(userMessage);
137
538
  let loopCount = 0;
138
539
  const maxLoops = 15;
139
540
  let finalResponse = "";
140
541
  let outOfToolCalls = false;
542
+ let previousResponseId = undefined;
543
+ // Carries tool outputs across iterations so previous_response_id chain stays intact.
544
+ let pendingToolOutputs = null;
141
545
  while (loopCount < maxLoops) {
142
546
  loopCount++;
143
- // Check for cancellation before each API call
144
547
  if (abortSignal?.aborted) {
145
548
  throw new Error("Request was cancelled");
146
549
  }
147
- // Call OpenAI Responses API with error handling
148
550
  let response;
149
551
  try {
150
- response = await this.openai.responses.create({
151
- model: this.config.openaiModel,
152
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
153
- input: this.conversationHistory,
154
- tools: outOfToolCalls ? [] : tools,
155
- });
552
+ if (!previousResponseId) {
553
+ // First request in this query: send full current context.
554
+ response = await this.createResponse({
555
+ input: this.buildInput(),
556
+ tools: outOfToolCalls ? [] : tools,
557
+ });
558
+ }
559
+ else {
560
+ // Send pending tool outputs to continue the response chain.
561
+ response = await this.createResponse({
562
+ input: pendingToolOutputs ?? [],
563
+ tools: outOfToolCalls ? [] : tools,
564
+ previousResponseId,
565
+ });
566
+ pendingToolOutputs = null;
567
+ }
156
568
  }
157
569
  catch (error) {
158
570
  const err = error;
159
- // Handle context length exceeded
160
- if (err.status === 400 &&
161
- (err.code === 'context_length_exceeded' ||
162
- err.message?.includes('context') ||
163
- err.message?.includes('length'))) {
164
- await this.compactConversation();
165
- response = await this.openai.responses.create({
166
- model: this.config.openaiModel,
167
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
168
- input: this.conversationHistory,
169
- tools: outOfToolCalls ? [] : tools,
170
- });
571
+ const message = err.message?.toLowerCase() || "";
572
+ const contextProblem = err.status === 400 &&
573
+ (err.code === "context_length_exceeded" ||
574
+ message.includes("context") ||
575
+ message.includes("length"));
576
+ const toolProblem = err.status === 400 &&
577
+ (err.code === "response_incomplete" ||
578
+ message.includes("incomplete") ||
579
+ message.includes("tool"));
580
+ if (contextProblem) {
581
+ await this.maybeCompactHistory();
582
+ if (!previousResponseId) {
583
+ response = await this.createResponse({
584
+ input: this.buildInput(),
585
+ tools: outOfToolCalls ? [] : tools,
586
+ });
587
+ }
588
+ else {
589
+ response = await this.createResponse({
590
+ input: pendingToolOutputs ?? [],
591
+ tools: outOfToolCalls ? [] : tools,
592
+ previousResponseId,
593
+ });
594
+ pendingToolOutputs = null;
595
+ }
171
596
  }
172
- // Handle tool calls exhausted
173
- else if (err.status === 400 &&
174
- (err.code === 'response_incomplete' ||
175
- err.message?.includes('incomplete') ||
176
- err.message?.includes('tool'))) {
597
+ else if (toolProblem) {
177
598
  outOfToolCalls = true;
178
- response = await this.openai.responses.create({
179
- model: this.config.openaiModel,
180
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
181
- input: this.conversationHistory,
182
- tools: [],
183
- });
599
+ if (!previousResponseId) {
600
+ response = await this.createResponse({
601
+ input: this.buildInput(),
602
+ tools: [],
603
+ });
604
+ }
605
+ else {
606
+ response = await this.createResponse({
607
+ input: pendingToolOutputs ?? [],
608
+ tools: [],
609
+ previousResponseId,
610
+ });
611
+ pendingToolOutputs = null;
612
+ }
184
613
  }
185
614
  else {
186
615
  throw error;
187
616
  }
188
617
  }
189
- const output = response.output;
190
- // Find function_call items
618
+ previousResponseId = response.id;
619
+ const output = response.output ?? [];
620
+ for (const item of output) {
621
+ if (item.type === "web_search_call") {
622
+ onThinking?.("🔍 web_search_preview");
623
+ }
624
+ }
191
625
  const functionCalls = output.filter((item) => item.type === "function_call");
192
- // Check if AI wants to call tools
193
626
  if (functionCalls.length > 0) {
627
+ // Persist model output items into rolling history.
194
628
  this.conversationHistory.push(...output);
629
+ const toolOutputsForNextStep = [];
195
630
  for (const functionCall of functionCalls) {
196
- // Check for cancellation before each tool call
197
631
  if (abortSignal?.aborted) {
198
632
  throw new Error("Request was cancelled");
199
633
  }
200
634
  const functionName = functionCall.name;
201
- const functionArgs = typeof functionCall.arguments === 'string'
635
+ const functionArgs = typeof functionCall.arguments === "string"
202
636
  ? JSON.parse(functionCall.arguments)
203
637
  : functionCall.arguments;
204
- // Build a descriptive thinking message with key arguments
205
638
  let toolDesc = functionName;
206
- if (functionName === "fetch_webpage" && functionArgs.url) {
639
+ if (functionArgs?.url && functionName === "fetch_webpage") {
207
640
  try {
208
641
  toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
209
642
  }
@@ -211,118 +644,113 @@ export class MCPClientOpenAI {
211
644
  toolDesc = `fetch_webpage → ${functionArgs.url}`;
212
645
  }
213
646
  }
214
- else if (functionName === "web_search" && functionArgs.query) {
647
+ else if (functionArgs?.query && functionName === "web_search") {
215
648
  toolDesc = `web_search → "${functionArgs.query}"`;
216
649
  }
217
650
  onThinking?.(`🔧 ${toolDesc}`);
218
651
  try {
219
- // Execute the tool via MCP
220
652
  const result = await this.client.callTool({
221
653
  name: functionName,
222
654
  arguments: functionArgs,
223
655
  });
224
- // Add tool result to conversation history
225
- this.conversationHistory.push({
226
- type: "function_call_output",
227
- call_id: functionCall.call_id,
228
- output: JSON.stringify(result.content),
229
- });
656
+ const compactOutput = this.compactToolResult(result.content);
657
+ const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, compactOutput);
658
+ toolOutputsForNextStep.push(toolOutputItem);
659
+ this.conversationHistory.push(toolOutputItem);
230
660
  }
231
661
  catch (error) {
232
- this.conversationHistory.push({
233
- type: "function_call_output",
234
- call_id: functionCall.call_id,
235
- output: `Error: ${error instanceof Error ? error.message : String(error)}`,
236
- });
662
+ const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`);
663
+ toolOutputsForNextStep.push(toolOutputItem);
664
+ this.conversationHistory.push(toolOutputItem);
237
665
  }
238
666
  }
667
+ // Carry tool outputs to the next iteration so the response chain stays intact.
668
+ pendingToolOutputs = toolOutputsForNextStep;
239
669
  continue;
240
670
  }
241
- else {
242
- // No more tool calls, extract final response
243
- for (const item of output) {
244
- if (item.type === "message" && item.role === "assistant") {
245
- for (const contentItem of item.content) {
246
- if (contentItem.type === "output_text") {
247
- finalResponse += contentItem.text;
248
- }
671
+ for (const item of output) {
672
+ if (item.type === "message" && item.role === "assistant") {
673
+ for (const contentItem of item.content ?? []) {
674
+ if (contentItem.type === "output_text") {
675
+ finalResponse += contentItem.text;
249
676
  }
250
677
  }
251
678
  }
252
- this.conversationHistory.push(...output);
253
- break;
254
679
  }
680
+ this.conversationHistory.push(...output);
681
+ break;
255
682
  }
256
- // If we hit max loops, make one final request without tools
257
683
  if (loopCount >= maxLoops && !finalResponse) {
258
684
  try {
259
685
  const finalApiResponse = await this.openai.responses.create({
260
686
  model: this.config.openaiModel,
687
+ instructions: this.instructions ?? undefined,
261
688
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
262
- input: this.conversationHistory,
689
+ input: this.buildInput(),
263
690
  tools: [],
691
+ truncation: "disabled",
692
+ prompt_cache_retention: "24h",
264
693
  });
265
- const finalOutput = finalApiResponse.output;
266
- for (const item of finalOutput) {
694
+ this.captureUsage(finalApiResponse);
695
+ for (const item of finalApiResponse.output ?? []) {
267
696
  if (item.type === "message" && item.role === "assistant") {
268
- for (const contentItem of item.content) {
697
+ for (const contentItem of item.content ?? []) {
269
698
  if (contentItem.type === "output_text") {
270
699
  finalResponse += contentItem.text;
271
700
  }
272
701
  }
273
702
  }
274
703
  }
275
- this.conversationHistory.push(...finalOutput);
704
+ this.conversationHistory.push(...(finalApiResponse.output ?? []));
276
705
  }
277
- catch (error) {
278
- finalResponse = "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
706
+ catch {
707
+ finalResponse =
708
+ "I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
279
709
  }
280
710
  }
281
- // Keep conversation history manageable
282
- if (this.conversationHistory.length > 50) {
283
- const systemMessage = this.conversationHistory[0];
284
- const recentItems = this.conversationHistory.slice(-49);
285
- this.conversationHistory = [systemMessage, ...recentItems];
286
- }
711
+ // Prevent pathological item growth even when tokens are still OK.
712
+ this.enforceHardHistoryLimitByTurns(20);
713
+ // Optional: if this turn caused a large input, compact immediately so the next turn starts cleaner.
714
+ await this.maybeCompactHistory();
287
715
  return finalResponse;
288
716
  }
289
717
  /**
290
- * Sends a raw query directly to the model — no system prompt, no conversation history.
291
- * Used by the Direct Prompt dev tool to test prompts verbatim.
718
+ * Raw mode: no cached instructions, no rolling history, no compaction state.
292
719
  */
293
720
  async processRawQuery(query, onThinking, abortSignal) {
294
- const toolsResponse = await this.client.listTools();
295
- const tools = toolsResponse.tools.map((tool) => ({
296
- type: "function",
297
- name: tool.name,
298
- description: tool.description || "",
299
- parameters: tool.inputSchema,
300
- strict: false,
301
- }));
302
- // Isolated history — just this message, no system prompt
303
- const isolatedHistory = [
304
- {
305
- type: "message",
306
- role: "user",
307
- content: [{ type: "input_text", text: query }],
308
- },
309
- ];
721
+ this.startTurn();
722
+ const tools = await this.buildTools();
723
+ const isolatedHistory = [this.makeUserMessage(query)];
310
724
  let loopCount = 0;
311
725
  const maxLoops = 15;
312
726
  let finalResponse = "";
727
+ let previousResponseId = undefined;
728
+ let pendingRawToolOutputs = null;
313
729
  while (loopCount < maxLoops) {
314
730
  loopCount++;
315
731
  if (abortSignal?.aborted)
316
732
  throw new Error("Request was cancelled");
733
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
317
734
  const response = await this.openai.responses.create({
318
735
  model: this.config.openaiModel,
319
- input: isolatedHistory,
736
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
737
+ input: (!previousResponseId ? isolatedHistory : (pendingRawToolOutputs ?? [])),
738
+ previous_response_id: previousResponseId,
320
739
  tools,
740
+ truncation: "disabled",
321
741
  });
322
- const output = response.output;
742
+ pendingRawToolOutputs = null;
743
+ this.captureUsage(response);
744
+ previousResponseId = response.id;
745
+ const output = response.output ?? [];
746
+ for (const item of output) {
747
+ if (item.type === "web_search_call") {
748
+ onThinking?.("🔍 web_search_preview");
749
+ }
750
+ }
323
751
  const functionCalls = output.filter((item) => item.type === "function_call");
324
752
  if (functionCalls.length > 0) {
325
- isolatedHistory.push(...output);
753
+ const newToolOutputs = [];
326
754
  for (const functionCall of functionCalls) {
327
755
  if (abortSignal?.aborted)
328
756
  throw new Error("Request was cancelled");
@@ -331,7 +759,7 @@ export class MCPClientOpenAI {
331
759
  ? JSON.parse(functionCall.arguments)
332
760
  : functionCall.arguments;
333
761
  let toolDesc = functionName;
334
- if (functionName === "fetch_webpage" && functionArgs.url) {
762
+ if (functionName === "fetch_webpage" && functionArgs?.url) {
335
763
  try {
336
764
  toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
337
765
  }
@@ -339,23 +767,27 @@ export class MCPClientOpenAI {
339
767
  toolDesc = `fetch_webpage → ${functionArgs.url}`;
340
768
  }
341
769
  }
342
- else if (functionName === "web_search" && functionArgs.query) {
770
+ else if (functionName === "web_search" && functionArgs?.query) {
343
771
  toolDesc = `web_search → "${functionArgs.query}"`;
344
772
  }
345
773
  onThinking?.(`🔧 ${toolDesc}`);
346
774
  try {
347
- const result = await this.client.callTool({ name: functionName, arguments: functionArgs });
348
- isolatedHistory.push({ type: "function_call_output", call_id: functionCall.call_id, output: JSON.stringify(result.content) });
775
+ const result = await this.client.callTool({
776
+ name: functionName,
777
+ arguments: functionArgs,
778
+ });
779
+ newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, this.compactToolResult(result.content)));
349
780
  }
350
781
  catch (error) {
351
- isolatedHistory.push({ type: "function_call_output", call_id: functionCall.call_id, output: `Error: ${error instanceof Error ? error.message : String(error)}` });
782
+ newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`));
352
783
  }
353
784
  }
785
+ pendingRawToolOutputs = newToolOutputs;
354
786
  continue;
355
787
  }
356
788
  for (const item of output) {
357
789
  if (item.type === "message" && item.role === "assistant") {
358
- for (const contentItem of item.content) {
790
+ for (const contentItem of item.content ?? []) {
359
791
  if (contentItem.type === "output_text")
360
792
  finalResponse += contentItem.text;
361
793
  }
@@ -365,13 +797,4 @@ export class MCPClientOpenAI {
365
797
  }
366
798
  return finalResponse;
367
799
  }
368
- clearHistory() {
369
- // Keep system message only if it genuinely is a system role message
370
- const first = this.conversationHistory[0];
371
- const systemMessage = first?.role === "system" ? this.conversationHistory[0] : undefined;
372
- this.conversationHistory = systemMessage ? [systemMessage] : [];
373
- }
374
- async cleanup() {
375
- await this.client.close();
376
- }
377
800
  }
@@ -446,6 +446,18 @@
446
446
  font-size: 12px;
447
447
  opacity: 0.6;
448
448
  margin-top: 6px;
449
+ display: flex;
450
+ align-items: center;
451
+ gap: 8px;
452
+ flex-wrap: wrap;
453
+ }
454
+
455
+ .mcp-chat-token-info {
456
+ font-size: 11px;
457
+ opacity: 0.75;
458
+ font-family: monospace;
459
+ border-left: 1px solid currentColor;
460
+ padding-left: 8px;
449
461
  }
450
462
 
451
463
  /* ───────────────────────────────────────────────
package/dist/types.d.ts CHANGED
@@ -8,6 +8,8 @@ export interface Message {
8
8
  isStreaming?: boolean;
9
9
  /** Hidden messages are sent to the AI but not shown in the chat bubble list */
10
10
  hidden?: boolean;
11
+ /** Token usage info shown in the footer of assistant messages — never fed back to AI */
12
+ tokenInfo?: string;
11
13
  }
12
14
  export interface ThinkingStep {
13
15
  id: string;
@@ -21,7 +23,7 @@ export interface MCPChatProps {
21
23
  className?: string;
22
24
  }
23
25
  export interface StreamEvent {
24
- type: "thinking" | "content" | "done" | "error";
26
+ type: "thinking" | "content" | "done" | "error" | "usage";
25
27
  message?: string;
26
28
  chunk?: string;
27
29
  }
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,CAAC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nqminds/mcp-client",
3
- "version": "1.0.9",
3
+ "version": "1.0.12",
4
4
  "description": "Reusable MCP client component with AI chat interface",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",