@nqminds/mcp-client 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/MCPChat.d.ts.map +1 -1
- package/dist/MCPChat.js +13 -1
- package/dist/api-helpers.d.ts.map +1 -1
- package/dist/api-helpers.js +17 -0
- package/dist/openai-client.d.ts +132 -11
- package/dist/openai-client.d.ts.map +1 -1
- package/dist/openai-client.js +568 -184
- package/dist/styles/MCPChat.css +12 -0
- package/dist/types.d.ts +3 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/MCPChat.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,
|
|
1
|
+
{"version":3,"file":"MCPChat.d.ts","sourceRoot":"","sources":["../src/MCPChat.tsx"],"names":[],"mappings":"AAEA,OAAO,KAAmD,MAAM,OAAO,CAAC;AAGxE,OAAO,KAAK,EAAyB,YAAY,EAAe,MAAM,SAAS,CAAC;AA+ChF,wBAAgB,OAAO,CAAC,EACtB,aAAa,EACb,WAA6B,EAC7B,YAAiB,EACjB,SAAc,GACf,EAAE,YAAY,qBA6fd"}
|
package/dist/MCPChat.js
CHANGED
|
@@ -167,6 +167,16 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
|
|
|
167
167
|
return updated;
|
|
168
168
|
});
|
|
169
169
|
}
|
|
170
|
+
else if (parsed.type === "usage") {
|
|
171
|
+
setMessages((prev) => {
|
|
172
|
+
const updated = [...prev];
|
|
173
|
+
const lastIndex = updated.length - 1;
|
|
174
|
+
if (lastIndex >= 0) {
|
|
175
|
+
updated[lastIndex] = { ...updated[lastIndex], tokenInfo: parsed.message };
|
|
176
|
+
}
|
|
177
|
+
return updated;
|
|
178
|
+
});
|
|
179
|
+
}
|
|
170
180
|
else if (parsed.type === "error") {
|
|
171
181
|
throw new Error(parsed.message || "Stream error");
|
|
172
182
|
}
|
|
@@ -315,7 +325,9 @@ export function MCPChat({ companyNumber, apiEndpoint = "/api/mcp/chat", customSt
|
|
|
315
325
|
React.createElement("div", { className: "mcp-chat-message-bubble" },
|
|
316
326
|
msg.role === "assistant" ? (React.createElement("div", { className: "mcp-chat-message-content markdown-content" },
|
|
317
327
|
React.createElement(ReactMarkdown, { remarkPlugins: [remarkGfm] }, msg.content))) : (React.createElement("div", { className: "mcp-chat-message-content" }, msg.content)),
|
|
318
|
-
React.createElement("div", { className: "mcp-chat-message-timestamp" },
|
|
328
|
+
React.createElement("div", { className: "mcp-chat-message-timestamp" },
|
|
329
|
+
msg.timestamp.toLocaleTimeString(),
|
|
330
|
+
msg.role === "assistant" && msg.tokenInfo && (React.createElement("span", { className: "mcp-chat-token-info" }, msg.tokenInfo))))))),
|
|
319
331
|
isLoading && (React.createElement("div", { className: "mcp-chat-message mcp-chat-message-assistant" },
|
|
320
332
|
React.createElement("div", { className: "mcp-chat-thinking" },
|
|
321
333
|
React.createElement("div", { className: "mcp-chat-thinking-title" },
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,
|
|
1
|
+
{"version":3,"file":"api-helpers.d.ts","sourceRoot":"","sources":["../src/api-helpers.ts"],"names":[],"mappings":"AAAA;;GAEG;AAOH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,sBAAsB,IACnD,SAAS,OAAO,uBAmH/B;AAED;;GAEG;AACH,wBAAgB,qBAAqB,KACrB,SAAS,OAAO,uBAU/B;AAED;;GAEG;AACH,wBAAsB,iBAAiB,kBAKtC"}
|
package/dist/api-helpers.js
CHANGED
|
@@ -47,6 +47,23 @@ export function createMCPChatHandler(config) {
|
|
|
47
47
|
sendEvent("thinking", { message: thinkingMessage });
|
|
48
48
|
}, abortController.signal, // Pass abort signal to enable cancellation
|
|
49
49
|
bypassSystemPrompt);
|
|
50
|
+
// Emit token usage summary for debugging
|
|
51
|
+
const usage = client.getUsage();
|
|
52
|
+
if (usage.inputTokens > 0 || usage.outputTokens > 0) {
|
|
53
|
+
const parts = [
|
|
54
|
+
`in: ${usage.inputTokens.toLocaleString()}`,
|
|
55
|
+
`out: ${usage.outputTokens.toLocaleString()}`,
|
|
56
|
+
`total: ${usage.totalTokens.toLocaleString()}`,
|
|
57
|
+
];
|
|
58
|
+
if (usage.cachedTokens > 0) {
|
|
59
|
+
const cachedPct = Math.round((usage.cachedTokens / usage.inputTokens) * 100);
|
|
60
|
+
parts.push(`cached: ${usage.cachedTokens.toLocaleString()} (${cachedPct}%)`);
|
|
61
|
+
}
|
|
62
|
+
if (usage.compactedTurns > 0) {
|
|
63
|
+
parts.push(`compacted: ${usage.compactedTurns} turn${usage.compactedTurns !== 1 ? "s" : ""}`);
|
|
64
|
+
}
|
|
65
|
+
sendEvent("usage", { message: parts.join(" | ") });
|
|
66
|
+
}
|
|
50
67
|
// Check if aborted before streaming response
|
|
51
68
|
if (abortController.signal.aborted) {
|
|
52
69
|
return;
|
package/dist/openai-client.d.ts
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* OpenAI-powered MCP Client
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
|
+
* Compaction strategy:
|
|
5
|
+
* - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
|
|
6
|
+
* - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
|
|
7
|
+
* compact the oldest cold portion and keep the newest portion verbatim.
|
|
8
|
+
* - Feed the returned compaction object back into future requests.
|
|
9
|
+
*
|
|
10
|
+
* Notes:
|
|
11
|
+
* - This is written to align with the OpenAI Responses API shape:
|
|
12
|
+
* - response usage fields
|
|
13
|
+
* - previous_response_id
|
|
14
|
+
* - input token counting
|
|
15
|
+
* - response compaction
|
|
16
|
+
* - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
|
|
17
|
+
* by SDK version. The logic here is the important part.
|
|
4
18
|
*/
|
|
5
19
|
export interface MCPClientConfig {
|
|
6
20
|
openaiApiKey: string;
|
|
@@ -8,30 +22,137 @@ export interface MCPClientConfig {
|
|
|
8
22
|
openaiModel?: string;
|
|
9
23
|
clientName?: string;
|
|
10
24
|
clientVersion?: string;
|
|
25
|
+
/**
|
|
26
|
+
* Trigger compaction when the last measured input tokens reaches this threshold.
|
|
27
|
+
* Example policy from your suggestion:
|
|
28
|
+
* - compact when last measured input >= 200k
|
|
29
|
+
*/
|
|
30
|
+
compactTriggerInputTokens?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Keep roughly this many of the most recent input tokens uncompacted.
|
|
33
|
+
* Example policy from your suggestion:
|
|
34
|
+
* - retain last ~100k uncompacted
|
|
35
|
+
*/
|
|
36
|
+
hotContextTargetInputTokens?: number;
|
|
37
|
+
/**
|
|
38
|
+
* Guardrail for unusually large tool outputs stored in history.
|
|
39
|
+
*/
|
|
40
|
+
maxToolOutputChars?: number;
|
|
41
|
+
}
|
|
42
|
+
interface UsageStats {
|
|
43
|
+
inputTokens: number;
|
|
44
|
+
outputTokens: number;
|
|
45
|
+
totalTokens: number;
|
|
46
|
+
cachedTokens: number;
|
|
47
|
+
reasoningTokens: number;
|
|
48
|
+
compactedTurns: number;
|
|
11
49
|
}
|
|
12
50
|
export declare class MCPClientOpenAI {
|
|
13
51
|
private client;
|
|
14
52
|
private openai;
|
|
15
53
|
private transport;
|
|
54
|
+
/**
|
|
55
|
+
* Instructions are sent using the Responses API `instructions` field,
|
|
56
|
+
* not inserted as a fake message inside the rolling conversation items.
|
|
57
|
+
*/
|
|
58
|
+
private instructions;
|
|
59
|
+
/**
|
|
60
|
+
* Rolling uncompacted conversation items.
|
|
61
|
+
* This contains the most recent "hot" context only.
|
|
62
|
+
*/
|
|
16
63
|
private conversationHistory;
|
|
17
|
-
|
|
64
|
+
/**
|
|
65
|
+
* Opaque compaction object returned by OpenAI.
|
|
66
|
+
* This represents older "cold" context that has been compacted.
|
|
67
|
+
*/
|
|
68
|
+
private compaction;
|
|
69
|
+
/**
|
|
70
|
+
* Last measured input tokens from a real Responses API call.
|
|
71
|
+
*/
|
|
72
|
+
private lastInputTokens;
|
|
73
|
+
/**
|
|
74
|
+
* Latest usage snapshot for logging/inspection.
|
|
75
|
+
*/
|
|
76
|
+
private lastUsage;
|
|
18
77
|
private config;
|
|
19
78
|
constructor(config: MCPClientConfig);
|
|
20
|
-
|
|
79
|
+
connect(): Promise<void>;
|
|
80
|
+
cleanup(): Promise<void>;
|
|
81
|
+
clearHistory(): void;
|
|
82
|
+
getUsage(): UsageStats;
|
|
21
83
|
/**
|
|
22
|
-
* Fetches the system prompt from the MCP server's registered "system-prompt" prompt
|
|
23
|
-
*
|
|
24
|
-
* Direct Prompt (bypass mode) skips this entirely.
|
|
84
|
+
* Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
|
|
85
|
+
* Cached per client session.
|
|
25
86
|
*/
|
|
26
87
|
private ensureSystemPrompt;
|
|
27
|
-
|
|
88
|
+
/**
|
|
89
|
+
* Build request input:
|
|
90
|
+
* [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
|
|
91
|
+
*/
|
|
92
|
+
private buildInput;
|
|
93
|
+
/**
|
|
94
|
+
* Count input tokens before making a request.
|
|
95
|
+
* Falls back to a simple rough estimate if the SDK method is unavailable.
|
|
96
|
+
*/
|
|
97
|
+
private countInputTokens;
|
|
98
|
+
/**
|
|
99
|
+
* Very rough fallback estimator.
|
|
100
|
+
* Only used if token counting endpoint is unavailable in the SDK version in use.
|
|
101
|
+
*/
|
|
102
|
+
private roughEstimateInputTokens;
|
|
103
|
+
/**
|
|
104
|
+
* Normalize usage from Responses API.
|
|
105
|
+
*/
|
|
106
|
+
private captureUsage;
|
|
107
|
+
/**
|
|
108
|
+
* Compact oversized tool outputs before storing them in rolling history.
|
|
109
|
+
*/
|
|
110
|
+
private compactToolResult;
|
|
111
|
+
private makeUserMessage;
|
|
112
|
+
private makeFunctionOutput;
|
|
113
|
+
/**
|
|
114
|
+
* We treat a "turn" boundary as:
|
|
115
|
+
* - starts at a user message
|
|
116
|
+
* - ends right before the next user message, or end of array
|
|
117
|
+
*
|
|
118
|
+
* This lets us compact or trim in coherent chunks instead of arbitrary items.
|
|
119
|
+
*/
|
|
120
|
+
private getTurnBoundaries;
|
|
121
|
+
/**
|
|
122
|
+
* Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
|
|
123
|
+
* Older turns become compaction candidates.
|
|
124
|
+
*/
|
|
125
|
+
private splitColdAndHotHistory;
|
|
126
|
+
/**
|
|
127
|
+
* Incrementally update compaction using the cold slice only.
|
|
128
|
+
*/
|
|
129
|
+
private compactColdHistory;
|
|
130
|
+
/**
|
|
131
|
+
* Proactively compact when the history has grown past the trigger.
|
|
132
|
+
* Keeps the newest hot window uncompacted and compacts the older cold window.
|
|
133
|
+
*/
|
|
134
|
+
private maybeCompactHistory;
|
|
135
|
+
/**
|
|
136
|
+
* Keep history from growing pathologically in item count even before token limits.
|
|
137
|
+
* Uses turn-aware trimming, not arbitrary item slicing.
|
|
138
|
+
*/
|
|
139
|
+
private enforceHardHistoryLimitByTurns;
|
|
140
|
+
/**
|
|
141
|
+
* Build MCP tool list for OpenAI Responses API.
|
|
142
|
+
*/
|
|
143
|
+
private buildTools;
|
|
144
|
+
/**
|
|
145
|
+
* Create a response against the current full context.
|
|
146
|
+
*/
|
|
147
|
+
private createResponse;
|
|
148
|
+
/**
|
|
149
|
+
* Main query method with rolling compaction.
|
|
150
|
+
*/
|
|
28
151
|
processQuery(query: string, onThinking?: (message: string) => void, abortSignal?: AbortSignal, bypassSystemPrompt?: boolean): Promise<string>;
|
|
29
152
|
/**
|
|
30
|
-
*
|
|
31
|
-
* Used by the Direct Prompt dev tool to test prompts verbatim.
|
|
153
|
+
* Raw mode: no cached instructions, no rolling history, no compaction state.
|
|
32
154
|
*/
|
|
33
155
|
private processRawQuery;
|
|
34
|
-
clearHistory(): void;
|
|
35
|
-
cleanup(): Promise<void>;
|
|
36
156
|
}
|
|
157
|
+
export {};
|
|
37
158
|
//# sourceMappingURL=openai-client.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAiBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,YAAY;IAoBpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA6OlB;;OAEG;YACW,eAAe;CAwG9B"}
|
package/dist/openai-client.js
CHANGED
|
@@ -1,31 +1,78 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* OpenAI-powered MCP Client
|
|
3
|
-
*
|
|
3
|
+
*
|
|
4
|
+
* Compaction strategy:
|
|
5
|
+
* - Keep the newest HOT_CONTEXT_TARGET_INPUT_TOKENS worth of conversation uncompressed.
|
|
6
|
+
* - Once the last measured input reaches COMPACT_TRIGGER_INPUT_TOKENS,
|
|
7
|
+
* compact the oldest cold portion and keep the newest portion verbatim.
|
|
8
|
+
* - Feed the returned compaction object back into future requests.
|
|
9
|
+
*
|
|
10
|
+
* Notes:
|
|
11
|
+
* - This is written to align with the OpenAI Responses API shape:
|
|
12
|
+
* - response usage fields
|
|
13
|
+
* - previous_response_id
|
|
14
|
+
* - input token counting
|
|
15
|
+
* - response compaction
|
|
16
|
+
* - The SDK surface for responses.compact / responses.inputTokens.count may differ slightly
|
|
17
|
+
* by SDK version. The logic here is the important part.
|
|
4
18
|
*/
|
|
5
19
|
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
6
20
|
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
|
7
21
|
import OpenAI from "openai";
|
|
8
22
|
export class MCPClientOpenAI {
|
|
9
23
|
constructor(config) {
|
|
24
|
+
/**
|
|
25
|
+
* Instructions are sent using the Responses API `instructions` field,
|
|
26
|
+
* not inserted as a fake message inside the rolling conversation items.
|
|
27
|
+
*/
|
|
28
|
+
this.instructions = null;
|
|
29
|
+
/**
|
|
30
|
+
* Rolling uncompacted conversation items.
|
|
31
|
+
* This contains the most recent "hot" context only.
|
|
32
|
+
*/
|
|
10
33
|
this.conversationHistory = [];
|
|
11
|
-
|
|
34
|
+
/**
|
|
35
|
+
* Opaque compaction object returned by OpenAI.
|
|
36
|
+
* This represents older "cold" context that has been compacted.
|
|
37
|
+
*/
|
|
38
|
+
this.compaction = {
|
|
39
|
+
item: null,
|
|
40
|
+
compactedTurns: 0,
|
|
41
|
+
};
|
|
42
|
+
/**
|
|
43
|
+
* Last measured input tokens from a real Responses API call.
|
|
44
|
+
*/
|
|
45
|
+
this.lastInputTokens = 0;
|
|
46
|
+
/**
|
|
47
|
+
* Latest usage snapshot for logging/inspection.
|
|
48
|
+
*/
|
|
49
|
+
this.lastUsage = {
|
|
50
|
+
inputTokens: 0,
|
|
51
|
+
outputTokens: 0,
|
|
52
|
+
totalTokens: 0,
|
|
53
|
+
cachedTokens: 0,
|
|
54
|
+
reasoningTokens: 0,
|
|
55
|
+
compactedTurns: 0,
|
|
56
|
+
};
|
|
12
57
|
this.config = {
|
|
13
58
|
openaiApiKey: config.openaiApiKey,
|
|
14
59
|
mcpServerCommand: config.mcpServerCommand,
|
|
15
|
-
openaiModel: config.openaiModel || "
|
|
60
|
+
openaiModel: config.openaiModel || "gpt-5-mini",
|
|
16
61
|
clientName: config.clientName || "mcp-flair-client",
|
|
17
62
|
clientVersion: config.clientVersion || "1.0.0",
|
|
63
|
+
compactTriggerInputTokens: config.compactTriggerInputTokens ?? 200000,
|
|
64
|
+
hotContextTargetInputTokens: config.hotContextTargetInputTokens ?? 100000,
|
|
65
|
+
maxToolOutputChars: config.maxToolOutputChars ?? 20000,
|
|
18
66
|
};
|
|
19
67
|
this.openai = new OpenAI({
|
|
20
68
|
apiKey: this.config.openaiApiKey,
|
|
21
69
|
});
|
|
22
|
-
// Parse the server command and args
|
|
23
70
|
const serverCmd = this.config.mcpServerCommand.split(" ");
|
|
24
71
|
const command = serverCmd[0];
|
|
25
72
|
const args = serverCmd.slice(1);
|
|
26
73
|
this.transport = new StdioClientTransport({
|
|
27
|
-
command
|
|
28
|
-
args
|
|
74
|
+
command,
|
|
75
|
+
args,
|
|
29
76
|
});
|
|
30
77
|
this.client = new Client({
|
|
31
78
|
name: this.config.clientName,
|
|
@@ -33,177 +80,525 @@ export class MCPClientOpenAI {
|
|
|
33
80
|
}, {
|
|
34
81
|
capabilities: {},
|
|
35
82
|
});
|
|
36
|
-
|
|
37
|
-
|
|
83
|
+
}
|
|
84
|
+
async connect() {
|
|
85
|
+
await this.client.connect(this.transport);
|
|
86
|
+
}
|
|
87
|
+
async cleanup() {
|
|
88
|
+
await this.client.close();
|
|
89
|
+
}
|
|
90
|
+
clearHistory() {
|
|
38
91
|
this.conversationHistory = [];
|
|
92
|
+
this.compaction = {
|
|
93
|
+
item: null,
|
|
94
|
+
compactedTurns: 0,
|
|
95
|
+
};
|
|
96
|
+
this.lastInputTokens = 0;
|
|
97
|
+
this.lastUsage = {
|
|
98
|
+
inputTokens: 0,
|
|
99
|
+
outputTokens: 0,
|
|
100
|
+
totalTokens: 0,
|
|
101
|
+
cachedTokens: 0,
|
|
102
|
+
reasoningTokens: 0,
|
|
103
|
+
compactedTurns: 0,
|
|
104
|
+
};
|
|
39
105
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const compactionResponse = await this.openai.responses.compact({
|
|
43
|
-
model: this.config.openaiModel,
|
|
44
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
45
|
-
input: this.conversationHistory,
|
|
46
|
-
});
|
|
47
|
-
this.conversationHistory = compactionResponse.output;
|
|
48
|
-
this.lastCompaction = Date.now();
|
|
49
|
-
}
|
|
50
|
-
catch (error) {
|
|
51
|
-
// Keep system message and last 25 items
|
|
52
|
-
if (this.conversationHistory.length > 26) {
|
|
53
|
-
const systemMessage = this.conversationHistory[0];
|
|
54
|
-
const recentItems = this.conversationHistory.slice(-25);
|
|
55
|
-
this.conversationHistory = [systemMessage, ...recentItems];
|
|
56
|
-
}
|
|
57
|
-
}
|
|
106
|
+
getUsage() {
|
|
107
|
+
return { ...this.lastUsage, compactedTurns: this.compaction.compactedTurns };
|
|
58
108
|
}
|
|
59
109
|
/**
|
|
60
|
-
* Fetches the system prompt from the MCP server's registered "system-prompt" prompt
|
|
61
|
-
*
|
|
62
|
-
* Direct Prompt (bypass mode) skips this entirely.
|
|
110
|
+
* Fetches the system prompt from the MCP server's registered "system-prompt" prompt.
|
|
111
|
+
* Cached per client session.
|
|
63
112
|
*/
|
|
64
113
|
async ensureSystemPrompt() {
|
|
65
|
-
|
|
66
|
-
if (this.conversationHistory[0]?.role === "system")
|
|
114
|
+
if (this.instructions)
|
|
67
115
|
return;
|
|
68
116
|
try {
|
|
117
|
+
// SDK typing may not expose getPrompt.
|
|
69
118
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
70
119
|
const result = await this.client.getPrompt({ name: "system-prompt" });
|
|
71
120
|
const parts = [];
|
|
72
|
-
for (const msg of result.messages) {
|
|
121
|
+
for (const msg of result.messages ?? []) {
|
|
122
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
73
123
|
const c = msg.content;
|
|
74
124
|
if (typeof c === "string")
|
|
75
125
|
parts.push(c);
|
|
76
126
|
else if (c?.text)
|
|
77
127
|
parts.push(c.text);
|
|
78
128
|
}
|
|
79
|
-
const text = parts.join("\n\n");
|
|
129
|
+
const text = parts.join("\n\n").trim();
|
|
80
130
|
if (text) {
|
|
81
|
-
this.
|
|
82
|
-
{
|
|
83
|
-
type: "message",
|
|
84
|
-
role: "system",
|
|
85
|
-
content: [{ type: "input_text", text }],
|
|
86
|
-
},
|
|
87
|
-
...this.conversationHistory,
|
|
88
|
-
];
|
|
131
|
+
this.instructions = text;
|
|
89
132
|
}
|
|
90
133
|
}
|
|
91
134
|
catch (error) {
|
|
92
135
|
console.error("[MCPClient] Failed to fetch system prompt from MCP server:", error);
|
|
136
|
+
this.instructions = null;
|
|
93
137
|
}
|
|
94
138
|
}
|
|
95
|
-
|
|
96
|
-
|
|
139
|
+
/**
|
|
140
|
+
* Build request input:
|
|
141
|
+
* [compactionObject?, ...recentUncompactedHistory, ...newInputItems]
|
|
142
|
+
*/
|
|
143
|
+
buildInput(newItems = []) {
|
|
144
|
+
const input = [];
|
|
145
|
+
if (this.compaction.item) {
|
|
146
|
+
input.push(this.compaction.item);
|
|
147
|
+
}
|
|
148
|
+
input.push(...this.conversationHistory);
|
|
149
|
+
input.push(...newItems);
|
|
150
|
+
return input;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Count input tokens before making a request.
|
|
154
|
+
* Falls back to a simple rough estimate if the SDK method is unavailable.
|
|
155
|
+
*/
|
|
156
|
+
async countInputTokens(input) {
|
|
157
|
+
try {
|
|
158
|
+
// Some SDK versions may expose this as responses.inputTokens.count(...)
|
|
159
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
160
|
+
const result = await this.openai.responses.inputTokens.count({
|
|
161
|
+
model: this.config.openaiModel,
|
|
162
|
+
input,
|
|
163
|
+
instructions: this.instructions ?? undefined,
|
|
164
|
+
tools: [],
|
|
165
|
+
});
|
|
166
|
+
// Common guess for returned shape
|
|
167
|
+
return (result?.input_tokens ??
|
|
168
|
+
result?.total_tokens ??
|
|
169
|
+
result?.count ??
|
|
170
|
+
this.roughEstimateInputTokens(input));
|
|
171
|
+
}
|
|
172
|
+
catch {
|
|
173
|
+
return this.roughEstimateInputTokens(input);
|
|
174
|
+
}
|
|
97
175
|
}
|
|
176
|
+
/**
|
|
177
|
+
* Very rough fallback estimator.
|
|
178
|
+
* Only used if token counting endpoint is unavailable in the SDK version in use.
|
|
179
|
+
*/
|
|
180
|
+
roughEstimateInputTokens(input) {
|
|
181
|
+
const serialized = JSON.stringify({
|
|
182
|
+
instructions: this.instructions,
|
|
183
|
+
input,
|
|
184
|
+
});
|
|
185
|
+
// Very rough English-ish heuristic.
|
|
186
|
+
return Math.ceil(serialized.length / 4);
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Normalize usage from Responses API.
|
|
190
|
+
*/
|
|
191
|
+
captureUsage(response) {
|
|
192
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
193
|
+
const usage = response?.usage ?? {};
|
|
194
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
195
|
+
const inputDetails = usage?.input_tokens_details ?? {};
|
|
196
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
197
|
+
const outputDetails = usage?.output_tokens_details ?? {};
|
|
198
|
+
this.lastUsage = {
|
|
199
|
+
inputTokens: usage.input_tokens ?? 0,
|
|
200
|
+
outputTokens: usage.output_tokens ?? 0,
|
|
201
|
+
totalTokens: usage.total_tokens ?? 0,
|
|
202
|
+
cachedTokens: inputDetails.cached_tokens ?? 0,
|
|
203
|
+
reasoningTokens: outputDetails.reasoning_tokens ?? 0,
|
|
204
|
+
compactedTurns: this.compaction.compactedTurns,
|
|
205
|
+
};
|
|
206
|
+
this.lastInputTokens = this.lastUsage.inputTokens;
|
|
207
|
+
}
|
|
208
|
+
/**
|
|
209
|
+
* Compact oversized tool outputs before storing them in rolling history.
|
|
210
|
+
*/
|
|
211
|
+
compactToolResult(value) {
|
|
212
|
+
const seen = new WeakSet();
|
|
213
|
+
const prune = (v) => {
|
|
214
|
+
if (v == null)
|
|
215
|
+
return v;
|
|
216
|
+
if (typeof v === "string") {
|
|
217
|
+
if (v.length <= this.config.maxToolOutputChars)
|
|
218
|
+
return v;
|
|
219
|
+
return (v.slice(0, this.config.maxToolOutputChars) +
|
|
220
|
+
`\n...[truncated ${v.length - this.config.maxToolOutputChars} chars]`);
|
|
221
|
+
}
|
|
222
|
+
if (typeof v !== "object")
|
|
223
|
+
return v;
|
|
224
|
+
if (Array.isArray(v)) {
|
|
225
|
+
const maxItems = 30;
|
|
226
|
+
const sliced = v.slice(0, maxItems).map(prune);
|
|
227
|
+
if (v.length > maxItems) {
|
|
228
|
+
sliced.push(`...[truncated ${v.length - maxItems} items]`);
|
|
229
|
+
}
|
|
230
|
+
return sliced;
|
|
231
|
+
}
|
|
232
|
+
if (seen.has(v))
|
|
233
|
+
return "[circular]";
|
|
234
|
+
seen.add(v);
|
|
235
|
+
const obj = v;
|
|
236
|
+
const out = {};
|
|
237
|
+
const entries = Object.entries(obj);
|
|
238
|
+
// Prefer keeping fewer, more informative fields.
|
|
239
|
+
const preferredFirst = [
|
|
240
|
+
"title",
|
|
241
|
+
"name",
|
|
242
|
+
"id",
|
|
243
|
+
"url",
|
|
244
|
+
"summary",
|
|
245
|
+
"description",
|
|
246
|
+
"text",
|
|
247
|
+
"content",
|
|
248
|
+
"status",
|
|
249
|
+
"result",
|
|
250
|
+
"items",
|
|
251
|
+
"data",
|
|
252
|
+
];
|
|
253
|
+
const sorted = entries.sort(([a], [b]) => {
|
|
254
|
+
const ai = preferredFirst.indexOf(a);
|
|
255
|
+
const bi = preferredFirst.indexOf(b);
|
|
256
|
+
const av = ai === -1 ? 999 : ai;
|
|
257
|
+
const bv = bi === -1 ? 999 : bi;
|
|
258
|
+
return av - bv;
|
|
259
|
+
});
|
|
260
|
+
const maxFields = 25;
|
|
261
|
+
for (const [k, val] of sorted.slice(0, maxFields)) {
|
|
262
|
+
out[k] = prune(val);
|
|
263
|
+
}
|
|
264
|
+
if (entries.length > maxFields) {
|
|
265
|
+
out.__truncated_fields__ = entries.length - maxFields;
|
|
266
|
+
}
|
|
267
|
+
return out;
|
|
268
|
+
};
|
|
269
|
+
try {
|
|
270
|
+
return JSON.stringify(prune(value));
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
const s = String(value);
|
|
274
|
+
return s.length <= this.config.maxToolOutputChars
|
|
275
|
+
? s
|
|
276
|
+
: s.slice(0, this.config.maxToolOutputChars) +
|
|
277
|
+
`\n...[truncated ${s.length - this.config.maxToolOutputChars} chars]`;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
makeUserMessage(text) {
|
|
281
|
+
return {
|
|
282
|
+
type: "message",
|
|
283
|
+
role: "user",
|
|
284
|
+
content: [{ type: "input_text", text }],
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
makeFunctionOutput(callId, output) {
|
|
288
|
+
return {
|
|
289
|
+
type: "function_call_output",
|
|
290
|
+
call_id: callId,
|
|
291
|
+
output,
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* We treat a "turn" boundary as:
|
|
296
|
+
* - starts at a user message
|
|
297
|
+
* - ends right before the next user message, or end of array
|
|
298
|
+
*
|
|
299
|
+
* This lets us compact or trim in coherent chunks instead of arbitrary items.
|
|
300
|
+
*/
|
|
301
|
+
getTurnBoundaries(items) {
|
|
302
|
+
const boundaries = [];
|
|
303
|
+
let currentStart = -1;
|
|
304
|
+
for (let i = 0; i < items.length; i++) {
|
|
305
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
306
|
+
const item = items[i];
|
|
307
|
+
const isUserMessage = item?.type === "message" && item?.role === "user";
|
|
308
|
+
if (isUserMessage) {
|
|
309
|
+
if (currentStart !== -1) {
|
|
310
|
+
const slice = items.slice(currentStart, i);
|
|
311
|
+
boundaries.push({
|
|
312
|
+
startIndex: currentStart,
|
|
313
|
+
endIndex: i - 1,
|
|
314
|
+
estimatedTokens: this.roughEstimateInputTokens(slice),
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
currentStart = i;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
if (currentStart !== -1) {
|
|
321
|
+
const slice = items.slice(currentStart);
|
|
322
|
+
boundaries.push({
|
|
323
|
+
startIndex: currentStart,
|
|
324
|
+
endIndex: items.length - 1,
|
|
325
|
+
estimatedTokens: this.roughEstimateInputTokens(slice),
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
// If there are no user turns, treat all as one chunk.
|
|
329
|
+
if (boundaries.length === 0 && items.length > 0) {
|
|
330
|
+
boundaries.push({
|
|
331
|
+
startIndex: 0,
|
|
332
|
+
endIndex: items.length - 1,
|
|
333
|
+
estimatedTokens: this.roughEstimateInputTokens(items),
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
return boundaries;
|
|
337
|
+
}
|
|
338
|
+
/**
|
|
339
|
+
* Keep the newest turns whose estimated sum stays within hotContextTargetInputTokens.
|
|
340
|
+
* Older turns become compaction candidates.
|
|
341
|
+
*/
|
|
342
|
+
splitColdAndHotHistory(items) {
|
|
343
|
+
const turns = this.getTurnBoundaries(items);
|
|
344
|
+
if (turns.length === 0) {
|
|
345
|
+
return { coldItems: [], hotItems: items };
|
|
346
|
+
}
|
|
347
|
+
let running = 0;
|
|
348
|
+
let keepFromTurnIndex = turns.length;
|
|
349
|
+
for (let i = turns.length - 1; i >= 0; i--) {
|
|
350
|
+
const next = running + turns[i].estimatedTokens;
|
|
351
|
+
if (next > this.config.hotContextTargetInputTokens) {
|
|
352
|
+
break;
|
|
353
|
+
}
|
|
354
|
+
running = next;
|
|
355
|
+
keepFromTurnIndex = i;
|
|
356
|
+
}
|
|
357
|
+
if (keepFromTurnIndex === turns.length) {
|
|
358
|
+
// Even the newest turn is too large; keep at least the latest turn hot.
|
|
359
|
+
const lastTurn = turns[turns.length - 1];
|
|
360
|
+
return {
|
|
361
|
+
coldItems: items.slice(0, lastTurn.startIndex),
|
|
362
|
+
hotItems: items.slice(lastTurn.startIndex),
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
const splitIndex = turns[keepFromTurnIndex].startIndex;
|
|
366
|
+
return {
|
|
367
|
+
coldItems: items.slice(0, splitIndex),
|
|
368
|
+
hotItems: items.slice(splitIndex),
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Incrementally update compaction using the cold slice only.
|
|
373
|
+
*/
|
|
374
|
+
async compactColdHistory(coldItems) {
|
|
375
|
+
if (coldItems.length === 0)
|
|
376
|
+
return;
|
|
377
|
+
try {
|
|
378
|
+
// Depending on SDK version, the exact shape may vary.
|
|
379
|
+
// The intent is:
|
|
380
|
+
// - compact [existing compaction object?, ...new cold items]
|
|
381
|
+
// - receive an updated opaque compaction item
|
|
382
|
+
const compactInput = [];
|
|
383
|
+
if (this.compaction.item)
|
|
384
|
+
compactInput.push(this.compaction.item);
|
|
385
|
+
compactInput.push(...coldItems);
|
|
386
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
387
|
+
const response = await this.openai.responses.compact({
|
|
388
|
+
model: this.config.openaiModel,
|
|
389
|
+
input: compactInput,
|
|
390
|
+
instructions: this.instructions ?? undefined,
|
|
391
|
+
});
|
|
392
|
+
// We expect the new compaction object to be reusable as input.
|
|
393
|
+
// Some SDKs may return `output`, some `compacted`, etc.
|
|
394
|
+
const newItem = response?.output?.[0] ??
|
|
395
|
+
response?.compacted ??
|
|
396
|
+
response?.item ??
|
|
397
|
+
null;
|
|
398
|
+
if (newItem) {
|
|
399
|
+
this.compaction.item = newItem;
|
|
400
|
+
this.compaction.compactedTurns += this.getTurnBoundaries(coldItems).length;
|
|
401
|
+
}
|
|
402
|
+
else {
|
|
403
|
+
throw new Error("Compaction response did not include a reusable compaction item");
|
|
404
|
+
}
|
|
405
|
+
console.error(`[MCPClient] Compacted ${coldItems.length} old items. Total compacted turns: ${this.compaction.compactedTurns}`);
|
|
406
|
+
}
|
|
407
|
+
catch (error) {
|
|
408
|
+
// Fallback: if compaction fails, just drop the cold part rather than
|
|
409
|
+
// keeping everything and risking repeated context overflows.
|
|
410
|
+
console.error("[MCPClient] Compaction failed, dropping cold history:", error);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
/**
|
|
414
|
+
* Proactively compact when the history has grown past the trigger.
|
|
415
|
+
* Keeps the newest hot window uncompacted and compacts the older cold window.
|
|
416
|
+
*/
|
|
417
|
+
async maybeCompactHistory() {
|
|
418
|
+
if (this.lastInputTokens < this.config.compactTriggerInputTokens) {
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
const { coldItems, hotItems } = this.splitColdAndHotHistory(this.conversationHistory);
|
|
422
|
+
if (coldItems.length === 0) {
|
|
423
|
+
return;
|
|
424
|
+
}
|
|
425
|
+
await this.compactColdHistory(coldItems);
|
|
426
|
+
this.conversationHistory = hotItems;
|
|
427
|
+
this.lastInputTokens = 0;
|
|
428
|
+
console.error(`[MCPClient] Applied rolling compaction. Kept ${hotItems.length} recent items uncompacted.`);
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Keep history from growing pathologically in item count even before token limits.
|
|
432
|
+
* Uses turn-aware trimming, not arbitrary item slicing.
|
|
433
|
+
*/
|
|
434
|
+
enforceHardHistoryLimitByTurns(maxTurns = 20) {
|
|
435
|
+
const turns = this.getTurnBoundaries(this.conversationHistory);
|
|
436
|
+
if (turns.length <= maxTurns)
|
|
437
|
+
return;
|
|
438
|
+
const keepFrom = turns[turns.length - maxTurns].startIndex;
|
|
439
|
+
const dropped = this.conversationHistory.slice(0, keepFrom);
|
|
440
|
+
this.conversationHistory = this.conversationHistory.slice(keepFrom);
|
|
441
|
+
console.error(`[MCPClient] Hard-trimmed ${dropped.length} old uncompacted items, preserving last ${maxTurns} turns`);
|
|
442
|
+
}
|
|
443
|
+
/**
|
|
444
|
+
* Build MCP tool list for OpenAI Responses API.
|
|
445
|
+
*/
|
|
446
|
+
async buildTools() {
|
|
447
|
+
const toolsResponse = await this.client.listTools();
|
|
448
|
+
return [
|
|
449
|
+
{ type: "web_search_preview" },
|
|
450
|
+
...toolsResponse.tools
|
|
451
|
+
.filter((t) => t.name !== "web_search" && t.name !== "fetch_webpage")
|
|
452
|
+
.map((tool) => ({
|
|
453
|
+
type: "function",
|
|
454
|
+
name: tool.name,
|
|
455
|
+
description: tool.description || "",
|
|
456
|
+
parameters: tool.inputSchema,
|
|
457
|
+
strict: false,
|
|
458
|
+
})),
|
|
459
|
+
];
|
|
460
|
+
}
|
|
461
|
+
/**
|
|
462
|
+
* Create a response against the current full context.
|
|
463
|
+
*/
|
|
464
|
+
async createResponse(params) {
|
|
465
|
+
const response = await this.openai.responses.create({
|
|
466
|
+
model: this.config.openaiModel,
|
|
467
|
+
instructions: this.instructions ?? undefined,
|
|
468
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
469
|
+
input: params.input,
|
|
470
|
+
tools: params.tools,
|
|
471
|
+
previous_response_id: params.previousResponseId,
|
|
472
|
+
truncation: "disabled",
|
|
473
|
+
prompt_cache_retention: "24h",
|
|
474
|
+
});
|
|
475
|
+
this.captureUsage(response);
|
|
476
|
+
return response;
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Main query method with rolling compaction.
|
|
480
|
+
*/
|
|
98
481
|
async processQuery(query, onThinking, abortSignal, bypassSystemPrompt = false) {
|
|
99
|
-
// Check for cancellation at start
|
|
100
482
|
if (abortSignal?.aborted) {
|
|
101
483
|
throw new Error("Request was cancelled");
|
|
102
484
|
}
|
|
103
|
-
// Bypass mode: send the raw prompt directly without system message or conversation history
|
|
104
485
|
if (bypassSystemPrompt) {
|
|
105
486
|
return this.processRawQuery(query, onThinking, abortSignal);
|
|
106
487
|
}
|
|
107
|
-
// Load system prompt from MCP server (no-op after first call)
|
|
108
488
|
await this.ensureSystemPrompt();
|
|
109
|
-
//
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
489
|
+
// Proactive compaction based on last real measured request.
|
|
490
|
+
await this.maybeCompactHistory();
|
|
491
|
+
const tools = await this.buildTools();
|
|
492
|
+
const userMessage = this.makeUserMessage(query);
|
|
493
|
+
// Optional proactive token counting near/around threshold.
|
|
494
|
+
const projectedInputTokens = await this.countInputTokens(this.buildInput([userMessage]));
|
|
495
|
+
if (projectedInputTokens >= this.config.compactTriggerInputTokens) {
|
|
496
|
+
await this.maybeCompactHistory();
|
|
114
497
|
}
|
|
115
|
-
// Add user message to
|
|
116
|
-
this.conversationHistory.push(
|
|
117
|
-
type: "message",
|
|
118
|
-
role: "user",
|
|
119
|
-
content: [
|
|
120
|
-
{
|
|
121
|
-
type: "input_text",
|
|
122
|
-
text: query,
|
|
123
|
-
}
|
|
124
|
-
],
|
|
125
|
-
});
|
|
126
|
-
// Get available tools from MCP server
|
|
127
|
-
const toolsResponse = await this.client.listTools();
|
|
128
|
-
// Convert MCP tools to OpenAI Responses API format
|
|
129
|
-
const tools = toolsResponse.tools.map((tool) => ({
|
|
130
|
-
type: "function",
|
|
131
|
-
name: tool.name,
|
|
132
|
-
description: tool.description || "",
|
|
133
|
-
parameters: tool.inputSchema,
|
|
134
|
-
strict: false,
|
|
135
|
-
}));
|
|
136
|
-
// Multi-turn conversation with tool calling
|
|
498
|
+
// Add the new user message to rolling history now.
|
|
499
|
+
this.conversationHistory.push(userMessage);
|
|
137
500
|
let loopCount = 0;
|
|
138
501
|
const maxLoops = 15;
|
|
139
502
|
let finalResponse = "";
|
|
140
503
|
let outOfToolCalls = false;
|
|
504
|
+
let previousResponseId = undefined;
|
|
505
|
+
// Carries tool outputs across iterations so previous_response_id chain stays intact.
|
|
506
|
+
let pendingToolOutputs = null;
|
|
141
507
|
while (loopCount < maxLoops) {
|
|
142
508
|
loopCount++;
|
|
143
|
-
// Check for cancellation before each API call
|
|
144
509
|
if (abortSignal?.aborted) {
|
|
145
510
|
throw new Error("Request was cancelled");
|
|
146
511
|
}
|
|
147
|
-
// Call OpenAI Responses API with error handling
|
|
148
512
|
let response;
|
|
149
513
|
try {
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
514
|
+
if (!previousResponseId) {
|
|
515
|
+
// First request in this query: send full current context.
|
|
516
|
+
response = await this.createResponse({
|
|
517
|
+
input: this.buildInput(),
|
|
518
|
+
tools: outOfToolCalls ? [] : tools,
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
else {
|
|
522
|
+
// Send pending tool outputs to continue the response chain.
|
|
523
|
+
response = await this.createResponse({
|
|
524
|
+
input: pendingToolOutputs ?? [],
|
|
525
|
+
tools: outOfToolCalls ? [] : tools,
|
|
526
|
+
previousResponseId,
|
|
527
|
+
});
|
|
528
|
+
pendingToolOutputs = null;
|
|
529
|
+
}
|
|
156
530
|
}
|
|
157
531
|
catch (error) {
|
|
158
532
|
const err = error;
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
(err.code ===
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
533
|
+
const message = err.message?.toLowerCase() || "";
|
|
534
|
+
const contextProblem = err.status === 400 &&
|
|
535
|
+
(err.code === "context_length_exceeded" ||
|
|
536
|
+
message.includes("context") ||
|
|
537
|
+
message.includes("length"));
|
|
538
|
+
const toolProblem = err.status === 400 &&
|
|
539
|
+
(err.code === "response_incomplete" ||
|
|
540
|
+
message.includes("incomplete") ||
|
|
541
|
+
message.includes("tool"));
|
|
542
|
+
if (contextProblem) {
|
|
543
|
+
await this.maybeCompactHistory();
|
|
544
|
+
if (!previousResponseId) {
|
|
545
|
+
response = await this.createResponse({
|
|
546
|
+
input: this.buildInput(),
|
|
547
|
+
tools: outOfToolCalls ? [] : tools,
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
else {
|
|
551
|
+
response = await this.createResponse({
|
|
552
|
+
input: pendingToolOutputs ?? [],
|
|
553
|
+
tools: outOfToolCalls ? [] : tools,
|
|
554
|
+
previousResponseId,
|
|
555
|
+
});
|
|
556
|
+
pendingToolOutputs = null;
|
|
557
|
+
}
|
|
171
558
|
}
|
|
172
|
-
|
|
173
|
-
else if (err.status === 400 &&
|
|
174
|
-
(err.code === 'response_incomplete' ||
|
|
175
|
-
err.message?.includes('incomplete') ||
|
|
176
|
-
err.message?.includes('tool'))) {
|
|
559
|
+
else if (toolProblem) {
|
|
177
560
|
outOfToolCalls = true;
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
561
|
+
if (!previousResponseId) {
|
|
562
|
+
response = await this.createResponse({
|
|
563
|
+
input: this.buildInput(),
|
|
564
|
+
tools: [],
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
else {
|
|
568
|
+
response = await this.createResponse({
|
|
569
|
+
input: pendingToolOutputs ?? [],
|
|
570
|
+
tools: [],
|
|
571
|
+
previousResponseId,
|
|
572
|
+
});
|
|
573
|
+
pendingToolOutputs = null;
|
|
574
|
+
}
|
|
184
575
|
}
|
|
185
576
|
else {
|
|
186
577
|
throw error;
|
|
187
578
|
}
|
|
188
579
|
}
|
|
189
|
-
|
|
190
|
-
|
|
580
|
+
previousResponseId = response.id;
|
|
581
|
+
const output = response.output ?? [];
|
|
582
|
+
for (const item of output) {
|
|
583
|
+
if (item.type === "web_search_call") {
|
|
584
|
+
onThinking?.("🔍 web_search_preview");
|
|
585
|
+
}
|
|
586
|
+
}
|
|
191
587
|
const functionCalls = output.filter((item) => item.type === "function_call");
|
|
192
|
-
// Check if AI wants to call tools
|
|
193
588
|
if (functionCalls.length > 0) {
|
|
589
|
+
// Persist model output items into rolling history.
|
|
194
590
|
this.conversationHistory.push(...output);
|
|
591
|
+
const toolOutputsForNextStep = [];
|
|
195
592
|
for (const functionCall of functionCalls) {
|
|
196
|
-
// Check for cancellation before each tool call
|
|
197
593
|
if (abortSignal?.aborted) {
|
|
198
594
|
throw new Error("Request was cancelled");
|
|
199
595
|
}
|
|
200
596
|
const functionName = functionCall.name;
|
|
201
|
-
const functionArgs = typeof functionCall.arguments ===
|
|
597
|
+
const functionArgs = typeof functionCall.arguments === "string"
|
|
202
598
|
? JSON.parse(functionCall.arguments)
|
|
203
599
|
: functionCall.arguments;
|
|
204
|
-
// Build a descriptive thinking message with key arguments
|
|
205
600
|
let toolDesc = functionName;
|
|
206
|
-
if (functionName === "fetch_webpage"
|
|
601
|
+
if (functionArgs?.url && functionName === "fetch_webpage") {
|
|
207
602
|
try {
|
|
208
603
|
toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
|
|
209
604
|
}
|
|
@@ -211,118 +606,112 @@ export class MCPClientOpenAI {
|
|
|
211
606
|
toolDesc = `fetch_webpage → ${functionArgs.url}`;
|
|
212
607
|
}
|
|
213
608
|
}
|
|
214
|
-
else if (functionName === "web_search"
|
|
609
|
+
else if (functionArgs?.query && functionName === "web_search") {
|
|
215
610
|
toolDesc = `web_search → "${functionArgs.query}"`;
|
|
216
611
|
}
|
|
217
612
|
onThinking?.(`🔧 ${toolDesc}`);
|
|
218
613
|
try {
|
|
219
|
-
// Execute the tool via MCP
|
|
220
614
|
const result = await this.client.callTool({
|
|
221
615
|
name: functionName,
|
|
222
616
|
arguments: functionArgs,
|
|
223
617
|
});
|
|
224
|
-
|
|
225
|
-
this.
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
output: JSON.stringify(result.content),
|
|
229
|
-
});
|
|
618
|
+
const compactOutput = this.compactToolResult(result.content);
|
|
619
|
+
const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, compactOutput);
|
|
620
|
+
toolOutputsForNextStep.push(toolOutputItem);
|
|
621
|
+
this.conversationHistory.push(toolOutputItem);
|
|
230
622
|
}
|
|
231
623
|
catch (error) {
|
|
232
|
-
this.
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
output: `Error: ${error instanceof Error ? error.message : String(error)}`,
|
|
236
|
-
});
|
|
624
|
+
const toolOutputItem = this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
625
|
+
toolOutputsForNextStep.push(toolOutputItem);
|
|
626
|
+
this.conversationHistory.push(toolOutputItem);
|
|
237
627
|
}
|
|
238
628
|
}
|
|
629
|
+
// Carry tool outputs to the next iteration so the response chain stays intact.
|
|
630
|
+
pendingToolOutputs = toolOutputsForNextStep;
|
|
239
631
|
continue;
|
|
240
632
|
}
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
if (contentItem.type === "output_text") {
|
|
247
|
-
finalResponse += contentItem.text;
|
|
248
|
-
}
|
|
633
|
+
for (const item of output) {
|
|
634
|
+
if (item.type === "message" && item.role === "assistant") {
|
|
635
|
+
for (const contentItem of item.content ?? []) {
|
|
636
|
+
if (contentItem.type === "output_text") {
|
|
637
|
+
finalResponse += contentItem.text;
|
|
249
638
|
}
|
|
250
639
|
}
|
|
251
640
|
}
|
|
252
|
-
this.conversationHistory.push(...output);
|
|
253
|
-
break;
|
|
254
641
|
}
|
|
642
|
+
this.conversationHistory.push(...output);
|
|
643
|
+
break;
|
|
255
644
|
}
|
|
256
|
-
// If we hit max loops, make one final request without tools
|
|
257
645
|
if (loopCount >= maxLoops && !finalResponse) {
|
|
258
646
|
try {
|
|
259
647
|
const finalApiResponse = await this.openai.responses.create({
|
|
260
648
|
model: this.config.openaiModel,
|
|
649
|
+
instructions: this.instructions ?? undefined,
|
|
261
650
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
262
|
-
input: this.
|
|
651
|
+
input: this.buildInput(),
|
|
263
652
|
tools: [],
|
|
653
|
+
truncation: "disabled",
|
|
654
|
+
prompt_cache_retention: "24h",
|
|
264
655
|
});
|
|
265
|
-
|
|
266
|
-
for (const item of
|
|
656
|
+
this.captureUsage(finalApiResponse);
|
|
657
|
+
for (const item of finalApiResponse.output ?? []) {
|
|
267
658
|
if (item.type === "message" && item.role === "assistant") {
|
|
268
|
-
for (const contentItem of item.content) {
|
|
659
|
+
for (const contentItem of item.content ?? []) {
|
|
269
660
|
if (contentItem.type === "output_text") {
|
|
270
661
|
finalResponse += contentItem.text;
|
|
271
662
|
}
|
|
272
663
|
}
|
|
273
664
|
}
|
|
274
665
|
}
|
|
275
|
-
this.conversationHistory.push(...
|
|
666
|
+
this.conversationHistory.push(...(finalApiResponse.output ?? []));
|
|
276
667
|
}
|
|
277
|
-
catch
|
|
278
|
-
finalResponse =
|
|
668
|
+
catch {
|
|
669
|
+
finalResponse =
|
|
670
|
+
"I've gathered information but reached my reasoning limit. Please try rephrasing your question.";
|
|
279
671
|
}
|
|
280
672
|
}
|
|
281
|
-
//
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
this.conversationHistory = [systemMessage, ...recentItems];
|
|
286
|
-
}
|
|
673
|
+
// Prevent pathological item growth even when tokens are still OK.
|
|
674
|
+
this.enforceHardHistoryLimitByTurns(20);
|
|
675
|
+
// Optional: if this turn caused a large input, compact immediately so the next turn starts cleaner.
|
|
676
|
+
await this.maybeCompactHistory();
|
|
287
677
|
return finalResponse;
|
|
288
678
|
}
|
|
289
679
|
/**
|
|
290
|
-
*
|
|
291
|
-
* Used by the Direct Prompt dev tool to test prompts verbatim.
|
|
680
|
+
* Raw mode: no cached instructions, no rolling history, no compaction state.
|
|
292
681
|
*/
|
|
293
682
|
async processRawQuery(query, onThinking, abortSignal) {
|
|
294
|
-
const
|
|
295
|
-
const
|
|
296
|
-
type: "function",
|
|
297
|
-
name: tool.name,
|
|
298
|
-
description: tool.description || "",
|
|
299
|
-
parameters: tool.inputSchema,
|
|
300
|
-
strict: false,
|
|
301
|
-
}));
|
|
302
|
-
// Isolated history — just this message, no system prompt
|
|
303
|
-
const isolatedHistory = [
|
|
304
|
-
{
|
|
305
|
-
type: "message",
|
|
306
|
-
role: "user",
|
|
307
|
-
content: [{ type: "input_text", text: query }],
|
|
308
|
-
},
|
|
309
|
-
];
|
|
683
|
+
const tools = await this.buildTools();
|
|
684
|
+
const isolatedHistory = [this.makeUserMessage(query)];
|
|
310
685
|
let loopCount = 0;
|
|
311
686
|
const maxLoops = 15;
|
|
312
687
|
let finalResponse = "";
|
|
688
|
+
let previousResponseId = undefined;
|
|
689
|
+
let pendingRawToolOutputs = null;
|
|
313
690
|
while (loopCount < maxLoops) {
|
|
314
691
|
loopCount++;
|
|
315
692
|
if (abortSignal?.aborted)
|
|
316
693
|
throw new Error("Request was cancelled");
|
|
694
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
317
695
|
const response = await this.openai.responses.create({
|
|
318
696
|
model: this.config.openaiModel,
|
|
319
|
-
|
|
697
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
698
|
+
input: (!previousResponseId ? isolatedHistory : (pendingRawToolOutputs ?? [])),
|
|
699
|
+
previous_response_id: previousResponseId,
|
|
320
700
|
tools,
|
|
701
|
+
truncation: "disabled",
|
|
321
702
|
});
|
|
322
|
-
|
|
703
|
+
pendingRawToolOutputs = null;
|
|
704
|
+
this.captureUsage(response);
|
|
705
|
+
previousResponseId = response.id;
|
|
706
|
+
const output = response.output ?? [];
|
|
707
|
+
for (const item of output) {
|
|
708
|
+
if (item.type === "web_search_call") {
|
|
709
|
+
onThinking?.("🔍 web_search_preview");
|
|
710
|
+
}
|
|
711
|
+
}
|
|
323
712
|
const functionCalls = output.filter((item) => item.type === "function_call");
|
|
324
713
|
if (functionCalls.length > 0) {
|
|
325
|
-
|
|
714
|
+
const newToolOutputs = [];
|
|
326
715
|
for (const functionCall of functionCalls) {
|
|
327
716
|
if (abortSignal?.aborted)
|
|
328
717
|
throw new Error("Request was cancelled");
|
|
@@ -331,7 +720,7 @@ export class MCPClientOpenAI {
|
|
|
331
720
|
? JSON.parse(functionCall.arguments)
|
|
332
721
|
: functionCall.arguments;
|
|
333
722
|
let toolDesc = functionName;
|
|
334
|
-
if (functionName === "fetch_webpage" && functionArgs
|
|
723
|
+
if (functionName === "fetch_webpage" && functionArgs?.url) {
|
|
335
724
|
try {
|
|
336
725
|
toolDesc = `fetch_webpage → ${new URL(functionArgs.url).hostname}`;
|
|
337
726
|
}
|
|
@@ -339,23 +728,27 @@ export class MCPClientOpenAI {
|
|
|
339
728
|
toolDesc = `fetch_webpage → ${functionArgs.url}`;
|
|
340
729
|
}
|
|
341
730
|
}
|
|
342
|
-
else if (functionName === "web_search" && functionArgs
|
|
731
|
+
else if (functionName === "web_search" && functionArgs?.query) {
|
|
343
732
|
toolDesc = `web_search → "${functionArgs.query}"`;
|
|
344
733
|
}
|
|
345
734
|
onThinking?.(`🔧 ${toolDesc}`);
|
|
346
735
|
try {
|
|
347
|
-
const result = await this.client.callTool({
|
|
348
|
-
|
|
736
|
+
const result = await this.client.callTool({
|
|
737
|
+
name: functionName,
|
|
738
|
+
arguments: functionArgs,
|
|
739
|
+
});
|
|
740
|
+
newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, this.compactToolResult(result.content)));
|
|
349
741
|
}
|
|
350
742
|
catch (error) {
|
|
351
|
-
|
|
743
|
+
newToolOutputs.push(this.makeFunctionOutput(functionCall.call_id, `Error: ${error instanceof Error ? error.message : String(error)}`));
|
|
352
744
|
}
|
|
353
745
|
}
|
|
746
|
+
pendingRawToolOutputs = newToolOutputs;
|
|
354
747
|
continue;
|
|
355
748
|
}
|
|
356
749
|
for (const item of output) {
|
|
357
750
|
if (item.type === "message" && item.role === "assistant") {
|
|
358
|
-
for (const contentItem of item.content) {
|
|
751
|
+
for (const contentItem of item.content ?? []) {
|
|
359
752
|
if (contentItem.type === "output_text")
|
|
360
753
|
finalResponse += contentItem.text;
|
|
361
754
|
}
|
|
@@ -365,13 +758,4 @@ export class MCPClientOpenAI {
|
|
|
365
758
|
}
|
|
366
759
|
return finalResponse;
|
|
367
760
|
}
|
|
368
|
-
clearHistory() {
|
|
369
|
-
// Keep system message only if it genuinely is a system role message
|
|
370
|
-
const first = this.conversationHistory[0];
|
|
371
|
-
const systemMessage = first?.role === "system" ? this.conversationHistory[0] : undefined;
|
|
372
|
-
this.conversationHistory = systemMessage ? [systemMessage] : [];
|
|
373
|
-
}
|
|
374
|
-
async cleanup() {
|
|
375
|
-
await this.client.close();
|
|
376
|
-
}
|
|
377
761
|
}
|
package/dist/styles/MCPChat.css
CHANGED
|
@@ -446,6 +446,18 @@
|
|
|
446
446
|
font-size: 12px;
|
|
447
447
|
opacity: 0.6;
|
|
448
448
|
margin-top: 6px;
|
|
449
|
+
display: flex;
|
|
450
|
+
align-items: center;
|
|
451
|
+
gap: 8px;
|
|
452
|
+
flex-wrap: wrap;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
.mcp-chat-token-info {
|
|
456
|
+
font-size: 11px;
|
|
457
|
+
opacity: 0.75;
|
|
458
|
+
font-family: monospace;
|
|
459
|
+
border-left: 1px solid currentColor;
|
|
460
|
+
padding-left: 8px;
|
|
449
461
|
}
|
|
450
462
|
|
|
451
463
|
/* ───────────────────────────────────────────────
|
package/dist/types.d.ts
CHANGED
|
@@ -8,6 +8,8 @@ export interface Message {
|
|
|
8
8
|
isStreaming?: boolean;
|
|
9
9
|
/** Hidden messages are sent to the AI but not shown in the chat bubble list */
|
|
10
10
|
hidden?: boolean;
|
|
11
|
+
/** Token usage info shown in the footer of assistant messages — never fed back to AI */
|
|
12
|
+
tokenInfo?: string;
|
|
11
13
|
}
|
|
12
14
|
export interface ThinkingStep {
|
|
13
15
|
id: string;
|
|
@@ -21,7 +23,7 @@ export interface MCPChatProps {
|
|
|
21
23
|
className?: string;
|
|
22
24
|
}
|
|
23
25
|
export interface StreamEvent {
|
|
24
|
-
type: "thinking" | "content" | "done" | "error";
|
|
26
|
+
type: "thinking" | "content" | "done" | "error" | "usage";
|
|
25
27
|
message?: string;
|
|
26
28
|
chunk?: string;
|
|
27
29
|
}
|
package/dist/types.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,OAAO,CAAC;IACtB,+EAA+E;IAC/E,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,wFAAwF;IACxF,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,YAAY;IAC3B,EAAE,EAAE,MAAM,CAAC;IACX,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,IAAI,CAAC;CACjB;AAED,MAAM,WAAW,YAAY;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,YAAY,CAAC,EAAE,KAAK,CAAC,aAAa,CAAC;IACnC,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,UAAU,GAAG,SAAS,GAAG,MAAM,GAAG,OAAO,GAAG,OAAO,CAAC;IAC1D,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB"}
|