@nqminds/mcp-client 1.0.11 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,6 +74,12 @@ export declare class MCPClientOpenAI {
74
74
  * Latest usage snapshot for logging/inspection.
75
75
  */
76
76
  private lastUsage;
77
+ /**
78
+ * Per-turn accumulator — reset at the start of each processQuery / processRawQuery call.
79
+ * Tracks the max input tokens seen (largest context = most representative) and
80
+ * the sum of output tokens across all API calls in the turn.
81
+ */
82
+ private turnStats;
77
83
  private config;
78
84
  constructor(config: MCPClientConfig);
79
85
  connect(): Promise<void>;
@@ -101,7 +107,17 @@ export declare class MCPClientOpenAI {
101
107
  */
102
108
  private roughEstimateInputTokens;
103
109
  /**
104
- * Normalize usage from Responses API.
110
+ * Reset the per-turn accumulator at the start of each processQuery / processRawQuery.
111
+ */
112
+ private startTurn;
113
+ /**
114
+ * Normalize usage from Responses API and accumulate into the per-turn stats.
115
+ *
116
+ * With previous_response_id chaining there are multiple API calls per user turn:
117
+ * - inputTokens: use max across calls (the call with the largest context is most representative)
118
+ * - cachedTokens: taken from the same call that has the max input tokens (keeps % ≤ 100%)
119
+ * - outputTokens: sum across calls (each call generates separately-billed tokens)
120
+ * - reasoningTokens: sum across calls
105
121
  */
106
122
  private captureUsage;
107
123
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAiBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,YAAY;IAoBpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA6OlB;;OAEG;YACW,eAAe;CAwG9B"}
1
+ {"version":3,"file":"openai-client.d.ts","sourceRoot":"","sources":["../src/openai-client.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,MAAM,CAAC;IACrB,gBAAgB,EAAE,MAAM,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAEnC;;;;OAIG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;OAEG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAID,UAAU,UAAU;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,EAAE,MAAM,CAAC;CACxB;AAiBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,SAAS,CAAuB;IAExC;;;OAGG;IACH,OAAO,CAAC,YAAY,CAAuB;IAE3C;;;OAGG;IACH,OAAO,CAAC,mBAAmB,CAA2B;IAEtD;;;OAGG;IACH,OAAO,CAAC,UAAU,CAGhB;IAEF;;OAEG;IACH,OAAO,CAAC,eAAe,CAAK;IAE5B;;OAEG;IACH,OAAO,CAAC,SAAS,CAOf;IAEF;;;;OAIG;IACH,OAAO,CAAC,SAAS,CAKf;IAEF,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,EAAE,eAAe;IAoC7B,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAIxB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B,YAAY,IAAI,IAAI;IAkBpB,QAAQ,IAAI,UAAU;IAItB;;;OAGG;YACW,kBAAkB;IA0BhC;;;OAGG;IACH,OAAO,CAAC,UAAU;IAalB;;;OAGG;YACW,gBAAgB;IAuB9B;;;OAGG;IACH,OAAO,CAAC,wBAAwB;IAUhC;;OAEG;IACH,OAAO,CAAC,SAAS;IAIjB;;;;;;;;OAQG;IACH,OAAO,CAAC,YAAY;IAmCpB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA8EzB,OAAO,CAAC,eAAe;IAQvB,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB;IA2CzB;;;OAGG;IACH,OAAO,CAAC,sBAAsB;IAsC9B;;OAEG;YACW,kBAAkB;IA4ChC;;;OAGG;YACW,mBAAmB;IAoBjC;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAatC;;OAEG;YACW,UAAU;IAiBxB;;OAEG;YACW,cAAc;IAoB5B;;OAEG;IACG,YAAY,CAChB,KAAK,EAAE,MAAM,EACb,UAAU,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,IAAI,EACtC,WAAW,CAAC,EAAE,WAAW,EACzB,kBAAkB,UAAQ,GACzB,OAAO,CAAC,MAAM,CAAC;IA8OlB;;OAEG;YACW,eAAe;CAyG9B"}
@@ -54,6 +54,17 @@ export class MCPClientOpenAI {
54
54
  reasoningTokens: 0,
55
55
  compactedTurns: 0,
56
56
  };
57
+ /**
58
+ * Per-turn accumulator — reset at the start of each processQuery / processRawQuery call.
59
+ * Tracks the max input tokens seen (largest context = most representative) and
60
+ * the sum of output tokens across all API calls in the turn.
61
+ */
62
+ this.turnStats = {
63
+ inputTokens: 0,
64
+ outputTokens: 0,
65
+ cachedTokens: 0,
66
+ reasoningTokens: 0,
67
+ };
57
68
  this.config = {
58
69
  openaiApiKey: config.openaiApiKey,
59
70
  mcpServerCommand: config.mcpServerCommand,
@@ -94,6 +105,7 @@ export class MCPClientOpenAI {
94
105
  compactedTurns: 0,
95
106
  };
96
107
  this.lastInputTokens = 0;
108
+ this.turnStats = { inputTokens: 0, outputTokens: 0, cachedTokens: 0, reasoningTokens: 0 };
97
109
  this.lastUsage = {
98
110
  inputTokens: 0,
99
111
  outputTokens: 0,
@@ -186,7 +198,19 @@ export class MCPClientOpenAI {
186
198
  return Math.ceil(serialized.length / 4);
187
199
  }
188
200
  /**
189
- * Normalize usage from Responses API.
201
+ * Reset the per-turn accumulator at the start of each processQuery / processRawQuery.
202
+ */
203
+ startTurn() {
204
+ this.turnStats = { inputTokens: 0, outputTokens: 0, cachedTokens: 0, reasoningTokens: 0 };
205
+ }
206
+ /**
207
+ * Normalize usage from Responses API and accumulate into the per-turn stats.
208
+ *
209
+ * With previous_response_id chaining there are multiple API calls per user turn:
210
+ * - inputTokens: use max across calls (the call with the largest context is most representative)
211
+ * - cachedTokens: taken from the same call that has the max input tokens (keeps % ≤ 100%)
212
+ * - outputTokens: sum across calls (each call generates separately-billed tokens)
213
+ * - reasoningTokens: sum across calls
190
214
  */
191
215
  captureUsage(response) {
192
216
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -195,15 +219,28 @@ export class MCPClientOpenAI {
195
219
  const inputDetails = usage?.input_tokens_details ?? {};
196
220
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
197
221
  const outputDetails = usage?.output_tokens_details ?? {};
222
+ const inputTokens = usage.input_tokens ?? 0;
223
+ const outputTokens = usage.output_tokens ?? 0;
224
+ const cachedTokens = inputDetails.cached_tokens ?? 0;
225
+ const reasoningTokens = outputDetails.reasoning_tokens ?? 0;
226
+ // cachedTokens is only meaningful relative to its own call's inputTokens.
227
+ // Track it alongside the max-input call so the percentage stays ≤ 100%.
228
+ if (inputTokens >= this.turnStats.inputTokens) {
229
+ this.turnStats.inputTokens = inputTokens;
230
+ this.turnStats.cachedTokens = cachedTokens;
231
+ }
232
+ this.turnStats.outputTokens += outputTokens;
233
+ this.turnStats.reasoningTokens += reasoningTokens;
198
234
  this.lastUsage = {
199
- inputTokens: usage.input_tokens ?? 0,
200
- outputTokens: usage.output_tokens ?? 0,
201
- totalTokens: usage.total_tokens ?? 0,
202
- cachedTokens: inputDetails.cached_tokens ?? 0,
203
- reasoningTokens: outputDetails.reasoning_tokens ?? 0,
235
+ inputTokens: this.turnStats.inputTokens,
236
+ outputTokens: this.turnStats.outputTokens,
237
+ totalTokens: this.turnStats.inputTokens + this.turnStats.outputTokens,
238
+ cachedTokens: this.turnStats.cachedTokens,
239
+ reasoningTokens: this.turnStats.reasoningTokens,
204
240
  compactedTurns: this.compaction.compactedTurns,
205
241
  };
206
- this.lastInputTokens = this.lastUsage.inputTokens;
242
+ // Keep lastInputTokens as the raw per-call value for compaction threshold checks.
243
+ this.lastInputTokens = inputTokens;
207
244
  }
208
245
  /**
209
246
  * Compact oversized tool outputs before storing them in rolling history.
@@ -485,6 +522,7 @@ export class MCPClientOpenAI {
485
522
  if (bypassSystemPrompt) {
486
523
  return this.processRawQuery(query, onThinking, abortSignal);
487
524
  }
525
+ this.startTurn();
488
526
  await this.ensureSystemPrompt();
489
527
  // Proactive compaction based on last real measured request.
490
528
  await this.maybeCompactHistory();
@@ -680,6 +718,7 @@ export class MCPClientOpenAI {
680
718
  * Raw mode: no cached instructions, no rolling history, no compaction state.
681
719
  */
682
720
  async processRawQuery(query, onThinking, abortSignal) {
721
+ this.startTurn();
683
722
  const tools = await this.buildTools();
684
723
  const isolatedHistory = [this.makeUserMessage(query)];
685
724
  let loopCount = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nqminds/mcp-client",
3
- "version": "1.0.11",
3
+ "version": "1.0.12",
4
4
  "description": "Reusable MCP client component with AI chat interface",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",