@dexto/core 1.5.4 → 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. package/dist/agent/DextoAgent.cjs +61 -155
  2. package/dist/agent/DextoAgent.d.ts +11 -54
  3. package/dist/agent/DextoAgent.d.ts.map +1 -1
  4. package/dist/agent/DextoAgent.js +61 -155
  5. package/dist/context/compaction/strategies/reactive-overflow.cjs +5 -3
  6. package/dist/context/compaction/strategies/reactive-overflow.d.ts +1 -0
  7. package/dist/context/compaction/strategies/reactive-overflow.d.ts.map +1 -1
  8. package/dist/context/compaction/strategies/reactive-overflow.js +5 -3
  9. package/dist/context/compaction/types.d.ts +13 -1
  10. package/dist/context/compaction/types.d.ts.map +1 -1
  11. package/dist/errors/types.cjs +0 -2
  12. package/dist/errors/types.d.ts +1 -5
  13. package/dist/errors/types.d.ts.map +1 -1
  14. package/dist/errors/types.js +0 -2
  15. package/dist/events/index.cjs +0 -2
  16. package/dist/events/index.d.ts +6 -25
  17. package/dist/events/index.d.ts.map +1 -1
  18. package/dist/events/index.js +0 -2
  19. package/dist/llm/executor/stream-processor.cjs +85 -27
  20. package/dist/llm/executor/stream-processor.d.ts +4 -0
  21. package/dist/llm/executor/stream-processor.d.ts.map +1 -1
  22. package/dist/llm/executor/stream-processor.js +85 -27
  23. package/dist/llm/executor/turn-executor.cjs +58 -130
  24. package/dist/llm/executor/turn-executor.d.ts +9 -43
  25. package/dist/llm/executor/turn-executor.d.ts.map +1 -1
  26. package/dist/llm/executor/turn-executor.js +58 -130
  27. package/dist/llm/executor/types.d.ts +0 -28
  28. package/dist/llm/executor/types.d.ts.map +1 -1
  29. package/dist/llm/services/vercel.cjs +2 -5
  30. package/dist/llm/services/vercel.d.ts +1 -6
  31. package/dist/llm/services/vercel.d.ts.map +1 -1
  32. package/dist/llm/services/vercel.js +2 -5
  33. package/dist/logger/logger.cjs +6 -7
  34. package/dist/logger/logger.d.ts +1 -0
  35. package/dist/logger/logger.d.ts.map +1 -1
  36. package/dist/logger/logger.js +6 -7
  37. package/dist/session/chat-session.cjs +19 -12
  38. package/dist/session/chat-session.d.ts +3 -6
  39. package/dist/session/chat-session.d.ts.map +1 -1
  40. package/dist/session/chat-session.js +19 -12
  41. package/dist/session/session-manager.cjs +0 -135
  42. package/dist/session/session-manager.d.ts +0 -43
  43. package/dist/session/session-manager.d.ts.map +1 -1
  44. package/dist/session/session-manager.js +0 -135
  45. package/dist/telemetry/telemetry.cjs +12 -5
  46. package/dist/telemetry/telemetry.d.ts.map +1 -1
  47. package/dist/telemetry/telemetry.js +12 -5
  48. package/dist/tools/schemas.cjs +2 -2
  49. package/dist/tools/schemas.js +2 -2
  50. package/package.json +15 -5
  51. package/dist/filesystem/error-codes.cjs +0 -53
  52. package/dist/filesystem/error-codes.d.ts +0 -31
  53. package/dist/filesystem/error-codes.d.ts.map +0 -1
  54. package/dist/filesystem/error-codes.js +0 -30
  55. package/dist/filesystem/errors.cjs +0 -303
  56. package/dist/filesystem/errors.d.ts +0 -109
  57. package/dist/filesystem/errors.d.ts.map +0 -1
  58. package/dist/filesystem/errors.js +0 -280
  59. package/dist/filesystem/filesystem-service.cjs +0 -534
  60. package/dist/filesystem/filesystem-service.d.ts +0 -97
  61. package/dist/filesystem/filesystem-service.d.ts.map +0 -1
  62. package/dist/filesystem/filesystem-service.js +0 -501
  63. package/dist/filesystem/index.cjs +0 -37
  64. package/dist/filesystem/index.d.ts +0 -11
  65. package/dist/filesystem/index.d.ts.map +0 -1
  66. package/dist/filesystem/index.js +0 -11
  67. package/dist/filesystem/path-validator.cjs +0 -250
  68. package/dist/filesystem/path-validator.d.ts +0 -103
  69. package/dist/filesystem/path-validator.d.ts.map +0 -1
  70. package/dist/filesystem/path-validator.js +0 -217
  71. package/dist/filesystem/types.cjs +0 -16
  72. package/dist/filesystem/types.d.ts +0 -175
  73. package/dist/filesystem/types.d.ts.map +0 -1
  74. package/dist/filesystem/types.js +0 -0
  75. package/dist/process/command-validator.cjs +0 -554
  76. package/dist/process/command-validator.d.ts +0 -49
  77. package/dist/process/command-validator.d.ts.map +0 -1
  78. package/dist/process/command-validator.js +0 -531
  79. package/dist/process/error-codes.cjs +0 -47
  80. package/dist/process/error-codes.d.ts +0 -25
  81. package/dist/process/error-codes.d.ts.map +0 -1
  82. package/dist/process/error-codes.js +0 -24
  83. package/dist/process/errors.cjs +0 -244
  84. package/dist/process/errors.d.ts +0 -87
  85. package/dist/process/errors.d.ts.map +0 -1
  86. package/dist/process/errors.js +0 -221
  87. package/dist/process/index.cjs +0 -37
  88. package/dist/process/index.d.ts +0 -11
  89. package/dist/process/index.d.ts.map +0 -1
  90. package/dist/process/index.js +0 -11
  91. package/dist/process/process-service.cjs +0 -497
  92. package/dist/process/process-service.d.ts +0 -69
  93. package/dist/process/process-service.d.ts.map +0 -1
  94. package/dist/process/process-service.js +0 -464
  95. package/dist/process/types.cjs +0 -16
  96. package/dist/process/types.d.ts +0 -107
  97. package/dist/process/types.d.ts.map +0 -1
  98. package/dist/process/types.js +0 -0
  99. package/dist/session/compaction-service.cjs +0 -139
  100. package/dist/session/compaction-service.d.ts +0 -81
  101. package/dist/session/compaction-service.d.ts.map +0 -1
  102. package/dist/session/compaction-service.js +0 -106
@@ -52,6 +52,7 @@ class StreamProcessor {
52
52
  reasoningMetadata;
53
53
  accumulatedText = "";
54
54
  logger;
55
+ hasStepUsage = false;
55
56
  /**
56
57
  * Track pending tool calls (added to context but no result yet).
57
58
  * On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
@@ -162,41 +163,63 @@ class StreamProcessor {
162
163
  }
163
164
  case "finish-step":
164
165
  if (event.usage) {
165
- const anthropicMeta = event.providerMetadata?.["anthropic"];
166
- const bedrockMeta = event.providerMetadata?.["bedrock"];
167
- const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
168
- const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? event.usage.cachedInputTokens ?? 0;
166
+ const providerMetadata = this.getProviderMetadata(event);
167
+ const stepUsage = this.normalizeUsage(event.usage, providerMetadata);
169
168
  this.actualTokens = {
170
- inputTokens: (this.actualTokens.inputTokens ?? 0) + (event.usage.inputTokens ?? 0),
171
- outputTokens: (this.actualTokens.outputTokens ?? 0) + (event.usage.outputTokens ?? 0),
172
- totalTokens: (this.actualTokens.totalTokens ?? 0) + (event.usage.totalTokens ?? 0),
173
- ...event.usage.reasoningTokens !== void 0 && {
174
- reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) + event.usage.reasoningTokens
169
+ inputTokens: (this.actualTokens.inputTokens ?? 0) + (stepUsage.inputTokens ?? 0),
170
+ outputTokens: (this.actualTokens.outputTokens ?? 0) + (stepUsage.outputTokens ?? 0),
171
+ totalTokens: (this.actualTokens.totalTokens ?? 0) + (stepUsage.totalTokens ?? 0),
172
+ ...stepUsage.reasoningTokens !== void 0 && {
173
+ reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) + stepUsage.reasoningTokens
175
174
  },
176
175
  // Cache tokens
177
- cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + cacheReadTokens,
178
- cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + cacheWriteTokens
176
+ cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + (stepUsage.cacheReadTokens ?? 0),
177
+ cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + (stepUsage.cacheWriteTokens ?? 0)
179
178
  };
179
+ this.hasStepUsage = true;
180
180
  }
181
181
  break;
182
182
  case "finish": {
183
183
  this.finishReason = event.finishReason;
184
- const cacheReadTokens = this.actualTokens.cacheReadTokens ?? event.totalUsage.cachedInputTokens ?? 0;
185
- const cacheWriteTokens = this.actualTokens.cacheWriteTokens ?? 0;
186
- const providerExcludesCached = this.config.provider === "anthropic" || this.config.provider === "bedrock";
187
- const adjustedInputTokens = providerExcludesCached ? event.totalUsage.inputTokens ?? 0 : (event.totalUsage.inputTokens ?? 0) - cacheReadTokens;
188
- const usage = {
189
- inputTokens: adjustedInputTokens,
190
- outputTokens: event.totalUsage.outputTokens ?? 0,
191
- totalTokens: event.totalUsage.totalTokens ?? 0,
192
- // Capture reasoning tokens if available (from Claude extended thinking, etc.)
193
- ...event.totalUsage.reasoningTokens !== void 0 && {
194
- reasoningTokens: event.totalUsage.reasoningTokens
195
- },
196
- // Cache tokens from accumulated finish-step events or totalUsage fallback
197
- cacheReadTokens,
198
- cacheWriteTokens
199
- };
184
+ const providerMetadata = this.getProviderMetadata(event);
185
+ const fallbackUsage = this.normalizeUsage(
186
+ event.totalUsage,
187
+ providerMetadata
188
+ );
189
+ const usage = this.hasStepUsage ? { ...this.actualTokens } : fallbackUsage;
190
+ if (this.hasStepUsage) {
191
+ const fallbackInput = fallbackUsage.inputTokens ?? 0;
192
+ if ((usage.inputTokens ?? 0) === 0 && fallbackInput > 0) {
193
+ this.logger.debug(
194
+ "Backfilling inputTokens from fallback usage (step reported 0)",
195
+ { stepValue: usage.inputTokens, fallbackValue: fallbackInput }
196
+ );
197
+ usage.inputTokens = fallbackInput;
198
+ }
199
+ const fallbackOutput = fallbackUsage.outputTokens ?? 0;
200
+ if ((usage.outputTokens ?? 0) === 0 && fallbackOutput > 0) {
201
+ this.logger.debug(
202
+ "Backfilling outputTokens from fallback usage (step reported 0)",
203
+ { stepValue: usage.outputTokens, fallbackValue: fallbackOutput }
204
+ );
205
+ usage.outputTokens = fallbackOutput;
206
+ }
207
+ const fallbackCacheRead = fallbackUsage.cacheReadTokens ?? 0;
208
+ if ((usage.cacheReadTokens ?? 0) === 0 && fallbackCacheRead > 0) {
209
+ usage.cacheReadTokens = fallbackCacheRead;
210
+ }
211
+ const fallbackCacheWrite = fallbackUsage.cacheWriteTokens ?? 0;
212
+ if ((usage.cacheWriteTokens ?? 0) === 0 && fallbackCacheWrite > 0) {
213
+ usage.cacheWriteTokens = fallbackCacheWrite;
214
+ }
215
+ const fallbackTotalTokens = fallbackUsage.totalTokens ?? 0;
216
+ if ((usage.totalTokens ?? 0) === 0 && fallbackTotalTokens > 0) {
217
+ usage.totalTokens = fallbackTotalTokens;
218
+ }
219
+ if (usage.reasoningTokens === void 0 && fallbackUsage.reasoningTokens !== void 0) {
220
+ usage.reasoningTokens = fallbackUsage.reasoningTokens;
221
+ }
222
+ }
200
223
  this.actualTokens = usage;
201
224
  this.logger.info("LLM response complete", {
202
225
  finishReason: event.finishReason,
@@ -344,6 +367,41 @@ class StreamProcessor {
344
367
  usage: this.actualTokens
345
368
  };
346
369
  }
370
+ getCacheTokensFromProviderMetadata(providerMetadata) {
371
+ const anthropicMeta = providerMetadata?.["anthropic"];
372
+ const bedrockMeta = providerMetadata?.["bedrock"];
373
+ const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
374
+ const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? 0;
375
+ return { cacheReadTokens, cacheWriteTokens };
376
+ }
377
+ normalizeUsage(usage, providerMetadata) {
378
+ const inputTokensRaw = usage?.inputTokens ?? 0;
379
+ const outputTokens = usage?.outputTokens ?? 0;
380
+ const totalTokens = usage?.totalTokens ?? 0;
381
+ const reasoningTokens = usage?.reasoningTokens;
382
+ const cachedInputTokens = usage?.cachedInputTokens;
383
+ const inputTokenDetails = usage?.inputTokenDetails;
384
+ const providerCache = this.getCacheTokensFromProviderMetadata(providerMetadata);
385
+ const cacheReadTokens = inputTokenDetails?.cacheReadTokens ?? cachedInputTokens ?? providerCache.cacheReadTokens ?? 0;
386
+ const cacheWriteTokens = inputTokenDetails?.cacheWriteTokens ?? providerCache.cacheWriteTokens ?? 0;
387
+ const needsCacheWriteAdjustment = inputTokenDetails === void 0 && cachedInputTokens !== void 0 && providerCache.cacheWriteTokens > 0;
388
+ const noCacheTokens = inputTokenDetails?.noCacheTokens ?? (cachedInputTokens !== void 0 ? inputTokensRaw - cachedInputTokens - (needsCacheWriteAdjustment ? providerCache.cacheWriteTokens : 0) : inputTokensRaw);
389
+ return {
390
+ inputTokens: Math.max(0, noCacheTokens),
391
+ outputTokens,
392
+ totalTokens,
393
+ ...reasoningTokens !== void 0 && { reasoningTokens },
394
+ cacheReadTokens,
395
+ cacheWriteTokens
396
+ };
397
+ }
398
+ getProviderMetadata(event) {
399
+ const metadata = "providerMetadata" in event ? event.providerMetadata : void 0;
400
+ if (!metadata || typeof metadata !== "object") {
401
+ return void 0;
402
+ }
403
+ return metadata;
404
+ }
347
405
  async createAssistantMessage() {
348
406
  await this.contextManager.addAssistantMessage("", [], {});
349
407
  return this.getLastMessageId();
@@ -26,6 +26,7 @@ export declare class StreamProcessor {
26
26
  private reasoningMetadata;
27
27
  private accumulatedText;
28
28
  private logger;
29
+ private hasStepUsage;
29
30
  /**
30
31
  * Track pending tool calls (added to context but no result yet).
31
32
  * On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
@@ -46,6 +47,9 @@ export declare class StreamProcessor {
46
47
  approvalStatus?: "approved" | "rejected";
47
48
  }> | undefined);
48
49
  process(streamFn: () => StreamTextResult<VercelToolSet, unknown>): Promise<StreamProcessorResult>;
50
+ private getCacheTokensFromProviderMetadata;
51
+ private normalizeUsage;
52
+ private getProviderMetadata;
49
53
  private createAssistantMessage;
50
54
  private getLastMessageId;
51
55
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"stream-processor.d.ts","sourceRoot":"","sources":["../../../src/llm/executor/stream-processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,aAAa,EAAE,MAAM,IAAI,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAmB,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAGnD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,WAAW,EAAc,MAAM,aAAa,CAAC;AAEtD,MAAM,WAAW,qBAAqB;IAClC,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,yEAAyE;IACzE,oBAAoB,CAAC,EAAE,MAAM,CAAC;CACjC;AAED,qBAAa,eAAe;IAyBpB,OAAO,CAAC,cAAc;IACtB,OAAO,CAAC,QAAQ;IAChB,OAAO,CAAC,eAAe;IACvB,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,MAAM;IAEd,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,gBAAgB,CAAC;IA/B7B,OAAO,CAAC,kBAAkB,CAAuB;IACjD,OAAO,CAAC,YAAY,CAAmE;IACvF,OAAO,CAAC,YAAY,CAA8B;IAClD,OAAO,CAAC,aAAa,CAAc;IACnC,OAAO,CAAC,iBAAiB,CAAsC;IAC/D,OAAO,CAAC,eAAe,CAAc;IACrC,OAAO,CAAC,MAAM,CAAe;IAC7B;;;OAGG;IACH,OAAO,CAAC,gBAAgB,CAAgD;IAExE;;;;;;;;;OASG;gBAES,cAAc,EAAE,cAAc,EAC9B,QAAQ,EAAE,eAAe,EACzB,eAAe,EAAE,eAAe,EAChC,WAAW,EAAE,WAAW,EACxB,MAAM,EAAE,qBAAqB,EACrC,MAAM,EAAE,YAAY,EACZ,SAAS,GAAE,OAAc,EACzB,gBAAgB,CAAC,EAAE,GAAG,CAC1B,MAAM,EACN;QAAE,eAAe,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,UAAU,GAAG,UAAU,CAAA;KAAE,CACzE,YAAA;IAKC,OAAO,CACT,QAAQ,EAAE,MAAM,gBAAgB,CAAC,aAAa,EAAE,OAAO,CAAC,GACzD,OAAO,CAAC,qBAAqB,CAAC;YAubnB,sBAAsB;YAKtB,gBAAgB;IAO9B;;;;OAIG;YACW,2BAA2B;CAmC5C"}
1
+ {"version":3,"file":"stream-processor.d.ts","sourceRoot":"","sources":["../../../src/llm/executor/stream-processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,aAAa,EAAE,MAAM,IAAI,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAmB,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAGnD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,WAAW,EAAc,MAAM,aAAa,CAAC;AAetD,MAAM,WAAW,qBAAqB;IAClC,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,yEAAyE;IACzE,oBAAoB,CAAC,EAAE,MAAM,CAAC;CACjC;AAED,qBAAa,eAAe;IA0BpB,OAAO,CAAC,cAAc;IACtB,OAAO,CAAC,QAAQ;IAChB,OAAO,CAAC,eAAe;IACvB,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,MAAM;IAEd,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,gBAAgB,CAAC;IAhC7B,OAAO,CAAC,kBAAkB,CAAuB;IACjD,OAAO,CAAC,YAAY,CAAmE;IACvF,OAAO,CAAC,YAAY,CAA8B;IAClD,OAAO,CAAC,aAAa,CAAc;IACnC,OAAO,CAAC,iBAAiB,CAAsC;IAC/D,OAAO,CAAC,eAAe,CAAc;IACrC,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,YAAY,CAAS;IAC7B;;;OAGG;IACH,OAAO,CAAC,gBAAgB,CAAgD;IAExE;;;;;;;;;OASG;gBAES,cAAc,EAAE,cAAc,EAC9B,QAAQ,EAAE,eAAe,EACzB,eAAe,EAAE,eAAe,EAChC,WAAW,EAAE,WAAW,EACxB,MAAM,EAAE,qBAAqB,EACrC,MAAM,EAAE,YAAY,EACZ,SAAS,GAAE,OAAc,EACzB,gBAAgB,CAAC,EAAE,GAAG,CAC1B,MAAM,EACN;QAAE,eAAe,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,UAAU,GAAG,UAAU,CAAA;KAAE,CACzE,YAAA;IAKC,OAAO,CACT,QAAQ,EAAE,MAAM,gBAAgB,CAAC,aAAa,EAAE,OAAO,CAAC,GACzD,OAAO,CAAC,qBAAqB,CAAC;IA4bjC,OAAO,CAAC,kCAAkC;IAoB1C,OAAO,CAAC,cAAc;IA0CtB,OAAO,CAAC,mBAAmB;YAab,sBAAsB;YAKtB,gBAAgB;IAO9B;;;;OAIG;YACW,2BAA2B;CAmC5C"}
@@ -30,6 +30,7 @@ class StreamProcessor {
30
30
  reasoningMetadata;
31
31
  accumulatedText = "";
32
32
  logger;
33
+ hasStepUsage = false;
33
34
  /**
34
35
  * Track pending tool calls (added to context but no result yet).
35
36
  * On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
@@ -140,41 +141,63 @@ class StreamProcessor {
140
141
  }
141
142
  case "finish-step":
142
143
  if (event.usage) {
143
- const anthropicMeta = event.providerMetadata?.["anthropic"];
144
- const bedrockMeta = event.providerMetadata?.["bedrock"];
145
- const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
146
- const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? event.usage.cachedInputTokens ?? 0;
144
+ const providerMetadata = this.getProviderMetadata(event);
145
+ const stepUsage = this.normalizeUsage(event.usage, providerMetadata);
147
146
  this.actualTokens = {
148
- inputTokens: (this.actualTokens.inputTokens ?? 0) + (event.usage.inputTokens ?? 0),
149
- outputTokens: (this.actualTokens.outputTokens ?? 0) + (event.usage.outputTokens ?? 0),
150
- totalTokens: (this.actualTokens.totalTokens ?? 0) + (event.usage.totalTokens ?? 0),
151
- ...event.usage.reasoningTokens !== void 0 && {
152
- reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) + event.usage.reasoningTokens
147
+ inputTokens: (this.actualTokens.inputTokens ?? 0) + (stepUsage.inputTokens ?? 0),
148
+ outputTokens: (this.actualTokens.outputTokens ?? 0) + (stepUsage.outputTokens ?? 0),
149
+ totalTokens: (this.actualTokens.totalTokens ?? 0) + (stepUsage.totalTokens ?? 0),
150
+ ...stepUsage.reasoningTokens !== void 0 && {
151
+ reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) + stepUsage.reasoningTokens
153
152
  },
154
153
  // Cache tokens
155
- cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + cacheReadTokens,
156
- cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + cacheWriteTokens
154
+ cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + (stepUsage.cacheReadTokens ?? 0),
155
+ cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + (stepUsage.cacheWriteTokens ?? 0)
157
156
  };
157
+ this.hasStepUsage = true;
158
158
  }
159
159
  break;
160
160
  case "finish": {
161
161
  this.finishReason = event.finishReason;
162
- const cacheReadTokens = this.actualTokens.cacheReadTokens ?? event.totalUsage.cachedInputTokens ?? 0;
163
- const cacheWriteTokens = this.actualTokens.cacheWriteTokens ?? 0;
164
- const providerExcludesCached = this.config.provider === "anthropic" || this.config.provider === "bedrock";
165
- const adjustedInputTokens = providerExcludesCached ? event.totalUsage.inputTokens ?? 0 : (event.totalUsage.inputTokens ?? 0) - cacheReadTokens;
166
- const usage = {
167
- inputTokens: adjustedInputTokens,
168
- outputTokens: event.totalUsage.outputTokens ?? 0,
169
- totalTokens: event.totalUsage.totalTokens ?? 0,
170
- // Capture reasoning tokens if available (from Claude extended thinking, etc.)
171
- ...event.totalUsage.reasoningTokens !== void 0 && {
172
- reasoningTokens: event.totalUsage.reasoningTokens
173
- },
174
- // Cache tokens from accumulated finish-step events or totalUsage fallback
175
- cacheReadTokens,
176
- cacheWriteTokens
177
- };
162
+ const providerMetadata = this.getProviderMetadata(event);
163
+ const fallbackUsage = this.normalizeUsage(
164
+ event.totalUsage,
165
+ providerMetadata
166
+ );
167
+ const usage = this.hasStepUsage ? { ...this.actualTokens } : fallbackUsage;
168
+ if (this.hasStepUsage) {
169
+ const fallbackInput = fallbackUsage.inputTokens ?? 0;
170
+ if ((usage.inputTokens ?? 0) === 0 && fallbackInput > 0) {
171
+ this.logger.debug(
172
+ "Backfilling inputTokens from fallback usage (step reported 0)",
173
+ { stepValue: usage.inputTokens, fallbackValue: fallbackInput }
174
+ );
175
+ usage.inputTokens = fallbackInput;
176
+ }
177
+ const fallbackOutput = fallbackUsage.outputTokens ?? 0;
178
+ if ((usage.outputTokens ?? 0) === 0 && fallbackOutput > 0) {
179
+ this.logger.debug(
180
+ "Backfilling outputTokens from fallback usage (step reported 0)",
181
+ { stepValue: usage.outputTokens, fallbackValue: fallbackOutput }
182
+ );
183
+ usage.outputTokens = fallbackOutput;
184
+ }
185
+ const fallbackCacheRead = fallbackUsage.cacheReadTokens ?? 0;
186
+ if ((usage.cacheReadTokens ?? 0) === 0 && fallbackCacheRead > 0) {
187
+ usage.cacheReadTokens = fallbackCacheRead;
188
+ }
189
+ const fallbackCacheWrite = fallbackUsage.cacheWriteTokens ?? 0;
190
+ if ((usage.cacheWriteTokens ?? 0) === 0 && fallbackCacheWrite > 0) {
191
+ usage.cacheWriteTokens = fallbackCacheWrite;
192
+ }
193
+ const fallbackTotalTokens = fallbackUsage.totalTokens ?? 0;
194
+ if ((usage.totalTokens ?? 0) === 0 && fallbackTotalTokens > 0) {
195
+ usage.totalTokens = fallbackTotalTokens;
196
+ }
197
+ if (usage.reasoningTokens === void 0 && fallbackUsage.reasoningTokens !== void 0) {
198
+ usage.reasoningTokens = fallbackUsage.reasoningTokens;
199
+ }
200
+ }
178
201
  this.actualTokens = usage;
179
202
  this.logger.info("LLM response complete", {
180
203
  finishReason: event.finishReason,
@@ -322,6 +345,41 @@ class StreamProcessor {
322
345
  usage: this.actualTokens
323
346
  };
324
347
  }
348
+ getCacheTokensFromProviderMetadata(providerMetadata) {
349
+ const anthropicMeta = providerMetadata?.["anthropic"];
350
+ const bedrockMeta = providerMetadata?.["bedrock"];
351
+ const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
352
+ const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? 0;
353
+ return { cacheReadTokens, cacheWriteTokens };
354
+ }
355
+ normalizeUsage(usage, providerMetadata) {
356
+ const inputTokensRaw = usage?.inputTokens ?? 0;
357
+ const outputTokens = usage?.outputTokens ?? 0;
358
+ const totalTokens = usage?.totalTokens ?? 0;
359
+ const reasoningTokens = usage?.reasoningTokens;
360
+ const cachedInputTokens = usage?.cachedInputTokens;
361
+ const inputTokenDetails = usage?.inputTokenDetails;
362
+ const providerCache = this.getCacheTokensFromProviderMetadata(providerMetadata);
363
+ const cacheReadTokens = inputTokenDetails?.cacheReadTokens ?? cachedInputTokens ?? providerCache.cacheReadTokens ?? 0;
364
+ const cacheWriteTokens = inputTokenDetails?.cacheWriteTokens ?? providerCache.cacheWriteTokens ?? 0;
365
+ const needsCacheWriteAdjustment = inputTokenDetails === void 0 && cachedInputTokens !== void 0 && providerCache.cacheWriteTokens > 0;
366
+ const noCacheTokens = inputTokenDetails?.noCacheTokens ?? (cachedInputTokens !== void 0 ? inputTokensRaw - cachedInputTokens - (needsCacheWriteAdjustment ? providerCache.cacheWriteTokens : 0) : inputTokensRaw);
367
+ return {
368
+ inputTokens: Math.max(0, noCacheTokens),
369
+ outputTokens,
370
+ totalTokens,
371
+ ...reasoningTokens !== void 0 && { reasoningTokens },
372
+ cacheReadTokens,
373
+ cacheWriteTokens
374
+ };
375
+ }
376
+ getProviderMetadata(event) {
377
+ const metadata = "providerMetadata" in event ? event.providerMetadata : void 0;
378
+ if (!metadata || typeof metadata !== "object") {
379
+ return void 0;
380
+ }
381
+ return metadata;
382
+ }
325
383
  async createAssistantMessage() {
326
384
  await this.contextManager.addAssistantMessage("", [], {});
327
385
  return this.getLastMessageId();
@@ -124,23 +124,6 @@ class TurnExecutor {
124
124
  * Used to pass approval info from tool execution to result persistence.
125
125
  */
126
126
  approvalMetadata = /* @__PURE__ */ new Map();
127
- /**
128
- * Tracks whether compaction occurred during this turn.
129
- * Used to signal to the caller that session continuation may be needed.
130
- */
131
- compactionOccurred = false;
132
- /**
133
- * Compaction data captured during this turn.
134
- * Contains summary text and preserved messages for session continuation.
135
- * This data is passed up the call chain (NOT persisted to original session).
136
- */
137
- compactionData = null;
138
- /**
139
- * Virtual context for remaining iterations after compaction.
140
- * When set, the main loop uses this instead of calling getFormattedMessagesForLLM().
141
- * This provides reduced context to the LLM without persisting to the original session.
142
- */
143
- virtualContext = null;
144
127
  /**
145
128
  * Get StreamProcessor config from TurnExecutor state.
146
129
  * @param estimatedInputTokens Optional estimated input tokens for analytics
@@ -206,7 +189,7 @@ class TurnExecutor {
206
189
  await this.injectQueuedMessages(coalesced);
207
190
  }
208
191
  await this.pruneOldToolOutputs();
209
- let prepared = this.virtualContext ? await this.buildMessagesFromVirtualContext(contributorContext) : await this.contextManager.getFormattedMessagesForLLM(
192
+ let prepared = await this.contextManager.getFormattedMessagesForLLM(
210
193
  contributorContext,
211
194
  this.llmContext
212
195
  );
@@ -220,9 +203,16 @@ class TurnExecutor {
220
203
  this.logger.debug(
221
204
  `Pre-check: estimated ${estimatedTokens} tokens exceeds threshold, compacting`
222
205
  );
223
- await this.compactToVirtualContext(estimatedTokens);
224
- if (this.virtualContext) {
225
- prepared = await this.buildMessagesFromVirtualContext(contributorContext);
206
+ const didCompact = await this.compactContext(
207
+ estimatedTokens,
208
+ contributorContext,
209
+ toolDefinitions
210
+ );
211
+ if (didCompact) {
212
+ prepared = await this.contextManager.getFormattedMessagesForLLM(
213
+ contributorContext,
214
+ this.llmContext
215
+ );
226
216
  estimatedTokens = await this.contextManager.getEstimatedNextInputTokens(
227
217
  prepared.systemPrompt,
228
218
  prepared.preparedHistory,
@@ -284,22 +274,29 @@ class TurnExecutor {
284
274
  `Context estimation (cancelled): keeping last known actuals, partial response (${result.text.length} chars) will be estimated`
285
275
  );
286
276
  } else if (result.usage?.inputTokens !== void 0) {
287
- const diff = estimatedTokens - result.usage.inputTokens;
288
- const diffPercent = result.usage.inputTokens > 0 ? (diff / result.usage.inputTokens * 100).toFixed(1) : "0.0";
277
+ const contextInputTokens2 = this.getContextInputTokens(result.usage);
278
+ const actualInputTokens = contextInputTokens2 ?? result.usage.inputTokens;
279
+ const diff = estimatedTokens - actualInputTokens;
280
+ const diffPercent = actualInputTokens > 0 ? (diff / actualInputTokens * 100).toFixed(1) : "0.0";
289
281
  this.logger.info(
290
- `Context estimation accuracy: estimated=${estimatedTokens}, actual=${result.usage.inputTokens}, error=${diff} (${diffPercent}%)`
282
+ `Context estimation accuracy: estimated=${estimatedTokens}, actual=${actualInputTokens}, error=${diff} (${diffPercent}%)`
291
283
  );
292
- this.contextManager.setLastActualInputTokens(result.usage.inputTokens);
284
+ this.contextManager.setLastActualInputTokens(actualInputTokens);
293
285
  if (result.usage?.outputTokens !== void 0) {
294
286
  this.contextManager.setLastActualOutputTokens(result.usage.outputTokens);
295
287
  }
296
288
  await this.contextManager.recordLastCallMessageCount();
297
289
  }
298
- if (!this.virtualContext && result.usage?.inputTokens && this.shouldCompactFromActual(result.usage.inputTokens)) {
290
+ const contextInputTokens = result.usage ? this.getContextInputTokens(result.usage) : null;
291
+ if (contextInputTokens && this.shouldCompactFromActual(contextInputTokens)) {
299
292
  this.logger.debug(
300
- `Post-response: actual ${result.usage.inputTokens} tokens exceeds threshold, compacting`
293
+ `Post-response: actual ${contextInputTokens} tokens exceeds threshold, compacting`
294
+ );
295
+ await this.compactContext(
296
+ contextInputTokens,
297
+ contributorContext,
298
+ toolDefinitions
301
299
  );
302
- await this.compactToVirtualContext(result.usage.inputTokens);
303
300
  }
304
301
  if (result.finishReason !== "tool-calls") {
305
302
  const queuedOnTerminate = this.messageQueue.dequeueAll();
@@ -353,14 +350,7 @@ class TurnExecutor {
353
350
  text: lastText,
354
351
  stepCount,
355
352
  usage: lastStepTokens,
356
- finishReason: lastFinishReason,
357
- // Signal to caller that compaction occurred during this turn
358
- // Caller can use this to trigger session-native continuation
359
- didCompact: this.compactionOccurred,
360
- // Pass compaction data up the chain (NOT persisted to original session)
361
- // Caller uses this to create the continuation session with summary
362
- // Use spread to conditionally include only when data exists (exactOptionalPropertyTypes)
363
- ...this.compactionData && { compaction: this.compactionData }
353
+ finishReason: lastFinishReason
364
354
  };
365
355
  } catch (_2) {
366
356
  var _error = _2, _hasError = true;
@@ -761,27 +751,31 @@ class TurnExecutor {
761
751
  );
762
752
  }
763
753
  /**
764
- * Compact context using ReactiveOverflowStrategy WITHOUT persisting to original session.
765
- *
766
- * Key design: Creates a virtual context (summary + preserved messages) that will be used
767
- * for the remaining iterations of this turn. The compaction data is passed up the call chain
768
- * so the caller can create a continuation session with the summary.
754
+ * Compact context by generating a summary and adding it to the same session.
769
755
  *
770
- * The original session remains UNTOUCHED - no messages are added or modified.
756
+ * The summary message is added to the conversation history with `isSummary: true` metadata.
757
+ * When the context is loaded via getFormattedMessagesForLLM(), filterCompacted() will
758
+ * exclude all messages before the summary, effectively compacting the context.
771
759
  *
772
760
  * @param originalTokens The estimated input token count that triggered overflow
761
+ * @param contributorContext Context for system prompt contributors (needed for accurate token estimation)
762
+ * @param tools Tool definitions (needed for accurate token estimation)
763
+ * @returns true if compaction occurred, false if skipped
773
764
  */
774
- async compactToVirtualContext(originalTokens) {
765
+ async compactContext(originalTokens, contributorContext, tools) {
775
766
  if (!this.compactionStrategy) {
776
- return;
767
+ return false;
777
768
  }
778
769
  this.logger.info(
779
770
  `Context overflow detected (${originalTokens} tokens), checking if compression is possible`
780
771
  );
781
772
  const history = await this.contextManager.getHistory();
773
+ const { filterCompacted } = await import("../../context/utils.js");
774
+ const originalFiltered = filterCompacted(history);
775
+ const originalMessages = originalFiltered.length;
782
776
  if (history.length < 4) {
783
777
  this.logger.debug("Compaction skipped: history too short to summarize");
784
- return;
778
+ return false;
785
779
  }
786
780
  this.eventBus.emit("context:compacting", {
787
781
  estimatedTokens: originalTokens
@@ -795,106 +789,36 @@ class TurnExecutor {
795
789
  originalTokens,
796
790
  compactedTokens: originalTokens,
797
791
  // No change
798
- originalMessages: history.length,
799
- compactedMessages: history.length,
792
+ originalMessages,
793
+ compactedMessages: originalMessages,
800
794
  // No change
801
795
  strategy: this.compactionStrategy.name,
802
796
  reason: "overflow"
803
797
  });
804
- return;
805
- }
806
- const summaryMessage = summaryMessages[0];
807
- if (!summaryMessage) {
808
- this.logger.warn("Compaction returned empty summary message array");
809
- return;
798
+ return false;
810
799
  }
811
- const summaryText = this.extractSummaryText(summaryMessage);
812
- const summarizedCount = summaryMessage.metadata?.originalMessageCount ?? 0;
813
- let existingSummaryIndex = -1;
814
- for (let i = history.length - 1; i >= 0; i--) {
815
- const msg = history[i];
816
- if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
817
- existingSummaryIndex = i;
818
- break;
819
- }
800
+ for (const summary of summaryMessages) {
801
+ await this.contextManager.addMessage(summary);
820
802
  }
821
- const baseIndex = existingSummaryIndex >= 0 ? existingSummaryIndex + 1 : 0;
822
- const preservedMessages = history.slice(baseIndex + summarizedCount);
823
- const firstTimestamp = summaryMessage.metadata?.originalFirstTimestamp;
824
- const lastTimestamp = summaryMessage.metadata?.originalLastTimestamp;
825
- this.compactionData = {
826
- summaryText,
827
- preservedMessages: [...preservedMessages],
828
- // Copy to avoid mutation
829
- summarizedCount,
830
- ...firstTimestamp !== void 0 && { originalFirstTimestamp: firstTimestamp },
831
- ...lastTimestamp !== void 0 && { originalLastTimestamp: lastTimestamp }
832
- };
833
- this.virtualContext = {
834
- summaryMessage,
835
- preservedMessages: [...preservedMessages]
836
- };
837
- this.compactionOccurred = true;
838
803
  this.contextManager.resetActualTokenTracking();
839
- const { estimateMessagesTokens: estimateTokens } = await import("../../context/utils.js");
840
- const virtualMessages = [summaryMessage, ...preservedMessages];
841
- const compactedTokens = estimateTokens(virtualMessages);
804
+ const afterEstimate = await this.contextManager.getContextTokenEstimate(
805
+ contributorContext,
806
+ tools
807
+ );
808
+ const compactedTokens = afterEstimate.estimated;
809
+ const compactedMessages = afterEstimate.stats.filteredMessageCount;
842
810
  this.eventBus.emit("context:compacted", {
843
811
  originalTokens,
844
812
  compactedTokens,
845
- originalMessages: history.length,
846
- compactedMessages: virtualMessages.length,
813
+ originalMessages,
814
+ compactedMessages,
847
815
  strategy: this.compactionStrategy.name,
848
816
  reason: "overflow"
849
817
  });
850
818
  this.logger.info(
851
- `Compaction complete (virtual context): ${originalTokens} \u2192 ~${compactedTokens} tokens (${history.length} \u2192 ${virtualMessages.length} messages). Original session unchanged - summary will be passed to continuation session.`
819
+ `Compaction complete: ${originalTokens} \u2192 ~${compactedTokens} tokens (${originalMessages} \u2192 ${compactedMessages} messages after filtering)`
852
820
  );
853
- }
854
- /**
855
- * Extract the summary text from a summary message.
856
- */
857
- extractSummaryText(summaryMessage) {
858
- if (typeof summaryMessage.content === "string") {
859
- return summaryMessage.content;
860
- }
861
- if (Array.isArray(summaryMessage.content)) {
862
- return summaryMessage.content.filter((part) => part.type === "text").map((part) => part.text).join("\n");
863
- }
864
- return "";
865
- }
866
- /**
867
- * Build formatted messages from virtual context (after compaction).
868
- *
869
- * This creates LLM-ready messages using:
870
- * - System prompt (same as normal flow)
871
- * - Summary message (as first message)
872
- * - Preserved messages (formatted for LLM)
873
- *
874
- * Uses the same formatting pipeline as getFormattedMessagesForLLM()
875
- * but with our virtual history instead of the stored history.
876
- *
877
- * @param contributorContext Context for system prompt contributors
878
- * @returns Formatted messages ready for LLM call, matching getFormattedMessagesForLLM return type
879
- */
880
- async buildMessagesFromVirtualContext(contributorContext) {
881
- if (!this.virtualContext) {
882
- throw new Error("buildMessagesFromVirtualContext called without virtual context");
883
- }
884
- const { summaryMessage, preservedMessages } = this.virtualContext;
885
- const systemPrompt = await this.contextManager.getSystemPrompt(contributorContext);
886
- const virtualHistory = [summaryMessage, ...preservedMessages];
887
- const formattedMessages = await this.contextManager.getFormattedMessages(
888
- contributorContext,
889
- this.llmContext,
890
- systemPrompt,
891
- virtualHistory
892
- );
893
- return {
894
- formattedMessages,
895
- systemPrompt,
896
- preparedHistory: virtualHistory
897
- };
821
+ return true;
898
822
  }
899
823
  /**
900
824
  * Set telemetry span attributes for token usage.
@@ -917,6 +841,10 @@ class TurnExecutor {
917
841
  activeSpan.setAttribute("gen_ai.usage.reasoning_tokens", usage.reasoningTokens);
918
842
  }
919
843
  }
844
+ getContextInputTokens(usage) {
845
+ if (usage.inputTokens === void 0) return null;
846
+ return usage.inputTokens + (usage.cacheReadTokens ?? 0) + (usage.cacheWriteTokens ?? 0);
847
+ }
920
848
  /**
921
849
  * Map provider errors to DextoRuntimeError.
922
850
  */