@dexto/core 1.5.4 → 1.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/DextoAgent.cjs +61 -155
- package/dist/agent/DextoAgent.d.ts +11 -54
- package/dist/agent/DextoAgent.d.ts.map +1 -1
- package/dist/agent/DextoAgent.js +61 -155
- package/dist/context/compaction/strategies/reactive-overflow.cjs +5 -3
- package/dist/context/compaction/strategies/reactive-overflow.d.ts +1 -0
- package/dist/context/compaction/strategies/reactive-overflow.d.ts.map +1 -1
- package/dist/context/compaction/strategies/reactive-overflow.js +5 -3
- package/dist/context/compaction/types.d.ts +13 -1
- package/dist/context/compaction/types.d.ts.map +1 -1
- package/dist/errors/types.cjs +0 -2
- package/dist/errors/types.d.ts +1 -5
- package/dist/errors/types.d.ts.map +1 -1
- package/dist/errors/types.js +0 -2
- package/dist/events/index.cjs +0 -2
- package/dist/events/index.d.ts +6 -25
- package/dist/events/index.d.ts.map +1 -1
- package/dist/events/index.js +0 -2
- package/dist/llm/executor/stream-processor.cjs +85 -27
- package/dist/llm/executor/stream-processor.d.ts +4 -0
- package/dist/llm/executor/stream-processor.d.ts.map +1 -1
- package/dist/llm/executor/stream-processor.js +85 -27
- package/dist/llm/executor/turn-executor.cjs +58 -130
- package/dist/llm/executor/turn-executor.d.ts +9 -43
- package/dist/llm/executor/turn-executor.d.ts.map +1 -1
- package/dist/llm/executor/turn-executor.js +58 -130
- package/dist/llm/executor/types.d.ts +0 -28
- package/dist/llm/executor/types.d.ts.map +1 -1
- package/dist/llm/services/vercel.cjs +2 -5
- package/dist/llm/services/vercel.d.ts +1 -6
- package/dist/llm/services/vercel.d.ts.map +1 -1
- package/dist/llm/services/vercel.js +2 -5
- package/dist/logger/logger.cjs +6 -7
- package/dist/logger/logger.d.ts +1 -0
- package/dist/logger/logger.d.ts.map +1 -1
- package/dist/logger/logger.js +6 -7
- package/dist/session/chat-session.cjs +19 -12
- package/dist/session/chat-session.d.ts +3 -6
- package/dist/session/chat-session.d.ts.map +1 -1
- package/dist/session/chat-session.js +19 -12
- package/dist/session/session-manager.cjs +0 -135
- package/dist/session/session-manager.d.ts +0 -43
- package/dist/session/session-manager.d.ts.map +1 -1
- package/dist/session/session-manager.js +0 -135
- package/dist/telemetry/telemetry.cjs +12 -5
- package/dist/telemetry/telemetry.d.ts.map +1 -1
- package/dist/telemetry/telemetry.js +12 -5
- package/dist/tools/schemas.cjs +2 -2
- package/dist/tools/schemas.js +2 -2
- package/package.json +15 -5
- package/dist/filesystem/error-codes.cjs +0 -53
- package/dist/filesystem/error-codes.d.ts +0 -31
- package/dist/filesystem/error-codes.d.ts.map +0 -1
- package/dist/filesystem/error-codes.js +0 -30
- package/dist/filesystem/errors.cjs +0 -303
- package/dist/filesystem/errors.d.ts +0 -109
- package/dist/filesystem/errors.d.ts.map +0 -1
- package/dist/filesystem/errors.js +0 -280
- package/dist/filesystem/filesystem-service.cjs +0 -534
- package/dist/filesystem/filesystem-service.d.ts +0 -97
- package/dist/filesystem/filesystem-service.d.ts.map +0 -1
- package/dist/filesystem/filesystem-service.js +0 -501
- package/dist/filesystem/index.cjs +0 -37
- package/dist/filesystem/index.d.ts +0 -11
- package/dist/filesystem/index.d.ts.map +0 -1
- package/dist/filesystem/index.js +0 -11
- package/dist/filesystem/path-validator.cjs +0 -250
- package/dist/filesystem/path-validator.d.ts +0 -103
- package/dist/filesystem/path-validator.d.ts.map +0 -1
- package/dist/filesystem/path-validator.js +0 -217
- package/dist/filesystem/types.cjs +0 -16
- package/dist/filesystem/types.d.ts +0 -175
- package/dist/filesystem/types.d.ts.map +0 -1
- package/dist/filesystem/types.js +0 -0
- package/dist/process/command-validator.cjs +0 -554
- package/dist/process/command-validator.d.ts +0 -49
- package/dist/process/command-validator.d.ts.map +0 -1
- package/dist/process/command-validator.js +0 -531
- package/dist/process/error-codes.cjs +0 -47
- package/dist/process/error-codes.d.ts +0 -25
- package/dist/process/error-codes.d.ts.map +0 -1
- package/dist/process/error-codes.js +0 -24
- package/dist/process/errors.cjs +0 -244
- package/dist/process/errors.d.ts +0 -87
- package/dist/process/errors.d.ts.map +0 -1
- package/dist/process/errors.js +0 -221
- package/dist/process/index.cjs +0 -37
- package/dist/process/index.d.ts +0 -11
- package/dist/process/index.d.ts.map +0 -1
- package/dist/process/index.js +0 -11
- package/dist/process/process-service.cjs +0 -497
- package/dist/process/process-service.d.ts +0 -69
- package/dist/process/process-service.d.ts.map +0 -1
- package/dist/process/process-service.js +0 -464
- package/dist/process/types.cjs +0 -16
- package/dist/process/types.d.ts +0 -107
- package/dist/process/types.d.ts.map +0 -1
- package/dist/process/types.js +0 -0
- package/dist/session/compaction-service.cjs +0 -139
- package/dist/session/compaction-service.d.ts +0 -81
- package/dist/session/compaction-service.d.ts.map +0 -1
- package/dist/session/compaction-service.js +0 -106
|
@@ -52,6 +52,7 @@ class StreamProcessor {
|
|
|
52
52
|
reasoningMetadata;
|
|
53
53
|
accumulatedText = "";
|
|
54
54
|
logger;
|
|
55
|
+
hasStepUsage = false;
|
|
55
56
|
/**
|
|
56
57
|
* Track pending tool calls (added to context but no result yet).
|
|
57
58
|
* On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
|
|
@@ -162,41 +163,63 @@ class StreamProcessor {
|
|
|
162
163
|
}
|
|
163
164
|
case "finish-step":
|
|
164
165
|
if (event.usage) {
|
|
165
|
-
const
|
|
166
|
-
const
|
|
167
|
-
const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
|
|
168
|
-
const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? event.usage.cachedInputTokens ?? 0;
|
|
166
|
+
const providerMetadata = this.getProviderMetadata(event);
|
|
167
|
+
const stepUsage = this.normalizeUsage(event.usage, providerMetadata);
|
|
169
168
|
this.actualTokens = {
|
|
170
|
-
inputTokens: (this.actualTokens.inputTokens ?? 0) + (
|
|
171
|
-
outputTokens: (this.actualTokens.outputTokens ?? 0) + (
|
|
172
|
-
totalTokens: (this.actualTokens.totalTokens ?? 0) + (
|
|
173
|
-
...
|
|
174
|
-
reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) +
|
|
169
|
+
inputTokens: (this.actualTokens.inputTokens ?? 0) + (stepUsage.inputTokens ?? 0),
|
|
170
|
+
outputTokens: (this.actualTokens.outputTokens ?? 0) + (stepUsage.outputTokens ?? 0),
|
|
171
|
+
totalTokens: (this.actualTokens.totalTokens ?? 0) + (stepUsage.totalTokens ?? 0),
|
|
172
|
+
...stepUsage.reasoningTokens !== void 0 && {
|
|
173
|
+
reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) + stepUsage.reasoningTokens
|
|
175
174
|
},
|
|
176
175
|
// Cache tokens
|
|
177
|
-
cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + cacheReadTokens,
|
|
178
|
-
cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + cacheWriteTokens
|
|
176
|
+
cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + (stepUsage.cacheReadTokens ?? 0),
|
|
177
|
+
cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + (stepUsage.cacheWriteTokens ?? 0)
|
|
179
178
|
};
|
|
179
|
+
this.hasStepUsage = true;
|
|
180
180
|
}
|
|
181
181
|
break;
|
|
182
182
|
case "finish": {
|
|
183
183
|
this.finishReason = event.finishReason;
|
|
184
|
-
const
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
184
|
+
const providerMetadata = this.getProviderMetadata(event);
|
|
185
|
+
const fallbackUsage = this.normalizeUsage(
|
|
186
|
+
event.totalUsage,
|
|
187
|
+
providerMetadata
|
|
188
|
+
);
|
|
189
|
+
const usage = this.hasStepUsage ? { ...this.actualTokens } : fallbackUsage;
|
|
190
|
+
if (this.hasStepUsage) {
|
|
191
|
+
const fallbackInput = fallbackUsage.inputTokens ?? 0;
|
|
192
|
+
if ((usage.inputTokens ?? 0) === 0 && fallbackInput > 0) {
|
|
193
|
+
this.logger.debug(
|
|
194
|
+
"Backfilling inputTokens from fallback usage (step reported 0)",
|
|
195
|
+
{ stepValue: usage.inputTokens, fallbackValue: fallbackInput }
|
|
196
|
+
);
|
|
197
|
+
usage.inputTokens = fallbackInput;
|
|
198
|
+
}
|
|
199
|
+
const fallbackOutput = fallbackUsage.outputTokens ?? 0;
|
|
200
|
+
if ((usage.outputTokens ?? 0) === 0 && fallbackOutput > 0) {
|
|
201
|
+
this.logger.debug(
|
|
202
|
+
"Backfilling outputTokens from fallback usage (step reported 0)",
|
|
203
|
+
{ stepValue: usage.outputTokens, fallbackValue: fallbackOutput }
|
|
204
|
+
);
|
|
205
|
+
usage.outputTokens = fallbackOutput;
|
|
206
|
+
}
|
|
207
|
+
const fallbackCacheRead = fallbackUsage.cacheReadTokens ?? 0;
|
|
208
|
+
if ((usage.cacheReadTokens ?? 0) === 0 && fallbackCacheRead > 0) {
|
|
209
|
+
usage.cacheReadTokens = fallbackCacheRead;
|
|
210
|
+
}
|
|
211
|
+
const fallbackCacheWrite = fallbackUsage.cacheWriteTokens ?? 0;
|
|
212
|
+
if ((usage.cacheWriteTokens ?? 0) === 0 && fallbackCacheWrite > 0) {
|
|
213
|
+
usage.cacheWriteTokens = fallbackCacheWrite;
|
|
214
|
+
}
|
|
215
|
+
const fallbackTotalTokens = fallbackUsage.totalTokens ?? 0;
|
|
216
|
+
if ((usage.totalTokens ?? 0) === 0 && fallbackTotalTokens > 0) {
|
|
217
|
+
usage.totalTokens = fallbackTotalTokens;
|
|
218
|
+
}
|
|
219
|
+
if (usage.reasoningTokens === void 0 && fallbackUsage.reasoningTokens !== void 0) {
|
|
220
|
+
usage.reasoningTokens = fallbackUsage.reasoningTokens;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
200
223
|
this.actualTokens = usage;
|
|
201
224
|
this.logger.info("LLM response complete", {
|
|
202
225
|
finishReason: event.finishReason,
|
|
@@ -344,6 +367,41 @@ class StreamProcessor {
|
|
|
344
367
|
usage: this.actualTokens
|
|
345
368
|
};
|
|
346
369
|
}
|
|
370
|
+
getCacheTokensFromProviderMetadata(providerMetadata) {
|
|
371
|
+
const anthropicMeta = providerMetadata?.["anthropic"];
|
|
372
|
+
const bedrockMeta = providerMetadata?.["bedrock"];
|
|
373
|
+
const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
|
|
374
|
+
const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? 0;
|
|
375
|
+
return { cacheReadTokens, cacheWriteTokens };
|
|
376
|
+
}
|
|
377
|
+
normalizeUsage(usage, providerMetadata) {
|
|
378
|
+
const inputTokensRaw = usage?.inputTokens ?? 0;
|
|
379
|
+
const outputTokens = usage?.outputTokens ?? 0;
|
|
380
|
+
const totalTokens = usage?.totalTokens ?? 0;
|
|
381
|
+
const reasoningTokens = usage?.reasoningTokens;
|
|
382
|
+
const cachedInputTokens = usage?.cachedInputTokens;
|
|
383
|
+
const inputTokenDetails = usage?.inputTokenDetails;
|
|
384
|
+
const providerCache = this.getCacheTokensFromProviderMetadata(providerMetadata);
|
|
385
|
+
const cacheReadTokens = inputTokenDetails?.cacheReadTokens ?? cachedInputTokens ?? providerCache.cacheReadTokens ?? 0;
|
|
386
|
+
const cacheWriteTokens = inputTokenDetails?.cacheWriteTokens ?? providerCache.cacheWriteTokens ?? 0;
|
|
387
|
+
const needsCacheWriteAdjustment = inputTokenDetails === void 0 && cachedInputTokens !== void 0 && providerCache.cacheWriteTokens > 0;
|
|
388
|
+
const noCacheTokens = inputTokenDetails?.noCacheTokens ?? (cachedInputTokens !== void 0 ? inputTokensRaw - cachedInputTokens - (needsCacheWriteAdjustment ? providerCache.cacheWriteTokens : 0) : inputTokensRaw);
|
|
389
|
+
return {
|
|
390
|
+
inputTokens: Math.max(0, noCacheTokens),
|
|
391
|
+
outputTokens,
|
|
392
|
+
totalTokens,
|
|
393
|
+
...reasoningTokens !== void 0 && { reasoningTokens },
|
|
394
|
+
cacheReadTokens,
|
|
395
|
+
cacheWriteTokens
|
|
396
|
+
};
|
|
397
|
+
}
|
|
398
|
+
getProviderMetadata(event) {
|
|
399
|
+
const metadata = "providerMetadata" in event ? event.providerMetadata : void 0;
|
|
400
|
+
if (!metadata || typeof metadata !== "object") {
|
|
401
|
+
return void 0;
|
|
402
|
+
}
|
|
403
|
+
return metadata;
|
|
404
|
+
}
|
|
347
405
|
async createAssistantMessage() {
|
|
348
406
|
await this.contextManager.addAssistantMessage("", [], {});
|
|
349
407
|
return this.getLastMessageId();
|
|
@@ -26,6 +26,7 @@ export declare class StreamProcessor {
|
|
|
26
26
|
private reasoningMetadata;
|
|
27
27
|
private accumulatedText;
|
|
28
28
|
private logger;
|
|
29
|
+
private hasStepUsage;
|
|
29
30
|
/**
|
|
30
31
|
* Track pending tool calls (added to context but no result yet).
|
|
31
32
|
* On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
|
|
@@ -46,6 +47,9 @@ export declare class StreamProcessor {
|
|
|
46
47
|
approvalStatus?: "approved" | "rejected";
|
|
47
48
|
}> | undefined);
|
|
48
49
|
process(streamFn: () => StreamTextResult<VercelToolSet, unknown>): Promise<StreamProcessorResult>;
|
|
50
|
+
private getCacheTokensFromProviderMetadata;
|
|
51
|
+
private normalizeUsage;
|
|
52
|
+
private getProviderMetadata;
|
|
49
53
|
private createAssistantMessage;
|
|
50
54
|
private getLastMessageId;
|
|
51
55
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stream-processor.d.ts","sourceRoot":"","sources":["../../../src/llm/executor/stream-processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,aAAa,EAAE,MAAM,IAAI,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAmB,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAGnD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,WAAW,EAAc,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"stream-processor.d.ts","sourceRoot":"","sources":["../../../src/llm/executor/stream-processor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,OAAO,IAAI,aAAa,EAAE,MAAM,IAAI,CAAC;AAChE,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAmB,MAAM,uBAAuB,CAAC;AACzE,OAAO,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,YAAY,CAAC;AAGnD,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,WAAW,EAAc,MAAM,aAAa,CAAC;AAetD,MAAM,WAAW,qBAAqB;IAClC,QAAQ,EAAE,WAAW,CAAC;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,yEAAyE;IACzE,oBAAoB,CAAC,EAAE,MAAM,CAAC;CACjC;AAED,qBAAa,eAAe;IA0BpB,OAAO,CAAC,cAAc;IACtB,OAAO,CAAC,QAAQ;IAChB,OAAO,CAAC,eAAe;IACvB,OAAO,CAAC,WAAW;IACnB,OAAO,CAAC,MAAM;IAEd,OAAO,CAAC,SAAS;IACjB,OAAO,CAAC,gBAAgB,CAAC;IAhC7B,OAAO,CAAC,kBAAkB,CAAuB;IACjD,OAAO,CAAC,YAAY,CAAmE;IACvF,OAAO,CAAC,YAAY,CAA8B;IAClD,OAAO,CAAC,aAAa,CAAc;IACnC,OAAO,CAAC,iBAAiB,CAAsC;IAC/D,OAAO,CAAC,eAAe,CAAc;IACrC,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,YAAY,CAAS;IAC7B;;;OAGG;IACH,OAAO,CAAC,gBAAgB,CAAgD;IAExE;;;;;;;;;OASG;gBAES,cAAc,EAAE,cAAc,EAC9B,QAAQ,EAAE,eAAe,EACzB,eAAe,EAAE,eAAe,EAChC,WAAW,EAAE,WAAW,EACxB,MAAM,EAAE,qBAAqB,EACrC,MAAM,EAAE,YAAY,EACZ,SAAS,GAAE,OAAc,EACzB,gBAAgB,CAAC,EAAE,GAAG,CAC1B,MAAM,EACN;QAAE,eAAe,EAAE,OAAO,CAAC;QAAC,cAAc,CAAC,EAAE,UAAU,GAAG,UAAU,CAAA;KAAE,CACzE,YAAA;IAKC,OAAO,CACT,QAAQ,EAAE,MAAM,gBAAgB,CAAC,aAAa,EAAE,OAAO,CAAC,GACzD,OAAO,CAAC,qBAAqB,CAAC;IA4bjC,OAAO,CAAC,kCAAkC;IAoB1C,OAAO,CAAC,cAAc;IA0CtB,OAAO,CAAC,mBAAmB;YAab,sBAAsB;YAKtB,gBAAgB;IAO9B;;;;OAIG;YACW,2BAA2B;CAmC5C"}
|
|
@@ -30,6 +30,7 @@ class StreamProcessor {
|
|
|
30
30
|
reasoningMetadata;
|
|
31
31
|
accumulatedText = "";
|
|
32
32
|
logger;
|
|
33
|
+
hasStepUsage = false;
|
|
33
34
|
/**
|
|
34
35
|
* Track pending tool calls (added to context but no result yet).
|
|
35
36
|
* On cancel/abort, we add synthetic "cancelled" results to maintain tool_use/tool_result pairing.
|
|
@@ -140,41 +141,63 @@ class StreamProcessor {
|
|
|
140
141
|
}
|
|
141
142
|
case "finish-step":
|
|
142
143
|
if (event.usage) {
|
|
143
|
-
const
|
|
144
|
-
const
|
|
145
|
-
const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
|
|
146
|
-
const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? event.usage.cachedInputTokens ?? 0;
|
|
144
|
+
const providerMetadata = this.getProviderMetadata(event);
|
|
145
|
+
const stepUsage = this.normalizeUsage(event.usage, providerMetadata);
|
|
147
146
|
this.actualTokens = {
|
|
148
|
-
inputTokens: (this.actualTokens.inputTokens ?? 0) + (
|
|
149
|
-
outputTokens: (this.actualTokens.outputTokens ?? 0) + (
|
|
150
|
-
totalTokens: (this.actualTokens.totalTokens ?? 0) + (
|
|
151
|
-
...
|
|
152
|
-
reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) +
|
|
147
|
+
inputTokens: (this.actualTokens.inputTokens ?? 0) + (stepUsage.inputTokens ?? 0),
|
|
148
|
+
outputTokens: (this.actualTokens.outputTokens ?? 0) + (stepUsage.outputTokens ?? 0),
|
|
149
|
+
totalTokens: (this.actualTokens.totalTokens ?? 0) + (stepUsage.totalTokens ?? 0),
|
|
150
|
+
...stepUsage.reasoningTokens !== void 0 && {
|
|
151
|
+
reasoningTokens: (this.actualTokens.reasoningTokens ?? 0) + stepUsage.reasoningTokens
|
|
153
152
|
},
|
|
154
153
|
// Cache tokens
|
|
155
|
-
cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + cacheReadTokens,
|
|
156
|
-
cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + cacheWriteTokens
|
|
154
|
+
cacheReadTokens: (this.actualTokens.cacheReadTokens ?? 0) + (stepUsage.cacheReadTokens ?? 0),
|
|
155
|
+
cacheWriteTokens: (this.actualTokens.cacheWriteTokens ?? 0) + (stepUsage.cacheWriteTokens ?? 0)
|
|
157
156
|
};
|
|
157
|
+
this.hasStepUsage = true;
|
|
158
158
|
}
|
|
159
159
|
break;
|
|
160
160
|
case "finish": {
|
|
161
161
|
this.finishReason = event.finishReason;
|
|
162
|
-
const
|
|
163
|
-
const
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
162
|
+
const providerMetadata = this.getProviderMetadata(event);
|
|
163
|
+
const fallbackUsage = this.normalizeUsage(
|
|
164
|
+
event.totalUsage,
|
|
165
|
+
providerMetadata
|
|
166
|
+
);
|
|
167
|
+
const usage = this.hasStepUsage ? { ...this.actualTokens } : fallbackUsage;
|
|
168
|
+
if (this.hasStepUsage) {
|
|
169
|
+
const fallbackInput = fallbackUsage.inputTokens ?? 0;
|
|
170
|
+
if ((usage.inputTokens ?? 0) === 0 && fallbackInput > 0) {
|
|
171
|
+
this.logger.debug(
|
|
172
|
+
"Backfilling inputTokens from fallback usage (step reported 0)",
|
|
173
|
+
{ stepValue: usage.inputTokens, fallbackValue: fallbackInput }
|
|
174
|
+
);
|
|
175
|
+
usage.inputTokens = fallbackInput;
|
|
176
|
+
}
|
|
177
|
+
const fallbackOutput = fallbackUsage.outputTokens ?? 0;
|
|
178
|
+
if ((usage.outputTokens ?? 0) === 0 && fallbackOutput > 0) {
|
|
179
|
+
this.logger.debug(
|
|
180
|
+
"Backfilling outputTokens from fallback usage (step reported 0)",
|
|
181
|
+
{ stepValue: usage.outputTokens, fallbackValue: fallbackOutput }
|
|
182
|
+
);
|
|
183
|
+
usage.outputTokens = fallbackOutput;
|
|
184
|
+
}
|
|
185
|
+
const fallbackCacheRead = fallbackUsage.cacheReadTokens ?? 0;
|
|
186
|
+
if ((usage.cacheReadTokens ?? 0) === 0 && fallbackCacheRead > 0) {
|
|
187
|
+
usage.cacheReadTokens = fallbackCacheRead;
|
|
188
|
+
}
|
|
189
|
+
const fallbackCacheWrite = fallbackUsage.cacheWriteTokens ?? 0;
|
|
190
|
+
if ((usage.cacheWriteTokens ?? 0) === 0 && fallbackCacheWrite > 0) {
|
|
191
|
+
usage.cacheWriteTokens = fallbackCacheWrite;
|
|
192
|
+
}
|
|
193
|
+
const fallbackTotalTokens = fallbackUsage.totalTokens ?? 0;
|
|
194
|
+
if ((usage.totalTokens ?? 0) === 0 && fallbackTotalTokens > 0) {
|
|
195
|
+
usage.totalTokens = fallbackTotalTokens;
|
|
196
|
+
}
|
|
197
|
+
if (usage.reasoningTokens === void 0 && fallbackUsage.reasoningTokens !== void 0) {
|
|
198
|
+
usage.reasoningTokens = fallbackUsage.reasoningTokens;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
178
201
|
this.actualTokens = usage;
|
|
179
202
|
this.logger.info("LLM response complete", {
|
|
180
203
|
finishReason: event.finishReason,
|
|
@@ -322,6 +345,41 @@ class StreamProcessor {
|
|
|
322
345
|
usage: this.actualTokens
|
|
323
346
|
};
|
|
324
347
|
}
|
|
348
|
+
getCacheTokensFromProviderMetadata(providerMetadata) {
|
|
349
|
+
const anthropicMeta = providerMetadata?.["anthropic"];
|
|
350
|
+
const bedrockMeta = providerMetadata?.["bedrock"];
|
|
351
|
+
const cacheWriteTokens = anthropicMeta?.["cacheCreationInputTokens"] ?? bedrockMeta?.usage?.["cacheWriteInputTokens"] ?? 0;
|
|
352
|
+
const cacheReadTokens = anthropicMeta?.["cacheReadInputTokens"] ?? bedrockMeta?.usage?.["cacheReadInputTokens"] ?? 0;
|
|
353
|
+
return { cacheReadTokens, cacheWriteTokens };
|
|
354
|
+
}
|
|
355
|
+
normalizeUsage(usage, providerMetadata) {
|
|
356
|
+
const inputTokensRaw = usage?.inputTokens ?? 0;
|
|
357
|
+
const outputTokens = usage?.outputTokens ?? 0;
|
|
358
|
+
const totalTokens = usage?.totalTokens ?? 0;
|
|
359
|
+
const reasoningTokens = usage?.reasoningTokens;
|
|
360
|
+
const cachedInputTokens = usage?.cachedInputTokens;
|
|
361
|
+
const inputTokenDetails = usage?.inputTokenDetails;
|
|
362
|
+
const providerCache = this.getCacheTokensFromProviderMetadata(providerMetadata);
|
|
363
|
+
const cacheReadTokens = inputTokenDetails?.cacheReadTokens ?? cachedInputTokens ?? providerCache.cacheReadTokens ?? 0;
|
|
364
|
+
const cacheWriteTokens = inputTokenDetails?.cacheWriteTokens ?? providerCache.cacheWriteTokens ?? 0;
|
|
365
|
+
const needsCacheWriteAdjustment = inputTokenDetails === void 0 && cachedInputTokens !== void 0 && providerCache.cacheWriteTokens > 0;
|
|
366
|
+
const noCacheTokens = inputTokenDetails?.noCacheTokens ?? (cachedInputTokens !== void 0 ? inputTokensRaw - cachedInputTokens - (needsCacheWriteAdjustment ? providerCache.cacheWriteTokens : 0) : inputTokensRaw);
|
|
367
|
+
return {
|
|
368
|
+
inputTokens: Math.max(0, noCacheTokens),
|
|
369
|
+
outputTokens,
|
|
370
|
+
totalTokens,
|
|
371
|
+
...reasoningTokens !== void 0 && { reasoningTokens },
|
|
372
|
+
cacheReadTokens,
|
|
373
|
+
cacheWriteTokens
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
getProviderMetadata(event) {
|
|
377
|
+
const metadata = "providerMetadata" in event ? event.providerMetadata : void 0;
|
|
378
|
+
if (!metadata || typeof metadata !== "object") {
|
|
379
|
+
return void 0;
|
|
380
|
+
}
|
|
381
|
+
return metadata;
|
|
382
|
+
}
|
|
325
383
|
async createAssistantMessage() {
|
|
326
384
|
await this.contextManager.addAssistantMessage("", [], {});
|
|
327
385
|
return this.getLastMessageId();
|
|
@@ -124,23 +124,6 @@ class TurnExecutor {
|
|
|
124
124
|
* Used to pass approval info from tool execution to result persistence.
|
|
125
125
|
*/
|
|
126
126
|
approvalMetadata = /* @__PURE__ */ new Map();
|
|
127
|
-
/**
|
|
128
|
-
* Tracks whether compaction occurred during this turn.
|
|
129
|
-
* Used to signal to the caller that session continuation may be needed.
|
|
130
|
-
*/
|
|
131
|
-
compactionOccurred = false;
|
|
132
|
-
/**
|
|
133
|
-
* Compaction data captured during this turn.
|
|
134
|
-
* Contains summary text and preserved messages for session continuation.
|
|
135
|
-
* This data is passed up the call chain (NOT persisted to original session).
|
|
136
|
-
*/
|
|
137
|
-
compactionData = null;
|
|
138
|
-
/**
|
|
139
|
-
* Virtual context for remaining iterations after compaction.
|
|
140
|
-
* When set, the main loop uses this instead of calling getFormattedMessagesForLLM().
|
|
141
|
-
* This provides reduced context to the LLM without persisting to the original session.
|
|
142
|
-
*/
|
|
143
|
-
virtualContext = null;
|
|
144
127
|
/**
|
|
145
128
|
* Get StreamProcessor config from TurnExecutor state.
|
|
146
129
|
* @param estimatedInputTokens Optional estimated input tokens for analytics
|
|
@@ -206,7 +189,7 @@ class TurnExecutor {
|
|
|
206
189
|
await this.injectQueuedMessages(coalesced);
|
|
207
190
|
}
|
|
208
191
|
await this.pruneOldToolOutputs();
|
|
209
|
-
let prepared =
|
|
192
|
+
let prepared = await this.contextManager.getFormattedMessagesForLLM(
|
|
210
193
|
contributorContext,
|
|
211
194
|
this.llmContext
|
|
212
195
|
);
|
|
@@ -220,9 +203,16 @@ class TurnExecutor {
|
|
|
220
203
|
this.logger.debug(
|
|
221
204
|
`Pre-check: estimated ${estimatedTokens} tokens exceeds threshold, compacting`
|
|
222
205
|
);
|
|
223
|
-
await this.
|
|
224
|
-
|
|
225
|
-
|
|
206
|
+
const didCompact = await this.compactContext(
|
|
207
|
+
estimatedTokens,
|
|
208
|
+
contributorContext,
|
|
209
|
+
toolDefinitions
|
|
210
|
+
);
|
|
211
|
+
if (didCompact) {
|
|
212
|
+
prepared = await this.contextManager.getFormattedMessagesForLLM(
|
|
213
|
+
contributorContext,
|
|
214
|
+
this.llmContext
|
|
215
|
+
);
|
|
226
216
|
estimatedTokens = await this.contextManager.getEstimatedNextInputTokens(
|
|
227
217
|
prepared.systemPrompt,
|
|
228
218
|
prepared.preparedHistory,
|
|
@@ -284,22 +274,29 @@ class TurnExecutor {
|
|
|
284
274
|
`Context estimation (cancelled): keeping last known actuals, partial response (${result.text.length} chars) will be estimated`
|
|
285
275
|
);
|
|
286
276
|
} else if (result.usage?.inputTokens !== void 0) {
|
|
287
|
-
const
|
|
288
|
-
const
|
|
277
|
+
const contextInputTokens2 = this.getContextInputTokens(result.usage);
|
|
278
|
+
const actualInputTokens = contextInputTokens2 ?? result.usage.inputTokens;
|
|
279
|
+
const diff = estimatedTokens - actualInputTokens;
|
|
280
|
+
const diffPercent = actualInputTokens > 0 ? (diff / actualInputTokens * 100).toFixed(1) : "0.0";
|
|
289
281
|
this.logger.info(
|
|
290
|
-
`Context estimation accuracy: estimated=${estimatedTokens}, actual=${
|
|
282
|
+
`Context estimation accuracy: estimated=${estimatedTokens}, actual=${actualInputTokens}, error=${diff} (${diffPercent}%)`
|
|
291
283
|
);
|
|
292
|
-
this.contextManager.setLastActualInputTokens(
|
|
284
|
+
this.contextManager.setLastActualInputTokens(actualInputTokens);
|
|
293
285
|
if (result.usage?.outputTokens !== void 0) {
|
|
294
286
|
this.contextManager.setLastActualOutputTokens(result.usage.outputTokens);
|
|
295
287
|
}
|
|
296
288
|
await this.contextManager.recordLastCallMessageCount();
|
|
297
289
|
}
|
|
298
|
-
|
|
290
|
+
const contextInputTokens = result.usage ? this.getContextInputTokens(result.usage) : null;
|
|
291
|
+
if (contextInputTokens && this.shouldCompactFromActual(contextInputTokens)) {
|
|
299
292
|
this.logger.debug(
|
|
300
|
-
`Post-response: actual ${
|
|
293
|
+
`Post-response: actual ${contextInputTokens} tokens exceeds threshold, compacting`
|
|
294
|
+
);
|
|
295
|
+
await this.compactContext(
|
|
296
|
+
contextInputTokens,
|
|
297
|
+
contributorContext,
|
|
298
|
+
toolDefinitions
|
|
301
299
|
);
|
|
302
|
-
await this.compactToVirtualContext(result.usage.inputTokens);
|
|
303
300
|
}
|
|
304
301
|
if (result.finishReason !== "tool-calls") {
|
|
305
302
|
const queuedOnTerminate = this.messageQueue.dequeueAll();
|
|
@@ -353,14 +350,7 @@ class TurnExecutor {
|
|
|
353
350
|
text: lastText,
|
|
354
351
|
stepCount,
|
|
355
352
|
usage: lastStepTokens,
|
|
356
|
-
finishReason: lastFinishReason
|
|
357
|
-
// Signal to caller that compaction occurred during this turn
|
|
358
|
-
// Caller can use this to trigger session-native continuation
|
|
359
|
-
didCompact: this.compactionOccurred,
|
|
360
|
-
// Pass compaction data up the chain (NOT persisted to original session)
|
|
361
|
-
// Caller uses this to create the continuation session with summary
|
|
362
|
-
// Use spread to conditionally include only when data exists (exactOptionalPropertyTypes)
|
|
363
|
-
...this.compactionData && { compaction: this.compactionData }
|
|
353
|
+
finishReason: lastFinishReason
|
|
364
354
|
};
|
|
365
355
|
} catch (_2) {
|
|
366
356
|
var _error = _2, _hasError = true;
|
|
@@ -761,27 +751,31 @@ class TurnExecutor {
|
|
|
761
751
|
);
|
|
762
752
|
}
|
|
763
753
|
/**
|
|
764
|
-
* Compact context
|
|
765
|
-
*
|
|
766
|
-
* Key design: Creates a virtual context (summary + preserved messages) that will be used
|
|
767
|
-
* for the remaining iterations of this turn. The compaction data is passed up the call chain
|
|
768
|
-
* so the caller can create a continuation session with the summary.
|
|
754
|
+
* Compact context by generating a summary and adding it to the same session.
|
|
769
755
|
*
|
|
770
|
-
* The
|
|
756
|
+
* The summary message is added to the conversation history with `isSummary: true` metadata.
|
|
757
|
+
* When the context is loaded via getFormattedMessagesForLLM(), filterCompacted() will
|
|
758
|
+
* exclude all messages before the summary, effectively compacting the context.
|
|
771
759
|
*
|
|
772
760
|
* @param originalTokens The estimated input token count that triggered overflow
|
|
761
|
+
* @param contributorContext Context for system prompt contributors (needed for accurate token estimation)
|
|
762
|
+
* @param tools Tool definitions (needed for accurate token estimation)
|
|
763
|
+
* @returns true if compaction occurred, false if skipped
|
|
773
764
|
*/
|
|
774
|
-
async
|
|
765
|
+
async compactContext(originalTokens, contributorContext, tools) {
|
|
775
766
|
if (!this.compactionStrategy) {
|
|
776
|
-
return;
|
|
767
|
+
return false;
|
|
777
768
|
}
|
|
778
769
|
this.logger.info(
|
|
779
770
|
`Context overflow detected (${originalTokens} tokens), checking if compression is possible`
|
|
780
771
|
);
|
|
781
772
|
const history = await this.contextManager.getHistory();
|
|
773
|
+
const { filterCompacted } = await import("../../context/utils.js");
|
|
774
|
+
const originalFiltered = filterCompacted(history);
|
|
775
|
+
const originalMessages = originalFiltered.length;
|
|
782
776
|
if (history.length < 4) {
|
|
783
777
|
this.logger.debug("Compaction skipped: history too short to summarize");
|
|
784
|
-
return;
|
|
778
|
+
return false;
|
|
785
779
|
}
|
|
786
780
|
this.eventBus.emit("context:compacting", {
|
|
787
781
|
estimatedTokens: originalTokens
|
|
@@ -795,106 +789,36 @@ class TurnExecutor {
|
|
|
795
789
|
originalTokens,
|
|
796
790
|
compactedTokens: originalTokens,
|
|
797
791
|
// No change
|
|
798
|
-
originalMessages
|
|
799
|
-
compactedMessages:
|
|
792
|
+
originalMessages,
|
|
793
|
+
compactedMessages: originalMessages,
|
|
800
794
|
// No change
|
|
801
795
|
strategy: this.compactionStrategy.name,
|
|
802
796
|
reason: "overflow"
|
|
803
797
|
});
|
|
804
|
-
return;
|
|
805
|
-
}
|
|
806
|
-
const summaryMessage = summaryMessages[0];
|
|
807
|
-
if (!summaryMessage) {
|
|
808
|
-
this.logger.warn("Compaction returned empty summary message array");
|
|
809
|
-
return;
|
|
798
|
+
return false;
|
|
810
799
|
}
|
|
811
|
-
const
|
|
812
|
-
|
|
813
|
-
let existingSummaryIndex = -1;
|
|
814
|
-
for (let i = history.length - 1; i >= 0; i--) {
|
|
815
|
-
const msg = history[i];
|
|
816
|
-
if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
|
|
817
|
-
existingSummaryIndex = i;
|
|
818
|
-
break;
|
|
819
|
-
}
|
|
800
|
+
for (const summary of summaryMessages) {
|
|
801
|
+
await this.contextManager.addMessage(summary);
|
|
820
802
|
}
|
|
821
|
-
const baseIndex = existingSummaryIndex >= 0 ? existingSummaryIndex + 1 : 0;
|
|
822
|
-
const preservedMessages = history.slice(baseIndex + summarizedCount);
|
|
823
|
-
const firstTimestamp = summaryMessage.metadata?.originalFirstTimestamp;
|
|
824
|
-
const lastTimestamp = summaryMessage.metadata?.originalLastTimestamp;
|
|
825
|
-
this.compactionData = {
|
|
826
|
-
summaryText,
|
|
827
|
-
preservedMessages: [...preservedMessages],
|
|
828
|
-
// Copy to avoid mutation
|
|
829
|
-
summarizedCount,
|
|
830
|
-
...firstTimestamp !== void 0 && { originalFirstTimestamp: firstTimestamp },
|
|
831
|
-
...lastTimestamp !== void 0 && { originalLastTimestamp: lastTimestamp }
|
|
832
|
-
};
|
|
833
|
-
this.virtualContext = {
|
|
834
|
-
summaryMessage,
|
|
835
|
-
preservedMessages: [...preservedMessages]
|
|
836
|
-
};
|
|
837
|
-
this.compactionOccurred = true;
|
|
838
803
|
this.contextManager.resetActualTokenTracking();
|
|
839
|
-
const
|
|
840
|
-
|
|
841
|
-
|
|
804
|
+
const afterEstimate = await this.contextManager.getContextTokenEstimate(
|
|
805
|
+
contributorContext,
|
|
806
|
+
tools
|
|
807
|
+
);
|
|
808
|
+
const compactedTokens = afterEstimate.estimated;
|
|
809
|
+
const compactedMessages = afterEstimate.stats.filteredMessageCount;
|
|
842
810
|
this.eventBus.emit("context:compacted", {
|
|
843
811
|
originalTokens,
|
|
844
812
|
compactedTokens,
|
|
845
|
-
originalMessages
|
|
846
|
-
compactedMessages
|
|
813
|
+
originalMessages,
|
|
814
|
+
compactedMessages,
|
|
847
815
|
strategy: this.compactionStrategy.name,
|
|
848
816
|
reason: "overflow"
|
|
849
817
|
});
|
|
850
818
|
this.logger.info(
|
|
851
|
-
`Compaction complete
|
|
819
|
+
`Compaction complete: ${originalTokens} \u2192 ~${compactedTokens} tokens (${originalMessages} \u2192 ${compactedMessages} messages after filtering)`
|
|
852
820
|
);
|
|
853
|
-
|
|
854
|
-
/**
|
|
855
|
-
* Extract the summary text from a summary message.
|
|
856
|
-
*/
|
|
857
|
-
extractSummaryText(summaryMessage) {
|
|
858
|
-
if (typeof summaryMessage.content === "string") {
|
|
859
|
-
return summaryMessage.content;
|
|
860
|
-
}
|
|
861
|
-
if (Array.isArray(summaryMessage.content)) {
|
|
862
|
-
return summaryMessage.content.filter((part) => part.type === "text").map((part) => part.text).join("\n");
|
|
863
|
-
}
|
|
864
|
-
return "";
|
|
865
|
-
}
|
|
866
|
-
/**
|
|
867
|
-
* Build formatted messages from virtual context (after compaction).
|
|
868
|
-
*
|
|
869
|
-
* This creates LLM-ready messages using:
|
|
870
|
-
* - System prompt (same as normal flow)
|
|
871
|
-
* - Summary message (as first message)
|
|
872
|
-
* - Preserved messages (formatted for LLM)
|
|
873
|
-
*
|
|
874
|
-
* Uses the same formatting pipeline as getFormattedMessagesForLLM()
|
|
875
|
-
* but with our virtual history instead of the stored history.
|
|
876
|
-
*
|
|
877
|
-
* @param contributorContext Context for system prompt contributors
|
|
878
|
-
* @returns Formatted messages ready for LLM call, matching getFormattedMessagesForLLM return type
|
|
879
|
-
*/
|
|
880
|
-
async buildMessagesFromVirtualContext(contributorContext) {
|
|
881
|
-
if (!this.virtualContext) {
|
|
882
|
-
throw new Error("buildMessagesFromVirtualContext called without virtual context");
|
|
883
|
-
}
|
|
884
|
-
const { summaryMessage, preservedMessages } = this.virtualContext;
|
|
885
|
-
const systemPrompt = await this.contextManager.getSystemPrompt(contributorContext);
|
|
886
|
-
const virtualHistory = [summaryMessage, ...preservedMessages];
|
|
887
|
-
const formattedMessages = await this.contextManager.getFormattedMessages(
|
|
888
|
-
contributorContext,
|
|
889
|
-
this.llmContext,
|
|
890
|
-
systemPrompt,
|
|
891
|
-
virtualHistory
|
|
892
|
-
);
|
|
893
|
-
return {
|
|
894
|
-
formattedMessages,
|
|
895
|
-
systemPrompt,
|
|
896
|
-
preparedHistory: virtualHistory
|
|
897
|
-
};
|
|
821
|
+
return true;
|
|
898
822
|
}
|
|
899
823
|
/**
|
|
900
824
|
* Set telemetry span attributes for token usage.
|
|
@@ -917,6 +841,10 @@ class TurnExecutor {
|
|
|
917
841
|
activeSpan.setAttribute("gen_ai.usage.reasoning_tokens", usage.reasoningTokens);
|
|
918
842
|
}
|
|
919
843
|
}
|
|
844
|
+
getContextInputTokens(usage) {
|
|
845
|
+
if (usage.inputTokens === void 0) return null;
|
|
846
|
+
return usage.inputTokens + (usage.cacheReadTokens ?? 0) + (usage.cacheWriteTokens ?? 0);
|
|
847
|
+
}
|
|
920
848
|
/**
|
|
921
849
|
* Map provider errors to DextoRuntimeError.
|
|
922
850
|
*/
|