@dexto/core 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/DextoAgent.cjs +284 -1
- package/dist/agent/DextoAgent.d.ts +114 -0
- package/dist/agent/DextoAgent.d.ts.map +1 -1
- package/dist/agent/DextoAgent.js +275 -1
- package/dist/agent/schemas.d.ts +51 -21
- package/dist/agent/schemas.d.ts.map +1 -1
- package/dist/context/compaction/overflow.cjs +6 -10
- package/dist/context/compaction/overflow.d.ts +14 -11
- package/dist/context/compaction/overflow.d.ts.map +1 -1
- package/dist/context/compaction/overflow.js +6 -10
- package/dist/context/compaction/providers/reactive-overflow-provider.cjs +15 -0
- package/dist/context/compaction/providers/reactive-overflow-provider.d.ts +15 -0
- package/dist/context/compaction/providers/reactive-overflow-provider.d.ts.map +1 -1
- package/dist/context/compaction/providers/reactive-overflow-provider.js +15 -0
- package/dist/context/compaction/schemas.cjs +22 -2
- package/dist/context/compaction/schemas.d.ts +45 -0
- package/dist/context/compaction/schemas.d.ts.map +1 -1
- package/dist/context/compaction/schemas.js +22 -2
- package/dist/context/compaction/strategies/reactive-overflow.cjs +166 -26
- package/dist/context/compaction/strategies/reactive-overflow.d.ts +21 -0
- package/dist/context/compaction/strategies/reactive-overflow.d.ts.map +1 -1
- package/dist/context/compaction/strategies/reactive-overflow.js +166 -26
- package/dist/context/manager.cjs +278 -31
- package/dist/context/manager.d.ts +192 -5
- package/dist/context/manager.d.ts.map +1 -1
- package/dist/context/manager.js +285 -32
- package/dist/context/types.d.ts +6 -0
- package/dist/context/types.d.ts.map +1 -1
- package/dist/context/utils.cjs +77 -11
- package/dist/context/utils.d.ts +86 -8
- package/dist/context/utils.d.ts.map +1 -1
- package/dist/context/utils.js +71 -11
- package/dist/events/index.cjs +4 -0
- package/dist/events/index.d.ts +41 -7
- package/dist/events/index.d.ts.map +1 -1
- package/dist/events/index.js +4 -0
- package/dist/llm/executor/stream-processor.cjs +19 -1
- package/dist/llm/executor/stream-processor.d.ts +3 -0
- package/dist/llm/executor/stream-processor.d.ts.map +1 -1
- package/dist/llm/executor/stream-processor.js +19 -1
- package/dist/llm/executor/turn-executor.cjs +219 -30
- package/dist/llm/executor/turn-executor.d.ts +62 -10
- package/dist/llm/executor/turn-executor.d.ts.map +1 -1
- package/dist/llm/executor/turn-executor.js +219 -30
- package/dist/llm/executor/types.d.ts +28 -0
- package/dist/llm/executor/types.d.ts.map +1 -1
- package/dist/llm/formatters/vercel.cjs +36 -28
- package/dist/llm/formatters/vercel.d.ts.map +1 -1
- package/dist/llm/formatters/vercel.js +36 -28
- package/dist/llm/services/factory.cjs +3 -2
- package/dist/llm/services/factory.d.ts +3 -1
- package/dist/llm/services/factory.d.ts.map +1 -1
- package/dist/llm/services/factory.js +3 -2
- package/dist/llm/services/vercel.cjs +34 -6
- package/dist/llm/services/vercel.d.ts +23 -3
- package/dist/llm/services/vercel.d.ts.map +1 -1
- package/dist/llm/services/vercel.js +34 -6
- package/dist/session/chat-session.cjs +20 -11
- package/dist/session/chat-session.d.ts +9 -4
- package/dist/session/chat-session.d.ts.map +1 -1
- package/dist/session/chat-session.js +20 -11
- package/dist/session/compaction-service.cjs +139 -0
- package/dist/session/compaction-service.d.ts +81 -0
- package/dist/session/compaction-service.d.ts.map +1 -0
- package/dist/session/compaction-service.js +106 -0
- package/dist/session/session-manager.cjs +146 -0
- package/dist/session/session-manager.d.ts +50 -0
- package/dist/session/session-manager.d.ts.map +1 -1
- package/dist/session/session-manager.js +146 -0
- package/dist/session/title-generator.cjs +2 -2
- package/dist/session/title-generator.js +2 -2
- package/dist/systemPrompt/in-built-prompts.cjs +36 -0
- package/dist/systemPrompt/in-built-prompts.d.ts +18 -1
- package/dist/systemPrompt/in-built-prompts.d.ts.map +1 -1
- package/dist/systemPrompt/in-built-prompts.js +25 -0
- package/dist/systemPrompt/manager.cjs +22 -0
- package/dist/systemPrompt/manager.d.ts +10 -0
- package/dist/systemPrompt/manager.d.ts.map +1 -1
- package/dist/systemPrompt/manager.js +22 -0
- package/dist/systemPrompt/registry.cjs +2 -1
- package/dist/systemPrompt/registry.d.ts +1 -1
- package/dist/systemPrompt/registry.d.ts.map +1 -1
- package/dist/systemPrompt/registry.js +2 -1
- package/dist/systemPrompt/schemas.cjs +7 -0
- package/dist/systemPrompt/schemas.d.ts +13 -13
- package/dist/systemPrompt/schemas.d.ts.map +1 -1
- package/dist/systemPrompt/schemas.js +7 -0
- package/dist/utils/index.cjs +3 -1
- package/dist/utils/index.d.ts +1 -0
- package/dist/utils/index.d.ts.map +1 -1
- package/dist/utils/index.js +1 -0
- package/package.json +1 -1
|
@@ -4,15 +4,36 @@ import { isAssistantMessage, isToolMessage } from "../../types.js";
|
|
|
4
4
|
const DEFAULT_OPTIONS = {
|
|
5
5
|
preserveLastNTurns: 2,
|
|
6
6
|
maxSummaryTokens: 2e3,
|
|
7
|
-
summaryPrompt: `You are a conversation summarizer
|
|
8
|
-
- What tasks were attempted and their outcomes
|
|
9
|
-
- Current state and context the assistant needs to remember
|
|
10
|
-
- Any important decisions or information discovered
|
|
11
|
-
- What the user was trying to accomplish
|
|
7
|
+
summaryPrompt: `You are a conversation summarizer creating a structured summary for session continuation.
|
|
12
8
|
|
|
13
|
-
|
|
9
|
+
Analyze the conversation and produce a summary in the following XML format:
|
|
14
10
|
|
|
15
|
-
|
|
11
|
+
<session_compaction>
|
|
12
|
+
<conversation_history>
|
|
13
|
+
A concise summary of what happened in the conversation:
|
|
14
|
+
- Tasks attempted and their outcomes (success/failure/in-progress)
|
|
15
|
+
- Important decisions made
|
|
16
|
+
- Key information discovered (file paths, configurations, errors encountered)
|
|
17
|
+
- Tools used and their results
|
|
18
|
+
</conversation_history>
|
|
19
|
+
|
|
20
|
+
<current_task>
|
|
21
|
+
The most recent task or instruction the user requested that may still be in progress.
|
|
22
|
+
Be specific - include the exact request and current status.
|
|
23
|
+
</current_task>
|
|
24
|
+
|
|
25
|
+
<important_context>
|
|
26
|
+
Critical state that must be preserved:
|
|
27
|
+
- File paths being worked on
|
|
28
|
+
- Variable values or configurations
|
|
29
|
+
- Error messages that need addressing
|
|
30
|
+
- Any pending actions or next steps
|
|
31
|
+
</important_context>
|
|
32
|
+
</session_compaction>
|
|
33
|
+
|
|
34
|
+
IMPORTANT: The assistant will continue working based on this summary. Ensure the current_task section clearly states what needs to be done next.
|
|
35
|
+
|
|
36
|
+
Conversation to summarize:
|
|
16
37
|
{conversation}`
|
|
17
38
|
};
|
|
18
39
|
class ReactiveOverflowStrategy {
|
|
@@ -41,15 +62,71 @@ class ReactiveOverflowStrategy {
|
|
|
41
62
|
this.logger.debug("ReactiveOverflowStrategy: History too short, skipping compaction");
|
|
42
63
|
return [];
|
|
43
64
|
}
|
|
65
|
+
let existingSummaryIndex = -1;
|
|
66
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
67
|
+
const msg = history[i];
|
|
68
|
+
if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
|
|
69
|
+
existingSummaryIndex = i;
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (existingSummaryIndex !== -1) {
|
|
74
|
+
const messagesAfterSummary = history.slice(existingSummaryIndex + 1);
|
|
75
|
+
if (messagesAfterSummary.length <= 4) {
|
|
76
|
+
this.logger.debug(
|
|
77
|
+
`ReactiveOverflowStrategy: Only ${messagesAfterSummary.length} messages after existing summary, skipping re-compaction`
|
|
78
|
+
);
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
this.logger.info(
|
|
82
|
+
`ReactiveOverflowStrategy: Found existing summary at index ${existingSummaryIndex}, working with ${messagesAfterSummary.length} messages after it`
|
|
83
|
+
);
|
|
84
|
+
return this.compactSubset(messagesAfterSummary, history);
|
|
85
|
+
}
|
|
44
86
|
const { toSummarize, toKeep } = this.splitHistory(history);
|
|
45
87
|
if (toSummarize.length === 0) {
|
|
46
88
|
this.logger.debug("ReactiveOverflowStrategy: No messages to summarize");
|
|
47
89
|
return [];
|
|
48
90
|
}
|
|
91
|
+
const currentTaskMessage = this.findCurrentTaskMessage(history);
|
|
49
92
|
this.logger.info(
|
|
50
93
|
`ReactiveOverflowStrategy: Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`
|
|
51
94
|
);
|
|
52
|
-
const summary = await this.generateSummary(toSummarize);
|
|
95
|
+
const summary = await this.generateSummary(toSummarize, currentTaskMessage);
|
|
96
|
+
const summaryMessage = {
|
|
97
|
+
role: "assistant",
|
|
98
|
+
content: [{ type: "text", text: summary }],
|
|
99
|
+
timestamp: Date.now(),
|
|
100
|
+
metadata: {
|
|
101
|
+
isSummary: true,
|
|
102
|
+
summarizedAt: Date.now(),
|
|
103
|
+
originalMessageCount: toSummarize.length,
|
|
104
|
+
originalFirstTimestamp: toSummarize[0]?.timestamp,
|
|
105
|
+
originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
return [summaryMessage];
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Handle re-compaction when there's already a summary in history.
|
|
112
|
+
* Only summarizes messages AFTER the existing summary, preventing
|
|
113
|
+
* cascading summaries of summaries.
|
|
114
|
+
*
|
|
115
|
+
* @param messagesAfterSummary Messages after the existing summary
|
|
116
|
+
* @param fullHistory The complete history (for current task detection)
|
|
117
|
+
* @returns Array with single summary message, or empty if nothing to summarize
|
|
118
|
+
*/
|
|
119
|
+
async compactSubset(messagesAfterSummary, fullHistory) {
|
|
120
|
+
const { toSummarize, toKeep } = this.splitHistory(messagesAfterSummary);
|
|
121
|
+
if (toSummarize.length === 0) {
|
|
122
|
+
this.logger.debug("ReactiveOverflowStrategy: No messages to summarize in subset");
|
|
123
|
+
return [];
|
|
124
|
+
}
|
|
125
|
+
const currentTaskMessage = this.findCurrentTaskMessage(fullHistory);
|
|
126
|
+
this.logger.info(
|
|
127
|
+
`ReactiveOverflowStrategy (re-compact): Summarizing ${toSummarize.length} messages after existing summary, keeping ${toKeep.length}`
|
|
128
|
+
);
|
|
129
|
+
const summary = await this.generateSummary(toSummarize, currentTaskMessage);
|
|
53
130
|
const summaryMessage = {
|
|
54
131
|
role: "assistant",
|
|
55
132
|
content: [{ type: "text", text: summary }],
|
|
@@ -57,16 +134,43 @@ class ReactiveOverflowStrategy {
|
|
|
57
134
|
metadata: {
|
|
58
135
|
isSummary: true,
|
|
59
136
|
summarizedAt: Date.now(),
|
|
60
|
-
|
|
137
|
+
originalMessageCount: toSummarize.length,
|
|
138
|
+
isRecompaction: true,
|
|
139
|
+
// Mark that this is a re-compaction
|
|
61
140
|
originalFirstTimestamp: toSummarize[0]?.timestamp,
|
|
62
141
|
originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
|
|
63
142
|
}
|
|
64
143
|
};
|
|
65
144
|
return [summaryMessage];
|
|
66
145
|
}
|
|
146
|
+
/**
|
|
147
|
+
* Find the most recent user message that represents the current task.
|
|
148
|
+
* This helps preserve context about what the user is currently asking for.
|
|
149
|
+
*/
|
|
150
|
+
findCurrentTaskMessage(history) {
|
|
151
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
152
|
+
const msg = history[i];
|
|
153
|
+
if (msg?.role === "user") {
|
|
154
|
+
if (typeof msg.content === "string") {
|
|
155
|
+
return msg.content;
|
|
156
|
+
} else if (Array.isArray(msg.content)) {
|
|
157
|
+
const textParts = msg.content.filter(
|
|
158
|
+
(part) => part.type === "text"
|
|
159
|
+
).map((part) => part.text).join("\n");
|
|
160
|
+
if (textParts.length > 0) {
|
|
161
|
+
return textParts;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
67
168
|
/**
|
|
68
169
|
* Split history into messages to summarize and messages to keep.
|
|
69
170
|
* Keeps the last N turns (user + assistant pairs) intact.
|
|
171
|
+
*
|
|
172
|
+
* For long agentic conversations with many tool calls, this also ensures
|
|
173
|
+
* we don't try to keep too many messages even within preserved turns.
|
|
70
174
|
*/
|
|
71
175
|
splitHistory(history) {
|
|
72
176
|
const turnsToKeep = this.options.preserveLastNTurns;
|
|
@@ -81,20 +185,25 @@ class ReactiveOverflowStrategy {
|
|
|
81
185
|
}
|
|
82
186
|
if (userMessageIndices.length > 0) {
|
|
83
187
|
const splitIndex = userMessageIndices[0];
|
|
84
|
-
if (splitIndex !== void 0) {
|
|
85
|
-
if (splitIndex === 0) {
|
|
86
|
-
return {
|
|
87
|
-
toSummarize: [],
|
|
88
|
-
toKeep: history
|
|
89
|
-
};
|
|
90
|
-
}
|
|
188
|
+
if (splitIndex !== void 0 && splitIndex > 0) {
|
|
91
189
|
return {
|
|
92
190
|
toSummarize: history.slice(0, splitIndex),
|
|
93
191
|
toKeep: history.slice(splitIndex)
|
|
94
192
|
};
|
|
95
193
|
}
|
|
96
194
|
}
|
|
97
|
-
const
|
|
195
|
+
const minKeep = 3;
|
|
196
|
+
const maxKeepPercent = 0.2;
|
|
197
|
+
const keepCount = Math.max(minKeep, Math.floor(history.length * maxKeepPercent));
|
|
198
|
+
if (keepCount >= history.length) {
|
|
199
|
+
return {
|
|
200
|
+
toSummarize: [],
|
|
201
|
+
toKeep: history
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
this.logger.debug(
|
|
205
|
+
`splitHistory: Using fallback - keeping last ${keepCount} of ${history.length} messages`
|
|
206
|
+
);
|
|
98
207
|
return {
|
|
99
208
|
toSummarize: history.slice(0, -keepCount),
|
|
100
209
|
toKeep: history.slice(-keepCount)
|
|
@@ -102,21 +211,36 @@ class ReactiveOverflowStrategy {
|
|
|
102
211
|
}
|
|
103
212
|
/**
|
|
104
213
|
* Generate an LLM summary of the messages.
|
|
214
|
+
*
|
|
215
|
+
* @param messages Messages to summarize
|
|
216
|
+
* @param currentTask The most recent user message (current task context)
|
|
105
217
|
*/
|
|
106
|
-
async generateSummary(messages) {
|
|
218
|
+
async generateSummary(messages, currentTask) {
|
|
107
219
|
const formattedConversation = this.formatMessagesForSummary(messages);
|
|
108
|
-
|
|
220
|
+
let conversationWithContext = formattedConversation;
|
|
221
|
+
if (currentTask) {
|
|
222
|
+
conversationWithContext += `
|
|
223
|
+
|
|
224
|
+
--- CURRENT TASK (most recent user request) ---
|
|
225
|
+
${currentTask}`;
|
|
226
|
+
}
|
|
227
|
+
const prompt = this.options.summaryPrompt.replace(
|
|
228
|
+
"{conversation}",
|
|
229
|
+
conversationWithContext
|
|
230
|
+
);
|
|
109
231
|
try {
|
|
110
232
|
const result = await generateText({
|
|
111
233
|
model: this.model,
|
|
112
234
|
prompt,
|
|
113
235
|
maxOutputTokens: this.options.maxSummaryTokens
|
|
114
236
|
});
|
|
115
|
-
return `[
|
|
237
|
+
return `[Session Compaction Summary]
|
|
116
238
|
${result.text}`;
|
|
117
239
|
} catch (error) {
|
|
118
|
-
this.logger.error(
|
|
119
|
-
|
|
240
|
+
this.logger.error(
|
|
241
|
+
`ReactiveOverflowStrategy: Failed to generate summary - ${error instanceof Error ? error.message : String(error)}`
|
|
242
|
+
);
|
|
243
|
+
return this.createFallbackSummary(messages, currentTask);
|
|
120
244
|
}
|
|
121
245
|
}
|
|
122
246
|
/**
|
|
@@ -152,7 +276,7 @@ ${result.text}`;
|
|
|
152
276
|
/**
|
|
153
277
|
* Create a fallback summary if LLM call fails.
|
|
154
278
|
*/
|
|
155
|
-
createFallbackSummary(messages) {
|
|
279
|
+
createFallbackSummary(messages, currentTask) {
|
|
156
280
|
const userMessages = messages.filter((m) => m.role === "user");
|
|
157
281
|
const assistantWithTools = messages.filter(
|
|
158
282
|
(m) => isAssistantMessage(m) && !!m.toolCalls && m.toolCalls.length > 0
|
|
@@ -168,9 +292,25 @@ ${result.text}`;
|
|
|
168
292
|
assistantWithTools.flatMap((m) => m.toolCalls.map((tc) => tc.function.name))
|
|
169
293
|
)
|
|
170
294
|
].join(", ");
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
295
|
+
let fallback = `[Session Compaction Summary - Fallback]
|
|
296
|
+
<session_compaction>
|
|
297
|
+
<conversation_history>
|
|
298
|
+
User discussed: ${userTopics || "various topics"}
|
|
299
|
+
Tools used: ${toolsUsed || "none"}
|
|
300
|
+
Messages summarized: ${messages.length}
|
|
301
|
+
</conversation_history>`;
|
|
302
|
+
if (currentTask) {
|
|
303
|
+
fallback += `
|
|
304
|
+
<current_task>
|
|
305
|
+
${currentTask.slice(0, 500)}${currentTask.length > 500 ? "..." : ""}
|
|
306
|
+
</current_task>`;
|
|
307
|
+
}
|
|
308
|
+
fallback += `
|
|
309
|
+
<important_context>
|
|
310
|
+
Note: This is a fallback summary due to LLM error. Context may be incomplete.
|
|
311
|
+
</important_context>
|
|
312
|
+
</session_compaction>`;
|
|
313
|
+
return fallback;
|
|
174
314
|
}
|
|
175
315
|
}
|
|
176
316
|
export {
|
package/dist/context/manager.cjs
CHANGED
|
@@ -54,6 +54,23 @@ class ContextManager {
|
|
|
54
54
|
* Maximum number of tokens allowed in the conversation (if specified)
|
|
55
55
|
*/
|
|
56
56
|
maxInputTokens;
|
|
57
|
+
/**
|
|
58
|
+
* Last known actual input token count from the LLM API response.
|
|
59
|
+
* Updated after each LLM call. Used by /context for accurate reporting.
|
|
60
|
+
*/
|
|
61
|
+
lastActualInputTokens = null;
|
|
62
|
+
/**
|
|
63
|
+
* Last known actual output token count from the LLM API response.
|
|
64
|
+
* Updated after each LLM call. Used in the context estimation formula:
|
|
65
|
+
* estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
|
|
66
|
+
*/
|
|
67
|
+
lastActualOutputTokens = null;
|
|
68
|
+
/**
|
|
69
|
+
* Message count at the time of the last LLM call.
|
|
70
|
+
* Used to identify which messages are "new" since the last call.
|
|
71
|
+
* Messages after this index are estimated with length/4 heuristic.
|
|
72
|
+
*/
|
|
73
|
+
lastCallMessageCount = null;
|
|
57
74
|
historyProvider;
|
|
58
75
|
sessionId;
|
|
59
76
|
/**
|
|
@@ -152,6 +169,119 @@ class ContextManager {
|
|
|
152
169
|
getMaxInputTokens() {
|
|
153
170
|
return this.maxInputTokens;
|
|
154
171
|
}
|
|
172
|
+
/**
|
|
173
|
+
* Returns the last known actual input token count from the LLM API.
|
|
174
|
+
* Returns null if no LLM call has been made yet.
|
|
175
|
+
*/
|
|
176
|
+
getLastActualInputTokens() {
|
|
177
|
+
return this.lastActualInputTokens;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Updates the last known actual input token count.
|
|
181
|
+
* Called after each LLM response with the actual usage from the API.
|
|
182
|
+
*/
|
|
183
|
+
setLastActualInputTokens(tokens) {
|
|
184
|
+
this.lastActualInputTokens = tokens;
|
|
185
|
+
this.logger.debug(`Updated lastActualInputTokens: ${tokens}`);
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Returns the last known actual output token count from the LLM API.
|
|
189
|
+
* Returns null if no LLM call has been made yet.
|
|
190
|
+
*/
|
|
191
|
+
getLastActualOutputTokens() {
|
|
192
|
+
return this.lastActualOutputTokens;
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Updates the last known actual output token count.
|
|
196
|
+
* Called after each LLM response with the actual usage from the API.
|
|
197
|
+
*/
|
|
198
|
+
setLastActualOutputTokens(tokens) {
|
|
199
|
+
this.lastActualOutputTokens = tokens;
|
|
200
|
+
this.logger.debug(`Updated lastActualOutputTokens: ${tokens}`);
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Returns the message count at the time of the last LLM call.
|
|
204
|
+
* Returns null if no LLM call has been made yet.
|
|
205
|
+
*/
|
|
206
|
+
getLastCallMessageCount() {
|
|
207
|
+
return this.lastCallMessageCount;
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Records the current message count after an LLM call completes.
|
|
211
|
+
* This marks the boundary for "new messages" calculation.
|
|
212
|
+
*/
|
|
213
|
+
async recordLastCallMessageCount() {
|
|
214
|
+
const history = await this.historyProvider.getHistory();
|
|
215
|
+
this.lastCallMessageCount = history.length;
|
|
216
|
+
this.logger.debug(`Recorded lastCallMessageCount: ${this.lastCallMessageCount}`);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Resets the actual token tracking state.
|
|
220
|
+
* Called after compaction since the context has fundamentally changed.
|
|
221
|
+
*/
|
|
222
|
+
resetActualTokenTracking() {
|
|
223
|
+
this.lastActualInputTokens = null;
|
|
224
|
+
this.lastActualOutputTokens = null;
|
|
225
|
+
this.lastCallMessageCount = null;
|
|
226
|
+
this.logger.debug("Reset actual token tracking state (after compaction)");
|
|
227
|
+
}
|
|
228
|
+
// ============= HISTORY PREPARATION =============
|
|
229
|
+
/**
|
|
230
|
+
* Placeholder text used when tool outputs are pruned.
|
|
231
|
+
* Shared constant to ensure consistency between preparation and estimation.
|
|
232
|
+
*/
|
|
233
|
+
static PRUNED_TOOL_PLACEHOLDER = "[Old tool result content cleared]";
|
|
234
|
+
/**
|
|
235
|
+
* Prepares conversation history for LLM consumption.
|
|
236
|
+
* This is the single source of truth for history transformation logic.
|
|
237
|
+
*
|
|
238
|
+
* Transformations applied:
|
|
239
|
+
* 1. filterCompacted - Remove pre-summary messages (messages before the most recent summary)
|
|
240
|
+
* 2. Transform pruned tool messages - Replace compactedAt messages with placeholder text
|
|
241
|
+
*
|
|
242
|
+
* Used by both:
|
|
243
|
+
* - getFormattedMessagesForLLM() - For actual LLM calls
|
|
244
|
+
* - getContextTokenEstimate() - For /context command estimation
|
|
245
|
+
*
|
|
246
|
+
* @returns Prepared history and statistics about the transformations
|
|
247
|
+
*/
|
|
248
|
+
async prepareHistory() {
|
|
249
|
+
const fullHistory = await this.historyProvider.getHistory();
|
|
250
|
+
const originalCount = fullHistory.length;
|
|
251
|
+
let history = (0, import_utils.filterCompacted)(fullHistory);
|
|
252
|
+
const filteredCount = history.length;
|
|
253
|
+
if (filteredCount < originalCount) {
|
|
254
|
+
this.logger.debug(
|
|
255
|
+
`prepareHistory: filterCompacted reduced from ${originalCount} to ${filteredCount} messages`
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
let prunedToolCount = 0;
|
|
259
|
+
history = history.map((msg) => {
|
|
260
|
+
if (msg.role === "tool" && msg.compactedAt) {
|
|
261
|
+
prunedToolCount++;
|
|
262
|
+
return {
|
|
263
|
+
...msg,
|
|
264
|
+
content: [
|
|
265
|
+
{ type: "text", text: ContextManager.PRUNED_TOOL_PLACEHOLDER }
|
|
266
|
+
]
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
return msg;
|
|
270
|
+
});
|
|
271
|
+
if (prunedToolCount > 0) {
|
|
272
|
+
this.logger.debug(
|
|
273
|
+
`prepareHistory: Transformed ${prunedToolCount} pruned tool messages to placeholders`
|
|
274
|
+
);
|
|
275
|
+
}
|
|
276
|
+
return {
|
|
277
|
+
preparedHistory: history,
|
|
278
|
+
stats: {
|
|
279
|
+
originalCount,
|
|
280
|
+
filteredCount,
|
|
281
|
+
prunedToolCount
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
}
|
|
155
285
|
/**
|
|
156
286
|
* Assembles and returns the current system prompt by invoking the SystemPromptManager.
|
|
157
287
|
*/
|
|
@@ -200,6 +330,7 @@ ${prompt}`);
|
|
|
200
330
|
}
|
|
201
331
|
};
|
|
202
332
|
await this.addMessage(clearMarker);
|
|
333
|
+
this.resetActualTokenTracking();
|
|
203
334
|
this.logger.debug(`Context cleared for session: ${this.sessionId}`);
|
|
204
335
|
}
|
|
205
336
|
/**
|
|
@@ -571,51 +702,166 @@ ${prompt}`);
|
|
|
571
702
|
/**
|
|
572
703
|
* Gets the conversation ready for LLM consumption with proper flow:
|
|
573
704
|
* 1. Get system prompt
|
|
574
|
-
* 2.
|
|
575
|
-
* 3. Format messages
|
|
576
|
-
* This method implements the correct ordering to avoid circular dependencies.
|
|
705
|
+
* 2. Prepare history (filter + transform pruned messages)
|
|
706
|
+
* 3. Format messages for LLM API
|
|
577
707
|
*
|
|
578
708
|
* @param contributorContext The DynamicContributorContext for system prompt contributors and formatting
|
|
579
709
|
* @param llmContext The llmContext for the formatter to decide which messages to include based on the model's capabilities
|
|
580
|
-
* @returns Object containing formatted messages and
|
|
710
|
+
* @returns Object containing formatted messages, system prompt, and prepared history
|
|
581
711
|
*/
|
|
582
|
-
async
|
|
712
|
+
async getFormattedMessagesForLLM(contributorContext, llmContext) {
|
|
583
713
|
const systemPrompt = await this.getSystemPrompt(contributorContext);
|
|
584
|
-
const
|
|
585
|
-
let history = (0, import_utils.filterCompacted)(fullHistory);
|
|
586
|
-
if (history.length < fullHistory.length) {
|
|
587
|
-
this.logger.debug(
|
|
588
|
-
`filterCompacted: Reduced history from ${fullHistory.length} to ${history.length} messages (summary present)`
|
|
589
|
-
);
|
|
590
|
-
}
|
|
591
|
-
const compactedCount = history.filter((m) => m.role === "tool" && m.compactedAt).length;
|
|
592
|
-
if (compactedCount > 0) {
|
|
593
|
-
history = history.map((msg) => {
|
|
594
|
-
if (msg.role === "tool" && msg.compactedAt) {
|
|
595
|
-
return {
|
|
596
|
-
...msg,
|
|
597
|
-
content: [
|
|
598
|
-
{ type: "text", text: "[Old tool result content cleared]" }
|
|
599
|
-
]
|
|
600
|
-
};
|
|
601
|
-
}
|
|
602
|
-
return msg;
|
|
603
|
-
});
|
|
604
|
-
this.logger.debug(
|
|
605
|
-
`Transformed ${compactedCount} compacted tool messages to placeholders`
|
|
606
|
-
);
|
|
607
|
-
}
|
|
714
|
+
const { preparedHistory } = await this.prepareHistory();
|
|
608
715
|
const formattedMessages = await this.getFormattedMessages(
|
|
609
716
|
contributorContext,
|
|
610
717
|
llmContext,
|
|
611
718
|
systemPrompt,
|
|
612
|
-
|
|
719
|
+
preparedHistory
|
|
613
720
|
);
|
|
614
721
|
return {
|
|
615
722
|
formattedMessages,
|
|
616
|
-
systemPrompt
|
|
723
|
+
systemPrompt,
|
|
724
|
+
preparedHistory
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
/**
|
|
728
|
+
* Estimates context token usage for the /context command and compaction decisions.
|
|
729
|
+
* Uses the same prepareHistory() logic as getFormattedMessagesForLLM() to ensure consistency.
|
|
730
|
+
*
|
|
731
|
+
* When actuals are available from previous LLM calls:
|
|
732
|
+
* estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
|
|
733
|
+
*
|
|
734
|
+
* This formula is more accurate because:
|
|
735
|
+
* - lastInputTokens: exactly what the API processed (ground truth)
|
|
736
|
+
* - lastOutputTokens: exactly what the LLM returned (ground truth)
|
|
737
|
+
* - newMessagesEstimate: only estimate the delta (tool results, new user messages)
|
|
738
|
+
*
|
|
739
|
+
* When no LLM call has been made yet (or after compaction), falls back to pure estimation.
|
|
740
|
+
*
|
|
741
|
+
* @param contributorContext Context for building the system prompt
|
|
742
|
+
* @param tools Tool definitions to include in the estimate
|
|
743
|
+
* @returns Token estimates with breakdown and comparison to actual (if available)
|
|
744
|
+
*/
|
|
745
|
+
async getContextTokenEstimate(contributorContext, tools) {
|
|
746
|
+
const systemPrompt = await this.getSystemPrompt(contributorContext);
|
|
747
|
+
const { preparedHistory, stats } = await this.prepareHistory();
|
|
748
|
+
const lastInput = this.lastActualInputTokens;
|
|
749
|
+
const lastOutput = this.lastActualOutputTokens;
|
|
750
|
+
const lastMsgCount = this.lastCallMessageCount;
|
|
751
|
+
const currentHistory = await this.historyProvider.getHistory();
|
|
752
|
+
const pureEstimate = (0, import_utils.estimateContextTokens)(systemPrompt, preparedHistory, tools);
|
|
753
|
+
let total;
|
|
754
|
+
let calculationBasis;
|
|
755
|
+
if (lastInput !== null && lastOutput !== null && lastMsgCount !== null) {
|
|
756
|
+
const newMessages = currentHistory.slice(lastMsgCount);
|
|
757
|
+
const newMessagesEstimate = (0, import_utils.estimateMessagesTokens)(newMessages);
|
|
758
|
+
total = lastInput + lastOutput + newMessagesEstimate;
|
|
759
|
+
calculationBasis = {
|
|
760
|
+
method: "actuals",
|
|
761
|
+
lastInputTokens: lastInput,
|
|
762
|
+
lastOutputTokens: lastOutput,
|
|
763
|
+
newMessagesEstimate
|
|
764
|
+
};
|
|
765
|
+
this.logger.info(
|
|
766
|
+
`Context estimate (actuals-based): lastInput=${lastInput}, lastOutput=${lastOutput}, newMsgs=${newMessagesEstimate} (${newMessages.length} messages), total=${total}`
|
|
767
|
+
);
|
|
768
|
+
} else {
|
|
769
|
+
total = pureEstimate.total;
|
|
770
|
+
calculationBasis = {
|
|
771
|
+
method: "estimate"
|
|
772
|
+
};
|
|
773
|
+
this.logger.debug(
|
|
774
|
+
`Context estimate (pure estimate): total=${total} (no actuals available yet)`
|
|
775
|
+
);
|
|
776
|
+
}
|
|
777
|
+
const systemPromptTokens = pureEstimate.breakdown.systemPrompt;
|
|
778
|
+
const toolsTokens = pureEstimate.breakdown.tools;
|
|
779
|
+
const messagesDisplay = Math.max(0, total - systemPromptTokens - toolsTokens.total);
|
|
780
|
+
if (lastInput !== null) {
|
|
781
|
+
const pureTotal = pureEstimate.total;
|
|
782
|
+
const diff = pureTotal - lastInput;
|
|
783
|
+
const diffPercent = lastInput > 0 ? (diff / lastInput * 100).toFixed(1) : "0.0";
|
|
784
|
+
this.logger.info(
|
|
785
|
+
`Context token calibration: pureEstimate=${pureTotal}, lastActual=${lastInput}, diff=${diff} (${diffPercent}%)`
|
|
786
|
+
);
|
|
787
|
+
}
|
|
788
|
+
return {
|
|
789
|
+
estimated: total,
|
|
790
|
+
actual: lastInput,
|
|
791
|
+
breakdown: {
|
|
792
|
+
systemPrompt: systemPromptTokens,
|
|
793
|
+
tools: toolsTokens,
|
|
794
|
+
messages: messagesDisplay
|
|
795
|
+
},
|
|
796
|
+
stats: {
|
|
797
|
+
originalMessageCount: stats.originalCount,
|
|
798
|
+
filteredMessageCount: stats.filteredCount,
|
|
799
|
+
prunedToolCount: stats.prunedToolCount
|
|
800
|
+
},
|
|
801
|
+
calculationBasis
|
|
617
802
|
};
|
|
618
803
|
}
|
|
804
|
+
/**
|
|
805
|
+
* Estimates the next input token count using actual token data from the previous LLM call.
|
|
806
|
+
* This is a lightweight version for compaction pre-checks that only returns the total.
|
|
807
|
+
*
|
|
808
|
+
* ## Formula (when actuals are available):
|
|
809
|
+
* estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
|
|
810
|
+
*
|
|
811
|
+
* ## Why this formula works:
|
|
812
|
+
*
|
|
813
|
+
* Consider two consecutive LLM calls:
|
|
814
|
+
*
|
|
815
|
+
* ```
|
|
816
|
+
* Call N:
|
|
817
|
+
* Input sent: system + tools + [user1] = lastInput tokens
|
|
818
|
+
* Output received: assistant response = lastOutput tokens
|
|
819
|
+
*
|
|
820
|
+
* Call N+1:
|
|
821
|
+
* Input will be: system + tools + [user1, assistant1, user2, ...]
|
|
822
|
+
* ≈ lastInput + assistant1_as_input + new_messages
|
|
823
|
+
* ≈ lastInput + lastOutput + newMessagesEstimate
|
|
824
|
+
* ```
|
|
825
|
+
*
|
|
826
|
+
* The assistant's response (lastOutput) becomes part of the next input as conversation
|
|
827
|
+
* history. Text tokenizes similarly whether sent as input or received as output.
|
|
828
|
+
*
|
|
829
|
+
* ## No double-counting:
|
|
830
|
+
*
|
|
831
|
+
* The assistant message is added to history DURING streaming (before this method runs),
|
|
832
|
+
* and recordLastCallMessageCount() captures the count INCLUDING that message.
|
|
833
|
+
* Therefore, newMessages = history.slice(lastMsgCount) EXCLUDES the assistant message,
|
|
834
|
+
* so lastOutput and newMessages don't overlap.
|
|
835
|
+
*
|
|
836
|
+
* ## Pruning caveat:
|
|
837
|
+
*
|
|
838
|
+
* If tool output pruning occurs between calls, lastInput may be stale (higher than
|
|
839
|
+
* actual). This causes OVERESTIMATION, which is SAFE - we'd trigger compaction
|
|
840
|
+
* earlier rather than risk context overflow.
|
|
841
|
+
*
|
|
842
|
+
* @param systemPrompt The system prompt string
|
|
843
|
+
* @param preparedHistory Message history AFTER filterCompacted and pruning
|
|
844
|
+
* @param tools Tool definitions
|
|
845
|
+
* @returns Estimated total input tokens for the next LLM call
|
|
846
|
+
*/
|
|
847
|
+
async getEstimatedNextInputTokens(systemPrompt, preparedHistory, tools) {
|
|
848
|
+
const lastInput = this.lastActualInputTokens;
|
|
849
|
+
const lastOutput = this.lastActualOutputTokens;
|
|
850
|
+
const lastMsgCount = this.lastCallMessageCount;
|
|
851
|
+
const currentHistory = await this.historyProvider.getHistory();
|
|
852
|
+
if (lastInput !== null && lastOutput !== null && lastMsgCount !== null) {
|
|
853
|
+
const newMessages = currentHistory.slice(lastMsgCount);
|
|
854
|
+
const newMessagesEstimate = (0, import_utils.estimateMessagesTokens)(newMessages);
|
|
855
|
+
const total = lastInput + lastOutput + newMessagesEstimate;
|
|
856
|
+
this.logger.debug(
|
|
857
|
+
`Estimated next input (actuals-based): ${lastInput} + ${lastOutput} + ${newMessagesEstimate} = ${total}`
|
|
858
|
+
);
|
|
859
|
+
return total;
|
|
860
|
+
}
|
|
861
|
+
const pureEstimate = (0, import_utils.estimateContextTokens)(systemPrompt, preparedHistory, tools);
|
|
862
|
+
this.logger.debug(`Estimated next input (pure estimate): ${pureEstimate.total}`);
|
|
863
|
+
return pureEstimate.total;
|
|
864
|
+
}
|
|
619
865
|
/**
|
|
620
866
|
* Gets the system prompt formatted for the target LLM provider
|
|
621
867
|
* Some providers handle system prompts differently
|
|
@@ -632,6 +878,7 @@ ${prompt}`);
|
|
|
632
878
|
*/
|
|
633
879
|
async resetConversation() {
|
|
634
880
|
await this.historyProvider.clearHistory();
|
|
881
|
+
this.resetActualTokenTracking();
|
|
635
882
|
this.logger.debug(
|
|
636
883
|
`ContextManager: Conversation history cleared for session ${this.sessionId}`
|
|
637
884
|
);
|