@dexto/core 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/agent/DextoAgent.cjs +284 -1
  2. package/dist/agent/DextoAgent.d.ts +114 -0
  3. package/dist/agent/DextoAgent.d.ts.map +1 -1
  4. package/dist/agent/DextoAgent.js +275 -1
  5. package/dist/agent/schemas.d.ts +51 -21
  6. package/dist/agent/schemas.d.ts.map +1 -1
  7. package/dist/context/compaction/overflow.cjs +6 -10
  8. package/dist/context/compaction/overflow.d.ts +14 -11
  9. package/dist/context/compaction/overflow.d.ts.map +1 -1
  10. package/dist/context/compaction/overflow.js +6 -10
  11. package/dist/context/compaction/providers/reactive-overflow-provider.cjs +15 -0
  12. package/dist/context/compaction/providers/reactive-overflow-provider.d.ts +15 -0
  13. package/dist/context/compaction/providers/reactive-overflow-provider.d.ts.map +1 -1
  14. package/dist/context/compaction/providers/reactive-overflow-provider.js +15 -0
  15. package/dist/context/compaction/schemas.cjs +22 -2
  16. package/dist/context/compaction/schemas.d.ts +45 -0
  17. package/dist/context/compaction/schemas.d.ts.map +1 -1
  18. package/dist/context/compaction/schemas.js +22 -2
  19. package/dist/context/compaction/strategies/reactive-overflow.cjs +166 -26
  20. package/dist/context/compaction/strategies/reactive-overflow.d.ts +21 -0
  21. package/dist/context/compaction/strategies/reactive-overflow.d.ts.map +1 -1
  22. package/dist/context/compaction/strategies/reactive-overflow.js +166 -26
  23. package/dist/context/manager.cjs +278 -31
  24. package/dist/context/manager.d.ts +192 -5
  25. package/dist/context/manager.d.ts.map +1 -1
  26. package/dist/context/manager.js +285 -32
  27. package/dist/context/types.d.ts +6 -0
  28. package/dist/context/types.d.ts.map +1 -1
  29. package/dist/context/utils.cjs +77 -11
  30. package/dist/context/utils.d.ts +86 -8
  31. package/dist/context/utils.d.ts.map +1 -1
  32. package/dist/context/utils.js +71 -11
  33. package/dist/events/index.cjs +4 -0
  34. package/dist/events/index.d.ts +41 -7
  35. package/dist/events/index.d.ts.map +1 -1
  36. package/dist/events/index.js +4 -0
  37. package/dist/llm/executor/stream-processor.cjs +19 -1
  38. package/dist/llm/executor/stream-processor.d.ts +3 -0
  39. package/dist/llm/executor/stream-processor.d.ts.map +1 -1
  40. package/dist/llm/executor/stream-processor.js +19 -1
  41. package/dist/llm/executor/turn-executor.cjs +219 -30
  42. package/dist/llm/executor/turn-executor.d.ts +62 -10
  43. package/dist/llm/executor/turn-executor.d.ts.map +1 -1
  44. package/dist/llm/executor/turn-executor.js +219 -30
  45. package/dist/llm/executor/types.d.ts +28 -0
  46. package/dist/llm/executor/types.d.ts.map +1 -1
  47. package/dist/llm/formatters/vercel.cjs +36 -28
  48. package/dist/llm/formatters/vercel.d.ts.map +1 -1
  49. package/dist/llm/formatters/vercel.js +36 -28
  50. package/dist/llm/services/factory.cjs +3 -2
  51. package/dist/llm/services/factory.d.ts +3 -1
  52. package/dist/llm/services/factory.d.ts.map +1 -1
  53. package/dist/llm/services/factory.js +3 -2
  54. package/dist/llm/services/vercel.cjs +34 -6
  55. package/dist/llm/services/vercel.d.ts +23 -3
  56. package/dist/llm/services/vercel.d.ts.map +1 -1
  57. package/dist/llm/services/vercel.js +34 -6
  58. package/dist/session/chat-session.cjs +20 -11
  59. package/dist/session/chat-session.d.ts +9 -4
  60. package/dist/session/chat-session.d.ts.map +1 -1
  61. package/dist/session/chat-session.js +20 -11
  62. package/dist/session/compaction-service.cjs +139 -0
  63. package/dist/session/compaction-service.d.ts +81 -0
  64. package/dist/session/compaction-service.d.ts.map +1 -0
  65. package/dist/session/compaction-service.js +106 -0
  66. package/dist/session/session-manager.cjs +146 -0
  67. package/dist/session/session-manager.d.ts +50 -0
  68. package/dist/session/session-manager.d.ts.map +1 -1
  69. package/dist/session/session-manager.js +146 -0
  70. package/dist/session/title-generator.cjs +2 -2
  71. package/dist/session/title-generator.js +2 -2
  72. package/dist/systemPrompt/in-built-prompts.cjs +36 -0
  73. package/dist/systemPrompt/in-built-prompts.d.ts +18 -1
  74. package/dist/systemPrompt/in-built-prompts.d.ts.map +1 -1
  75. package/dist/systemPrompt/in-built-prompts.js +25 -0
  76. package/dist/systemPrompt/manager.cjs +22 -0
  77. package/dist/systemPrompt/manager.d.ts +10 -0
  78. package/dist/systemPrompt/manager.d.ts.map +1 -1
  79. package/dist/systemPrompt/manager.js +22 -0
  80. package/dist/systemPrompt/registry.cjs +2 -1
  81. package/dist/systemPrompt/registry.d.ts +1 -1
  82. package/dist/systemPrompt/registry.d.ts.map +1 -1
  83. package/dist/systemPrompt/registry.js +2 -1
  84. package/dist/systemPrompt/schemas.cjs +7 -0
  85. package/dist/systemPrompt/schemas.d.ts +13 -13
  86. package/dist/systemPrompt/schemas.d.ts.map +1 -1
  87. package/dist/systemPrompt/schemas.js +7 -0
  88. package/dist/utils/index.cjs +3 -1
  89. package/dist/utils/index.d.ts +1 -0
  90. package/dist/utils/index.d.ts.map +1 -1
  91. package/dist/utils/index.js +1 -0
  92. package/package.json +1 -1
@@ -4,15 +4,36 @@ import { isAssistantMessage, isToolMessage } from "../../types.js";
4
4
  const DEFAULT_OPTIONS = {
5
5
  preserveLastNTurns: 2,
6
6
  maxSummaryTokens: 2e3,
7
- summaryPrompt: `You are a conversation summarizer. Summarize the following conversation history concisely, focusing on:
8
- - What tasks were attempted and their outcomes
9
- - Current state and context the assistant needs to remember
10
- - Any important decisions or information discovered
11
- - What the user was trying to accomplish
7
+ summaryPrompt: `You are a conversation summarizer creating a structured summary for session continuation.
12
8
 
13
- Be concise but preserve essential context. Output only the summary, no preamble.
9
+ Analyze the conversation and produce a summary in the following XML format:
14
10
 
15
- Conversation:
11
+ <session_compaction>
12
+ <conversation_history>
13
+ A concise summary of what happened in the conversation:
14
+ - Tasks attempted and their outcomes (success/failure/in-progress)
15
+ - Important decisions made
16
+ - Key information discovered (file paths, configurations, errors encountered)
17
+ - Tools used and their results
18
+ </conversation_history>
19
+
20
+ <current_task>
21
+ The most recent task or instruction the user requested that may still be in progress.
22
+ Be specific - include the exact request and current status.
23
+ </current_task>
24
+
25
+ <important_context>
26
+ Critical state that must be preserved:
27
+ - File paths being worked on
28
+ - Variable values or configurations
29
+ - Error messages that need addressing
30
+ - Any pending actions or next steps
31
+ </important_context>
32
+ </session_compaction>
33
+
34
+ IMPORTANT: The assistant will continue working based on this summary. Ensure the current_task section clearly states what needs to be done next.
35
+
36
+ Conversation to summarize:
16
37
  {conversation}`
17
38
  };
18
39
  class ReactiveOverflowStrategy {
@@ -41,15 +62,71 @@ class ReactiveOverflowStrategy {
41
62
  this.logger.debug("ReactiveOverflowStrategy: History too short, skipping compaction");
42
63
  return [];
43
64
  }
65
+ let existingSummaryIndex = -1;
66
+ for (let i = history.length - 1; i >= 0; i--) {
67
+ const msg = history[i];
68
+ if (msg?.metadata?.isSummary === true || msg?.metadata?.isSessionSummary === true) {
69
+ existingSummaryIndex = i;
70
+ break;
71
+ }
72
+ }
73
+ if (existingSummaryIndex !== -1) {
74
+ const messagesAfterSummary = history.slice(existingSummaryIndex + 1);
75
+ if (messagesAfterSummary.length <= 4) {
76
+ this.logger.debug(
77
+ `ReactiveOverflowStrategy: Only ${messagesAfterSummary.length} messages after existing summary, skipping re-compaction`
78
+ );
79
+ return [];
80
+ }
81
+ this.logger.info(
82
+ `ReactiveOverflowStrategy: Found existing summary at index ${existingSummaryIndex}, working with ${messagesAfterSummary.length} messages after it`
83
+ );
84
+ return this.compactSubset(messagesAfterSummary, history);
85
+ }
44
86
  const { toSummarize, toKeep } = this.splitHistory(history);
45
87
  if (toSummarize.length === 0) {
46
88
  this.logger.debug("ReactiveOverflowStrategy: No messages to summarize");
47
89
  return [];
48
90
  }
91
+ const currentTaskMessage = this.findCurrentTaskMessage(history);
49
92
  this.logger.info(
50
93
  `ReactiveOverflowStrategy: Summarizing ${toSummarize.length} messages, keeping ${toKeep.length}`
51
94
  );
52
- const summary = await this.generateSummary(toSummarize);
95
+ const summary = await this.generateSummary(toSummarize, currentTaskMessage);
96
+ const summaryMessage = {
97
+ role: "assistant",
98
+ content: [{ type: "text", text: summary }],
99
+ timestamp: Date.now(),
100
+ metadata: {
101
+ isSummary: true,
102
+ summarizedAt: Date.now(),
103
+ originalMessageCount: toSummarize.length,
104
+ originalFirstTimestamp: toSummarize[0]?.timestamp,
105
+ originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
106
+ }
107
+ };
108
+ return [summaryMessage];
109
+ }
110
+ /**
111
+ * Handle re-compaction when there's already a summary in history.
112
+ * Only summarizes messages AFTER the existing summary, preventing
113
+ * cascading summaries of summaries.
114
+ *
115
+ * @param messagesAfterSummary Messages after the existing summary
116
+ * @param fullHistory The complete history (for current task detection)
117
+ * @returns Array with single summary message, or empty if nothing to summarize
118
+ */
119
+ async compactSubset(messagesAfterSummary, fullHistory) {
120
+ const { toSummarize, toKeep } = this.splitHistory(messagesAfterSummary);
121
+ if (toSummarize.length === 0) {
122
+ this.logger.debug("ReactiveOverflowStrategy: No messages to summarize in subset");
123
+ return [];
124
+ }
125
+ const currentTaskMessage = this.findCurrentTaskMessage(fullHistory);
126
+ this.logger.info(
127
+ `ReactiveOverflowStrategy (re-compact): Summarizing ${toSummarize.length} messages after existing summary, keeping ${toKeep.length}`
128
+ );
129
+ const summary = await this.generateSummary(toSummarize, currentTaskMessage);
53
130
  const summaryMessage = {
54
131
  role: "assistant",
55
132
  content: [{ type: "text", text: summary }],
@@ -57,16 +134,43 @@ class ReactiveOverflowStrategy {
57
134
  metadata: {
58
135
  isSummary: true,
59
136
  summarizedAt: Date.now(),
60
- summarizedMessageCount: toSummarize.length,
137
+ originalMessageCount: toSummarize.length,
138
+ isRecompaction: true,
139
+ // Mark that this is a re-compaction
61
140
  originalFirstTimestamp: toSummarize[0]?.timestamp,
62
141
  originalLastTimestamp: toSummarize[toSummarize.length - 1]?.timestamp
63
142
  }
64
143
  };
65
144
  return [summaryMessage];
66
145
  }
146
+ /**
147
+ * Find the most recent user message that represents the current task.
148
+ * This helps preserve context about what the user is currently asking for.
149
+ */
150
+ findCurrentTaskMessage(history) {
151
+ for (let i = history.length - 1; i >= 0; i--) {
152
+ const msg = history[i];
153
+ if (msg?.role === "user") {
154
+ if (typeof msg.content === "string") {
155
+ return msg.content;
156
+ } else if (Array.isArray(msg.content)) {
157
+ const textParts = msg.content.filter(
158
+ (part) => part.type === "text"
159
+ ).map((part) => part.text).join("\n");
160
+ if (textParts.length > 0) {
161
+ return textParts;
162
+ }
163
+ }
164
+ }
165
+ }
166
+ return null;
167
+ }
67
168
  /**
68
169
  * Split history into messages to summarize and messages to keep.
69
170
  * Keeps the last N turns (user + assistant pairs) intact.
171
+ *
172
+ * For long agentic conversations with many tool calls, this also ensures
173
+ * we don't try to keep too many messages even within preserved turns.
70
174
  */
71
175
  splitHistory(history) {
72
176
  const turnsToKeep = this.options.preserveLastNTurns;
@@ -81,20 +185,25 @@ class ReactiveOverflowStrategy {
81
185
  }
82
186
  if (userMessageIndices.length > 0) {
83
187
  const splitIndex = userMessageIndices[0];
84
- if (splitIndex !== void 0) {
85
- if (splitIndex === 0) {
86
- return {
87
- toSummarize: [],
88
- toKeep: history
89
- };
90
- }
188
+ if (splitIndex !== void 0 && splitIndex > 0) {
91
189
  return {
92
190
  toSummarize: history.slice(0, splitIndex),
93
191
  toKeep: history.slice(splitIndex)
94
192
  };
95
193
  }
96
194
  }
97
- const keepCount = Math.min(4, history.length);
195
+ const minKeep = 3;
196
+ const maxKeepPercent = 0.2;
197
+ const keepCount = Math.max(minKeep, Math.floor(history.length * maxKeepPercent));
198
+ if (keepCount >= history.length) {
199
+ return {
200
+ toSummarize: [],
201
+ toKeep: history
202
+ };
203
+ }
204
+ this.logger.debug(
205
+ `splitHistory: Using fallback - keeping last ${keepCount} of ${history.length} messages`
206
+ );
98
207
  return {
99
208
  toSummarize: history.slice(0, -keepCount),
100
209
  toKeep: history.slice(-keepCount)
@@ -102,21 +211,36 @@ class ReactiveOverflowStrategy {
102
211
  }
103
212
  /**
104
213
  * Generate an LLM summary of the messages.
214
+ *
215
+ * @param messages Messages to summarize
216
+ * @param currentTask The most recent user message (current task context)
105
217
  */
106
- async generateSummary(messages) {
218
+ async generateSummary(messages, currentTask) {
107
219
  const formattedConversation = this.formatMessagesForSummary(messages);
108
- const prompt = this.options.summaryPrompt.replace("{conversation}", formattedConversation);
220
+ let conversationWithContext = formattedConversation;
221
+ if (currentTask) {
222
+ conversationWithContext += `
223
+
224
+ --- CURRENT TASK (most recent user request) ---
225
+ ${currentTask}`;
226
+ }
227
+ const prompt = this.options.summaryPrompt.replace(
228
+ "{conversation}",
229
+ conversationWithContext
230
+ );
109
231
  try {
110
232
  const result = await generateText({
111
233
  model: this.model,
112
234
  prompt,
113
235
  maxOutputTokens: this.options.maxSummaryTokens
114
236
  });
115
- return `[Previous conversation summary]
237
+ return `[Session Compaction Summary]
116
238
  ${result.text}`;
117
239
  } catch (error) {
118
- this.logger.error("ReactiveOverflowStrategy: Failed to generate summary", { error });
119
- return this.createFallbackSummary(messages);
240
+ this.logger.error(
241
+ `ReactiveOverflowStrategy: Failed to generate summary - ${error instanceof Error ? error.message : String(error)}`
242
+ );
243
+ return this.createFallbackSummary(messages, currentTask);
120
244
  }
121
245
  }
122
246
  /**
@@ -152,7 +276,7 @@ ${result.text}`;
152
276
  /**
153
277
  * Create a fallback summary if LLM call fails.
154
278
  */
155
- createFallbackSummary(messages) {
279
+ createFallbackSummary(messages, currentTask) {
156
280
  const userMessages = messages.filter((m) => m.role === "user");
157
281
  const assistantWithTools = messages.filter(
158
282
  (m) => isAssistantMessage(m) && !!m.toolCalls && m.toolCalls.length > 0
@@ -168,9 +292,25 @@ ${result.text}`;
168
292
  assistantWithTools.flatMap((m) => m.toolCalls.map((tc) => tc.function.name))
169
293
  )
170
294
  ].join(", ");
171
- return `[Previous conversation summary - fallback]
172
- User discussed: ${userTopics || "various topics"}
173
- Tools used: ${toolsUsed || "none"}`;
295
+ let fallback = `[Session Compaction Summary - Fallback]
296
+ <session_compaction>
297
+ <conversation_history>
298
+ User discussed: ${userTopics || "various topics"}
299
+ Tools used: ${toolsUsed || "none"}
300
+ Messages summarized: ${messages.length}
301
+ </conversation_history>`;
302
+ if (currentTask) {
303
+ fallback += `
304
+ <current_task>
305
+ ${currentTask.slice(0, 500)}${currentTask.length > 500 ? "..." : ""}
306
+ </current_task>`;
307
+ }
308
+ fallback += `
309
+ <important_context>
310
+ Note: This is a fallback summary due to LLM error. Context may be incomplete.
311
+ </important_context>
312
+ </session_compaction>`;
313
+ return fallback;
174
314
  }
175
315
  }
176
316
  export {
@@ -54,6 +54,23 @@ class ContextManager {
54
54
  * Maximum number of tokens allowed in the conversation (if specified)
55
55
  */
56
56
  maxInputTokens;
57
+ /**
58
+ * Last known actual input token count from the LLM API response.
59
+ * Updated after each LLM call. Used by /context for accurate reporting.
60
+ */
61
+ lastActualInputTokens = null;
62
+ /**
63
+ * Last known actual output token count from the LLM API response.
64
+ * Updated after each LLM call. Used in the context estimation formula:
65
+ * estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
66
+ */
67
+ lastActualOutputTokens = null;
68
+ /**
69
+ * Message count at the time of the last LLM call.
70
+ * Used to identify which messages are "new" since the last call.
71
+ * Messages after this index are estimated with length/4 heuristic.
72
+ */
73
+ lastCallMessageCount = null;
57
74
  historyProvider;
58
75
  sessionId;
59
76
  /**
@@ -152,6 +169,119 @@ class ContextManager {
152
169
  getMaxInputTokens() {
153
170
  return this.maxInputTokens;
154
171
  }
172
+ /**
173
+ * Returns the last known actual input token count from the LLM API.
174
+ * Returns null if no LLM call has been made yet.
175
+ */
176
+ getLastActualInputTokens() {
177
+ return this.lastActualInputTokens;
178
+ }
179
+ /**
180
+ * Updates the last known actual input token count.
181
+ * Called after each LLM response with the actual usage from the API.
182
+ */
183
+ setLastActualInputTokens(tokens) {
184
+ this.lastActualInputTokens = tokens;
185
+ this.logger.debug(`Updated lastActualInputTokens: ${tokens}`);
186
+ }
187
+ /**
188
+ * Returns the last known actual output token count from the LLM API.
189
+ * Returns null if no LLM call has been made yet.
190
+ */
191
+ getLastActualOutputTokens() {
192
+ return this.lastActualOutputTokens;
193
+ }
194
+ /**
195
+ * Updates the last known actual output token count.
196
+ * Called after each LLM response with the actual usage from the API.
197
+ */
198
+ setLastActualOutputTokens(tokens) {
199
+ this.lastActualOutputTokens = tokens;
200
+ this.logger.debug(`Updated lastActualOutputTokens: ${tokens}`);
201
+ }
202
+ /**
203
+ * Returns the message count at the time of the last LLM call.
204
+ * Returns null if no LLM call has been made yet.
205
+ */
206
+ getLastCallMessageCount() {
207
+ return this.lastCallMessageCount;
208
+ }
209
+ /**
210
+ * Records the current message count after an LLM call completes.
211
+ * This marks the boundary for "new messages" calculation.
212
+ */
213
+ async recordLastCallMessageCount() {
214
+ const history = await this.historyProvider.getHistory();
215
+ this.lastCallMessageCount = history.length;
216
+ this.logger.debug(`Recorded lastCallMessageCount: ${this.lastCallMessageCount}`);
217
+ }
218
+ /**
219
+ * Resets the actual token tracking state.
220
+ * Called after compaction since the context has fundamentally changed.
221
+ */
222
+ resetActualTokenTracking() {
223
+ this.lastActualInputTokens = null;
224
+ this.lastActualOutputTokens = null;
225
+ this.lastCallMessageCount = null;
226
+ this.logger.debug("Reset actual token tracking state (after compaction)");
227
+ }
228
+ // ============= HISTORY PREPARATION =============
229
+ /**
230
+ * Placeholder text used when tool outputs are pruned.
231
+ * Shared constant to ensure consistency between preparation and estimation.
232
+ */
233
+ static PRUNED_TOOL_PLACEHOLDER = "[Old tool result content cleared]";
234
+ /**
235
+ * Prepares conversation history for LLM consumption.
236
+ * This is the single source of truth for history transformation logic.
237
+ *
238
+ * Transformations applied:
239
+ * 1. filterCompacted - Remove pre-summary messages (messages before the most recent summary)
240
+ * 2. Transform pruned tool messages - Replace compactedAt messages with placeholder text
241
+ *
242
+ * Used by both:
243
+ * - getFormattedMessagesForLLM() - For actual LLM calls
244
+ * - getContextTokenEstimate() - For /context command estimation
245
+ *
246
+ * @returns Prepared history and statistics about the transformations
247
+ */
248
+ async prepareHistory() {
249
+ const fullHistory = await this.historyProvider.getHistory();
250
+ const originalCount = fullHistory.length;
251
+ let history = (0, import_utils.filterCompacted)(fullHistory);
252
+ const filteredCount = history.length;
253
+ if (filteredCount < originalCount) {
254
+ this.logger.debug(
255
+ `prepareHistory: filterCompacted reduced from ${originalCount} to ${filteredCount} messages`
256
+ );
257
+ }
258
+ let prunedToolCount = 0;
259
+ history = history.map((msg) => {
260
+ if (msg.role === "tool" && msg.compactedAt) {
261
+ prunedToolCount++;
262
+ return {
263
+ ...msg,
264
+ content: [
265
+ { type: "text", text: ContextManager.PRUNED_TOOL_PLACEHOLDER }
266
+ ]
267
+ };
268
+ }
269
+ return msg;
270
+ });
271
+ if (prunedToolCount > 0) {
272
+ this.logger.debug(
273
+ `prepareHistory: Transformed ${prunedToolCount} pruned tool messages to placeholders`
274
+ );
275
+ }
276
+ return {
277
+ preparedHistory: history,
278
+ stats: {
279
+ originalCount,
280
+ filteredCount,
281
+ prunedToolCount
282
+ }
283
+ };
284
+ }
155
285
  /**
156
286
  * Assembles and returns the current system prompt by invoking the SystemPromptManager.
157
287
  */
@@ -200,6 +330,7 @@ ${prompt}`);
200
330
  }
201
331
  };
202
332
  await this.addMessage(clearMarker);
333
+ this.resetActualTokenTracking();
203
334
  this.logger.debug(`Context cleared for session: ${this.sessionId}`);
204
335
  }
205
336
  /**
@@ -571,51 +702,166 @@ ${prompt}`);
571
702
  /**
572
703
  * Gets the conversation ready for LLM consumption with proper flow:
573
704
  * 1. Get system prompt
574
- * 2. Get history and filter (exclude pre-summary messages)
575
- * 3. Format messages
576
- * This method implements the correct ordering to avoid circular dependencies.
705
+ * 2. Prepare history (filter + transform pruned messages)
706
+ * 3. Format messages for LLM API
577
707
  *
578
708
  * @param contributorContext The DynamicContributorContext for system prompt contributors and formatting
579
709
  * @param llmContext The llmContext for the formatter to decide which messages to include based on the model's capabilities
580
- * @returns Object containing formatted messages and system prompt
710
+ * @returns Object containing formatted messages, system prompt, and prepared history
581
711
  */
582
- async getFormattedMessagesWithCompression(contributorContext, llmContext) {
712
+ async getFormattedMessagesForLLM(contributorContext, llmContext) {
583
713
  const systemPrompt = await this.getSystemPrompt(contributorContext);
584
- const fullHistory = await this.historyProvider.getHistory();
585
- let history = (0, import_utils.filterCompacted)(fullHistory);
586
- if (history.length < fullHistory.length) {
587
- this.logger.debug(
588
- `filterCompacted: Reduced history from ${fullHistory.length} to ${history.length} messages (summary present)`
589
- );
590
- }
591
- const compactedCount = history.filter((m) => m.role === "tool" && m.compactedAt).length;
592
- if (compactedCount > 0) {
593
- history = history.map((msg) => {
594
- if (msg.role === "tool" && msg.compactedAt) {
595
- return {
596
- ...msg,
597
- content: [
598
- { type: "text", text: "[Old tool result content cleared]" }
599
- ]
600
- };
601
- }
602
- return msg;
603
- });
604
- this.logger.debug(
605
- `Transformed ${compactedCount} compacted tool messages to placeholders`
606
- );
607
- }
714
+ const { preparedHistory } = await this.prepareHistory();
608
715
  const formattedMessages = await this.getFormattedMessages(
609
716
  contributorContext,
610
717
  llmContext,
611
718
  systemPrompt,
612
- history
719
+ preparedHistory
613
720
  );
614
721
  return {
615
722
  formattedMessages,
616
- systemPrompt
723
+ systemPrompt,
724
+ preparedHistory
725
+ };
726
+ }
727
+ /**
728
+ * Estimates context token usage for the /context command and compaction decisions.
729
+ * Uses the same prepareHistory() logic as getFormattedMessagesForLLM() to ensure consistency.
730
+ *
731
+ * When actuals are available from previous LLM calls:
732
+ * estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
733
+ *
734
+ * This formula is more accurate because:
735
+ * - lastInputTokens: exactly what the API processed (ground truth)
736
+ * - lastOutputTokens: exactly what the LLM returned (ground truth)
737
+ * - newMessagesEstimate: only estimate the delta (tool results, new user messages)
738
+ *
739
+ * When no LLM call has been made yet (or after compaction), falls back to pure estimation.
740
+ *
741
+ * @param contributorContext Context for building the system prompt
742
+ * @param tools Tool definitions to include in the estimate
743
+ * @returns Token estimates with breakdown and comparison to actual (if available)
744
+ */
745
+ async getContextTokenEstimate(contributorContext, tools) {
746
+ const systemPrompt = await this.getSystemPrompt(contributorContext);
747
+ const { preparedHistory, stats } = await this.prepareHistory();
748
+ const lastInput = this.lastActualInputTokens;
749
+ const lastOutput = this.lastActualOutputTokens;
750
+ const lastMsgCount = this.lastCallMessageCount;
751
+ const currentHistory = await this.historyProvider.getHistory();
752
+ const pureEstimate = (0, import_utils.estimateContextTokens)(systemPrompt, preparedHistory, tools);
753
+ let total;
754
+ let calculationBasis;
755
+ if (lastInput !== null && lastOutput !== null && lastMsgCount !== null) {
756
+ const newMessages = currentHistory.slice(lastMsgCount);
757
+ const newMessagesEstimate = (0, import_utils.estimateMessagesTokens)(newMessages);
758
+ total = lastInput + lastOutput + newMessagesEstimate;
759
+ calculationBasis = {
760
+ method: "actuals",
761
+ lastInputTokens: lastInput,
762
+ lastOutputTokens: lastOutput,
763
+ newMessagesEstimate
764
+ };
765
+ this.logger.info(
766
+ `Context estimate (actuals-based): lastInput=${lastInput}, lastOutput=${lastOutput}, newMsgs=${newMessagesEstimate} (${newMessages.length} messages), total=${total}`
767
+ );
768
+ } else {
769
+ total = pureEstimate.total;
770
+ calculationBasis = {
771
+ method: "estimate"
772
+ };
773
+ this.logger.debug(
774
+ `Context estimate (pure estimate): total=${total} (no actuals available yet)`
775
+ );
776
+ }
777
+ const systemPromptTokens = pureEstimate.breakdown.systemPrompt;
778
+ const toolsTokens = pureEstimate.breakdown.tools;
779
+ const messagesDisplay = Math.max(0, total - systemPromptTokens - toolsTokens.total);
780
+ if (lastInput !== null) {
781
+ const pureTotal = pureEstimate.total;
782
+ const diff = pureTotal - lastInput;
783
+ const diffPercent = lastInput > 0 ? (diff / lastInput * 100).toFixed(1) : "0.0";
784
+ this.logger.info(
785
+ `Context token calibration: pureEstimate=${pureTotal}, lastActual=${lastInput}, diff=${diff} (${diffPercent}%)`
786
+ );
787
+ }
788
+ return {
789
+ estimated: total,
790
+ actual: lastInput,
791
+ breakdown: {
792
+ systemPrompt: systemPromptTokens,
793
+ tools: toolsTokens,
794
+ messages: messagesDisplay
795
+ },
796
+ stats: {
797
+ originalMessageCount: stats.originalCount,
798
+ filteredMessageCount: stats.filteredCount,
799
+ prunedToolCount: stats.prunedToolCount
800
+ },
801
+ calculationBasis
617
802
  };
618
803
  }
804
+ /**
805
+ * Estimates the next input token count using actual token data from the previous LLM call.
806
+ * This is a lightweight version for compaction pre-checks that only returns the total.
807
+ *
808
+ * ## Formula (when actuals are available):
809
+ * estimatedNextInput = lastInputTokens + lastOutputTokens + newMessagesEstimate
810
+ *
811
+ * ## Why this formula works:
812
+ *
813
+ * Consider two consecutive LLM calls:
814
+ *
815
+ * ```
816
+ * Call N:
817
+ * Input sent: system + tools + [user1] = lastInput tokens
818
+ * Output received: assistant response = lastOutput tokens
819
+ *
820
+ * Call N+1:
821
+ * Input will be: system + tools + [user1, assistant1, user2, ...]
822
+ * ≈ lastInput + assistant1_as_input + new_messages
823
+ * ≈ lastInput + lastOutput + newMessagesEstimate
824
+ * ```
825
+ *
826
+ * The assistant's response (lastOutput) becomes part of the next input as conversation
827
+ * history. Text tokenizes similarly whether sent as input or received as output.
828
+ *
829
+ * ## No double-counting:
830
+ *
831
+ * The assistant message is added to history DURING streaming (before this method runs),
832
+ * and recordLastCallMessageCount() captures the count INCLUDING that message.
833
+ * Therefore, newMessages = history.slice(lastMsgCount) EXCLUDES the assistant message,
834
+ * so lastOutput and newMessages don't overlap.
835
+ *
836
+ * ## Pruning caveat:
837
+ *
838
+ * If tool output pruning occurs between calls, lastInput may be stale (higher than
839
+ * actual). This causes OVERESTIMATION, which is SAFE - we'd trigger compaction
840
+ * earlier rather than risk context overflow.
841
+ *
842
+ * @param systemPrompt The system prompt string
843
+ * @param preparedHistory Message history AFTER filterCompacted and pruning
844
+ * @param tools Tool definitions
845
+ * @returns Estimated total input tokens for the next LLM call
846
+ */
847
+ async getEstimatedNextInputTokens(systemPrompt, preparedHistory, tools) {
848
+ const lastInput = this.lastActualInputTokens;
849
+ const lastOutput = this.lastActualOutputTokens;
850
+ const lastMsgCount = this.lastCallMessageCount;
851
+ const currentHistory = await this.historyProvider.getHistory();
852
+ if (lastInput !== null && lastOutput !== null && lastMsgCount !== null) {
853
+ const newMessages = currentHistory.slice(lastMsgCount);
854
+ const newMessagesEstimate = (0, import_utils.estimateMessagesTokens)(newMessages);
855
+ const total = lastInput + lastOutput + newMessagesEstimate;
856
+ this.logger.debug(
857
+ `Estimated next input (actuals-based): ${lastInput} + ${lastOutput} + ${newMessagesEstimate} = ${total}`
858
+ );
859
+ return total;
860
+ }
861
+ const pureEstimate = (0, import_utils.estimateContextTokens)(systemPrompt, preparedHistory, tools);
862
+ this.logger.debug(`Estimated next input (pure estimate): ${pureEstimate.total}`);
863
+ return pureEstimate.total;
864
+ }
619
865
  /**
620
866
  * Gets the system prompt formatted for the target LLM provider
621
867
  * Some providers handle system prompts differently
@@ -632,6 +878,7 @@ ${prompt}`);
632
878
  */
633
879
  async resetConversation() {
634
880
  await this.historyProvider.clearHistory();
881
+ this.resetActualTokenTracking();
635
882
  this.logger.debug(
636
883
  `ContextManager: Conversation history cleared for session ${this.sessionId}`
637
884
  );