@witqq/agent-sdk 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +539 -6
- package/dist/{types-BvwNzZCj.d.cts → agent-CW9XbmG_.d.ts} +148 -95
- package/dist/{types-BvwNzZCj.d.ts → agent-DxY68NZL.d.cts} +148 -95
- package/dist/auth/index.cjs +260 -2
- package/dist/auth/index.cjs.map +1 -1
- package/dist/auth/index.d.cts +21 -138
- package/dist/auth/index.d.ts +21 -138
- package/dist/auth/index.js +260 -3
- package/dist/auth/index.js.map +1 -1
- package/dist/backends/claude.cjs +653 -140
- package/dist/backends/claude.cjs.map +1 -1
- package/dist/backends/claude.d.cts +4 -1
- package/dist/backends/claude.d.ts +4 -1
- package/dist/backends/claude.js +653 -140
- package/dist/backends/claude.js.map +1 -1
- package/dist/backends/copilot.cjs +428 -88
- package/dist/backends/copilot.cjs.map +1 -1
- package/dist/backends/copilot.d.cts +13 -4
- package/dist/backends/copilot.d.ts +13 -4
- package/dist/backends/copilot.js +428 -88
- package/dist/backends/copilot.js.map +1 -1
- package/dist/backends/vercel-ai.cjs +349 -77
- package/dist/backends/vercel-ai.cjs.map +1 -1
- package/dist/backends/vercel-ai.d.cts +3 -1
- package/dist/backends/vercel-ai.d.ts +3 -1
- package/dist/backends/vercel-ai.js +349 -77
- package/dist/backends/vercel-ai.js.map +1 -1
- package/dist/backends-BSrsBYFn.d.cts +39 -0
- package/dist/backends-BSrsBYFn.d.ts +39 -0
- package/dist/chat/accumulator.cjs +147 -0
- package/dist/chat/accumulator.cjs.map +1 -0
- package/dist/chat/accumulator.d.cts +64 -0
- package/dist/chat/accumulator.d.ts +64 -0
- package/dist/chat/accumulator.js +145 -0
- package/dist/chat/accumulator.js.map +1 -0
- package/dist/chat/backends.cjs +3524 -0
- package/dist/chat/backends.cjs.map +1 -0
- package/dist/chat/backends.d.cts +66 -0
- package/dist/chat/backends.d.ts +66 -0
- package/dist/chat/backends.js +3512 -0
- package/dist/chat/backends.js.map +1 -0
- package/dist/chat/context.cjs +280 -0
- package/dist/chat/context.cjs.map +1 -0
- package/dist/chat/context.d.cts +191 -0
- package/dist/chat/context.d.ts +191 -0
- package/dist/chat/context.js +277 -0
- package/dist/chat/context.js.map +1 -0
- package/dist/chat/core.cjs +305 -0
- package/dist/chat/core.cjs.map +1 -0
- package/dist/chat/core.d.cts +84 -0
- package/dist/chat/core.d.ts +84 -0
- package/dist/chat/core.js +282 -0
- package/dist/chat/core.js.map +1 -0
- package/dist/chat/errors.cjs +273 -0
- package/dist/chat/errors.cjs.map +1 -0
- package/dist/chat/errors.d.cts +97 -0
- package/dist/chat/errors.d.ts +97 -0
- package/dist/chat/errors.js +266 -0
- package/dist/chat/errors.js.map +1 -0
- package/dist/chat/events.cjs +203 -0
- package/dist/chat/events.cjs.map +1 -0
- package/dist/chat/events.d.cts +245 -0
- package/dist/chat/events.d.ts +245 -0
- package/dist/chat/events.js +196 -0
- package/dist/chat/events.js.map +1 -0
- package/dist/chat/index.cjs +5550 -0
- package/dist/chat/index.cjs.map +1 -0
- package/dist/chat/index.d.cts +77 -0
- package/dist/chat/index.d.ts +77 -0
- package/dist/chat/index.js +5505 -0
- package/dist/chat/index.js.map +1 -0
- package/dist/chat/react/theme.css +2517 -0
- package/dist/chat/react.cjs +3589 -0
- package/dist/chat/react.cjs.map +1 -0
- package/dist/chat/react.d.cts +1088 -0
- package/dist/chat/react.d.ts +1088 -0
- package/dist/chat/react.js +3547 -0
- package/dist/chat/react.js.map +1 -0
- package/dist/chat/runtime.cjs +1245 -0
- package/dist/chat/runtime.cjs.map +1 -0
- package/dist/chat/runtime.d.cts +182 -0
- package/dist/chat/runtime.d.ts +182 -0
- package/dist/chat/runtime.js +1243 -0
- package/dist/chat/runtime.js.map +1 -0
- package/dist/chat/server.cjs +2668 -0
- package/dist/chat/server.cjs.map +1 -0
- package/dist/chat/server.d.cts +648 -0
- package/dist/chat/server.d.ts +648 -0
- package/dist/chat/server.js +2628 -0
- package/dist/chat/server.js.map +1 -0
- package/dist/chat/sessions.cjs +380 -0
- package/dist/chat/sessions.cjs.map +1 -0
- package/dist/chat/sessions.d.cts +158 -0
- package/dist/chat/sessions.d.ts +158 -0
- package/dist/chat/sessions.js +376 -0
- package/dist/chat/sessions.js.map +1 -0
- package/dist/chat/sqlite.cjs +441 -0
- package/dist/chat/sqlite.cjs.map +1 -0
- package/dist/chat/sqlite.d.cts +128 -0
- package/dist/chat/sqlite.d.ts +128 -0
- package/dist/chat/sqlite.js +435 -0
- package/dist/chat/sqlite.js.map +1 -0
- package/dist/chat/state.cjs +190 -0
- package/dist/chat/state.cjs.map +1 -0
- package/dist/chat/state.d.cts +95 -0
- package/dist/chat/state.d.ts +95 -0
- package/dist/chat/state.js +180 -0
- package/dist/chat/state.js.map +1 -0
- package/dist/chat/storage.cjs +249 -0
- package/dist/chat/storage.cjs.map +1 -0
- package/dist/chat/storage.d.cts +197 -0
- package/dist/chat/storage.d.ts +197 -0
- package/dist/chat/storage.js +245 -0
- package/dist/chat/storage.js.map +1 -0
- package/dist/errors-C-so0M4t.d.cts +33 -0
- package/dist/errors-C-so0M4t.d.ts +33 -0
- package/dist/errors-CmVvczxZ.d.cts +28 -0
- package/dist/errors-CmVvczxZ.d.ts +28 -0
- package/dist/in-process-transport-C1JnJGVR.d.ts +228 -0
- package/dist/in-process-transport-C7DSqPyX.d.cts +228 -0
- package/dist/index.cjs +365 -59
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +322 -125
- package/dist/index.d.ts +322 -125
- package/dist/index.js +359 -60
- package/dist/index.js.map +1 -1
- package/dist/provider-types-PTSlRPNB.d.cts +39 -0
- package/dist/provider-types-PTSlRPNB.d.ts +39 -0
- package/dist/refresh-manager-B81PpYBr.d.cts +153 -0
- package/dist/refresh-manager-Dlv_iNZi.d.ts +153 -0
- package/dist/testing.cjs +383 -0
- package/dist/testing.cjs.map +1 -0
- package/dist/testing.d.cts +132 -0
- package/dist/testing.d.ts +132 -0
- package/dist/testing.js +377 -0
- package/dist/testing.js.map +1 -0
- package/dist/token-store-CSUBgYwn.d.ts +48 -0
- package/dist/token-store-CuC4hB9Z.d.cts +48 -0
- package/dist/transport-Cdh3M0tS.d.cts +68 -0
- package/dist/transport-Ciap4PWK.d.ts +68 -0
- package/dist/types-4vbcmPTp.d.cts +143 -0
- package/dist/types-BxggH0Yh.d.ts +143 -0
- package/dist/types-DRgd_9R7.d.cts +363 -0
- package/dist/types-ajANVzf7.d.ts +363 -0
- package/package.json +178 -6
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { c as ChatMessage } from '../types-ajANVzf7.js';
|
|
2
|
+
import '../agent-CW9XbmG_.js';
|
|
3
|
+
import 'zod';
|
|
4
|
+
import '../errors-C-so0M4t.js';
|
|
5
|
+
import '../types-BxggH0Yh.js';
|
|
6
|
+
import '../errors-CmVvczxZ.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* @witqq/agent-sdk/chat/context
|
|
10
|
+
*
|
|
11
|
+
* Context window manager for selecting which messages fit within a token budget.
|
|
12
|
+
* Stateless: takes messages in, returns trimmed messages out.
|
|
13
|
+
* Three overflow strategies: truncate-oldest, sliding-window, summarize-placeholder.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Options for token estimation.
|
|
18
|
+
*/
|
|
19
|
+
interface TokenEstimationOptions {
|
|
20
|
+
/**
|
|
21
|
+
* Characters per token ratio.
|
|
22
|
+
* Lower = more conservative (fewer messages fit).
|
|
23
|
+
* @default 4
|
|
24
|
+
*/
|
|
25
|
+
charsPerToken?: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Estimate token count for a single chat message.
|
|
29
|
+
* Uses character-based heuristic: `Math.ceil(charCount / charsPerToken)`.
|
|
30
|
+
*
|
|
31
|
+
* Counts:
|
|
32
|
+
* - Text content (string or text parts)
|
|
33
|
+
* - Serialized tool calls and tool results
|
|
34
|
+
* - Thinking blocks
|
|
35
|
+
* - Role overhead (~4 tokens)
|
|
36
|
+
*
|
|
37
|
+
* @param message - Chat message to estimate
|
|
38
|
+
* @param options - Estimation options
|
|
39
|
+
* @returns Estimated token count
|
|
40
|
+
*
|
|
41
|
+
* @example
|
|
42
|
+
* ```typescript
|
|
43
|
+
* const tokens = estimateTokens(message);
|
|
44
|
+
* const conservative = estimateTokens(message, { charsPerToken: 3 });
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
47
|
+
declare function estimateTokens(message: ChatMessage, options?: TokenEstimationOptions): number;
|
|
48
|
+
/** Overflow strategy type */
|
|
49
|
+
type OverflowStrategy = "truncate-oldest" | "sliding-window" | "summarize-placeholder";
|
|
50
|
+
/**
|
|
51
|
+
* Async summarizer function for the summarize-placeholder strategy.
|
|
52
|
+
* Receives removed messages and returns a summary string.
|
|
53
|
+
* When configured, replaces the static placeholder text with actual summary.
|
|
54
|
+
*/
|
|
55
|
+
type ContextSummarizer = (removedMessages: readonly ChatMessage[]) => Promise<string>;
|
|
56
|
+
/**
|
|
57
|
+
* Configuration for the context window manager.
|
|
58
|
+
*/
|
|
59
|
+
interface ContextWindowConfig {
|
|
60
|
+
/** Maximum token budget for the context window */
|
|
61
|
+
maxTokens: number;
|
|
62
|
+
/**
|
|
63
|
+
* Tokens reserved for system prompt and response generation.
|
|
64
|
+
* Subtracted from maxTokens to get available budget.
|
|
65
|
+
* @default 0
|
|
66
|
+
*/
|
|
67
|
+
reservedTokens?: number;
|
|
68
|
+
/**
|
|
69
|
+
* Strategy for handling overflow when messages exceed budget.
|
|
70
|
+
* @default "truncate-oldest"
|
|
71
|
+
*/
|
|
72
|
+
strategy?: OverflowStrategy;
|
|
73
|
+
/**
|
|
74
|
+
* Token estimation options.
|
|
75
|
+
*/
|
|
76
|
+
estimation?: TokenEstimationOptions;
|
|
77
|
+
/**
|
|
78
|
+
* Optional async summarizer for the summarize-placeholder strategy.
|
|
79
|
+
* When provided, replaces the static placeholder with a generated summary.
|
|
80
|
+
* Falls back to static placeholder if summarizer throws.
|
|
81
|
+
*/
|
|
82
|
+
summarizer?: ContextSummarizer;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Result of context window trimming.
|
|
86
|
+
*/
|
|
87
|
+
interface ContextWindowResult {
|
|
88
|
+
/** Messages that fit within the budget */
|
|
89
|
+
messages: ChatMessage[];
|
|
90
|
+
/** Total estimated tokens for included messages */
|
|
91
|
+
totalTokens: number;
|
|
92
|
+
/** Number of messages removed */
|
|
93
|
+
removedCount: number;
|
|
94
|
+
/** Whether any messages were truncated */
|
|
95
|
+
wasTruncated: boolean;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Context usage statistics for a session.
|
|
99
|
+
* Returned by `IChatRuntime.getContextStats()`.
|
|
100
|
+
*
|
|
101
|
+
* When real usage data is available (after the first API response),
|
|
102
|
+
* `realPromptTokens` and `realCompletionTokens` contain actual token counts.
|
|
103
|
+
* `modelContextWindow` is the model's context window from `listModels()`.
|
|
104
|
+
*/
|
|
105
|
+
interface ContextStats {
|
|
106
|
+
/** Estimated total tokens in the trimmed context (heuristic, kept for backward compat) */
|
|
107
|
+
totalTokens: number;
|
|
108
|
+
/** Number of messages removed by trimming */
|
|
109
|
+
removedCount: number;
|
|
110
|
+
/** Whether context was truncated */
|
|
111
|
+
wasTruncated: boolean;
|
|
112
|
+
/** Available token budget (maxTokens − reservedTokens) */
|
|
113
|
+
availableBudget: number;
|
|
114
|
+
/** Real prompt tokens from the last API response (undefined before first response) */
|
|
115
|
+
realPromptTokens?: number;
|
|
116
|
+
/** Real completion tokens from the last API response (undefined before first response) */
|
|
117
|
+
realCompletionTokens?: number;
|
|
118
|
+
/** Model's context window in tokens from listModels() (undefined if not available) */
|
|
119
|
+
modelContextWindow?: number;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Stateless context window manager.
|
|
123
|
+
* Takes messages and returns the subset that fits within a token budget.
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```typescript
|
|
127
|
+
* const manager = new ContextWindowManager({
|
|
128
|
+
* maxTokens: 4096,
|
|
129
|
+
* reservedTokens: 500,
|
|
130
|
+
* strategy: "sliding-window",
|
|
131
|
+
* });
|
|
132
|
+
*
|
|
133
|
+
* const result = manager.fitMessages(messages);
|
|
134
|
+
* // result.messages — trimmed to fit budget
|
|
135
|
+
* // result.totalTokens — estimated token usage
|
|
136
|
+
* // result.wasTruncated — whether messages were removed
|
|
137
|
+
* ```
|
|
138
|
+
*/
|
|
139
|
+
declare class ContextWindowManager {
|
|
140
|
+
private readonly config;
|
|
141
|
+
constructor(config: ContextWindowConfig);
|
|
142
|
+
/** Available token budget after reserving tokens */
|
|
143
|
+
get availableBudget(): number;
|
|
144
|
+
/**
|
|
145
|
+
* Estimate tokens for a single message.
|
|
146
|
+
* @param message - Message to estimate
|
|
147
|
+
* @returns Estimated token count
|
|
148
|
+
*/
|
|
149
|
+
estimateMessageTokens(message: ChatMessage): number;
|
|
150
|
+
/**
|
|
151
|
+
* Fit messages within the token budget using the configured strategy.
|
|
152
|
+
* @param messages - All messages to consider
|
|
153
|
+
* @returns Result with fitted messages and metadata
|
|
154
|
+
*/
|
|
155
|
+
fitMessages(messages: readonly ChatMessage[]): ContextWindowResult;
|
|
156
|
+
/**
|
|
157
|
+
* Async variant of fitMessages that supports async summarization.
|
|
158
|
+
* When strategy is "summarize-placeholder" and a summarizer is configured,
|
|
159
|
+
* calls the summarizer with removed messages and replaces the placeholder text.
|
|
160
|
+
* Falls back to static placeholder if summarizer throws.
|
|
161
|
+
* For other strategies, behaves identically to fitMessages().
|
|
162
|
+
*/
|
|
163
|
+
fitMessagesAsync(messages: readonly ChatMessage[]): Promise<ContextWindowResult>;
|
|
164
|
+
/**
|
|
165
|
+
* Trim messages using real token usage data from the previous API call.
|
|
166
|
+
* Uses average-based algorithm: `avgTokensPerMessage = lastPromptTokens / messageCount`.
|
|
167
|
+
* Removes oldest non-system messages until freed budget brings usage under modelContextWindow.
|
|
168
|
+
*
|
|
169
|
+
* @param messages - All messages in the session
|
|
170
|
+
* @param lastPromptTokens - Real prompt tokens from the last API response
|
|
171
|
+
* @param modelContextWindow - Model's total context window size in tokens
|
|
172
|
+
* @returns Result with fitted messages and metadata
|
|
173
|
+
*/
|
|
174
|
+
fitMessagesWithUsage(messages: readonly ChatMessage[], lastPromptTokens: number, modelContextWindow: number): ContextWindowResult;
|
|
175
|
+
/**
|
|
176
|
+
* Truncate oldest: keeps system messages, removes oldest non-system messages first.
|
|
177
|
+
* Always keeps the most recent user message.
|
|
178
|
+
*/
|
|
179
|
+
private truncateOldest;
|
|
180
|
+
/**
|
|
181
|
+
* Sliding window: keeps the most recent messages that fit within budget.
|
|
182
|
+
*/
|
|
183
|
+
private slidingWindow;
|
|
184
|
+
/**
|
|
185
|
+
* Summarize placeholder: replaces truncated messages with a placeholder,
|
|
186
|
+
* preserving system messages and recent context.
|
|
187
|
+
*/
|
|
188
|
+
private summarizePlaceholder;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export { type ContextStats, type ContextSummarizer, type ContextWindowConfig, ContextWindowManager, type ContextWindowResult, type OverflowStrategy, type TokenEstimationOptions, estimateTokens };
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
// src/chat/context.ts
|
|
2
|
+
function estimateTokens(message, options) {
|
|
3
|
+
const ratio = options?.charsPerToken ?? 4;
|
|
4
|
+
let charCount = 0;
|
|
5
|
+
charCount += message.role.length + 4;
|
|
6
|
+
for (const part of message.parts) {
|
|
7
|
+
charCount += estimatePartChars(part);
|
|
8
|
+
}
|
|
9
|
+
return Math.ceil(charCount / ratio);
|
|
10
|
+
}
|
|
11
|
+
function estimatePartChars(part) {
|
|
12
|
+
switch (part.type) {
|
|
13
|
+
case "text":
|
|
14
|
+
return part.text.length;
|
|
15
|
+
case "reasoning":
|
|
16
|
+
return part.text.length;
|
|
17
|
+
case "tool_call":
|
|
18
|
+
return JSON.stringify(part.args).length + part.name.length + 20 + (part.result !== void 0 ? JSON.stringify(part.result).length : 0);
|
|
19
|
+
case "source":
|
|
20
|
+
return (part.title?.length ?? 0) + part.url.length + 10;
|
|
21
|
+
case "file":
|
|
22
|
+
return part.name.length + part.data.length + 20;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
var ContextWindowManager = class {
|
|
26
|
+
config;
|
|
27
|
+
constructor(config) {
|
|
28
|
+
this.config = {
|
|
29
|
+
maxTokens: config.maxTokens,
|
|
30
|
+
reservedTokens: config.reservedTokens ?? 0,
|
|
31
|
+
strategy: config.strategy ?? "truncate-oldest",
|
|
32
|
+
estimation: config.estimation,
|
|
33
|
+
summarizer: config.summarizer
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
/** Available token budget after reserving tokens */
|
|
37
|
+
get availableBudget() {
|
|
38
|
+
return Math.max(0, this.config.maxTokens - this.config.reservedTokens);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Estimate tokens for a single message.
|
|
42
|
+
* @param message - Message to estimate
|
|
43
|
+
* @returns Estimated token count
|
|
44
|
+
*/
|
|
45
|
+
estimateMessageTokens(message) {
|
|
46
|
+
return estimateTokens(message, this.config.estimation);
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Fit messages within the token budget using the configured strategy.
|
|
50
|
+
* @param messages - All messages to consider
|
|
51
|
+
* @returns Result with fitted messages and metadata
|
|
52
|
+
*/
|
|
53
|
+
fitMessages(messages) {
|
|
54
|
+
if (messages.length === 0) {
|
|
55
|
+
return { messages: [], totalTokens: 0, removedCount: 0, wasTruncated: false };
|
|
56
|
+
}
|
|
57
|
+
const budget = this.availableBudget;
|
|
58
|
+
const tokenCounts = messages.map((m) => this.estimateMessageTokens(m));
|
|
59
|
+
const totalTokens = tokenCounts.reduce((a, b) => a + b, 0);
|
|
60
|
+
if (totalTokens <= budget) {
|
|
61
|
+
return {
|
|
62
|
+
messages: [...messages],
|
|
63
|
+
totalTokens,
|
|
64
|
+
removedCount: 0,
|
|
65
|
+
wasTruncated: false
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
switch (this.config.strategy) {
|
|
69
|
+
case "truncate-oldest":
|
|
70
|
+
return this.truncateOldest(messages, tokenCounts, budget);
|
|
71
|
+
case "sliding-window":
|
|
72
|
+
return this.slidingWindow(messages, tokenCounts, budget);
|
|
73
|
+
case "summarize-placeholder":
|
|
74
|
+
return this.summarizePlaceholder(messages, tokenCounts, budget);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Async variant of fitMessages that supports async summarization.
|
|
79
|
+
* When strategy is "summarize-placeholder" and a summarizer is configured,
|
|
80
|
+
* calls the summarizer with removed messages and replaces the placeholder text.
|
|
81
|
+
* Falls back to static placeholder if summarizer throws.
|
|
82
|
+
* For other strategies, behaves identically to fitMessages().
|
|
83
|
+
*/
|
|
84
|
+
async fitMessagesAsync(messages) {
|
|
85
|
+
const result = this.fitMessages(messages);
|
|
86
|
+
if (this.config.strategy !== "summarize-placeholder" || !result.wasTruncated || !this.config.summarizer) {
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
const keptIds = new Set(result.messages.map((m) => m.id));
|
|
90
|
+
const removed = messages.filter((m) => !keptIds.has(m.id));
|
|
91
|
+
if (removed.length === 0) return result;
|
|
92
|
+
let summaryText;
|
|
93
|
+
try {
|
|
94
|
+
summaryText = await this.config.summarizer(removed);
|
|
95
|
+
} catch {
|
|
96
|
+
return result;
|
|
97
|
+
}
|
|
98
|
+
const updatedMessages = result.messages.map((m) => {
|
|
99
|
+
if (m.metadata?.isSummary === true) {
|
|
100
|
+
return {
|
|
101
|
+
...m,
|
|
102
|
+
parts: [{ type: "text", text: summaryText, status: "complete" }]
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
return m;
|
|
106
|
+
});
|
|
107
|
+
return { ...result, messages: updatedMessages };
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Trim messages using real token usage data from the previous API call.
|
|
111
|
+
* Uses average-based algorithm: `avgTokensPerMessage = lastPromptTokens / messageCount`.
|
|
112
|
+
* Removes oldest non-system messages until freed budget brings usage under modelContextWindow.
|
|
113
|
+
*
|
|
114
|
+
* @param messages - All messages in the session
|
|
115
|
+
* @param lastPromptTokens - Real prompt tokens from the last API response
|
|
116
|
+
* @param modelContextWindow - Model's total context window size in tokens
|
|
117
|
+
* @returns Result with fitted messages and metadata
|
|
118
|
+
*/
|
|
119
|
+
fitMessagesWithUsage(messages, lastPromptTokens, modelContextWindow) {
|
|
120
|
+
if (messages.length === 0) {
|
|
121
|
+
return { messages: [], totalTokens: 0, removedCount: 0, wasTruncated: false };
|
|
122
|
+
}
|
|
123
|
+
const budget = modelContextWindow - this.config.reservedTokens;
|
|
124
|
+
if (budget <= 0 || lastPromptTokens <= budget) {
|
|
125
|
+
return {
|
|
126
|
+
messages: [...messages],
|
|
127
|
+
totalTokens: lastPromptTokens,
|
|
128
|
+
removedCount: 0,
|
|
129
|
+
wasTruncated: false
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
const avgTokensPerMessage = lastPromptTokens / messages.length;
|
|
133
|
+
const tokensToFree = lastPromptTokens - budget;
|
|
134
|
+
const messagesToRemove = Math.ceil(tokensToFree / avgTokensPerMessage);
|
|
135
|
+
const nonSystemIndices = [];
|
|
136
|
+
for (let i = 0; i < messages.length; i++) {
|
|
137
|
+
if (messages[i].role === "system") ; else {
|
|
138
|
+
nonSystemIndices.push(i);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const removableCount = Math.min(messagesToRemove, nonSystemIndices.length);
|
|
142
|
+
const removedIndices = new Set(nonSystemIndices.slice(0, removableCount));
|
|
143
|
+
const result = [];
|
|
144
|
+
for (let i = 0; i < messages.length; i++) {
|
|
145
|
+
if (!removedIndices.has(i)) {
|
|
146
|
+
result.push(messages[i]);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const estimatedTokens = Math.round(
|
|
150
|
+
lastPromptTokens * (result.length / messages.length)
|
|
151
|
+
);
|
|
152
|
+
return {
|
|
153
|
+
messages: result,
|
|
154
|
+
totalTokens: estimatedTokens,
|
|
155
|
+
removedCount: removableCount,
|
|
156
|
+
wasTruncated: removableCount > 0
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Truncate oldest: keeps system messages, removes oldest non-system messages first.
|
|
161
|
+
* Always keeps the most recent user message.
|
|
162
|
+
*/
|
|
163
|
+
truncateOldest(messages, tokenCounts, budget) {
|
|
164
|
+
const systemIndices = [];
|
|
165
|
+
const nonSystemIndices = [];
|
|
166
|
+
for (let i = 0; i < messages.length; i++) {
|
|
167
|
+
if (messages[i].role === "system") {
|
|
168
|
+
systemIndices.push(i);
|
|
169
|
+
} else {
|
|
170
|
+
nonSystemIndices.push(i);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
let usedTokens = systemIndices.reduce(
|
|
174
|
+
(sum, i) => sum + tokenCounts[i],
|
|
175
|
+
0
|
|
176
|
+
);
|
|
177
|
+
const includedNonSystem = [];
|
|
178
|
+
for (let i = nonSystemIndices.length - 1; i >= 0; i--) {
|
|
179
|
+
const idx = nonSystemIndices[i];
|
|
180
|
+
if (usedTokens + tokenCounts[idx] <= budget) {
|
|
181
|
+
includedNonSystem.unshift(idx);
|
|
182
|
+
usedTokens += tokenCounts[idx];
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
const includedSet = /* @__PURE__ */ new Set([...systemIndices, ...includedNonSystem]);
|
|
186
|
+
const result = [];
|
|
187
|
+
let resultTokens = 0;
|
|
188
|
+
for (let i = 0; i < messages.length; i++) {
|
|
189
|
+
if (includedSet.has(i)) {
|
|
190
|
+
result.push(messages[i]);
|
|
191
|
+
resultTokens += tokenCounts[i];
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return {
|
|
195
|
+
messages: result,
|
|
196
|
+
totalTokens: resultTokens,
|
|
197
|
+
removedCount: messages.length - result.length,
|
|
198
|
+
wasTruncated: true
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Sliding window: keeps the most recent messages that fit within budget.
|
|
203
|
+
*/
|
|
204
|
+
slidingWindow(messages, tokenCounts, budget) {
|
|
205
|
+
const result = [];
|
|
206
|
+
let usedTokens = 0;
|
|
207
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
208
|
+
if (usedTokens + tokenCounts[i] <= budget) {
|
|
209
|
+
result.unshift(messages[i]);
|
|
210
|
+
usedTokens += tokenCounts[i];
|
|
211
|
+
} else {
|
|
212
|
+
break;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return {
|
|
216
|
+
messages: result,
|
|
217
|
+
totalTokens: usedTokens,
|
|
218
|
+
removedCount: messages.length - result.length,
|
|
219
|
+
wasTruncated: true
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Summarize placeholder: replaces truncated messages with a placeholder,
|
|
224
|
+
* preserving system messages and recent context.
|
|
225
|
+
*/
|
|
226
|
+
summarizePlaceholder(messages, tokenCounts, budget) {
|
|
227
|
+
const systemMessages = [];
|
|
228
|
+
const nonSystem = [];
|
|
229
|
+
for (let i = 0; i < messages.length; i++) {
|
|
230
|
+
if (messages[i].role === "system") {
|
|
231
|
+
systemMessages.push({ msg: messages[i], tokens: tokenCounts[i] });
|
|
232
|
+
} else {
|
|
233
|
+
nonSystem.push({ msg: messages[i], tokens: tokenCounts[i], idx: i });
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
let usedTokens = systemMessages.reduce((s, m) => s + m.tokens, 0);
|
|
237
|
+
const placeholderTokens = 20;
|
|
238
|
+
usedTokens += placeholderTokens;
|
|
239
|
+
const recentKept = [];
|
|
240
|
+
for (let i = nonSystem.length - 1; i >= 0; i--) {
|
|
241
|
+
if (usedTokens + nonSystem[i].tokens <= budget) {
|
|
242
|
+
recentKept.unshift(nonSystem[i]);
|
|
243
|
+
usedTokens += nonSystem[i].tokens;
|
|
244
|
+
} else {
|
|
245
|
+
break;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
const removedCount = messages.length - systemMessages.length - recentKept.length;
|
|
249
|
+
const result = [];
|
|
250
|
+
for (const sm of systemMessages) {
|
|
251
|
+
result.push(sm.msg);
|
|
252
|
+
}
|
|
253
|
+
if (removedCount > 0) {
|
|
254
|
+
result.push({
|
|
255
|
+
id: "context-placeholder",
|
|
256
|
+
role: "system",
|
|
257
|
+
parts: [{ type: "text", text: `[${removedCount} earlier message${removedCount === 1 ? "" : "s"} omitted for context window]`, status: "complete" }],
|
|
258
|
+
metadata: { isSummary: true },
|
|
259
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
260
|
+
status: "complete"
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
for (const m of recentKept) {
|
|
264
|
+
result.push(m.msg);
|
|
265
|
+
}
|
|
266
|
+
return {
|
|
267
|
+
messages: result,
|
|
268
|
+
totalTokens: usedTokens,
|
|
269
|
+
removedCount,
|
|
270
|
+
wasTruncated: true
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
export { ContextWindowManager, estimateTokens };
|
|
276
|
+
//# sourceMappingURL=context.js.map
|
|
277
|
+
//# sourceMappingURL=context.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/chat/context.ts"],"names":[],"mappings":";AA4CO,SAAS,cAAA,CACd,SACA,OAAA,EACQ;AACR,EAAA,MAAM,KAAA,GAAQ,SAAS,aAAA,IAAiB,CAAA;AACxC,EAAA,IAAI,SAAA,GAAY,CAAA;AAGhB,EAAA,SAAA,IAAa,OAAA,CAAQ,KAAK,MAAA,GAAS,CAAA;AAGnC,EAAA,KAAA,MAAW,IAAA,IAAQ,QAAQ,KAAA,EAAO;AAChC,IAAA,SAAA,IAAa,kBAAkB,IAAI,CAAA;AAAA,EACrC;AAEA,EAAA,OAAO,IAAA,CAAK,IAAA,CAAK,SAAA,GAAY,KAAK,CAAA;AACpC;AAEA,SAAS,kBAAkB,IAAA,EAA2B;AACpD,EAAA,QAAQ,KAAK,IAAA;AAAM,IACjB,KAAK,MAAA;AACH,MAAA,OAAO,KAAK,IAAA,CAAK,MAAA;AAAA,IACnB,KAAK,WAAA;AACH,MAAA,OAAO,KAAK,IAAA,CAAK,MAAA;AAAA,IACnB,KAAK,WAAA;AACH,MAAA,OAAO,KAAK,SAAA,CAAU,IAAA,CAAK,IAAI,CAAA,CAAE,MAAA,GAAS,KAAK,IAAA,CAAK,MAAA,GAAS,EAAA,IAC1D,IAAA,CAAK,WAAW,MAAA,GAAY,IAAA,CAAK,UAAU,IAAA,CAAK,MAAM,EAAE,MAAA,GAAS,CAAA,CAAA;AAAA,IACtE,KAAK,QAAA;AACH,MAAA,OAAA,CAAQ,KAAK,KAAA,EAAO,MAAA,IAAU,CAAA,IAAK,IAAA,CAAK,IAAI,MAAA,GAAS,EAAA;AAAA,IACvD,KAAK,MAAA;AACH,MAAA,OAAO,IAAA,CAAK,IAAA,CAAK,MAAA,GAAS,IAAA,CAAK,KAAK,MAAA,GAAS,EAAA;AAAA;AAEnD;AAmHO,IAAM,uBAAN,MAA2B;AAAA,EACf,MAAA;AAAA,EAKjB,YAAY,MAAA,EAA6B;AACvC,IAAA,IAAA,CAAK,MAAA,GAAS;AAAA,MACZ,WAAW,MAAA,CAAO,SAAA;AAAA,MAClB,cAAA,EAAgB,OAAO,cAAA,IAAkB,CAAA;AAAA,MACzC,QAAA,EAAU,OAAO,QAAA,IAAY,iBAAA;AAAA,MAC7B,YAAY,MAAA,CAAO,UAAA;AAAA,MACnB,YAAY,MAAA,CAAO;AAAA,KACrB;AAAA,EACF;AAAA;AAAA,EAGA,IAAI,eAAA,GAA0B;AAC5B,IAAA,OAAO,IAAA,CAAK,IAAI,CAAA,EAAG,IAAA,CAAK,OAAO,SAAA,GAAY,IAAA,CAAK,OAAO,cAAc,CAAA;AAAA,EACvE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,sBAAsB,OAAA,EAA8B;AAClD,IAAA,OAAO,cAAA,CAAe,OAAA,EAAS,IAAA,CAAK,MAAA,CAAO,UAAU,CAAA;AAAA,EACvD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,YAAY,QAAA,EAAuD;AACjE,IAAA,IAAI,QAAA,CAAS,WAAW,CAAA,EAAG;AACzB,MAAA,OAAO,EAAE,UAAU,EAAC,EAAG,aAAa,CAAA,EAAG,YAAA,EAAc,CAAA,EAAG,YAAA,EAAc,KAAA,EAAM;AAAA,IAC9E;AAEA,IAAA,MAAM,SAAS,IAAA,CAAK,eAAA;AAGpB,IAAA,MAAM,WAAA,GAAc,SAAS,GAAA,CAAI,CAAC,MAAM,IAAA,CAAK,qBAAA,CAAsB,CAAC,CAAC,CAAA;AACrE,IAAA,MAAM,WAAA,GAAc,YAAY,MAAA,CAAO,CAAC,GAAG,CAAA,KAAM,CAAA,GAAI,GAAG,CAAC,CAAA;AAGzD,IAAA,IAAI,eAAe,MAAA,EAAQ;AACzB,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,CAAC,GAAG,QAAQ,CAAA;AAAA,QACtB,WAAA;AAAA,QACA,YAAA,EAAc,CAAA;AAAA,QACd,YAAA,EAAc;AAAA,OAChB;AAAA,IACF;AAEA,IAAA,QAAQ,IAAA,CAAK,OAAO,QAAA;AAAU,MAC5B,KAAK,iBAAA;AACH,QAAA,OAAO,IAAA,CAAK,cAAA,CAAe,QAAA,EAAU,WAAA,EAAa,MAAM,CAAA;AAAA,MAC1D,KAAK,gBAAA;AACH,QAAA,OAAO,IAAA,CAAK,aAAA,CAAc,QAAA,EAAU,WAAA,EAAa,MAAM,CAAA;AAAA,MACzD,KAAK,uBAAA;AACH,QAAA,OAAO,IAAA,CAAK,oBAAA,CAAqB,QAAA,EAAU,WAAA,EAAa,MAAM,CAAA;AAAA;AAClE,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,iBAAiB,QAAA,EAAgE;AACrF,IAAA,MAAM,MAAA,GAAS,IAAA,CAAK,WAAA,CAAY,QAAQ,CAAA;AAGxC,IAAA,IACE,IAAA,CAAK,MAAA,CAAO,QAAA,KAAa,uBAAA,IACzB,CAAC,OAAO,YAAA,IACR,CAAC,IAAA,CAAK,MAAA,CAAO,UAAA,EACb;AACA,MAAA,OAAO,MAAA;AAAA,IACT;AAGA,IAAA,MAAM,OAAA,GAAU,IAAI,GAAA,CAAI,MAAA,CAAO,SAAS,GAAA,CAAI,CAAA,CAAA,KAAK,CAAA,CAAE,EAAE,CAAC,CAAA;AACtD,IAAA,MAAM,OAAA,GAAU,SAAS,MAAA,CAAO,CAAA,CAAA,KAAK,CAAC,OAAA,CAAQ,GAAA,CAAI,CAAA,CAAE,EAAE,CAAC,CAAA;AACvD,IAAA,IAAI,OAAA,CAAQ,MAAA,KAAW,CAAA,EAAG,OAAO,MAAA;AAGjC,IAAA,IAAI,WAAA;AACJ,IAAA,IAAI;AACF,MAAA,WAAA,GAAc,MAAM,IAAA,CAAK,MAAA,CAAO,UAAA,CAAW,OAAO,CAAA;AAAA,IACpD,CAAA,CAAA,MAAQ;AACN,MAAA,OAAO,MAAA;AAAA,IACT;AAGA,IAAA,MAAM,eAAA,GAAkB,MAAA,CAAO,QAAA,CAAS,GAAA,CAAI,CAAA,CAAA,KAAK;AAC/C,MAAA,IAAK,CAAA,CAAE,QAAA,EAAsC,SAAA,KAAc,IAAA,EAAM;AAC/D,QAAA,OAAO;AAAA,UACL,GAAG,CAAA;AAAA,UACH,KAAA,EAAO,CAAC,EAAE,IAAA,EAAM,QAAiB,IAAA,EAAM,WAAA,EAAa,MAAA,EAAQ,UAAA,EAAqB;AAAA,SACnF;AAAA,MACF;AACA,MAAA,OAAO,CAAA;AAAA,IACT,CAAC,CAAA;AAED,IAAA,OAAO,EAAE,GAAG,MAAA,EAAQ,QAAA,EAAU,eAAA,EAAgB;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,oBAAA,CACE,QAAA,EACA,gBAAA,EACA,kBAAA,EACqB;AACrB,IAAA,IAAI,QAAA,CAAS,WAAW,CAAA,EAAG;AACzB,MAAA,OAAO,EAAE,UAAU,EAAC,EAAG,aAAa,CAAA,EAAG,YAAA,EAAc,CAAA,EAAG,YAAA,EAAc,KAAA,EAAM;AAAA,IAC9E;AAEA,IAAA,MAAM,MAAA,GAAS,kBAAA,GAAqB,IAAA,CAAK,MAAA,CAAO,cAAA;AAChD,IAAA,IAAI,MAAA,IAAU,CAAA,IAAK,gBAAA,IAAoB,MAAA,EAAQ;AAC7C,MAAA,OAAO;AAAA,QACL,QAAA,EAAU,CAAC,GAAG,QAAQ,CAAA;AAAA,QACtB,WAAA,EAAa,gBAAA;AAAA,QACb,YAAA,EAAc,CAAA;AAAA,QACd,YAAA,EAAc;AAAA,OAChB;AAAA,IACF;AAGA,IAAA,MAAM,mBAAA,GAAsB,mBAAmB,QAAA,CAAS,MAAA;AAGxD,IAAA,MAAM,eAAe,gBAAA,GAAmB,MAAA;AAExC,IAAA,MAAM,gBAAA,GAAmB,IAAA,CAAK,IAAA,CAAK,YAAA,GAAe,mBAAmB,CAAA;AAIrE,IAAA,MAAM,mBAA6B,EAAC;AACpC,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,QAAA,CAAS,QAAQ,CAAA,EAAA,EAAK;AACxC,MAAA,IAAI,QAAA,CAAS,CAAC,CAAA,CAAE,IAAA,KAAS,QAAA,EAAU,CAEnC,MAAO;AACL,QAAA,gBAAA,CAAiB,KAAK,CAAC,CAAA;AAAA,MACzB;AAAA,IACF;AAGA,IAAA,MAAM,cAAA,GAAiB,IAAA,CAAK,GAAA,CAAI,gBAAA,EAAkB,iBAAiB,MAAM,CAAA;AACzE,IAAA,MAAM,iBAAiB,IAAI,GAAA,CAAI,iBAAiB,KAAA,CAAM,CAAA,EAAG,cAAc,CAAC,CAAA;AAExE,IAAA,MAAM,SAAwB,EAAC;AAC/B,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,QAAA,CAAS,QAAQ,CAAA,EAAA,EAAK;AACxC,MAAA,IAAI,CAAC,cAAA,CAAe,GAAA,CAAI,CAAC,CAAA,EAAG;AAC1B,QAAA,MAAA,CAAO,IAAA,CAAK,QAAA,CAAS,CAAC,CAAC,CAAA;AAAA,MACzB;AAAA,IACF;AAGA,IAAA,MAAM,kBAAkB,IAAA,CAAK,KAAA;AAAA,MAC3B,gBAAA,IAAoB,MAAA,CAAO,MAAA,GAAS,QAAA,CAAS,MAAA;AAAA,KAC/C;AAEA,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,WAAA,EAAa,eAAA;AAAA,MACb,YAAA,EAAc,cAAA;AAAA,MACd,cAAc,cAAA,GAAiB;AAAA,KACjC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,cAAA,CACN,QAAA,EACA,WAAA,EACA,MAAA,EACqB;AAErB,IAAA,MAAM,gBAA0B,EAAC;AACjC,IAAA,MAAM,mBAA6B,EAAC;AAEpC,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,QAAA,CAAS,QAAQ,CAAA,EAAA,EAAK;AACxC,MAAA,IAAI,QAAA,CAAS,CAAC,CAAA,CAAE,IAAA,KAAS,QAAA,EAAU;AACjC,QAAA,aAAA,CAAc,KAAK,CAAC,CAAA;AAAA,MACtB,CAAA,MAAO;AACL,QAAA,gBAAA,CAAiB,KAAK,CAAC,CAAA;AAAA,MACzB;AAAA,IACF;AAGA,IAAA,IAAI,aAAa,aAAA,CAAc,MAAA;AAAA,MAC7B,CAAC,GAAA,EAAK,CAAA,KAAM,GAAA,GAAM,YAAY,CAAC,CAAA;AAAA,MAC/B;AAAA,KACF;AAMA,IAAA,MAAM,oBAA8B,EAAC;AACrC,IAAA,KAAA,IAAS,IAAI,gBAAA,CAAiB,MAAA,GAAS,CAAA,EAAG,CAAA,IAAK,GAAG,CAAA,EAAA,EAAK;AACrD,MAAA,MAAM,GAAA,GAAM,iBAAiB,CAAC,CAAA;AAC9B,MAAA,IAAI,UAAA,GAAa,WAAA,CAAY,GAAG,CAAA,IAAK,MAAA,EAAQ;AAC3C,QAAA,iBAAA,CAAkB,QAAQ,GAAG,CAAA;AAC7B,QAAA,UAAA,IAAc,YAAY,GAAG,CAAA;AAAA,MAC/B;AAAA,IACF;AAGA,IAAA,MAAM,WAAA,uBAAkB,GAAA,CAAI,CAAC,GAAG,aAAA,EAAe,GAAG,iBAAiB,CAAC,CAAA;AACpE,IAAA,MAAM,SAAwB,EAAC;AAC/B,IAAA,IAAI,YAAA,GAAe,CAAA;AACnB,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,QAAA,CAAS,QAAQ,CAAA,EAAA,EAAK;AACxC,MAAA,IAAI,WAAA,CAAY,GAAA,CAAI,CAAC,CAAA,EAAG;AACtB,QAAA,MAAA,CAAO,IAAA,CAAK,QAAA,CAAS,CAAC,CAAC,CAAA;AACvB,QAAA,YAAA,IAAgB,YAAY,CAAC,CAAA;AAAA,MAC/B;AAAA,IACF;AAEA,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,WAAA,EAAa,YAAA;AAAA,MACb,YAAA,EAAc,QAAA,CAAS,MAAA,GAAS,MAAA,CAAO,MAAA;AAAA,MACvC,YAAA,EAAc;AAAA,KAChB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKQ,aAAA,CACN,QAAA,EACA,WAAA,EACA,MAAA,EACqB;AACrB,IAAA,MAAM,SAAwB,EAAC;AAC/B,IAAA,IAAI,UAAA,GAAa,CAAA;AAGjB,IAAA,KAAA,IAAS,IAAI,QAAA,CAAS,MAAA,GAAS,CAAA,EAAG,CAAA,IAAK,GAAG,CAAA,EAAA,EAAK;AAC7C,MAAA,IAAI,UAAA,GAAa,WAAA,CAAY,CAAC,CAAA,IAAK,MAAA,EAAQ;AACzC,QAAA,MAAA,CAAO,OAAA,CAAQ,QAAA,CAAS,CAAC,CAAC,CAAA;AAC1B,QAAA,UAAA,IAAc,YAAY,CAAC,CAAA;AAAA,MAC7B,CAAA,MAAO;AACL,QAAA;AAAA,MACF;AAAA,IACF;AAEA,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,WAAA,EAAa,UAAA;AAAA,MACb,YAAA,EAAc,QAAA,CAAS,MAAA,GAAS,MAAA,CAAO,MAAA;AAAA,MACvC,YAAA,EAAc;AAAA,KAChB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,oBAAA,CACN,QAAA,EACA,WAAA,EACA,MAAA,EACqB;AAErB,IAAA,MAAM,iBAAyD,EAAC;AAChE,IAAA,MAAM,YAAiE,EAAC;AAExE,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,QAAA,CAAS,QAAQ,CAAA,EAAA,EAAK;AACxC,MAAA,IAAI,QAAA,CAAS,CAAC,CAAA,CAAE,IAAA,KAAS,QAAA,EAAU;AACjC,QAAA,cAAA,CAAe,IAAA,CAAK,EAAE,GAAA,EAAK,QAAA,CAAS,CAAC,GAAG,MAAA,EAAQ,WAAA,CAAY,CAAC,CAAA,EAAG,CAAA;AAAA,MAClE,CAAA,MAAO;AACL,QAAA,SAAA,CAAU,IAAA,CAAK,EAAE,GAAA,EAAK,QAAA,CAAS,CAAC,CAAA,EAAG,MAAA,EAAQ,WAAA,CAAY,CAAC,CAAA,EAAG,GAAA,EAAK,CAAA,EAAG,CAAA;AAAA,MACrE;AAAA,IACF;AAGA,IAAA,IAAI,UAAA,GAAa,eAAe,MAAA,CAAO,CAAC,GAAG,CAAA,KAAM,CAAA,GAAI,CAAA,CAAE,MAAA,EAAQ,CAAC,CAAA;AAGhE,IAAA,MAAM,iBAAA,GAAoB,EAAA;AAC1B,IAAA,UAAA,IAAc,iBAAA;AAGd,IAAA,MAAM,aAA+B,EAAC;AACtC,IAAA,KAAA,IAAS,IAAI,SAAA,CAAU,MAAA,GAAS,CAAA,EAAG,CAAA,IAAK,GAAG,CAAA,EAAA,EAAK;AAC9C,MAAA,IAAI,UAAA,GAAa,SAAA,CAAU,CAAC,CAAA,CAAE,UAAU,MAAA,EAAQ;AAC9C,QAAA,UAAA,CAAW,OAAA,CAAQ,SAAA,CAAU,CAAC,CAAC,CAAA;AAC/B,QAAA,UAAA,IAAc,SAAA,CAAU,CAAC,CAAA,CAAE,MAAA;AAAA,MAC7B,CAAA,MAAO;AACL,QAAA;AAAA,MACF;AAAA,IACF;AAEA,IAAA,MAAM,YAAA,GACJ,QAAA,CAAS,MAAA,GACT,cAAA,CAAe,SACf,UAAA,CAAW,MAAA;AAGb,IAAA,MAAM,SAAwB,EAAC;AAG/B,IAAA,KAAA,MAAW,MAAM,cAAA,EAAgB;AAC/B,MAAA,MAAA,CAAO,IAAA,CAAK,GAAG,GAAG,CAAA;AAAA,IACpB;AAGA,IAAA,IAAI,eAAe,CAAA,EAAG;AACpB,MAAA,MAAA,CAAO,IAAA,CAAK;AAAA,QACV,EAAA,EAAI,qBAAA;AAAA,QACJ,IAAA,EAAM,QAAA;AAAA,QACN,OAAO,CAAC,EAAE,IAAA,EAAM,MAAA,EAAQ,MAAM,CAAA,CAAA,EAAI,YAAY,CAAA,gBAAA,EAAmB,YAAA,KAAiB,IAAI,EAAA,GAAK,GAAG,CAAA,4BAAA,CAAA,EAAgC,MAAA,EAAQ,YAAqB,CAAA;AAAA,QAC3J,QAAA,EAAU,EAAE,SAAA,EAAW,IAAA,EAAK;AAAA,QAC5B,SAAA,EAAA,iBAAW,IAAI,IAAA,EAAK,EAAE,WAAA,EAAY;AAAA,QAClC,MAAA,EAAQ;AAAA,OACT,CAAA;AAAA,IACH;AAGA,IAAA,KAAA,MAAW,KAAK,UAAA,EAAY;AAC1B,MAAA,MAAA,CAAO,IAAA,CAAK,EAAE,GAAG,CAAA;AAAA,IACnB;AAEA,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,WAAA,EAAa,UAAA;AAAA,MACb,YAAA;AAAA,MACA,YAAA,EAAc;AAAA,KAChB;AAAA,EACF;AACF","file":"context.js","sourcesContent":["/**\n * @witqq/agent-sdk/chat/context\n *\n * Context window manager for selecting which messages fit within a token budget.\n * Stateless: takes messages in, returns trimmed messages out.\n * Three overflow strategies: truncate-oldest, sliding-window, summarize-placeholder.\n */\n\nimport type { ChatMessage, MessagePart } from \"./core.js\";\n\n// ─── Token Estimation ──────────────────────────────────────────\n\n/**\n * Options for token estimation.\n */\nexport interface TokenEstimationOptions {\n /**\n * Characters per token ratio.\n * Lower = more conservative (fewer messages fit).\n * @default 4\n */\n charsPerToken?: number;\n}\n\n/**\n * Estimate token count for a single chat message.\n * Uses character-based heuristic: `Math.ceil(charCount / charsPerToken)`.\n *\n * Counts:\n * - Text content (string or text parts)\n * - Serialized tool calls and tool results\n * - Thinking blocks\n * - Role overhead (~4 tokens)\n *\n * @param message - Chat message to estimate\n * @param options - Estimation options\n * @returns Estimated token count\n *\n * @example\n * ```typescript\n * const tokens = estimateTokens(message);\n * const conservative = estimateTokens(message, { charsPerToken: 3 });\n * ```\n */\nexport function estimateTokens(\n message: ChatMessage,\n options?: TokenEstimationOptions,\n): number {\n const ratio = options?.charsPerToken ?? 4;\n let charCount = 0;\n\n // Role overhead\n charCount += message.role.length + 4;\n\n // Parts\n for (const part of message.parts) {\n charCount += estimatePartChars(part);\n }\n\n return Math.ceil(charCount / ratio);\n}\n\nfunction estimatePartChars(part: MessagePart): number {\n switch (part.type) {\n case \"text\":\n return part.text.length;\n case \"reasoning\":\n return part.text.length;\n case \"tool_call\":\n return JSON.stringify(part.args).length + part.name.length + 20 +\n (part.result !== undefined ? JSON.stringify(part.result).length : 0);\n case \"source\":\n return (part.title?.length ?? 0) + part.url.length + 10;\n case \"file\":\n return part.name.length + part.data.length + 20;\n }\n}\n\n// ─── Overflow Strategies ───────────────────────────────────────\n\n/** Overflow strategy type */\nexport type OverflowStrategy =\n | \"truncate-oldest\"\n | \"sliding-window\"\n | \"summarize-placeholder\";\n\n// ─── Context Window Configuration ──────────────────────────────\n\n/**\n * Async summarizer function for the summarize-placeholder strategy.\n * Receives removed messages and returns a summary string.\n * When configured, replaces the static placeholder text with actual summary.\n */\nexport type ContextSummarizer = (removedMessages: readonly ChatMessage[]) => Promise<string>;\n\n/**\n * Configuration for the context window manager.\n */\nexport interface ContextWindowConfig {\n /** Maximum token budget for the context window */\n maxTokens: number;\n\n /**\n * Tokens reserved for system prompt and response generation.\n * Subtracted from maxTokens to get available budget.\n * @default 0\n */\n reservedTokens?: number;\n\n /**\n * Strategy for handling overflow when messages exceed budget.\n * @default \"truncate-oldest\"\n */\n strategy?: OverflowStrategy;\n\n /**\n * Token estimation options.\n */\n estimation?: TokenEstimationOptions;\n\n /**\n * Optional async summarizer for the summarize-placeholder strategy.\n * When provided, replaces the static placeholder with a generated summary.\n * Falls back to static placeholder if summarizer throws.\n */\n summarizer?: ContextSummarizer;\n}\n\n// ─── Context Window Result ─────────────────────────────────────\n\n/**\n * Result of context window trimming.\n */\nexport interface ContextWindowResult {\n /** Messages that fit within the budget */\n messages: ChatMessage[];\n /** Total estimated tokens for included messages */\n totalTokens: number;\n /** Number of messages removed */\n removedCount: number;\n /** Whether any messages were truncated */\n wasTruncated: boolean;\n}\n\n// ─── Context Stats ─────────────────────────────────────────────\n\n/**\n * Context usage statistics for a session.\n * Returned by `IChatRuntime.getContextStats()`.\n *\n * When real usage data is available (after the first API response),\n * `realPromptTokens` and `realCompletionTokens` contain actual token counts.\n * `modelContextWindow` is the model's context window from `listModels()`.\n */\nexport interface ContextStats {\n /** Estimated total tokens in the trimmed context (heuristic, kept for backward compat) */\n totalTokens: number;\n /** Number of messages removed by trimming */\n removedCount: number;\n /** Whether context was truncated */\n wasTruncated: boolean;\n /** Available token budget (maxTokens − reservedTokens) */\n availableBudget: number;\n /** Real prompt tokens from the last API response (undefined before first response) */\n realPromptTokens?: number;\n /** Real completion tokens from the last API response (undefined before first response) */\n realCompletionTokens?: number;\n /** Model's context window in tokens from listModels() (undefined if not available) */\n modelContextWindow?: number;\n}\n\n// ─── Context Window Manager ────────────────────────────────────\n\n/**\n * Stateless context window manager.\n * Takes messages and returns the subset that fits within a token budget.\n *\n * @example\n * ```typescript\n * const manager = new ContextWindowManager({\n * maxTokens: 4096,\n * reservedTokens: 500,\n * strategy: \"sliding-window\",\n * });\n *\n * const result = manager.fitMessages(messages);\n * // result.messages — trimmed to fit budget\n * // result.totalTokens — estimated token usage\n * // result.wasTruncated — whether messages were removed\n * ```\n */\nexport class ContextWindowManager {\n private readonly config: Required<\n Pick<ContextWindowConfig, \"maxTokens\" | \"reservedTokens\" | \"strategy\">\n > &\n Pick<ContextWindowConfig, \"estimation\" | \"summarizer\">;\n\n constructor(config: ContextWindowConfig) {\n this.config = {\n maxTokens: config.maxTokens,\n reservedTokens: config.reservedTokens ?? 0,\n strategy: config.strategy ?? \"truncate-oldest\",\n estimation: config.estimation,\n summarizer: config.summarizer,\n };\n }\n\n /** Available token budget after reserving tokens */\n get availableBudget(): number {\n return Math.max(0, this.config.maxTokens - this.config.reservedTokens);\n }\n\n /**\n * Estimate tokens for a single message.\n * @param message - Message to estimate\n * @returns Estimated token count\n */\n estimateMessageTokens(message: ChatMessage): number {\n return estimateTokens(message, this.config.estimation);\n }\n\n /**\n * Fit messages within the token budget using the configured strategy.\n * @param messages - All messages to consider\n * @returns Result with fitted messages and metadata\n */\n fitMessages(messages: readonly ChatMessage[]): ContextWindowResult {\n if (messages.length === 0) {\n return { messages: [], totalTokens: 0, removedCount: 0, wasTruncated: false };\n }\n\n const budget = this.availableBudget;\n\n // Calculate tokens for each message\n const tokenCounts = messages.map((m) => this.estimateMessageTokens(m));\n const totalTokens = tokenCounts.reduce((a, b) => a + b, 0);\n\n // All messages fit\n if (totalTokens <= budget) {\n return {\n messages: [...messages],\n totalTokens,\n removedCount: 0,\n wasTruncated: false,\n };\n }\n\n switch (this.config.strategy) {\n case \"truncate-oldest\":\n return this.truncateOldest(messages, tokenCounts, budget);\n case \"sliding-window\":\n return this.slidingWindow(messages, tokenCounts, budget);\n case \"summarize-placeholder\":\n return this.summarizePlaceholder(messages, tokenCounts, budget);\n }\n }\n\n /**\n * Async variant of fitMessages that supports async summarization.\n * When strategy is \"summarize-placeholder\" and a summarizer is configured,\n * calls the summarizer with removed messages and replaces the placeholder text.\n * Falls back to static placeholder if summarizer throws.\n * For other strategies, behaves identically to fitMessages().\n */\n async fitMessagesAsync(messages: readonly ChatMessage[]): Promise<ContextWindowResult> {\n const result = this.fitMessages(messages);\n\n // Only enhance if summarize-placeholder strategy, messages were removed, and summarizer is configured\n if (\n this.config.strategy !== \"summarize-placeholder\" ||\n !result.wasTruncated ||\n !this.config.summarizer\n ) {\n return result;\n }\n\n // Find removed messages (those in original but not in result)\n const keptIds = new Set(result.messages.map(m => m.id));\n const removed = messages.filter(m => !keptIds.has(m.id));\n if (removed.length === 0) return result;\n\n // Call async summarizer, fall back to static placeholder on error\n let summaryText: string;\n try {\n summaryText = await this.config.summarizer(removed);\n } catch {\n return result; // Keep static placeholder on summarizer failure\n }\n\n // Replace placeholder text with summarizer output\n const updatedMessages = result.messages.map(m => {\n if ((m.metadata as Record<string, unknown>)?.isSummary === true) {\n return {\n ...m,\n parts: [{ type: \"text\" as const, text: summaryText, status: \"complete\" as const }],\n };\n }\n return m;\n });\n\n return { ...result, messages: updatedMessages };\n }\n\n /**\n * Trim messages using real token usage data from the previous API call.\n * Uses average-based algorithm: `avgTokensPerMessage = lastPromptTokens / messageCount`.\n * Removes oldest non-system messages until freed budget brings usage under modelContextWindow.\n *\n * @param messages - All messages in the session\n * @param lastPromptTokens - Real prompt tokens from the last API response\n * @param modelContextWindow - Model's total context window size in tokens\n * @returns Result with fitted messages and metadata\n */\n fitMessagesWithUsage(\n messages: readonly ChatMessage[],\n lastPromptTokens: number,\n modelContextWindow: number,\n ): ContextWindowResult {\n if (messages.length === 0) {\n return { messages: [], totalTokens: 0, removedCount: 0, wasTruncated: false };\n }\n\n const budget = modelContextWindow - this.config.reservedTokens;\n if (budget <= 0 || lastPromptTokens <= budget) {\n return {\n messages: [...messages],\n totalTokens: lastPromptTokens,\n removedCount: 0,\n wasTruncated: false,\n };\n }\n\n // Average tokens per message from real data\n const avgTokensPerMessage = lastPromptTokens / messages.length;\n\n // How many tokens we need to free\n const tokensToFree = lastPromptTokens - budget;\n // How many messages to remove (ceil to be safe)\n const messagesToRemove = Math.ceil(tokensToFree / avgTokensPerMessage);\n\n // Separate system and non-system messages\n const systemIndices: number[] = [];\n const nonSystemIndices: number[] = [];\n for (let i = 0; i < messages.length; i++) {\n if (messages[i].role === \"system\") {\n systemIndices.push(i);\n } else {\n nonSystemIndices.push(i);\n }\n }\n\n // Remove oldest non-system messages (from the beginning of conversation)\n const removableCount = Math.min(messagesToRemove, nonSystemIndices.length);\n const removedIndices = new Set(nonSystemIndices.slice(0, removableCount));\n\n const result: ChatMessage[] = [];\n for (let i = 0; i < messages.length; i++) {\n if (!removedIndices.has(i)) {\n result.push(messages[i]);\n }\n }\n\n // Estimate new total: proportional reduction\n const estimatedTokens = Math.round(\n lastPromptTokens * (result.length / messages.length),\n );\n\n return {\n messages: result,\n totalTokens: estimatedTokens,\n removedCount: removableCount,\n wasTruncated: removableCount > 0,\n };\n }\n\n /**\n * Truncate oldest: keeps system messages, removes oldest non-system messages first.\n * Always keeps the most recent user message.\n */\n private truncateOldest(\n messages: readonly ChatMessage[],\n tokenCounts: number[],\n budget: number,\n ): ContextWindowResult {\n // Separate system messages (always kept) and non-system\n const systemIndices: number[] = [];\n const nonSystemIndices: number[] = [];\n\n for (let i = 0; i < messages.length; i++) {\n if (messages[i].role === \"system\") {\n systemIndices.push(i);\n } else {\n nonSystemIndices.push(i);\n }\n }\n\n // System messages cost\n let usedTokens = systemIndices.reduce(\n (sum, i) => sum + tokenCounts[i],\n 0,\n );\n\n // If system messages alone exceed budget, still include them\n // (caller should configure reservedTokens properly)\n\n // Try to fit non-system from newest to oldest\n const includedNonSystem: number[] = [];\n for (let i = nonSystemIndices.length - 1; i >= 0; i--) {\n const idx = nonSystemIndices[i];\n if (usedTokens + tokenCounts[idx] <= budget) {\n includedNonSystem.unshift(idx);\n usedTokens += tokenCounts[idx];\n }\n }\n\n // Build result preserving original order\n const includedSet = new Set([...systemIndices, ...includedNonSystem]);\n const result: ChatMessage[] = [];\n let resultTokens = 0;\n for (let i = 0; i < messages.length; i++) {\n if (includedSet.has(i)) {\n result.push(messages[i]);\n resultTokens += tokenCounts[i];\n }\n }\n\n return {\n messages: result,\n totalTokens: resultTokens,\n removedCount: messages.length - result.length,\n wasTruncated: true,\n };\n }\n\n /**\n * Sliding window: keeps the most recent messages that fit within budget.\n */\n private slidingWindow(\n messages: readonly ChatMessage[],\n tokenCounts: number[],\n budget: number,\n ): ContextWindowResult {\n const result: ChatMessage[] = [];\n let usedTokens = 0;\n\n // Walk from newest to oldest\n for (let i = messages.length - 1; i >= 0; i--) {\n if (usedTokens + tokenCounts[i] <= budget) {\n result.unshift(messages[i]);\n usedTokens += tokenCounts[i];\n } else {\n break;\n }\n }\n\n return {\n messages: result,\n totalTokens: usedTokens,\n removedCount: messages.length - result.length,\n wasTruncated: true,\n };\n }\n\n /**\n * Summarize placeholder: replaces truncated messages with a placeholder,\n * preserving system messages and recent context.\n */\n private summarizePlaceholder(\n messages: readonly ChatMessage[],\n tokenCounts: number[],\n budget: number,\n ): ContextWindowResult {\n // First, identify system messages and recent messages\n const systemMessages: { msg: ChatMessage; tokens: number }[] = [];\n const nonSystem: { msg: ChatMessage; tokens: number; idx: number }[] = [];\n\n for (let i = 0; i < messages.length; i++) {\n if (messages[i].role === \"system\") {\n systemMessages.push({ msg: messages[i], tokens: tokenCounts[i] });\n } else {\n nonSystem.push({ msg: messages[i], tokens: tokenCounts[i], idx: i });\n }\n }\n\n // System message cost\n let usedTokens = systemMessages.reduce((s, m) => s + m.tokens, 0);\n\n // Placeholder costs ~20 tokens\n const placeholderTokens = 20;\n usedTokens += placeholderTokens;\n\n // Fit recent non-system messages from newest\n const recentKept: typeof nonSystem = [];\n for (let i = nonSystem.length - 1; i >= 0; i--) {\n if (usedTokens + nonSystem[i].tokens <= budget) {\n recentKept.unshift(nonSystem[i]);\n usedTokens += nonSystem[i].tokens;\n } else {\n break;\n }\n }\n\n const removedCount =\n messages.length -\n systemMessages.length -\n recentKept.length;\n\n // Build result: system messages, placeholder, recent messages\n const result: ChatMessage[] = [];\n\n // System messages first\n for (const sm of systemMessages) {\n result.push(sm.msg);\n }\n\n // Placeholder if messages were removed\n if (removedCount > 0) {\n result.push({\n id: \"context-placeholder\" as ChatMessage[\"id\"],\n role: \"system\",\n parts: [{ type: \"text\", text: `[${removedCount} earlier message${removedCount === 1 ? \"\" : \"s\"} omitted for context window]`, status: \"complete\" as const }],\n metadata: { isSummary: true },\n createdAt: new Date().toISOString(),\n status: \"complete\",\n });\n }\n\n // Recent messages\n for (const m of recentKept) {\n result.push(m.msg);\n }\n\n return {\n messages: result,\n totalTokens: usedTokens,\n removedCount,\n wasTruncated: true,\n };\n }\n}\n"]}
|