@m6d/cortex-server 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/adapters/database.d.ts +3 -0
- package/dist/src/ai/active-streams.d.ts +14 -0
- package/dist/src/ai/context/builder.d.ts +24 -0
- package/dist/src/ai/context/compressor.d.ts +7 -0
- package/dist/src/ai/context/index.d.ts +15 -0
- package/dist/src/ai/context/summarizer.d.ts +5 -0
- package/dist/src/ai/context/token-estimator.d.ts +20 -0
- package/dist/src/ai/context/types.d.ts +20 -0
- package/dist/src/ai/index.d.ts +1 -1
- package/dist/src/ai/prompt.d.ts +6 -1
- package/dist/src/config.d.ts +4 -0
- package/dist/src/db/schema.d.ts +19 -1
- package/dist/src/graph/expand-domains.d.ts +2 -0
- package/dist/src/graph/helpers.d.ts +5 -0
- package/dist/src/graph/resolver.d.ts +2 -0
- package/dist/src/graph/types.d.ts +6 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/routes/ws.d.ts +5 -1
- package/dist/src/types.d.ts +32 -14
- package/dist/src/ws/connections.d.ts +3 -3
- package/dist/src/ws/events.d.ts +28 -3
- package/dist/src/ws/index.d.ts +1 -1
- package/dist/src/ws/notify.d.ts +1 -1
- package/package.json +1 -1
- package/src/adapters/database.ts +3 -0
- package/src/adapters/mssql.ts +26 -6
- package/src/ai/active-streams.ts +123 -0
- package/src/ai/context/builder.ts +94 -0
- package/src/ai/context/compressor.ts +47 -0
- package/src/ai/context/index.ts +75 -0
- package/src/ai/context/summarizer.ts +50 -0
- package/src/ai/context/token-estimator.ts +60 -0
- package/src/ai/context/types.ts +28 -0
- package/src/ai/index.ts +124 -29
- package/src/ai/prompt.ts +27 -18
- package/src/ai/tools/query-graph.tool.ts +1 -1
- package/src/cli/extract-endpoints.ts +18 -18
- package/src/config.ts +4 -0
- package/src/db/migrations/20260315000000_add_context_meta/migration.sql +1 -0
- package/src/db/schema.ts +6 -1
- package/src/factory.ts +11 -1
- package/src/graph/expand-domains.ts +276 -0
- package/src/graph/generate-cypher.ts +18 -5
- package/src/graph/helpers.ts +1 -0
- package/src/graph/resolver.ts +10 -0
- package/src/graph/seed.ts +5 -2
- package/src/graph/types.ts +6 -0
- package/src/index.ts +2 -0
- package/src/routes/chat.ts +47 -2
- package/src/routes/threads.ts +46 -9
- package/src/routes/ws.ts +37 -23
- package/src/types.ts +37 -13
- package/src/ws/connections.ts +15 -9
- package/src/ws/events.ts +31 -3
- package/src/ws/index.ts +9 -1
- package/src/ws/notify.ts +2 -2
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
type ActiveStream = {
|
|
2
|
+
id: string;
|
|
3
|
+
abortController: AbortController;
|
|
4
|
+
buffer: string[];
|
|
5
|
+
subscribers: Set<ReadableStreamDefaultController<string>>;
|
|
6
|
+
isComplete: boolean;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
const streams = new Map<string, ActiveStream>();
|
|
10
|
+
|
|
11
|
+
export function registerStream(threadId: string, abortController: AbortController) {
|
|
12
|
+
const existing = streams.get(threadId);
|
|
13
|
+
if (existing) {
|
|
14
|
+
existing.abortController.abort();
|
|
15
|
+
for (const controller of existing.subscribers) {
|
|
16
|
+
try {
|
|
17
|
+
controller.close();
|
|
18
|
+
} catch {
|
|
19
|
+
/* already closed */
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
streams.delete(threadId);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
const entry: ActiveStream = {
|
|
26
|
+
id: crypto.randomUUID(),
|
|
27
|
+
abortController,
|
|
28
|
+
buffer: [],
|
|
29
|
+
subscribers: new Set(),
|
|
30
|
+
isComplete: false,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
streams.set(threadId, entry);
|
|
34
|
+
return entry;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function attachSseStream(threadId: string, sseStream: ReadableStream<string>) {
|
|
38
|
+
const entry = streams.get(threadId);
|
|
39
|
+
if (!entry) return;
|
|
40
|
+
|
|
41
|
+
const reader = sseStream.getReader();
|
|
42
|
+
(async function consume() {
|
|
43
|
+
try {
|
|
44
|
+
while (true) {
|
|
45
|
+
const { done, value } = await reader.read();
|
|
46
|
+
if (done) break;
|
|
47
|
+
entry.buffer.push(value);
|
|
48
|
+
for (const controller of entry.subscribers) {
|
|
49
|
+
try {
|
|
50
|
+
controller.enqueue(value);
|
|
51
|
+
} catch {
|
|
52
|
+
/* subscriber cancelled */
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
} catch {
|
|
57
|
+
/* stream aborted or errored */
|
|
58
|
+
} finally {
|
|
59
|
+
entry.isComplete = true;
|
|
60
|
+
for (const controller of entry.subscribers) {
|
|
61
|
+
try {
|
|
62
|
+
controller.close();
|
|
63
|
+
} catch {
|
|
64
|
+
/* already closed */
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
})();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function subscribe(threadId: string) {
|
|
72
|
+
const entry = streams.get(threadId);
|
|
73
|
+
if (!entry) return null;
|
|
74
|
+
|
|
75
|
+
let savedController: ReadableStreamDefaultController<string>;
|
|
76
|
+
|
|
77
|
+
return new ReadableStream<string>({
|
|
78
|
+
start(controller) {
|
|
79
|
+
savedController = controller;
|
|
80
|
+
controller.enqueue("[START]"); // XXX: we need to add this flag for an immediate response for the subscriber, otherwise we'd have to wait for the first token from the llm.
|
|
81
|
+
for (const chunk of entry.buffer) {
|
|
82
|
+
controller.enqueue(chunk);
|
|
83
|
+
}
|
|
84
|
+
if (entry.isComplete) {
|
|
85
|
+
controller.close();
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
entry.subscribers.add(controller);
|
|
89
|
+
},
|
|
90
|
+
cancel() {
|
|
91
|
+
entry.subscribers.delete(savedController);
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function abortStream(threadId: string) {
|
|
97
|
+
const entry = streams.get(threadId);
|
|
98
|
+
if (!entry) return false;
|
|
99
|
+
queueMicrotask(() => entry.abortController.abort());
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export function isStreamRunning(threadId: string) {
|
|
104
|
+
const entry = streams.get(threadId);
|
|
105
|
+
return entry ? !entry.isComplete : false;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export function removeStream(threadId: string, streamId?: string) {
|
|
109
|
+
const entry = streams.get(threadId);
|
|
110
|
+
if (!entry) return;
|
|
111
|
+
if (streamId && entry.id !== streamId) return;
|
|
112
|
+
|
|
113
|
+
if (entry) {
|
|
114
|
+
for (const controller of entry.subscribers) {
|
|
115
|
+
try {
|
|
116
|
+
controller.close();
|
|
117
|
+
} catch {
|
|
118
|
+
/* already closed */
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
streams.delete(threadId);
|
|
123
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import type { UIMessage } from "ai";
|
|
2
|
+
import { generateId } from "ai";
|
|
3
|
+
import type { DatabaseAdapter } from "../../adapters/database.ts";
|
|
4
|
+
import type { MessageMetadata, Thread } from "../../types.ts";
|
|
5
|
+
import type { ContextConfig, ThreadContextMeta } from "./types.ts";
|
|
6
|
+
import { compressToolResults } from "./compressor.ts";
|
|
7
|
+
import { estimateMessageTokens } from "./token-estimator.ts";
|
|
8
|
+
|
|
9
|
+
type ContextBuildResult = {
|
|
10
|
+
messages: UIMessage<MessageMetadata>[];
|
|
11
|
+
allMessages: UIMessage<MessageMetadata>[];
|
|
12
|
+
summary: string | null;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Builds a token-aware context window from stored messages.
|
|
17
|
+
*
|
|
18
|
+
* 1. Loads messages from DB with generous limit
|
|
19
|
+
* 2. Reads existing summary from thread.contextMeta
|
|
20
|
+
* 3. Compresses large tool results
|
|
21
|
+
* 4. Walks messages newest-to-oldest, accumulating token estimates
|
|
22
|
+
* 5. Stops when adding the next message would exceed the budget
|
|
23
|
+
* 6. Prepends summary as synthetic message if older messages were trimmed
|
|
24
|
+
*/
|
|
25
|
+
export async function buildContextMessages(
|
|
26
|
+
userId: string,
|
|
27
|
+
thread: Thread,
|
|
28
|
+
db: DatabaseAdapter,
|
|
29
|
+
contextConfig: ContextConfig,
|
|
30
|
+
) {
|
|
31
|
+
// 1. Load messages with generous limit
|
|
32
|
+
const storedMessages = await db.messages.list(userId, thread.id, { limit: 50 });
|
|
33
|
+
const allMessages = storedMessages.map((m) => m.content);
|
|
34
|
+
|
|
35
|
+
// 2. Read existing summary
|
|
36
|
+
const contextMeta = thread.contextMeta;
|
|
37
|
+
const summary = contextMeta?.summary ?? null;
|
|
38
|
+
|
|
39
|
+
// 3. Compress large tool results
|
|
40
|
+
const compressed = compressToolResults(allMessages, contextConfig.toolResultMaxTokens);
|
|
41
|
+
|
|
42
|
+
// 4. Walk newest-to-oldest, accumulating token estimates.
|
|
43
|
+
// Reserve space for the system prompt + tool definitions (they share the context window).
|
|
44
|
+
const budget = contextConfig.maxContextTokens - contextConfig.reservedTokenBudget;
|
|
45
|
+
const selected: UIMessage<MessageMetadata>[] = [];
|
|
46
|
+
let accumulated = 0;
|
|
47
|
+
|
|
48
|
+
for (let i = compressed.length - 1; i >= 0; i--) {
|
|
49
|
+
const msgTokens = estimateMessageTokens(compressed[i]!);
|
|
50
|
+
|
|
51
|
+
if (accumulated + msgTokens > budget && selected.length > 0) {
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
accumulated += msgTokens;
|
|
56
|
+
selected.unshift(compressed[i]!);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// 5. If we trimmed messages and a summary exists, prepend it.
|
|
60
|
+
// Make room for the summary by evicting the oldest messages if needed.
|
|
61
|
+
if (summary && selected.length < allMessages.length) {
|
|
62
|
+
const summaryMessage = {
|
|
63
|
+
id: generateId(),
|
|
64
|
+
role: "user",
|
|
65
|
+
parts: [{ type: "text", text: `[Previous conversation summary]: ${summary}` }],
|
|
66
|
+
} satisfies UIMessage;
|
|
67
|
+
|
|
68
|
+
const summaryTokens = estimateMessageTokens(summaryMessage);
|
|
69
|
+
const trimmed = trimMessagesToFit(selected, budget - summaryTokens);
|
|
70
|
+
trimmed.unshift(summaryMessage);
|
|
71
|
+
return { messages: trimmed, allMessages, summary };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return { messages: selected, allMessages, summary } satisfies ContextBuildResult;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Drops the oldest messages until the total estimated tokens fit within `budget`.
|
|
79
|
+
* Always keeps at least the most recent message.
|
|
80
|
+
*/
|
|
81
|
+
export function trimMessagesToFit(messages: UIMessage<MessageMetadata>[], budget: number) {
|
|
82
|
+
let total = 0;
|
|
83
|
+
for (const msg of messages) {
|
|
84
|
+
total += estimateMessageTokens(msg);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const trimmed = [...messages];
|
|
88
|
+
while (total > budget && trimmed.length > 1) {
|
|
89
|
+
const evicted = trimmed.shift()!;
|
|
90
|
+
total -= estimateMessageTokens(evicted);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return trimmed;
|
|
94
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { UIMessage } from "ai";
|
|
2
|
+
import { estimateTokens, CHARS_PER_TOKEN } from "./token-estimator.ts";
|
|
3
|
+
import type { MessageMetadata } from "src/types.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Returns a new array of messages with large tool outputs truncated
|
|
7
|
+
* to `maxTokensPerResult`. Does not mutate the input messages.
|
|
8
|
+
*/
|
|
9
|
+
export function compressToolResults(
|
|
10
|
+
messages: UIMessage<MessageMetadata>[],
|
|
11
|
+
maxTokensPerResult: number,
|
|
12
|
+
) {
|
|
13
|
+
return messages.map((message) => {
|
|
14
|
+
let hasLargeToolOutput = false;
|
|
15
|
+
|
|
16
|
+
for (const part of message.parts) {
|
|
17
|
+
if ("toolCallId" in part && "output" in part && part.output != null) {
|
|
18
|
+
const outputTokens = estimateTokens(JSON.stringify(part.output));
|
|
19
|
+
if (outputTokens > maxTokensPerResult) {
|
|
20
|
+
hasLargeToolOutput = true;
|
|
21
|
+
break;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (!hasLargeToolOutput) return message;
|
|
27
|
+
|
|
28
|
+
const compressedParts = message.parts.map((part) => {
|
|
29
|
+
if (!("toolCallId" in part) || !("output" in part) || part.output == null) {
|
|
30
|
+
return part;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const outputStr = JSON.stringify(part.output);
|
|
34
|
+
const outputTokens = estimateTokens(outputStr);
|
|
35
|
+
|
|
36
|
+
if (outputTokens <= maxTokensPerResult) return part;
|
|
37
|
+
|
|
38
|
+
// Convert token budget back to character budget
|
|
39
|
+
const charBudget = maxTokensPerResult * CHARS_PER_TOKEN;
|
|
40
|
+
const truncatedOutput = outputStr.slice(0, charBudget) + "\n[...truncated]";
|
|
41
|
+
|
|
42
|
+
return { ...part, output: truncatedOutput } as typeof part;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
return { ...message, parts: compressedParts };
|
|
46
|
+
});
|
|
47
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
export type { ContextConfig, ThreadContextMeta } from "./types.ts";
|
|
2
|
+
export { DEFAULT_CONTEXT_CONFIG } from "./types.ts";
|
|
3
|
+
export {
|
|
4
|
+
CHARS_PER_TOKEN,
|
|
5
|
+
estimateTokens,
|
|
6
|
+
estimateMessageTokens,
|
|
7
|
+
estimateMessagesTokens,
|
|
8
|
+
} from "./token-estimator.ts";
|
|
9
|
+
export { compressToolResults } from "./compressor.ts";
|
|
10
|
+
export { summarizeMessages } from "./summarizer.ts";
|
|
11
|
+
export { buildContextMessages, trimMessagesToFit } from "./builder.ts";
|
|
12
|
+
|
|
13
|
+
import type { UIMessage } from "ai";
|
|
14
|
+
import type { ResolvedCortexAgentConfig } from "../../config.ts";
|
|
15
|
+
import type { Thread } from "../../types.ts";
|
|
16
|
+
import type { ThreadContextMeta } from "./types.ts";
|
|
17
|
+
import { estimateMessagesTokens } from "./token-estimator.ts";
|
|
18
|
+
import { summarizeMessages } from "./summarizer.ts";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Post-response context optimization.
|
|
22
|
+
* Called fire-and-forget from onFinish — summarizes older messages
|
|
23
|
+
* when token usage exceeds the configured threshold.
|
|
24
|
+
*/
|
|
25
|
+
export async function optimizeThreadContext(
|
|
26
|
+
thread: Thread,
|
|
27
|
+
messages: UIMessage[],
|
|
28
|
+
config: ResolvedCortexAgentConfig,
|
|
29
|
+
) {
|
|
30
|
+
const contextConfig = config.context;
|
|
31
|
+
|
|
32
|
+
// 1. Estimate tokens for all messages
|
|
33
|
+
const estimates = estimateMessagesTokens(messages);
|
|
34
|
+
const totalEstimatedTokens = estimates.reduce((sum, e) => sum + e.tokens, 0);
|
|
35
|
+
|
|
36
|
+
// 2. Check if over summarization threshold
|
|
37
|
+
const threshold = contextConfig.maxContextTokens * contextConfig.summarizationThreshold;
|
|
38
|
+
|
|
39
|
+
if (totalEstimatedTokens <= threshold) {
|
|
40
|
+
// Update token estimate but skip summarization
|
|
41
|
+
const meta: ThreadContextMeta = {
|
|
42
|
+
summary: thread.contextMeta?.summary ?? null,
|
|
43
|
+
summaryUpToMessageId: thread.contextMeta?.summaryUpToMessageId ?? null,
|
|
44
|
+
totalEstimatedTokens,
|
|
45
|
+
lastOptimizedAt: new Date().toISOString(),
|
|
46
|
+
};
|
|
47
|
+
await config.db.threads.updateContextMeta(thread.id, meta);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// 3. Keep the most recent messages unsummarized (they're the hot context)
|
|
52
|
+
const recentCount = Math.min(contextConfig.recentMessagesToKeep, messages.length);
|
|
53
|
+
const messagesToSummarize = messages.slice(0, messages.length - recentCount);
|
|
54
|
+
|
|
55
|
+
if (messagesToSummarize.length === 0) return;
|
|
56
|
+
|
|
57
|
+
// 4. Determine model config for summarization
|
|
58
|
+
const modelConfig = contextConfig.summarizationModel ?? config.model;
|
|
59
|
+
|
|
60
|
+
// 5. Generate summary incorporating any existing summary
|
|
61
|
+
const existingSummary = thread.contextMeta?.summary ?? null;
|
|
62
|
+
|
|
63
|
+
const summary = await summarizeMessages(messagesToSummarize, existingSummary, modelConfig);
|
|
64
|
+
|
|
65
|
+
// 6. Update thread context meta
|
|
66
|
+
const lastSummarizedMessage = messagesToSummarize.at(-1);
|
|
67
|
+
const meta = {
|
|
68
|
+
summary,
|
|
69
|
+
summaryUpToMessageId: lastSummarizedMessage?.id ?? null,
|
|
70
|
+
totalEstimatedTokens,
|
|
71
|
+
lastOptimizedAt: new Date().toISOString(),
|
|
72
|
+
} satisfies ThreadContextMeta;
|
|
73
|
+
|
|
74
|
+
await config.db.threads.updateContextMeta(thread.id, meta);
|
|
75
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
import type { UIMessage } from "ai";
|
|
3
|
+
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
4
|
+
import type { ContextConfig } from "./types.ts";
|
|
5
|
+
|
|
6
|
+
type SummarizationModelConfig = NonNullable<ContextConfig["summarizationModel"]>;
|
|
7
|
+
|
|
8
|
+
export async function summarizeMessages(
|
|
9
|
+
messages: UIMessage[],
|
|
10
|
+
existingSummary: string | null,
|
|
11
|
+
modelConfig: SummarizationModelConfig,
|
|
12
|
+
) {
|
|
13
|
+
const provider = createOpenAICompatible({
|
|
14
|
+
name: modelConfig.providerName ?? "summarization-provider",
|
|
15
|
+
baseURL: modelConfig.baseURL,
|
|
16
|
+
apiKey: modelConfig.apiKey,
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
const model = provider.chatModel(modelConfig.modelName);
|
|
20
|
+
|
|
21
|
+
const conversationText = messages
|
|
22
|
+
.map(function (msg) {
|
|
23
|
+
const textParts = msg.parts
|
|
24
|
+
.filter((p): p is Extract<typeof p, { type: "text" }> => p.type === "text")
|
|
25
|
+
.map((p) => p.text);
|
|
26
|
+
return `[${msg.role}]: ${textParts.join(" ")}`;
|
|
27
|
+
})
|
|
28
|
+
.join("\n");
|
|
29
|
+
|
|
30
|
+
const summaryContext = existingSummary
|
|
31
|
+
? `\nPrior summary of earlier messages:\n${existingSummary}\n`
|
|
32
|
+
: "";
|
|
33
|
+
|
|
34
|
+
const { text } = await generateText({
|
|
35
|
+
model,
|
|
36
|
+
system: `You are a precise conversation summarizer. Produce a concise summary that preserves:
|
|
37
|
+
- Key decisions and conclusions
|
|
38
|
+
- Important entities (names, IDs, URLs, values)
|
|
39
|
+
- User intent and goals
|
|
40
|
+
- Any unresolved questions or next steps
|
|
41
|
+
|
|
42
|
+
Maximum 500 tokens. Use bullet points. Do not include preamble.`,
|
|
43
|
+
prompt: `${summaryContext}
|
|
44
|
+
Summarize the following conversation:
|
|
45
|
+
|
|
46
|
+
${conversationText}`,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
return text;
|
|
50
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { UIMessage } from "ai";
|
|
2
|
+
|
|
3
|
+
/** Average characters per token for English text. Used by the heuristic estimator. */
|
|
4
|
+
export const CHARS_PER_TOKEN = 4;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Estimates token count for a string using the chars/4 heuristic.
|
|
8
|
+
* ~10% accuracy for English text — good enough for budget decisions.
|
|
9
|
+
*/
|
|
10
|
+
export function estimateTokens(text: string) {
|
|
11
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Estimates token count for a single UIMessage by walking its parts.
|
|
16
|
+
*/
|
|
17
|
+
export function estimateMessageTokens(message: UIMessage) {
|
|
18
|
+
let tokens = 0;
|
|
19
|
+
|
|
20
|
+
for (const part of message.parts) {
|
|
21
|
+
if (part.type === "text" || part.type === "reasoning") {
|
|
22
|
+
tokens += estimateTokens(part.text);
|
|
23
|
+
} else if ("toolCallId" in part) {
|
|
24
|
+
// Tool invocation parts (tool-${name})
|
|
25
|
+
if ("input" in part && part.input != null) {
|
|
26
|
+
tokens += estimateTokens(JSON.stringify(part.input));
|
|
27
|
+
}
|
|
28
|
+
if ("output" in part && part.output != null) {
|
|
29
|
+
tokens += estimateTokens(JSON.stringify(part.output));
|
|
30
|
+
}
|
|
31
|
+
} else if (part.type === "source-url") {
|
|
32
|
+
tokens += estimateTokens(part.url);
|
|
33
|
+
} else if (part.type === "source-document") {
|
|
34
|
+
tokens += estimateTokens(part.title);
|
|
35
|
+
} else if (part.type === "file") {
|
|
36
|
+
tokens += estimateTokens(part.url);
|
|
37
|
+
} else {
|
|
38
|
+
// step-start, data parts, etc.
|
|
39
|
+
tokens += 5;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Per-message overhead (role, metadata framing)
|
|
44
|
+
tokens += 4;
|
|
45
|
+
|
|
46
|
+
return tokens;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Estimates token counts for an array of UIMessages.
|
|
51
|
+
* Returns per-message estimates in the same order.
|
|
52
|
+
*/
|
|
53
|
+
export function estimateMessagesTokens(messages: UIMessage[]) {
|
|
54
|
+
return messages.map(function (message) {
|
|
55
|
+
return {
|
|
56
|
+
message,
|
|
57
|
+
tokens: estimateMessageTokens(message),
|
|
58
|
+
};
|
|
59
|
+
});
|
|
60
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export type ContextConfig = {
|
|
2
|
+
maxContextTokens: number;
|
|
3
|
+
reservedTokenBudget: number;
|
|
4
|
+
summarizationThreshold: number;
|
|
5
|
+
summarizationModel?: {
|
|
6
|
+
baseURL: string;
|
|
7
|
+
apiKey: string;
|
|
8
|
+
modelName: string;
|
|
9
|
+
providerName?: string;
|
|
10
|
+
};
|
|
11
|
+
toolResultMaxTokens: number;
|
|
12
|
+
recentMessagesToKeep: number;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export type ThreadContextMeta = {
|
|
16
|
+
summary: string | null;
|
|
17
|
+
summaryUpToMessageId: string | null;
|
|
18
|
+
totalEstimatedTokens: number;
|
|
19
|
+
lastOptimizedAt: string | null;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export const DEFAULT_CONTEXT_CONFIG: ContextConfig = {
|
|
23
|
+
maxContextTokens: 120_000,
|
|
24
|
+
reservedTokenBudget: 8_000,
|
|
25
|
+
summarizationThreshold: 0.75,
|
|
26
|
+
toolResultMaxTokens: 2_000,
|
|
27
|
+
recentMessagesToKeep: 6,
|
|
28
|
+
};
|