@m6d/cortex-server 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/adapters/database.d.ts +3 -0
- package/dist/src/ai/active-streams.d.ts +14 -0
- package/dist/src/ai/active-streams.test.d.ts +1 -0
- package/dist/src/ai/context/builder.d.ts +24 -0
- package/dist/src/ai/context/compressor.d.ts +7 -0
- package/dist/src/ai/context/index.d.ts +15 -0
- package/dist/src/ai/context/summarizer.d.ts +5 -0
- package/dist/src/ai/context/token-estimator.d.ts +20 -0
- package/dist/src/ai/context/types.d.ts +20 -0
- package/dist/src/ai/prompt.d.ts +6 -1
- package/dist/src/config.d.ts +4 -0
- package/dist/src/db/schema.d.ts +19 -1
- package/dist/src/index.d.ts +1 -0
- package/dist/src/routes/ws.d.ts +5 -1
- package/dist/src/types.d.ts +32 -14
- package/dist/src/ws/connections.d.ts +3 -3
- package/dist/src/ws/events.d.ts +33 -2
- package/dist/src/ws/index.d.ts +1 -1
- package/dist/src/ws/notify.d.ts +1 -1
- package/package.json +3 -2
- package/src/adapters/database.ts +3 -0
- package/src/adapters/mssql.ts +26 -6
- package/src/ai/active-streams.test.ts +21 -0
- package/src/ai/active-streams.ts +123 -0
- package/src/ai/context/builder.ts +94 -0
- package/src/ai/context/compressor.ts +47 -0
- package/src/ai/context/index.ts +75 -0
- package/src/ai/context/summarizer.ts +50 -0
- package/src/ai/context/token-estimator.ts +60 -0
- package/src/ai/context/types.ts +28 -0
- package/src/ai/index.ts +158 -22
- package/src/ai/prompt.ts +21 -15
- package/src/ai/tools/execute-code.tool.ts +79 -27
- package/src/ai/tools/query-graph.tool.ts +1 -1
- package/src/cli/extract-endpoints.ts +18 -18
- package/src/config.ts +4 -0
- package/src/db/migrations/20260315000000_add_context_meta/migration.sql +1 -0
- package/src/db/schema.ts +6 -1
- package/src/factory.ts +11 -1
- package/src/index.ts +2 -0
- package/src/routes/chat.ts +46 -1
- package/src/routes/threads.ts +46 -9
- package/src/routes/ws.ts +37 -23
- package/src/types.ts +37 -13
- package/src/ws/connections.ts +15 -9
- package/src/ws/events.ts +35 -2
- package/src/ws/index.ts +9 -1
- package/src/ws/notify.ts +2 -2
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import type { UIMessage } from "ai";
|
|
2
|
+
import { generateId } from "ai";
|
|
3
|
+
import type { DatabaseAdapter } from "../../adapters/database.ts";
|
|
4
|
+
import type { MessageMetadata, Thread } from "../../types.ts";
|
|
5
|
+
import type { ContextConfig, ThreadContextMeta } from "./types.ts";
|
|
6
|
+
import { compressToolResults } from "./compressor.ts";
|
|
7
|
+
import { estimateMessageTokens } from "./token-estimator.ts";
|
|
8
|
+
|
|
9
|
+
type ContextBuildResult = {
|
|
10
|
+
messages: UIMessage<MessageMetadata>[];
|
|
11
|
+
allMessages: UIMessage<MessageMetadata>[];
|
|
12
|
+
summary: string | null;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Builds a token-aware context window from stored messages.
|
|
17
|
+
*
|
|
18
|
+
* 1. Loads messages from DB with generous limit
|
|
19
|
+
* 2. Reads existing summary from thread.contextMeta
|
|
20
|
+
* 3. Compresses large tool results
|
|
21
|
+
* 4. Walks messages newest-to-oldest, accumulating token estimates
|
|
22
|
+
* 5. Stops when adding the next message would exceed the budget
|
|
23
|
+
* 6. Prepends summary as synthetic message if older messages were trimmed
|
|
24
|
+
*/
|
|
25
|
+
export async function buildContextMessages(
|
|
26
|
+
userId: string,
|
|
27
|
+
thread: Thread,
|
|
28
|
+
db: DatabaseAdapter,
|
|
29
|
+
contextConfig: ContextConfig,
|
|
30
|
+
) {
|
|
31
|
+
// 1. Load messages with generous limit
|
|
32
|
+
const storedMessages = await db.messages.list(userId, thread.id, { limit: 50 });
|
|
33
|
+
const allMessages = storedMessages.map((m) => m.content);
|
|
34
|
+
|
|
35
|
+
// 2. Read existing summary
|
|
36
|
+
const contextMeta = thread.contextMeta;
|
|
37
|
+
const summary = contextMeta?.summary ?? null;
|
|
38
|
+
|
|
39
|
+
// 3. Compress large tool results
|
|
40
|
+
const compressed = compressToolResults(allMessages, contextConfig.toolResultMaxTokens);
|
|
41
|
+
|
|
42
|
+
// 4. Walk newest-to-oldest, accumulating token estimates.
|
|
43
|
+
// Reserve space for the system prompt + tool definitions (they share the context window).
|
|
44
|
+
const budget = contextConfig.maxContextTokens - contextConfig.reservedTokenBudget;
|
|
45
|
+
const selected: UIMessage<MessageMetadata>[] = [];
|
|
46
|
+
let accumulated = 0;
|
|
47
|
+
|
|
48
|
+
for (let i = compressed.length - 1; i >= 0; i--) {
|
|
49
|
+
const msgTokens = estimateMessageTokens(compressed[i]!);
|
|
50
|
+
|
|
51
|
+
if (accumulated + msgTokens > budget && selected.length > 0) {
|
|
52
|
+
break;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
accumulated += msgTokens;
|
|
56
|
+
selected.unshift(compressed[i]!);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// 5. If we trimmed messages and a summary exists, prepend it.
|
|
60
|
+
// Make room for the summary by evicting the oldest messages if needed.
|
|
61
|
+
if (summary && selected.length < allMessages.length) {
|
|
62
|
+
const summaryMessage = {
|
|
63
|
+
id: generateId(),
|
|
64
|
+
role: "user",
|
|
65
|
+
parts: [{ type: "text", text: `[Previous conversation summary]: ${summary}` }],
|
|
66
|
+
} satisfies UIMessage;
|
|
67
|
+
|
|
68
|
+
const summaryTokens = estimateMessageTokens(summaryMessage);
|
|
69
|
+
const trimmed = trimMessagesToFit(selected, budget - summaryTokens);
|
|
70
|
+
trimmed.unshift(summaryMessage);
|
|
71
|
+
return { messages: trimmed, allMessages, summary };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return { messages: selected, allMessages, summary } satisfies ContextBuildResult;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Drops the oldest messages until the total estimated tokens fit within `budget`.
|
|
79
|
+
* Always keeps at least the most recent message.
|
|
80
|
+
*/
|
|
81
|
+
export function trimMessagesToFit(messages: UIMessage<MessageMetadata>[], budget: number) {
|
|
82
|
+
let total = 0;
|
|
83
|
+
for (const msg of messages) {
|
|
84
|
+
total += estimateMessageTokens(msg);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const trimmed = [...messages];
|
|
88
|
+
while (total > budget && trimmed.length > 1) {
|
|
89
|
+
const evicted = trimmed.shift()!;
|
|
90
|
+
total -= estimateMessageTokens(evicted);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return trimmed;
|
|
94
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { UIMessage } from "ai";
|
|
2
|
+
import { estimateTokens, CHARS_PER_TOKEN } from "./token-estimator.ts";
|
|
3
|
+
import type { MessageMetadata } from "src/types.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Returns a new array of messages with large tool outputs truncated
|
|
7
|
+
* to `maxTokensPerResult`. Does not mutate the input messages.
|
|
8
|
+
*/
|
|
9
|
+
export function compressToolResults(
|
|
10
|
+
messages: UIMessage<MessageMetadata>[],
|
|
11
|
+
maxTokensPerResult: number,
|
|
12
|
+
) {
|
|
13
|
+
return messages.map((message) => {
|
|
14
|
+
let hasLargeToolOutput = false;
|
|
15
|
+
|
|
16
|
+
for (const part of message.parts) {
|
|
17
|
+
if ("toolCallId" in part && "output" in part && part.output != null) {
|
|
18
|
+
const outputTokens = estimateTokens(JSON.stringify(part.output));
|
|
19
|
+
if (outputTokens > maxTokensPerResult) {
|
|
20
|
+
hasLargeToolOutput = true;
|
|
21
|
+
break;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
if (!hasLargeToolOutput) return message;
|
|
27
|
+
|
|
28
|
+
const compressedParts = message.parts.map((part) => {
|
|
29
|
+
if (!("toolCallId" in part) || !("output" in part) || part.output == null) {
|
|
30
|
+
return part;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const outputStr = JSON.stringify(part.output);
|
|
34
|
+
const outputTokens = estimateTokens(outputStr);
|
|
35
|
+
|
|
36
|
+
if (outputTokens <= maxTokensPerResult) return part;
|
|
37
|
+
|
|
38
|
+
// Convert token budget back to character budget
|
|
39
|
+
const charBudget = maxTokensPerResult * CHARS_PER_TOKEN;
|
|
40
|
+
const truncatedOutput = outputStr.slice(0, charBudget) + "\n[...truncated]";
|
|
41
|
+
|
|
42
|
+
return { ...part, output: truncatedOutput } as typeof part;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
return { ...message, parts: compressedParts };
|
|
46
|
+
});
|
|
47
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
export type { ContextConfig, ThreadContextMeta } from "./types.ts";
|
|
2
|
+
export { DEFAULT_CONTEXT_CONFIG } from "./types.ts";
|
|
3
|
+
export {
|
|
4
|
+
CHARS_PER_TOKEN,
|
|
5
|
+
estimateTokens,
|
|
6
|
+
estimateMessageTokens,
|
|
7
|
+
estimateMessagesTokens,
|
|
8
|
+
} from "./token-estimator.ts";
|
|
9
|
+
export { compressToolResults } from "./compressor.ts";
|
|
10
|
+
export { summarizeMessages } from "./summarizer.ts";
|
|
11
|
+
export { buildContextMessages, trimMessagesToFit } from "./builder.ts";
|
|
12
|
+
|
|
13
|
+
import type { UIMessage } from "ai";
|
|
14
|
+
import type { ResolvedCortexAgentConfig } from "../../config.ts";
|
|
15
|
+
import type { Thread } from "../../types.ts";
|
|
16
|
+
import type { ThreadContextMeta } from "./types.ts";
|
|
17
|
+
import { estimateMessagesTokens } from "./token-estimator.ts";
|
|
18
|
+
import { summarizeMessages } from "./summarizer.ts";
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Post-response context optimization.
|
|
22
|
+
* Called fire-and-forget from onFinish — summarizes older messages
|
|
23
|
+
* when token usage exceeds the configured threshold.
|
|
24
|
+
*/
|
|
25
|
+
export async function optimizeThreadContext(
|
|
26
|
+
thread: Thread,
|
|
27
|
+
messages: UIMessage[],
|
|
28
|
+
config: ResolvedCortexAgentConfig,
|
|
29
|
+
) {
|
|
30
|
+
const contextConfig = config.context;
|
|
31
|
+
|
|
32
|
+
// 1. Estimate tokens for all messages
|
|
33
|
+
const estimates = estimateMessagesTokens(messages);
|
|
34
|
+
const totalEstimatedTokens = estimates.reduce((sum, e) => sum + e.tokens, 0);
|
|
35
|
+
|
|
36
|
+
// 2. Check if over summarization threshold
|
|
37
|
+
const threshold = contextConfig.maxContextTokens * contextConfig.summarizationThreshold;
|
|
38
|
+
|
|
39
|
+
if (totalEstimatedTokens <= threshold) {
|
|
40
|
+
// Update token estimate but skip summarization
|
|
41
|
+
const meta: ThreadContextMeta = {
|
|
42
|
+
summary: thread.contextMeta?.summary ?? null,
|
|
43
|
+
summaryUpToMessageId: thread.contextMeta?.summaryUpToMessageId ?? null,
|
|
44
|
+
totalEstimatedTokens,
|
|
45
|
+
lastOptimizedAt: new Date().toISOString(),
|
|
46
|
+
};
|
|
47
|
+
await config.db.threads.updateContextMeta(thread.id, meta);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// 3. Keep the most recent messages unsummarized (they're the hot context)
|
|
52
|
+
const recentCount = Math.min(contextConfig.recentMessagesToKeep, messages.length);
|
|
53
|
+
const messagesToSummarize = messages.slice(0, messages.length - recentCount);
|
|
54
|
+
|
|
55
|
+
if (messagesToSummarize.length === 0) return;
|
|
56
|
+
|
|
57
|
+
// 4. Determine model config for summarization
|
|
58
|
+
const modelConfig = contextConfig.summarizationModel ?? config.model;
|
|
59
|
+
|
|
60
|
+
// 5. Generate summary incorporating any existing summary
|
|
61
|
+
const existingSummary = thread.contextMeta?.summary ?? null;
|
|
62
|
+
|
|
63
|
+
const summary = await summarizeMessages(messagesToSummarize, existingSummary, modelConfig);
|
|
64
|
+
|
|
65
|
+
// 6. Update thread context meta
|
|
66
|
+
const lastSummarizedMessage = messagesToSummarize.at(-1);
|
|
67
|
+
const meta = {
|
|
68
|
+
summary,
|
|
69
|
+
summaryUpToMessageId: lastSummarizedMessage?.id ?? null,
|
|
70
|
+
totalEstimatedTokens,
|
|
71
|
+
lastOptimizedAt: new Date().toISOString(),
|
|
72
|
+
} satisfies ThreadContextMeta;
|
|
73
|
+
|
|
74
|
+
await config.db.threads.updateContextMeta(thread.id, meta);
|
|
75
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { generateText } from "ai";
|
|
2
|
+
import type { UIMessage } from "ai";
|
|
3
|
+
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
4
|
+
import type { ContextConfig } from "./types.ts";
|
|
5
|
+
|
|
6
|
+
type SummarizationModelConfig = NonNullable<ContextConfig["summarizationModel"]>;
|
|
7
|
+
|
|
8
|
+
export async function summarizeMessages(
|
|
9
|
+
messages: UIMessage[],
|
|
10
|
+
existingSummary: string | null,
|
|
11
|
+
modelConfig: SummarizationModelConfig,
|
|
12
|
+
) {
|
|
13
|
+
const provider = createOpenAICompatible({
|
|
14
|
+
name: modelConfig.providerName ?? "summarization-provider",
|
|
15
|
+
baseURL: modelConfig.baseURL,
|
|
16
|
+
apiKey: modelConfig.apiKey,
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
const model = provider.chatModel(modelConfig.modelName);
|
|
20
|
+
|
|
21
|
+
const conversationText = messages
|
|
22
|
+
.map(function (msg) {
|
|
23
|
+
const textParts = msg.parts
|
|
24
|
+
.filter((p): p is Extract<typeof p, { type: "text" }> => p.type === "text")
|
|
25
|
+
.map((p) => p.text);
|
|
26
|
+
return `[${msg.role}]: ${textParts.join(" ")}`;
|
|
27
|
+
})
|
|
28
|
+
.join("\n");
|
|
29
|
+
|
|
30
|
+
const summaryContext = existingSummary
|
|
31
|
+
? `\nPrior summary of earlier messages:\n${existingSummary}\n`
|
|
32
|
+
: "";
|
|
33
|
+
|
|
34
|
+
const { text } = await generateText({
|
|
35
|
+
model,
|
|
36
|
+
system: `You are a precise conversation summarizer. Produce a concise summary that preserves:
|
|
37
|
+
- Key decisions and conclusions
|
|
38
|
+
- Important entities (names, IDs, URLs, values)
|
|
39
|
+
- User intent and goals
|
|
40
|
+
- Any unresolved questions or next steps
|
|
41
|
+
|
|
42
|
+
Maximum 500 tokens. Use bullet points. Do not include preamble.`,
|
|
43
|
+
prompt: `${summaryContext}
|
|
44
|
+
Summarize the following conversation:
|
|
45
|
+
|
|
46
|
+
${conversationText}`,
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
return text;
|
|
50
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { UIMessage } from "ai";
|
|
2
|
+
|
|
3
|
+
/** Average characters per token for English text. Used by the heuristic estimator. */
|
|
4
|
+
export const CHARS_PER_TOKEN = 4;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Estimates token count for a string using the chars/4 heuristic.
|
|
8
|
+
* ~10% accuracy for English text — good enough for budget decisions.
|
|
9
|
+
*/
|
|
10
|
+
export function estimateTokens(text: string) {
|
|
11
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Estimates token count for a single UIMessage by walking its parts.
|
|
16
|
+
*/
|
|
17
|
+
export function estimateMessageTokens(message: UIMessage) {
|
|
18
|
+
let tokens = 0;
|
|
19
|
+
|
|
20
|
+
for (const part of message.parts) {
|
|
21
|
+
if (part.type === "text" || part.type === "reasoning") {
|
|
22
|
+
tokens += estimateTokens(part.text);
|
|
23
|
+
} else if ("toolCallId" in part) {
|
|
24
|
+
// Tool invocation parts (tool-${name})
|
|
25
|
+
if ("input" in part && part.input != null) {
|
|
26
|
+
tokens += estimateTokens(JSON.stringify(part.input));
|
|
27
|
+
}
|
|
28
|
+
if ("output" in part && part.output != null) {
|
|
29
|
+
tokens += estimateTokens(JSON.stringify(part.output));
|
|
30
|
+
}
|
|
31
|
+
} else if (part.type === "source-url") {
|
|
32
|
+
tokens += estimateTokens(part.url);
|
|
33
|
+
} else if (part.type === "source-document") {
|
|
34
|
+
tokens += estimateTokens(part.title);
|
|
35
|
+
} else if (part.type === "file") {
|
|
36
|
+
tokens += estimateTokens(part.url);
|
|
37
|
+
} else {
|
|
38
|
+
// step-start, data parts, etc.
|
|
39
|
+
tokens += 5;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Per-message overhead (role, metadata framing)
|
|
44
|
+
tokens += 4;
|
|
45
|
+
|
|
46
|
+
return tokens;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Estimates token counts for an array of UIMessages.
|
|
51
|
+
* Returns per-message estimates in the same order.
|
|
52
|
+
*/
|
|
53
|
+
export function estimateMessagesTokens(messages: UIMessage[]) {
|
|
54
|
+
return messages.map(function (message) {
|
|
55
|
+
return {
|
|
56
|
+
message,
|
|
57
|
+
tokens: estimateMessageTokens(message),
|
|
58
|
+
};
|
|
59
|
+
});
|
|
60
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export type ContextConfig = {
|
|
2
|
+
maxContextTokens: number;
|
|
3
|
+
reservedTokenBudget: number;
|
|
4
|
+
summarizationThreshold: number;
|
|
5
|
+
summarizationModel?: {
|
|
6
|
+
baseURL: string;
|
|
7
|
+
apiKey: string;
|
|
8
|
+
modelName: string;
|
|
9
|
+
providerName?: string;
|
|
10
|
+
};
|
|
11
|
+
toolResultMaxTokens: number;
|
|
12
|
+
recentMessagesToKeep: number;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export type ThreadContextMeta = {
|
|
16
|
+
summary: string | null;
|
|
17
|
+
summaryUpToMessageId: string | null;
|
|
18
|
+
totalEstimatedTokens: number;
|
|
19
|
+
lastOptimizedAt: string | null;
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
export const DEFAULT_CONTEXT_CONFIG: ContextConfig = {
|
|
23
|
+
maxContextTokens: 120_000,
|
|
24
|
+
reservedTokenBudget: 8_000,
|
|
25
|
+
summarizationThreshold: 0.75,
|
|
26
|
+
toolResultMaxTokens: 2_000,
|
|
27
|
+
recentMessagesToKeep: 6,
|
|
28
|
+
};
|
package/src/ai/index.ts
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import {
|
|
2
|
+
type UIMessage,
|
|
2
3
|
type ToolSet,
|
|
3
4
|
convertToModelMessages,
|
|
4
5
|
generateId,
|
|
5
6
|
generateText,
|
|
6
7
|
safeValidateUIMessages,
|
|
8
|
+
stepCountIs,
|
|
7
9
|
streamText,
|
|
8
10
|
} from "ai";
|
|
9
11
|
import { HTTPException } from "hono/http-exception";
|
|
10
12
|
import type { ResolvedCortexAgentConfig } from "../config.ts";
|
|
11
|
-
import type { Thread } from "../types.ts";
|
|
13
|
+
import type { MessageMetadata, Thread } from "../types.ts";
|
|
12
14
|
import { createModel, createEmbeddingModel } from "./helpers.ts";
|
|
13
|
-
import { buildSystemPrompt } from "./prompt.ts";
|
|
15
|
+
import { buildSystemPrompt, resolveSession } from "./prompt.ts";
|
|
14
16
|
import { createQueryGraphTool } from "./tools/query-graph.tool.ts";
|
|
15
17
|
import { createCallEndpointTool } from "./tools/call-endpoint.tool.ts";
|
|
16
18
|
import { createExecuteCodeTool } from "./tools/execute-code.tool.ts";
|
|
@@ -19,6 +21,15 @@ import { createRequestInterceptor } from "./interceptors/request-interceptor.ts"
|
|
|
19
21
|
import { createNeo4jClient } from "../graph/neo4j.ts";
|
|
20
22
|
import { resolveFromGraph } from "../graph/resolver.ts";
|
|
21
23
|
import { notify } from "../ws/index.ts";
|
|
24
|
+
import { buildContextMessages } from "./context/builder.ts";
|
|
25
|
+
import { optimizeThreadContext, estimateTokens, trimMessagesToFit } from "./context/index.ts";
|
|
26
|
+
import {
|
|
27
|
+
registerStream,
|
|
28
|
+
attachSseStream,
|
|
29
|
+
removeStream,
|
|
30
|
+
isStreamRunning,
|
|
31
|
+
} from "./active-streams.ts";
|
|
32
|
+
import { toThreadSummary } from "../types.ts";
|
|
22
33
|
|
|
23
34
|
export async function stream(
|
|
24
35
|
messages: unknown[],
|
|
@@ -27,6 +38,8 @@ export async function stream(
|
|
|
27
38
|
token: string,
|
|
28
39
|
config: ResolvedCortexAgentConfig,
|
|
29
40
|
) {
|
|
41
|
+
const abortController = new AbortController();
|
|
42
|
+
|
|
30
43
|
const validationResult = await safeValidateUIMessages({ messages });
|
|
31
44
|
if (!validationResult.success) {
|
|
32
45
|
throw new HTTPException(423, { message: "Invalid messages format" });
|
|
@@ -34,15 +47,19 @@ export async function stream(
|
|
|
34
47
|
|
|
35
48
|
const validatedMessages = validationResult.data;
|
|
36
49
|
await config.db.messages.upsert(thread.id, validatedMessages);
|
|
50
|
+
const updatedThread = await config.db.threads.touch(thread.id);
|
|
37
51
|
|
|
38
|
-
const
|
|
39
|
-
.list(userId, thread.id, { limit: 20 })
|
|
40
|
-
.then((x) => x.map((y) => y.content));
|
|
52
|
+
const activeStream = registerStream(thread.id, abortController);
|
|
41
53
|
|
|
42
|
-
|
|
54
|
+
notify(userId, thread.agentId, {
|
|
55
|
+
type: "thread:run-started",
|
|
56
|
+
payload: { thread: toThreadSummary(updatedThread, true) },
|
|
57
|
+
});
|
|
43
58
|
|
|
59
|
+
// Extract prompt from the just-upserted messages (last user message)
|
|
60
|
+
// so we can start graph resolution without waiting for history fetch
|
|
44
61
|
const prompt =
|
|
45
|
-
|
|
62
|
+
validatedMessages
|
|
46
63
|
.filter((x) => x.role === "user")
|
|
47
64
|
.at(-1)
|
|
48
65
|
?.parts.find((x) => x.type === "text")?.text ?? "";
|
|
@@ -51,15 +68,24 @@ export async function stream(
|
|
|
51
68
|
const embeddingModel = createEmbeddingModel(config.embedding);
|
|
52
69
|
const neo4j = createNeo4jClient(config.neo4j, embeddingModel);
|
|
53
70
|
|
|
54
|
-
//
|
|
55
|
-
const resolved = await
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
71
|
+
// Run independent operations in parallel
|
|
72
|
+
const [contextResult, resolved, session] = await Promise.all([
|
|
73
|
+
// Branch A: Load messages + build token-aware context window
|
|
74
|
+
buildContextMessages(userId, thread, config.db, config.context),
|
|
75
|
+
// Branch B: Resolve graph context (400-2000ms, the bottleneck)
|
|
76
|
+
resolveFromGraph(prompt, {
|
|
77
|
+
neo4j,
|
|
78
|
+
embeddingModel,
|
|
79
|
+
reranker: config.reranker,
|
|
80
|
+
}),
|
|
81
|
+
// Branch C: Resolve session data
|
|
82
|
+
resolveSession(config, thread, token),
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
const { messages: contextMessages, allMessages: originalMessages } = contextResult;
|
|
60
86
|
|
|
61
87
|
// Build tools
|
|
62
|
-
const builtInTools:
|
|
88
|
+
const builtInTools: ToolSet = {
|
|
63
89
|
captureFiles: captureFilesTool,
|
|
64
90
|
queryGraph: createQueryGraphTool(neo4j),
|
|
65
91
|
};
|
|
@@ -85,27 +111,106 @@ export async function stream(
|
|
|
85
111
|
...config.tools,
|
|
86
112
|
} as ToolSet;
|
|
87
113
|
|
|
88
|
-
const systemPrompt = await buildSystemPrompt(config,
|
|
114
|
+
const systemPrompt = await buildSystemPrompt(config, resolved, session);
|
|
115
|
+
|
|
116
|
+
// The context builder reserved a static token budget for the system prompt + tools.
|
|
117
|
+
// Now that we have the actual values, verify the reserve was sufficient and trim
|
|
118
|
+
// the oldest messages if it wasn't.
|
|
119
|
+
const actualFixedCost = estimateTokens(systemPrompt) + estimateTokens(JSON.stringify(tools));
|
|
120
|
+
const { reservedTokenBudget, maxContextTokens } = config.context;
|
|
121
|
+
const trimmedMessages =
|
|
122
|
+
actualFixedCost > reservedTokenBudget
|
|
123
|
+
? trimMessagesToFit(contextMessages, maxContextTokens - actualFixedCost)
|
|
124
|
+
: contextMessages;
|
|
125
|
+
|
|
126
|
+
const recentMessages = await convertToModelMessages(trimmedMessages);
|
|
89
127
|
|
|
90
128
|
const result = streamText({
|
|
91
129
|
model,
|
|
92
130
|
system: systemPrompt,
|
|
93
131
|
tools,
|
|
94
132
|
messages: recentMessages,
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
},
|
|
133
|
+
abortSignal: abortController.signal,
|
|
134
|
+
stopWhen: stepCountIs(50),
|
|
98
135
|
});
|
|
99
136
|
|
|
100
|
-
return result.toUIMessageStreamResponse({
|
|
137
|
+
return result.toUIMessageStreamResponse<UIMessage<MessageMetadata>>({
|
|
101
138
|
originalMessages,
|
|
102
139
|
generateMessageId: generateId,
|
|
140
|
+
consumeSseStream: ({ stream: sseStream }) => {
|
|
141
|
+
attachSseStream(thread.id, sseStream);
|
|
142
|
+
},
|
|
103
143
|
onFinish: async ({ messages: finishedMessages, isAborted }) => {
|
|
104
144
|
if (isAborted) {
|
|
105
|
-
|
|
145
|
+
finalizeAbortedMessages(finishedMessages);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Record token usage (result promises reject on abort, so skip)
|
|
149
|
+
const lastAssistantMessage = finishedMessages
|
|
150
|
+
.filter((x) => x.role === "assistant")
|
|
151
|
+
.at(-1);
|
|
152
|
+
if (lastAssistantMessage && !isAborted) {
|
|
153
|
+
const providerMetadata = await result.providerMetadata;
|
|
154
|
+
const response = await result.response;
|
|
155
|
+
const usage = await result.totalUsage;
|
|
156
|
+
let metadata: MessageMetadata = {
|
|
157
|
+
isAborted,
|
|
158
|
+
providerMetadata,
|
|
159
|
+
modelId: response.modelId,
|
|
160
|
+
tokenUsage: {
|
|
161
|
+
input: {
|
|
162
|
+
noCache: usage.inputTokenDetails.noCacheTokens ?? 0,
|
|
163
|
+
cacheRead: usage.inputTokenDetails.cacheReadTokens ?? 0,
|
|
164
|
+
cacheWrite: usage.inputTokenDetails.cacheWriteTokens ?? 0,
|
|
165
|
+
total: usage.inputTokens ?? 0,
|
|
166
|
+
},
|
|
167
|
+
output: {
|
|
168
|
+
reasoning: usage.outputTokenDetails.reasoningTokens ?? 0,
|
|
169
|
+
text: usage.outputTokenDetails.textTokens ?? 0,
|
|
170
|
+
total: usage.outputTokens ?? 0,
|
|
171
|
+
},
|
|
172
|
+
total: usage.totalTokens ?? 0,
|
|
173
|
+
},
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
lastAssistantMessage.metadata = metadata;
|
|
177
|
+
} else if (lastAssistantMessage) {
|
|
178
|
+
lastAssistantMessage.metadata = {
|
|
179
|
+
isAborted,
|
|
180
|
+
modelId: "",
|
|
181
|
+
providerMetadata: undefined,
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const persistedThread = await config.db.threads.getById(userId, thread.id);
|
|
186
|
+
if (!persistedThread) {
|
|
187
|
+
removeStream(thread.id);
|
|
188
|
+
return;
|
|
106
189
|
}
|
|
190
|
+
|
|
107
191
|
await config.db.messages.upsert(thread.id, finishedMessages);
|
|
108
192
|
config.onStreamFinish?.({ messages: finishedMessages, isAborted });
|
|
193
|
+
|
|
194
|
+
// XXX: we need to notify the user so that the client can
|
|
195
|
+
// fetch new messages. The client can't fetch messages
|
|
196
|
+
// immediately because messages may not have been saved yet.
|
|
197
|
+
notify(userId, persistedThread.agentId, {
|
|
198
|
+
type: "thread:messages-updated",
|
|
199
|
+
payload: {
|
|
200
|
+
threadId: thread.id,
|
|
201
|
+
thread: toThreadSummary(persistedThread, false),
|
|
202
|
+
},
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
setTimeout(() => removeStream(thread.id, activeStream.id), 10_000);
|
|
206
|
+
|
|
207
|
+
// Fire-and-forget: optimize context for next request
|
|
208
|
+
// Runs after response is delivered — no perceived latency
|
|
209
|
+
try {
|
|
210
|
+
optimizeThreadContext(thread, finishedMessages, config);
|
|
211
|
+
} catch (err) {
|
|
212
|
+
console.error("[cortex-server] Context optimization failed:", err);
|
|
213
|
+
}
|
|
109
214
|
},
|
|
110
215
|
});
|
|
111
216
|
}
|
|
@@ -130,8 +235,39 @@ going to do so or any other speech. Spit out only the title.`,
|
|
|
130
235
|
|
|
131
236
|
await config.db.threads.updateTitle(threadId, output ?? "");
|
|
132
237
|
|
|
133
|
-
|
|
238
|
+
const thread = await config.db.threads.getById(userId, threadId);
|
|
239
|
+
if (!thread) return;
|
|
240
|
+
|
|
241
|
+
notify(userId, thread.agentId, {
|
|
134
242
|
type: "thread:title-updated",
|
|
135
|
-
payload: {
|
|
243
|
+
payload: { thread: toThreadSummary(thread, isStreamRunning(thread.id)) },
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const TERMINAL_TOOL_STATES = new Set(["output-available", "output-error", "output-denied"]);
|
|
248
|
+
|
|
249
|
+
function finalizeAbortedMessages(messages: UIMessage[]) {
|
|
250
|
+
const lastMessage = messages.at(-1);
|
|
251
|
+
if (!lastMessage || lastMessage.role !== "assistant") return;
|
|
252
|
+
|
|
253
|
+
lastMessage.parts = lastMessage.parts.map((part) => {
|
|
254
|
+
if ((part.type === "text" || part.type === "reasoning") && part.state === "streaming") {
|
|
255
|
+
return { ...part, state: "done" as const };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if ("toolCallId" in part && "state" in part) {
|
|
259
|
+
const toolState = part.state;
|
|
260
|
+
if (!TERMINAL_TOOL_STATES.has(toolState)) {
|
|
261
|
+
const { approval: _, ...rest } = part;
|
|
262
|
+
return {
|
|
263
|
+
...rest,
|
|
264
|
+
state: "output-error" as const,
|
|
265
|
+
errorText: "Generation was aborted",
|
|
266
|
+
output: undefined,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return part;
|
|
136
272
|
});
|
|
137
273
|
}
|
package/src/ai/prompt.ts
CHANGED
|
@@ -2,29 +2,35 @@ import type { ResolvedContext } from "../graph/resolver.ts";
|
|
|
2
2
|
import type { ResolvedCortexAgentConfig } from "../config.ts";
|
|
3
3
|
import type { Thread } from "../types.ts";
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
/**
|
|
6
|
+
* Resolves session data for the thread, loading from the configured
|
|
7
|
+
* session loader if not already cached on the thread.
|
|
8
|
+
*/
|
|
9
|
+
export async function resolveSession(
|
|
6
10
|
config: ResolvedCortexAgentConfig,
|
|
7
|
-
prompt: string,
|
|
8
11
|
thread: Thread,
|
|
9
12
|
token: string,
|
|
13
|
+
) {
|
|
14
|
+
let session = thread.session;
|
|
15
|
+
|
|
16
|
+
if (!session && config.loadSessionData) {
|
|
17
|
+
session = await config.loadSessionData(token);
|
|
18
|
+
// Persist to DB for future cache hits
|
|
19
|
+
await config.db.threads.updateSession(thread.id, session);
|
|
20
|
+
thread.session = session;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return session;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export async function buildSystemPrompt(
|
|
27
|
+
config: ResolvedCortexAgentConfig,
|
|
10
28
|
resolved: ResolvedContext | null,
|
|
29
|
+
session: Record<string, unknown> | null,
|
|
11
30
|
) {
|
|
12
31
|
// Resolve the consumer's base system prompt
|
|
13
32
|
let basePrompt: string;
|
|
14
33
|
if (typeof config.systemPrompt === "function") {
|
|
15
|
-
// Resolve session data with caching
|
|
16
|
-
let session: Record<string, unknown> | null = thread.session as Record<
|
|
17
|
-
string,
|
|
18
|
-
unknown
|
|
19
|
-
> | null;
|
|
20
|
-
|
|
21
|
-
if (!session && config.loadSessionData) {
|
|
22
|
-
session = await config.loadSessionData(token);
|
|
23
|
-
// Persist to DB for future cache hits
|
|
24
|
-
await config.db.threads.updateSession(thread.id, session);
|
|
25
|
-
thread.session = session;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
34
|
basePrompt = await config.systemPrompt(session);
|
|
29
35
|
} else {
|
|
30
36
|
basePrompt = config.systemPrompt;
|