@m6d/cortex-server 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/src/adapters/database.d.ts +3 -0
  2. package/dist/src/ai/active-streams.d.ts +14 -0
  3. package/dist/src/ai/context/builder.d.ts +24 -0
  4. package/dist/src/ai/context/compressor.d.ts +7 -0
  5. package/dist/src/ai/context/index.d.ts +15 -0
  6. package/dist/src/ai/context/summarizer.d.ts +5 -0
  7. package/dist/src/ai/context/token-estimator.d.ts +20 -0
  8. package/dist/src/ai/context/types.d.ts +20 -0
  9. package/dist/src/ai/index.d.ts +1 -1
  10. package/dist/src/ai/prompt.d.ts +6 -1
  11. package/dist/src/config.d.ts +4 -0
  12. package/dist/src/db/schema.d.ts +19 -1
  13. package/dist/src/graph/expand-domains.d.ts +2 -0
  14. package/dist/src/graph/helpers.d.ts +5 -0
  15. package/dist/src/graph/resolver.d.ts +2 -0
  16. package/dist/src/graph/types.d.ts +6 -0
  17. package/dist/src/index.d.ts +1 -0
  18. package/dist/src/routes/ws.d.ts +5 -1
  19. package/dist/src/types.d.ts +32 -14
  20. package/dist/src/ws/connections.d.ts +3 -3
  21. package/dist/src/ws/events.d.ts +28 -3
  22. package/dist/src/ws/index.d.ts +1 -1
  23. package/dist/src/ws/notify.d.ts +1 -1
  24. package/package.json +1 -1
  25. package/src/adapters/database.ts +3 -0
  26. package/src/adapters/mssql.ts +26 -6
  27. package/src/ai/active-streams.ts +123 -0
  28. package/src/ai/context/builder.ts +94 -0
  29. package/src/ai/context/compressor.ts +47 -0
  30. package/src/ai/context/index.ts +75 -0
  31. package/src/ai/context/summarizer.ts +50 -0
  32. package/src/ai/context/token-estimator.ts +60 -0
  33. package/src/ai/context/types.ts +28 -0
  34. package/src/ai/index.ts +124 -29
  35. package/src/ai/prompt.ts +27 -18
  36. package/src/ai/tools/query-graph.tool.ts +1 -1
  37. package/src/cli/extract-endpoints.ts +18 -18
  38. package/src/config.ts +4 -0
  39. package/src/db/migrations/20260315000000_add_context_meta/migration.sql +1 -0
  40. package/src/db/schema.ts +6 -1
  41. package/src/factory.ts +11 -1
  42. package/src/graph/expand-domains.ts +276 -0
  43. package/src/graph/generate-cypher.ts +18 -5
  44. package/src/graph/helpers.ts +1 -0
  45. package/src/graph/resolver.ts +10 -0
  46. package/src/graph/seed.ts +5 -2
  47. package/src/graph/types.ts +6 -0
  48. package/src/index.ts +2 -0
  49. package/src/routes/chat.ts +47 -2
  50. package/src/routes/threads.ts +46 -9
  51. package/src/routes/ws.ts +37 -23
  52. package/src/types.ts +37 -13
  53. package/src/ws/connections.ts +15 -9
  54. package/src/ws/events.ts +31 -3
  55. package/src/ws/index.ts +9 -1
  56. package/src/ws/notify.ts +2 -2
@@ -0,0 +1,123 @@
1
+ type ActiveStream = {
2
+ id: string;
3
+ abortController: AbortController;
4
+ buffer: string[];
5
+ subscribers: Set<ReadableStreamDefaultController<string>>;
6
+ isComplete: boolean;
7
+ };
8
+
9
+ const streams = new Map<string, ActiveStream>();
10
+
11
+ export function registerStream(threadId: string, abortController: AbortController) {
12
+ const existing = streams.get(threadId);
13
+ if (existing) {
14
+ existing.abortController.abort();
15
+ for (const controller of existing.subscribers) {
16
+ try {
17
+ controller.close();
18
+ } catch {
19
+ /* already closed */
20
+ }
21
+ }
22
+ streams.delete(threadId);
23
+ }
24
+
25
+ const entry: ActiveStream = {
26
+ id: crypto.randomUUID(),
27
+ abortController,
28
+ buffer: [],
29
+ subscribers: new Set(),
30
+ isComplete: false,
31
+ };
32
+
33
+ streams.set(threadId, entry);
34
+ return entry;
35
+ }
36
+
37
+ export function attachSseStream(threadId: string, sseStream: ReadableStream<string>) {
38
+ const entry = streams.get(threadId);
39
+ if (!entry) return;
40
+
41
+ const reader = sseStream.getReader();
42
+ (async function consume() {
43
+ try {
44
+ while (true) {
45
+ const { done, value } = await reader.read();
46
+ if (done) break;
47
+ entry.buffer.push(value);
48
+ for (const controller of entry.subscribers) {
49
+ try {
50
+ controller.enqueue(value);
51
+ } catch {
52
+ /* subscriber cancelled */
53
+ }
54
+ }
55
+ }
56
+ } catch {
57
+ /* stream aborted or errored */
58
+ } finally {
59
+ entry.isComplete = true;
60
+ for (const controller of entry.subscribers) {
61
+ try {
62
+ controller.close();
63
+ } catch {
64
+ /* already closed */
65
+ }
66
+ }
67
+ }
68
+ })();
69
+ }
70
+
71
+ export function subscribe(threadId: string) {
72
+ const entry = streams.get(threadId);
73
+ if (!entry) return null;
74
+
75
+ let savedController: ReadableStreamDefaultController<string>;
76
+
77
+ return new ReadableStream<string>({
78
+ start(controller) {
79
+ savedController = controller;
80
+ controller.enqueue("[START]"); // XXX: we need to add this flag for an immediate response for the subscriber, otherwise we'd have to wait for the first token from the llm.
81
+ for (const chunk of entry.buffer) {
82
+ controller.enqueue(chunk);
83
+ }
84
+ if (entry.isComplete) {
85
+ controller.close();
86
+ return;
87
+ }
88
+ entry.subscribers.add(controller);
89
+ },
90
+ cancel() {
91
+ entry.subscribers.delete(savedController);
92
+ },
93
+ });
94
+ }
95
+
96
+ export function abortStream(threadId: string) {
97
+ const entry = streams.get(threadId);
98
+ if (!entry) return false;
99
+ queueMicrotask(() => entry.abortController.abort());
100
+ return true;
101
+ }
102
+
103
+ export function isStreamRunning(threadId: string) {
104
+ const entry = streams.get(threadId);
105
+ return entry ? !entry.isComplete : false;
106
+ }
107
+
108
+ export function removeStream(threadId: string, streamId?: string) {
109
+ const entry = streams.get(threadId);
110
+ if (!entry) return;
111
+ if (streamId && entry.id !== streamId) return;
112
+
113
+ if (entry) {
114
+ for (const controller of entry.subscribers) {
115
+ try {
116
+ controller.close();
117
+ } catch {
118
+ /* already closed */
119
+ }
120
+ }
121
+ }
122
+ streams.delete(threadId);
123
+ }
@@ -0,0 +1,94 @@
1
+ import type { UIMessage } from "ai";
2
+ import { generateId } from "ai";
3
+ import type { DatabaseAdapter } from "../../adapters/database.ts";
4
+ import type { MessageMetadata, Thread } from "../../types.ts";
5
+ import type { ContextConfig, ThreadContextMeta } from "./types.ts";
6
+ import { compressToolResults } from "./compressor.ts";
7
+ import { estimateMessageTokens } from "./token-estimator.ts";
8
+
9
+ type ContextBuildResult = {
10
+ messages: UIMessage<MessageMetadata>[];
11
+ allMessages: UIMessage<MessageMetadata>[];
12
+ summary: string | null;
13
+ };
14
+
15
+ /**
16
+ * Builds a token-aware context window from stored messages.
17
+ *
18
+ * 1. Loads messages from DB with generous limit
19
+ * 2. Reads existing summary from thread.contextMeta
20
+ * 3. Compresses large tool results
21
+ * 4. Walks messages newest-to-oldest, accumulating token estimates
22
+ * 5. Stops when adding the next message would exceed the budget
23
+ * 6. Prepends summary as synthetic message if older messages were trimmed
24
+ */
25
+ export async function buildContextMessages(
26
+ userId: string,
27
+ thread: Thread,
28
+ db: DatabaseAdapter,
29
+ contextConfig: ContextConfig,
30
+ ) {
31
+ // 1. Load messages with generous limit
32
+ const storedMessages = await db.messages.list(userId, thread.id, { limit: 50 });
33
+ const allMessages = storedMessages.map((m) => m.content);
34
+
35
+ // 2. Read existing summary
36
+ const contextMeta = thread.contextMeta;
37
+ const summary = contextMeta?.summary ?? null;
38
+
39
+ // 3. Compress large tool results
40
+ const compressed = compressToolResults(allMessages, contextConfig.toolResultMaxTokens);
41
+
42
+ // 4. Walk newest-to-oldest, accumulating token estimates.
43
+ // Reserve space for the system prompt + tool definitions (they share the context window).
44
+ const budget = contextConfig.maxContextTokens - contextConfig.reservedTokenBudget;
45
+ const selected: UIMessage<MessageMetadata>[] = [];
46
+ let accumulated = 0;
47
+
48
+ for (let i = compressed.length - 1; i >= 0; i--) {
49
+ const msgTokens = estimateMessageTokens(compressed[i]!);
50
+
51
+ if (accumulated + msgTokens > budget && selected.length > 0) {
52
+ break;
53
+ }
54
+
55
+ accumulated += msgTokens;
56
+ selected.unshift(compressed[i]!);
57
+ }
58
+
59
+ // 5. If we trimmed messages and a summary exists, prepend it.
60
+ // Make room for the summary by evicting the oldest messages if needed.
61
+ if (summary && selected.length < allMessages.length) {
62
+ const summaryMessage = {
63
+ id: generateId(),
64
+ role: "user",
65
+ parts: [{ type: "text", text: `[Previous conversation summary]: ${summary}` }],
66
+ } satisfies UIMessage;
67
+
68
+ const summaryTokens = estimateMessageTokens(summaryMessage);
69
+ const trimmed = trimMessagesToFit(selected, budget - summaryTokens);
70
+ trimmed.unshift(summaryMessage);
71
+ return { messages: trimmed, allMessages, summary };
72
+ }
73
+
74
+ return { messages: selected, allMessages, summary } satisfies ContextBuildResult;
75
+ }
76
+
77
+ /**
78
+ * Drops the oldest messages until the total estimated tokens fit within `budget`.
79
+ * Always keeps at least the most recent message.
80
+ */
81
+ export function trimMessagesToFit(messages: UIMessage<MessageMetadata>[], budget: number) {
82
+ let total = 0;
83
+ for (const msg of messages) {
84
+ total += estimateMessageTokens(msg);
85
+ }
86
+
87
+ const trimmed = [...messages];
88
+ while (total > budget && trimmed.length > 1) {
89
+ const evicted = trimmed.shift()!;
90
+ total -= estimateMessageTokens(evicted);
91
+ }
92
+
93
+ return trimmed;
94
+ }
@@ -0,0 +1,47 @@
1
+ import type { UIMessage } from "ai";
2
+ import { estimateTokens, CHARS_PER_TOKEN } from "./token-estimator.ts";
3
+ import type { MessageMetadata } from "src/types.ts";
4
+
5
+ /**
6
+ * Returns a new array of messages with large tool outputs truncated
7
+ * to `maxTokensPerResult`. Does not mutate the input messages.
8
+ */
9
+ export function compressToolResults(
10
+ messages: UIMessage<MessageMetadata>[],
11
+ maxTokensPerResult: number,
12
+ ) {
13
+ return messages.map((message) => {
14
+ let hasLargeToolOutput = false;
15
+
16
+ for (const part of message.parts) {
17
+ if ("toolCallId" in part && "output" in part && part.output != null) {
18
+ const outputTokens = estimateTokens(JSON.stringify(part.output));
19
+ if (outputTokens > maxTokensPerResult) {
20
+ hasLargeToolOutput = true;
21
+ break;
22
+ }
23
+ }
24
+ }
25
+
26
+ if (!hasLargeToolOutput) return message;
27
+
28
+ const compressedParts = message.parts.map((part) => {
29
+ if (!("toolCallId" in part) || !("output" in part) || part.output == null) {
30
+ return part;
31
+ }
32
+
33
+ const outputStr = JSON.stringify(part.output);
34
+ const outputTokens = estimateTokens(outputStr);
35
+
36
+ if (outputTokens <= maxTokensPerResult) return part;
37
+
38
+ // Convert token budget back to character budget
39
+ const charBudget = maxTokensPerResult * CHARS_PER_TOKEN;
40
+ const truncatedOutput = outputStr.slice(0, charBudget) + "\n[...truncated]";
41
+
42
+ return { ...part, output: truncatedOutput } as typeof part;
43
+ });
44
+
45
+ return { ...message, parts: compressedParts };
46
+ });
47
+ }
@@ -0,0 +1,75 @@
1
+ export type { ContextConfig, ThreadContextMeta } from "./types.ts";
2
+ export { DEFAULT_CONTEXT_CONFIG } from "./types.ts";
3
+ export {
4
+ CHARS_PER_TOKEN,
5
+ estimateTokens,
6
+ estimateMessageTokens,
7
+ estimateMessagesTokens,
8
+ } from "./token-estimator.ts";
9
+ export { compressToolResults } from "./compressor.ts";
10
+ export { summarizeMessages } from "./summarizer.ts";
11
+ export { buildContextMessages, trimMessagesToFit } from "./builder.ts";
12
+
13
+ import type { UIMessage } from "ai";
14
+ import type { ResolvedCortexAgentConfig } from "../../config.ts";
15
+ import type { Thread } from "../../types.ts";
16
+ import type { ThreadContextMeta } from "./types.ts";
17
+ import { estimateMessagesTokens } from "./token-estimator.ts";
18
+ import { summarizeMessages } from "./summarizer.ts";
19
+
20
+ /**
21
+ * Post-response context optimization.
22
+ * Called fire-and-forget from onFinish — summarizes older messages
23
+ * when token usage exceeds the configured threshold.
24
+ */
25
+ export async function optimizeThreadContext(
26
+ thread: Thread,
27
+ messages: UIMessage[],
28
+ config: ResolvedCortexAgentConfig,
29
+ ) {
30
+ const contextConfig = config.context;
31
+
32
+ // 1. Estimate tokens for all messages
33
+ const estimates = estimateMessagesTokens(messages);
34
+ const totalEstimatedTokens = estimates.reduce((sum, e) => sum + e.tokens, 0);
35
+
36
+ // 2. Check if over summarization threshold
37
+ const threshold = contextConfig.maxContextTokens * contextConfig.summarizationThreshold;
38
+
39
+ if (totalEstimatedTokens <= threshold) {
40
+ // Update token estimate but skip summarization
41
+ const meta: ThreadContextMeta = {
42
+ summary: thread.contextMeta?.summary ?? null,
43
+ summaryUpToMessageId: thread.contextMeta?.summaryUpToMessageId ?? null,
44
+ totalEstimatedTokens,
45
+ lastOptimizedAt: new Date().toISOString(),
46
+ };
47
+ await config.db.threads.updateContextMeta(thread.id, meta);
48
+ return;
49
+ }
50
+
51
+ // 3. Keep the most recent messages unsummarized (they're the hot context)
52
+ const recentCount = Math.min(contextConfig.recentMessagesToKeep, messages.length);
53
+ const messagesToSummarize = messages.slice(0, messages.length - recentCount);
54
+
55
+ if (messagesToSummarize.length === 0) return;
56
+
57
+ // 4. Determine model config for summarization
58
+ const modelConfig = contextConfig.summarizationModel ?? config.model;
59
+
60
+ // 5. Generate summary incorporating any existing summary
61
+ const existingSummary = thread.contextMeta?.summary ?? null;
62
+
63
+ const summary = await summarizeMessages(messagesToSummarize, existingSummary, modelConfig);
64
+
65
+ // 6. Update thread context meta
66
+ const lastSummarizedMessage = messagesToSummarize.at(-1);
67
+ const meta = {
68
+ summary,
69
+ summaryUpToMessageId: lastSummarizedMessage?.id ?? null,
70
+ totalEstimatedTokens,
71
+ lastOptimizedAt: new Date().toISOString(),
72
+ } satisfies ThreadContextMeta;
73
+
74
+ await config.db.threads.updateContextMeta(thread.id, meta);
75
+ }
@@ -0,0 +1,50 @@
1
+ import { generateText } from "ai";
2
+ import type { UIMessage } from "ai";
3
+ import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
4
+ import type { ContextConfig } from "./types.ts";
5
+
6
+ type SummarizationModelConfig = NonNullable<ContextConfig["summarizationModel"]>;
7
+
8
+ export async function summarizeMessages(
9
+ messages: UIMessage[],
10
+ existingSummary: string | null,
11
+ modelConfig: SummarizationModelConfig,
12
+ ) {
13
+ const provider = createOpenAICompatible({
14
+ name: modelConfig.providerName ?? "summarization-provider",
15
+ baseURL: modelConfig.baseURL,
16
+ apiKey: modelConfig.apiKey,
17
+ });
18
+
19
+ const model = provider.chatModel(modelConfig.modelName);
20
+
21
+ const conversationText = messages
22
+ .map(function (msg) {
23
+ const textParts = msg.parts
24
+ .filter((p): p is Extract<typeof p, { type: "text" }> => p.type === "text")
25
+ .map((p) => p.text);
26
+ return `[${msg.role}]: ${textParts.join(" ")}`;
27
+ })
28
+ .join("\n");
29
+
30
+ const summaryContext = existingSummary
31
+ ? `\nPrior summary of earlier messages:\n${existingSummary}\n`
32
+ : "";
33
+
34
+ const { text } = await generateText({
35
+ model,
36
+ system: `You are a precise conversation summarizer. Produce a concise summary that preserves:
37
+ - Key decisions and conclusions
38
+ - Important entities (names, IDs, URLs, values)
39
+ - User intent and goals
40
+ - Any unresolved questions or next steps
41
+
42
+ Maximum 500 tokens. Use bullet points. Do not include preamble.`,
43
+ prompt: `${summaryContext}
44
+ Summarize the following conversation:
45
+
46
+ ${conversationText}`,
47
+ });
48
+
49
+ return text;
50
+ }
@@ -0,0 +1,60 @@
1
+ import type { UIMessage } from "ai";
2
+
3
+ /** Average characters per token for English text. Used by the heuristic estimator. */
4
+ export const CHARS_PER_TOKEN = 4;
5
+
6
+ /**
7
+ * Estimates token count for a string using the chars/4 heuristic.
8
+ * ~10% accuracy for English text — good enough for budget decisions.
9
+ */
10
+ export function estimateTokens(text: string) {
11
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
12
+ }
13
+
14
+ /**
15
+ * Estimates token count for a single UIMessage by walking its parts.
16
+ */
17
+ export function estimateMessageTokens(message: UIMessage) {
18
+ let tokens = 0;
19
+
20
+ for (const part of message.parts) {
21
+ if (part.type === "text" || part.type === "reasoning") {
22
+ tokens += estimateTokens(part.text);
23
+ } else if ("toolCallId" in part) {
24
+ // Tool invocation parts (tool-${name})
25
+ if ("input" in part && part.input != null) {
26
+ tokens += estimateTokens(JSON.stringify(part.input));
27
+ }
28
+ if ("output" in part && part.output != null) {
29
+ tokens += estimateTokens(JSON.stringify(part.output));
30
+ }
31
+ } else if (part.type === "source-url") {
32
+ tokens += estimateTokens(part.url);
33
+ } else if (part.type === "source-document") {
34
+ tokens += estimateTokens(part.title);
35
+ } else if (part.type === "file") {
36
+ tokens += estimateTokens(part.url);
37
+ } else {
38
+ // step-start, data parts, etc.
39
+ tokens += 5;
40
+ }
41
+ }
42
+
43
+ // Per-message overhead (role, metadata framing)
44
+ tokens += 4;
45
+
46
+ return tokens;
47
+ }
48
+
49
+ /**
50
+ * Estimates token counts for an array of UIMessages.
51
+ * Returns per-message estimates in the same order.
52
+ */
53
+ export function estimateMessagesTokens(messages: UIMessage[]) {
54
+ return messages.map(function (message) {
55
+ return {
56
+ message,
57
+ tokens: estimateMessageTokens(message),
58
+ };
59
+ });
60
+ }
@@ -0,0 +1,28 @@
1
+ export type ContextConfig = {
2
+ maxContextTokens: number;
3
+ reservedTokenBudget: number;
4
+ summarizationThreshold: number;
5
+ summarizationModel?: {
6
+ baseURL: string;
7
+ apiKey: string;
8
+ modelName: string;
9
+ providerName?: string;
10
+ };
11
+ toolResultMaxTokens: number;
12
+ recentMessagesToKeep: number;
13
+ };
14
+
15
+ export type ThreadContextMeta = {
16
+ summary: string | null;
17
+ summaryUpToMessageId: string | null;
18
+ totalEstimatedTokens: number;
19
+ lastOptimizedAt: string | null;
20
+ };
21
+
22
+ export const DEFAULT_CONTEXT_CONFIG: ContextConfig = {
23
+ maxContextTokens: 120_000,
24
+ reservedTokenBudget: 8_000,
25
+ summarizationThreshold: 0.75,
26
+ toolResultMaxTokens: 2_000,
27
+ recentMessagesToKeep: 6,
28
+ };