npm - @supyagent/sdk - Versions diffs - 0.1.36 → 0.1.38 - Mend

@supyagent/sdk 0.1.36 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/context.d.ts ADDED Viewed

@@ -0,0 +1,204 @@
+import { UIMessage } from 'ai';
+interface ContextManagerOptions {
+    /**
+     * Maximum context window size in tokens for the model.
+     * Used to calculate soft/hard thresholds.
+     * @default 128_000
+     */
+    maxTokens?: number;
+    /**
+     * Soft threshold as a fraction of maxTokens (0–1).
+     * When exceeded after a response completes, triggers background summarization.
+     * @default 0.75
+     */
+    softThreshold?: number;
+    /**
+     * Hard threshold as a fraction of maxTokens (0–1).
+     * When exceeded, blocks before the next LLM call and compactifies synchronously.
+     * @default 0.90
+     */
+    hardThreshold?: number;
+    /**
+     * Tokens to reserve for the model's response output.
+     * Subtracted from the context budget when checking thresholds.
+     * @default 4096
+     */
+    responseReserve?: number;
+    /**
+     * Minimum number of recent messages to always keep (never summarized away).
+     * @default 4
+     */
+    minRecentMessages?: number;
+    /**
+     * Custom prompt for the default summarizer.
+     * Overrides the built-in summarization system prompt.
+     */
+    summaryPrompt?: string;
+    /**
+     * Full override for compactification.
+     * When provided, replaces the default summarizer entirely.
+     * Receives all messages and must return the compacted message list.
+     */
+    compactify?: (messages: UIMessage[]) => Promise<UIMessage[]>;
+    /**
+     * The model to use for summarization (same type as streamText's `model` param).
+     * Required unless a custom `compactify` function is provided.
+     */
+    summaryModel?: any;
+    /**
+     * Custom token estimator function.
+     * Given a UIMessage[], returns an estimated token count.
+     * @default Character-based heuristic (~4 chars per token)
+     */
+    estimateTokens?: (messages: UIMessage[]) => number;
+}
+interface ContextState {
+    /** Total input tokens consumed across all LLM calls in this chat */
+    totalInputTokens: number;
+    /** Total output tokens consumed across all LLM calls in this chat */
+    totalOutputTokens: number;
+    /** Estimated current context size (tokens for the next LLM call) */
+    estimatedContextSize: number;
+    /** The configured maximum context window */
+    maxTokens: number;
+    /** Usage ratio (estimatedContextSize / effectiveBudget), clamped 0–1 */
+    usageRatio: number;
+    /** Whether the soft threshold has been exceeded */
+    softThresholdExceeded: boolean;
+    /** Whether the hard threshold has been exceeded */
+    hardThresholdExceeded: boolean;
+    /** Number of context-summary messages found in the chat */
+    summaryCount: number;
+}
+interface ContextSummaryMetadata {
+    type: "context-summary";
+    /** Number of messages that were summarized */
+    messagesSummarized: number;
+    /** Estimated tokens of the original messages that were summarized */
+    originalTokens: number;
+    /** Estimated tokens of the summary */
+    summaryTokens: number;
+    /** ISO timestamp of when the summary was created */
+    createdAt: string;
+}
+interface ContextMessageMetadata {
+    context?: {
+        /** Input tokens used by this specific LLM call */
+        inputTokens?: number;
+        /** Output tokens used by this specific LLM call */
+        outputTokens?: number;
+        /** Cumulative total tokens consumed in this chat so far */
+        totalTokens?: number;
+        /** Current usage ratio at completion (0–1) */
+        usageRatio?: number;
+    };
+}
+interface ContextManager {
+    /** Get the current context state. */
+    getState(): ContextState;
+    /**
+     * Record token usage from a completed LLM step.
+     * Called from onStepFinish or onFinish callbacks.
+     */
+    recordUsage(usage: {
+        inputTokens?: number;
+        outputTokens?: number;
+    }): void;
+    /**
+     * Update the estimated context size based on current messages.
+     * Should be called whenever the message list changes.
+     */
+    updateEstimate(messages: UIMessage[]): void;
+    /**
+     * Prepare messages for the LLM call.
+     * - Finds the last context-summary message
+     * - Drops all messages before it
+     * - Injects the summary text into the system prompt
+     * - Returns the trimmed messages and updated system prompt
+     *
+     * Does NOT mutate the input array.
+     */
+    prepareMessages(messages: UIMessage[], systemPrompt: string): Promise<{
+        messages: UIMessage[];
+        systemPrompt: string;
+    }>;
+    /**
+     * Returns true if estimated context exceeds the hard threshold.
+     * Indicates compactification should block before the next LLM call.
+     */
+    shouldCompactify(messages: UIMessage[]): boolean;
+    /**
+     * Returns true if estimated context exceeds the soft threshold
+     * but not the hard threshold.
+     * Indicates background summarization should be triggered.
+     */
+    shouldSummarize(messages: UIMessage[]): boolean;
+    /**
+     * Perform compactification (synchronous in the request flow).
+     * Returns a new message array with a summary message inserted
+     * and older messages removed. Does NOT mutate the input.
+     */
+    compactify(messages: UIMessage[]): Promise<UIMessage[]>;
+    /**
+     * Get context metadata to attach to the streamed response.
+     * Used with toUIMessageStreamResponse's messageMetadata callback.
+     */
+    getMessageMetadata(): ContextMessageMetadata;
+    /** Reset internal state (e.g. when starting a new chat). */
+    reset(): void;
+}
+/**
+ * Prepare messages for an LLM call by trimming everything before the last
+ * context-summary message and injecting the summary into the system prompt.
+ *
+ * - If no summary exists, all messages pass through unchanged.
+ * - If a summary exists at index `i`, messages `[0..i]` are dropped and the
+ *   summary text is appended to the system prompt.
+ *
+ * Neither the input array nor any individual message is mutated.
+ */
+declare function prepareMessages(messages: UIMessage[], systemPrompt: string): {
+    messages: UIMessage[];
+    systemPrompt: string;
+};
+/**
+ * Find the index of the last message that is a context summary.
+ * Returns -1 if none found.
+ */
+declare function findLastSummaryIndex(messages: UIMessage[]): number;
+/** Count how many context-summary messages exist in the array. */
+declare function countSummaries(messages: UIMessage[]): number;
+/**
+ * Create a context manager that tracks token usage, detects threshold
+ * breaches, and can compactify message history via summarisation.
+ *
+ * The manager is stateless across HTTP requests — it derives cumulative
+ * totals from message metadata written by previous requests and
+ * accumulates usage recorded during the current request.
+ */
+declare function createContextManager(options?: ContextManagerOptions): ContextManager;
+/**
+ * Estimate the token count of a UIMessage array using a character-based heuristic.
+ *
+ * This is intentionally model-agnostic (~4 chars per token) and errs slightly
+ * on the conservative side. Pass a custom `estimateTokens` to
+ * `createContextManager` if you need model-specific accuracy.
+ */
+declare function estimateTokens(messages: UIMessage[]): number;
+/**
+ * Summarise a list of UIMessages into a single prose block
+ * using an LLM call via the AI SDK's `generateText`.
+ */
+declare function summarize(messages: UIMessage[], options: {
+    model: any;
+    prompt?: string;
+    /** Set to true when compacting mid-tool-chain */
+    midChain?: boolean;
+}): Promise<string>;
+export { type ContextManager, type ContextManagerOptions, type ContextMessageMetadata, type ContextState, type ContextSummaryMetadata, countSummaries, createContextManager, estimateTokens, findLastSummaryIndex, prepareMessages, summarize };

package/dist/context.js ADDED Viewed

@@ -0,0 +1,290 @@
+// src/context/token-estimator.ts
+var CHARS_PER_TOKEN = 4;
+var MESSAGE_OVERHEAD = 4;
+var CONVERSATION_OVERHEAD = 3;
+function estimateTokens(messages) {
+  let totalChars = 0;
+  for (const msg of messages) {
+    totalChars += MESSAGE_OVERHEAD * CHARS_PER_TOKEN;
+    for (const part of msg.parts) {
+      switch (part.type) {
+        case "text":
+          totalChars += (part.text ?? "").length;
+          break;
+        case "tool-invocation": {
+          const inv = part;
+          if (inv.toolName) totalChars += inv.toolName.length;
+          if (inv.input) totalChars += JSON.stringify(inv.input).length;
+          if (inv.output) totalChars += JSON.stringify(inv.output).length;
+          break;
+        }
+        case "file": {
+          const file = part;
+          totalChars += (file.url ?? "").length;
+          break;
+        }
+        default:
+          try {
+            totalChars += JSON.stringify(part).length;
+          } catch {
+            totalChars += 50;
+          }
+      }
+    }
+  }
+  return Math.ceil(totalChars / CHARS_PER_TOKEN) + CONVERSATION_OVERHEAD;
+}
+// src/context/message-preparation.ts
+var SUMMARY_PREAMBLE = "\n\n[Previous conversation summary]\nThe following is a summary of the conversation history that has been compacted to save context space:\n\n";
+function prepareMessages(messages, systemPrompt) {
+  const summaryIdx = findLastSummaryIndex(messages);
+  if (summaryIdx === -1) {
+    return { messages, systemPrompt };
+  }
+  const summaryMessage = messages[summaryIdx];
+  const summaryText = extractSummaryText(summaryMessage);
+  const trimmedMessages = messages.slice(summaryIdx + 1);
+  return {
+    messages: trimmedMessages,
+    systemPrompt: systemPrompt + SUMMARY_PREAMBLE + summaryText
+  };
+}
+function findLastSummaryIndex(messages) {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const meta = messages[i].metadata;
+    if (meta?.type === "context-summary") {
+      return i;
+    }
+  }
+  return -1;
+}
+function countSummaries(messages) {
+  let count = 0;
+  for (const msg of messages) {
+    const meta = msg.metadata;
+    if (meta?.type === "context-summary") count++;
+  }
+  return count;
+}
+function extractSummaryText(message) {
+  for (const part of message.parts) {
+    if (part.type === "text") {
+      return part.text ?? "";
+    }
+  }
+  return "";
+}
+// src/context/summarizer.ts
+import { generateText } from "ai";
+var DEFAULT_SUMMARY_PROMPT = `You are a conversation summariser. Produce a concise summary of the conversation below.
+Focus on:
+1. Key topics discussed and decisions made
+2. Tasks completed and their outcomes (including important IDs, names, or values)
+3. Pending tasks or open questions
+4. Any in-progress multi-step work the assistant was performing
+Be concise but preserve all actionable information. Output only the summary text, no preamble.`;
+var MID_CHAIN_ADDENDUM = `
+IMPORTANT: The assistant was in the middle of executing a multi-step task when this summary was requested. Make sure to clearly note what step it was on and what remains to be done, so the task can be resumed seamlessly.`;
+async function summarize(messages, options) {
+  const conversationText = formatConversation(messages);
+  let systemPrompt = options.prompt ?? DEFAULT_SUMMARY_PROMPT;
+  if (options.midChain) {
+    systemPrompt += MID_CHAIN_ADDENDUM;
+  }
+  const { text } = await generateText({
+    model: options.model,
+    system: systemPrompt,
+    prompt: conversationText
+  });
+  return text;
+}
+function formatConversation(messages) {
+  const lines = [];
+  for (const msg of messages) {
+    const role = msg.role.toUpperCase();
+    const textParts = [];
+    const toolParts = [];
+    for (const part of msg.parts) {
+      if (part.type === "text") {
+        const text = part.text;
+        if (text?.trim()) textParts.push(text.trim());
+      } else if (part.type === "tool-invocation") {
+        const inv = part;
+        const name = inv.toolName ?? "unknown";
+        const state = inv.state ?? "";
+        if (state === "output-available" && inv.output != null) {
+          const outputStr = typeof inv.output === "string" ? inv.output : JSON.stringify(inv.output, null, 0);
+          const truncated = outputStr.length > 2e3 ? outputStr.slice(0, 2e3) + "... (truncated)" : outputStr;
+          toolParts.push(`[Tool: ${name} \u2192 ${truncated}]`);
+        } else {
+          toolParts.push(`[Tool: ${name} (${state || "pending"})]`);
+        }
+      }
+    }
+    const content = [...textParts, ...toolParts].join("\n");
+    if (content.trim()) {
+      lines.push(`${role}: ${content}`);
+    }
+  }
+  return lines.join("\n\n");
+}
+// src/context/context-manager.ts
+var DEFAULT_MAX_TOKENS = 128e3;
+var DEFAULT_SOFT_THRESHOLD = 0.75;
+var DEFAULT_HARD_THRESHOLD = 0.9;
+var DEFAULT_RESPONSE_RESERVE = 4096;
+var DEFAULT_MIN_RECENT = 4;
+function generateId() {
+  return `ctx-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
+}
+function createContextManager(options) {
+  const maxTokens = options?.maxTokens ?? DEFAULT_MAX_TOKENS;
+  const softThreshold = options?.softThreshold ?? DEFAULT_SOFT_THRESHOLD;
+  const hardThreshold = options?.hardThreshold ?? DEFAULT_HARD_THRESHOLD;
+  const responseReserve = options?.responseReserve ?? DEFAULT_RESPONSE_RESERVE;
+  const minRecentMessages = options?.minRecentMessages ?? DEFAULT_MIN_RECENT;
+  const estimateTokens2 = options?.estimateTokens ?? estimateTokens;
+  const effectiveBudget = maxTokens - responseReserve;
+  let requestInputTokens = 0;
+  let requestOutputTokens = 0;
+  let estimatedContextSize = 0;
+  function getCumulativeTotals(messages) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const meta = messages[i].metadata;
+      if (meta?.context) {
+        return {
+          totalTokens: meta.context.totalTokens ?? 0,
+          inputTokens: meta.context.inputTokens ?? 0,
+          outputTokens: meta.context.outputTokens ?? 0
+        };
+      }
+    }
+    return { totalTokens: 0, inputTokens: 0, outputTokens: 0 };
+  }
+  function computeUsageRatio() {
+    if (effectiveBudget <= 0) return 0;
+    return Math.min(Math.max(estimatedContextSize / effectiveBudget, 0), 1);
+  }
+  const manager = {
+    getState() {
+      const ratio = computeUsageRatio();
+      return {
+        totalInputTokens: requestInputTokens,
+        totalOutputTokens: requestOutputTokens,
+        estimatedContextSize,
+        maxTokens,
+        usageRatio: ratio,
+        softThresholdExceeded: ratio > softThreshold,
+        hardThresholdExceeded: ratio > hardThreshold,
+        summaryCount: 0
+        // updated by updateEstimate
+      };
+    },
+    recordUsage(usage) {
+      requestInputTokens += usage.inputTokens ?? 0;
+      requestOutputTokens += usage.outputTokens ?? 0;
+      if (usage.inputTokens != null && usage.inputTokens > 0) {
+        estimatedContextSize = usage.inputTokens;
+      }
+    },
+    updateEstimate(messages) {
+      const { messages: trimmed } = prepareMessages(messages, "");
+      estimatedContextSize = estimateTokens2(trimmed);
+    },
+    async prepareMessages(messages, systemPrompt) {
+      return prepareMessages(messages, systemPrompt);
+    },
+    shouldCompactify(messages) {
+      const { messages: trimmed } = prepareMessages(messages, "");
+      const estimate = estimateTokens2(trimmed);
+      return estimate / effectiveBudget > hardThreshold;
+    },
+    shouldSummarize(messages) {
+      const { messages: trimmed } = prepareMessages(messages, "");
+      const estimate = estimateTokens2(trimmed);
+      const ratio = estimate / effectiveBudget;
+      return ratio > softThreshold && ratio <= hardThreshold;
+    },
+    async compactify(messages) {
+      if (options?.compactify) {
+        return options.compactify(messages);
+      }
+      if (!options?.summaryModel) {
+        throw new Error(
+          "createContextManager: either `summaryModel` or a custom `compactify` function is required to perform compactification."
+        );
+      }
+      if (messages.length <= minRecentMessages) {
+        return messages;
+      }
+      const splitIdx = messages.length - minRecentMessages;
+      const messagesToSummarize = messages.slice(0, splitIdx);
+      const recentMessages = messages.slice(splitIdx);
+      const lastMsg = messages[messages.length - 1];
+      const midChain = lastMsg?.role === "assistant" && lastMsg.parts.some(
+        (p) => p.type === "tool-invocation" && p.state !== "output-available"
+      );
+      const summaryText = await summarize(messagesToSummarize, {
+        model: options.summaryModel,
+        prompt: options.summaryPrompt,
+        midChain
+      });
+      const originalTokens = estimateTokens2(messagesToSummarize);
+      const summaryMessage = {
+        id: generateId(),
+        role: "assistant",
+        parts: [{ type: "text", text: summaryText }],
+        metadata: {
+          type: "context-summary",
+          messagesSummarized: messagesToSummarize.length,
+          originalTokens,
+          summaryTokens: estimateTokens2([
+            {
+              id: "tmp",
+              role: "assistant",
+              parts: [{ type: "text", text: summaryText }]
+            }
+          ]),
+          createdAt: (/* @__PURE__ */ new Date()).toISOString()
+        }
+      };
+      return [summaryMessage, ...recentMessages];
+    },
+    getMessageMetadata() {
+      return {
+        context: {
+          inputTokens: requestInputTokens,
+          outputTokens: requestOutputTokens,
+          totalTokens: requestInputTokens + requestOutputTokens,
+          usageRatio: computeUsageRatio()
+        }
+      };
+    },
+    reset() {
+      requestInputTokens = 0;
+      requestOutputTokens = 0;
+      estimatedContextSize = 0;
+    }
+  };
+  const originalGetState = manager.getState.bind(manager);
+  manager.getState = function getStateWithSummaryCount() {
+    const state = originalGetState();
+    return state;
+  };
+  return manager;
+}
+export {
+  countSummaries,
+  createContextManager,
+  estimateTokens,
+  findLastSummaryIndex,
+  prepareMessages,
+  summarize
+};
+//# sourceMappingURL=context.js.map

package/dist/context.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/context/token-estimator.ts","../src/context/message-preparation.ts","../src/context/summarizer.ts","../src/context/context-manager.ts"],"sourcesContent":["import type { UIMessage } from \"ai\";\n\n/** Approximate characters per token for mixed English text and code. */\nconst CHARS_PER_TOKEN = 4;\n\n/** Overhead tokens added per message (role, framing). */\nconst MESSAGE_OVERHEAD = 4;\n\n/** Base overhead for the conversation itself. */\nconst CONVERSATION_OVERHEAD = 3;\n\n/**\n * Estimate the token count of a UIMessage array using a character-based heuristic.\n *\n * This is intentionally model-agnostic (~4 chars per token) and errs slightly\n * on the conservative side. Pass a custom `estimateTokens` to\n * `createContextManager` if you need model-specific accuracy.\n */\nexport function estimateTokens(messages: UIMessage[]): number {\n let totalChars = 0;\n\n for (const msg of messages) {\n totalChars += MESSAGE_OVERHEAD * CHARS_PER_TOKEN; // per-message overhead\n\n for (const part of msg.parts) {\n switch (part.type) {\n case \"text\":\n totalChars += ((part as { text: string }).text ?? \"\").length;\n break;\n\n case \"tool-invocation\": {\n // Estimate from the serialised input + output\n const inv = part as {\n toolName?: string;\n input?: unknown;\n output?: unknown;\n };\n if (inv.toolName) totalChars += inv.toolName.length;\n if (inv.input) totalChars += JSON.stringify(inv.input).length;\n if (inv.output) totalChars += JSON.stringify(inv.output).length;\n break;\n }\n\n case \"file\": {\n // Only count the URL length (actual file content is handled by the provider)\n const file = part as { url?: string };\n totalChars += (file.url ?? \"\").length;\n break;\n }\n\n default:\n // Catch-all for unknown part types\n try {\n totalChars += JSON.stringify(part).length;\n } catch {\n totalChars += 50; // small fallback\n }\n }\n }\n }\n\n return Math.ceil(totalChars / CHARS_PER_TOKEN) + CONVERSATION_OVERHEAD;\n}\n","import type { UIMessage } from \"ai\";\nimport type { ContextSummaryMetadata } from \"./types.js\";\n\nconst SUMMARY_PREAMBLE =\n \"\\n\\n[Previous conversation summary]\\nThe following is a summary of the conversation history that has been compacted to save context space:\\n\\n\";\n\n/**\n * Prepare messages for an LLM call by trimming everything before the last\n * context-summary message and injecting the summary into the system prompt.\n *\n * - If no summary exists, all messages pass through unchanged.\n * - If a summary exists at index `i`, messages `[0..i]` are dropped and the\n * summary text is appended to the system prompt.\n *\n * Neither the input array nor any individual message is mutated.\n */\nexport function prepareMessages(\n messages: UIMessage[],\n systemPrompt: string\n): { messages: UIMessage[]; systemPrompt: string } {\n const summaryIdx = findLastSummaryIndex(messages);\n\n if (summaryIdx === -1) {\n return { messages, systemPrompt };\n }\n\n const summaryMessage = messages[summaryIdx];\n const summaryText = extractSummaryText(summaryMessage);\n const trimmedMessages = messages.slice(summaryIdx + 1);\n\n return {\n messages: trimmedMessages,\n systemPrompt: systemPrompt + SUMMARY_PREAMBLE + summaryText,\n };\n}\n\n/**\n * Find the index of the last message that is a context summary.\n * Returns -1 if none found.\n */\nexport function findLastSummaryIndex(messages: UIMessage[]): number {\n for (let i = messages.length - 1; i >= 0; i--) {\n const meta = messages[i].metadata as ContextSummaryMetadata | undefined;\n if (meta?.type === \"context-summary\") {\n return i;\n }\n }\n return -1;\n}\n\n/** Count how many context-summary messages exist in the array. */\nexport function countSummaries(messages: UIMessage[]): number {\n let count = 0;\n for (const msg of messages) {\n const meta = msg.metadata as ContextSummaryMetadata | undefined;\n if (meta?.type === \"context-summary\") count++;\n }\n return count;\n}\n\n/** Extract the text content from a summary message. */\nfunction extractSummaryText(message: UIMessage): string {\n for (const part of message.parts) {\n if (part.type === \"text\") {\n return (part as { text: string }).text ?? \"\";\n }\n }\n return \"\";\n}\n","import { generateText, type UIMessage } from \"ai\";\n\nconst DEFAULT_SUMMARY_PROMPT = `You are a conversation summariser. Produce a concise summary of the conversation below.\n\nFocus on:\n1. Key topics discussed and decisions made\n2. Tasks completed and their outcomes (including important IDs, names, or values)\n3. Pending tasks or open questions\n4. Any in-progress multi-step work the assistant was performing\n\nBe concise but preserve all actionable information. Output only the summary text, no preamble.`;\n\nconst MID_CHAIN_ADDENDUM = `\\n\\nIMPORTANT: The assistant was in the middle of executing a multi-step task when this summary was requested. Make sure to clearly note what step it was on and what remains to be done, so the task can be resumed seamlessly.`;\n\n/**\n * Summarise a list of UIMessages into a single prose block\n * using an LLM call via the AI SDK's `generateText`.\n */\nexport async function summarize(\n messages: UIMessage[],\n options: {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n model: any;\n prompt?: string;\n /** Set to true when compacting mid-tool-chain */\n midChain?: boolean;\n }\n): Promise<string> {\n const conversationText = formatConversation(messages);\n\n let systemPrompt = options.prompt ?? DEFAULT_SUMMARY_PROMPT;\n if (options.midChain) {\n systemPrompt += MID_CHAIN_ADDENDUM;\n }\n\n const { text } = await generateText({\n model: options.model,\n system: systemPrompt,\n prompt: conversationText,\n });\n\n return text;\n}\n\n/** Convert UIMessage[] into a plain-text conversation transcript for the summariser. */\nfunction formatConversation(messages: UIMessage[]): string {\n const lines: string[] = [];\n\n for (const msg of messages) {\n const role = msg.role.toUpperCase();\n const textParts: string[] = [];\n const toolParts: string[] = [];\n\n for (const part of msg.parts) {\n if (part.type === \"text\") {\n const text = (part as { text: string }).text;\n if (text?.trim()) textParts.push(text.trim());\n } else if (part.type === \"tool-invocation\") {\n const inv = part as { toolName?: string; output?: unknown; state?: string };\n const name = inv.toolName ?? \"unknown\";\n const state = inv.state ?? \"\";\n if (state === \"output-available\" && inv.output != null) {\n const outputStr =\n typeof inv.output === \"string\"\n ? inv.output\n : JSON.stringify(inv.output, null, 0);\n // Truncate very large outputs to keep the summariser prompt reasonable\n const truncated =\n outputStr.length > 2000\n ? outputStr.slice(0, 2000) + \"... (truncated)\"\n : outputStr;\n toolParts.push(`[Tool: ${name} → ${truncated}]`);\n } else {\n toolParts.push(`[Tool: ${name} (${state || \"pending\"})]`);\n }\n }\n }\n\n const content = [...textParts, ...toolParts].join(\"\\n\");\n if (content.trim()) {\n lines.push(`${role}: ${content}`);\n }\n }\n\n return lines.join(\"\\n\\n\");\n}\n","import type { UIMessage } from \"ai\";\nimport type {\n ContextManager,\n ContextManagerOptions,\n ContextMessageMetadata,\n ContextState,\n ContextSummaryMetadata,\n} from \"./types.js\";\nimport { estimateTokens as defaultEstimateTokens } from \"./token-estimator.js\";\nimport {\n prepareMessages as doPrepareMessages,\n findLastSummaryIndex,\n countSummaries,\n} from \"./message-preparation.js\";\nimport { summarize } from \"./summarizer.js\";\n\nconst DEFAULT_MAX_TOKENS = 128_000;\nconst DEFAULT_SOFT_THRESHOLD = 0.75;\nconst DEFAULT_HARD_THRESHOLD = 0.9;\nconst DEFAULT_RESPONSE_RESERVE = 4096;\nconst DEFAULT_MIN_RECENT = 4;\n\n/** Generate a simple unique ID for summary messages. */\nfunction generateId(): string {\n return `ctx-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;\n}\n\n/**\n * Create a context manager that tracks token usage, detects threshold\n * breaches, and can compactify message history via summarisation.\n *\n * The manager is stateless across HTTP requests — it derives cumulative\n * totals from message metadata written by previous requests and\n * accumulates usage recorded during the current request.\n */\nexport function createContextManager(\n options?: ContextManagerOptions\n): ContextManager {\n const maxTokens = options?.maxTokens ?? DEFAULT_MAX_TOKENS;\n const softThreshold = options?.softThreshold ?? DEFAULT_SOFT_THRESHOLD;\n const hardThreshold = options?.hardThreshold ?? DEFAULT_HARD_THRESHOLD;\n const responseReserve = options?.responseReserve ?? DEFAULT_RESPONSE_RESERVE;\n const minRecentMessages = options?.minRecentMessages ?? DEFAULT_MIN_RECENT;\n const estimateTokens = options?.estimateTokens ?? defaultEstimateTokens;\n\n // Effective budget = maxTokens minus what we reserve for the response\n const effectiveBudget = maxTokens - responseReserve;\n\n // ── Mutable per-request state ──────────────────────────────────────\n\n /** Input tokens recorded during this request (from onStepFinish). */\n let requestInputTokens = 0;\n /** Output tokens recorded during this request. */\n let requestOutputTokens = 0;\n /** Latest estimated context size in tokens. */\n let estimatedContextSize = 0;\n\n // ── Helpers ────────────────────────────────────────────────────────\n\n /** Read cumulative totals from the last assistant message that has context metadata. */\n function getCumulativeTotals(messages: UIMessage[]): {\n totalTokens: number;\n inputTokens: number;\n outputTokens: number;\n } {\n for (let i = messages.length - 1; i >= 0; i--) {\n const meta = messages[i].metadata as ContextMessageMetadata | undefined;\n if (meta?.context) {\n return {\n totalTokens: meta.context.totalTokens ?? 0,\n inputTokens: meta.context.inputTokens ?? 0,\n outputTokens: meta.context.outputTokens ?? 0,\n };\n }\n }\n return { totalTokens: 0, inputTokens: 0, outputTokens: 0 };\n }\n\n function computeUsageRatio(): number {\n if (effectiveBudget <= 0) return 0;\n return Math.min(Math.max(estimatedContextSize / effectiveBudget, 0), 1);\n }\n\n // ── ContextManager implementation ──────────────────────────────────\n\n const manager: ContextManager = {\n getState(): ContextState {\n const ratio = computeUsageRatio();\n return {\n totalInputTokens: requestInputTokens,\n totalOutputTokens: requestOutputTokens,\n estimatedContextSize,\n maxTokens,\n usageRatio: ratio,\n softThresholdExceeded: ratio > softThreshold,\n hardThresholdExceeded: ratio > hardThreshold,\n summaryCount: 0, // updated by updateEstimate\n };\n },\n\n recordUsage(usage) {\n requestInputTokens += usage.inputTokens ?? 0;\n requestOutputTokens += usage.outputTokens ?? 0;\n // Use actual input tokens as the new context size estimate\n // (they reflect the real token count the provider saw)\n if (usage.inputTokens != null && usage.inputTokens > 0) {\n estimatedContextSize = usage.inputTokens;\n }\n },\n\n updateEstimate(messages) {\n // After prepareMessages, estimate what the LLM would see\n const { messages: trimmed } = doPrepareMessages(messages, \"\");\n estimatedContextSize = estimateTokens(trimmed);\n },\n\n async prepareMessages(messages, systemPrompt) {\n return doPrepareMessages(messages, systemPrompt);\n },\n\n shouldCompactify(messages) {\n // Re-estimate in case messages changed since last updateEstimate\n const { messages: trimmed } = doPrepareMessages(messages, \"\");\n const estimate = estimateTokens(trimmed);\n return estimate / effectiveBudget > hardThreshold;\n },\n\n shouldSummarize(messages) {\n const { messages: trimmed } = doPrepareMessages(messages, \"\");\n const estimate = estimateTokens(trimmed);\n const ratio = estimate / effectiveBudget;\n return ratio > softThreshold && ratio <= hardThreshold;\n },\n\n async compactify(messages) {\n // If a custom compactify is provided, delegate entirely\n if (options?.compactify) {\n return options.compactify(messages);\n }\n\n if (!options?.summaryModel) {\n throw new Error(\n \"createContextManager: either `summaryModel` or a custom `compactify` function is required to perform compactification.\"\n );\n }\n\n // Don't compact if there aren't enough messages\n if (messages.length <= minRecentMessages) {\n return messages;\n }\n\n // Split: everything before the cutpoint gets summarised\n const splitIdx = messages.length - minRecentMessages;\n const messagesToSummarize = messages.slice(0, splitIdx);\n const recentMessages = messages.slice(splitIdx);\n\n // Detect if we're mid-chain (last message is assistant with tool invocations\n // that might indicate an in-progress workflow)\n const lastMsg = messages[messages.length - 1];\n const midChain =\n lastMsg?.role === \"assistant\" &&\n lastMsg.parts.some(\n (p) =>\n p.type === \"tool-invocation\" &&\n (p as { state?: string }).state !== \"output-available\"\n );\n\n const summaryText = await summarize(messagesToSummarize, {\n model: options.summaryModel,\n prompt: options.summaryPrompt,\n midChain,\n });\n\n const originalTokens = estimateTokens(messagesToSummarize);\n const summaryMessage: UIMessage = {\n id: generateId(),\n role: \"assistant\",\n parts: [{ type: \"text\", text: summaryText }],\n metadata: {\n type: \"context-summary\",\n messagesSummarized: messagesToSummarize.length,\n originalTokens,\n summaryTokens: estimateTokens([\n {\n id: \"tmp\",\n role: \"assistant\",\n parts: [{ type: \"text\", text: summaryText }],\n },\n ]),\n createdAt: new Date().toISOString(),\n } satisfies ContextSummaryMetadata,\n };\n\n return [summaryMessage, ...recentMessages];\n },\n\n getMessageMetadata(): ContextMessageMetadata {\n return {\n context: {\n inputTokens: requestInputTokens,\n outputTokens: requestOutputTokens,\n totalTokens: requestInputTokens + requestOutputTokens,\n usageRatio: computeUsageRatio(),\n },\n };\n },\n\n reset() {\n requestInputTokens = 0;\n requestOutputTokens = 0;\n estimatedContextSize = 0;\n },\n };\n\n // Patch getState to include summaryCount dynamically\n const originalGetState = manager.getState.bind(manager);\n manager.getState = function getStateWithSummaryCount() {\n const state = originalGetState();\n // summaryCount is derived; caller should use countSummaries if needed\n return state;\n };\n\n return manager;\n}\n\nexport { countSummaries };\n"],"mappings":";AAGA,IAAM,kBAAkB;AAGxB,IAAM,mBAAmB;AAGzB,IAAM,wBAAwB;AASvB,SAAS,eAAe,UAA+B;AAC5D,MAAI,aAAa;AAEjB,aAAW,OAAO,UAAU;AAC1B,kBAAc,mBAAmB;AAEjC,eAAW,QAAQ,IAAI,OAAO;AAC5B,cAAQ,KAAK,MAAM;AAAA,QACjB,KAAK;AACH,yBAAgB,KAA0B,QAAQ,IAAI;AACtD;AAAA,QAEF,KAAK,mBAAmB;AAEtB,gBAAM,MAAM;AAKZ,cAAI,IAAI,SAAU,eAAc,IAAI,SAAS;AAC7C,cAAI,IAAI,MAAO,eAAc,KAAK,UAAU,IAAI,KAAK,EAAE;AACvD,cAAI,IAAI,OAAQ,eAAc,KAAK,UAAU,IAAI,MAAM,EAAE;AACzD;AAAA,QACF;AAAA,QAEA,KAAK,QAAQ;AAEX,gBAAM,OAAO;AACb,yBAAe,KAAK,OAAO,IAAI;AAC/B;AAAA,QACF;AAAA,QAEA;AAEE,cAAI;AACF,0BAAc,KAAK,UAAU,IAAI,EAAE;AAAA,UACrC,QAAQ;AACN,0BAAc;AAAA,UAChB;AAAA,MACJ;AAAA,IACF;AAAA,EACF;AAEA,SAAO,KAAK,KAAK,aAAa,eAAe,IAAI;AACnD;;;AC3DA,IAAM,mBACJ;AAYK,SAAS,gBACd,UACA,cACiD;AACjD,QAAM,aAAa,qBAAqB,QAAQ;AAEhD,MAAI,eAAe,IAAI;AACrB,WAAO,EAAE,UAAU,aAAa;AAAA,EAClC;AAEA,QAAM,iBAAiB,SAAS,UAAU;AAC1C,QAAM,cAAc,mBAAmB,cAAc;AACrD,QAAM,kBAAkB,SAAS,MAAM,aAAa,CAAC;AAErD,SAAO;AAAA,IACL,UAAU;AAAA,IACV,cAAc,eAAe,mBAAmB;AAAA,EAClD;AACF;AAMO,SAAS,qBAAqB,UAA+B;AAClE,WAAS,IAAI,SAAS,SAAS,GAAG,KAAK,GAAG,KAAK;AAC7C,UAAM,OAAO,SAAS,CAAC,EAAE;AACzB,QAAI,MAAM,SAAS,mBAAmB;AACpC,aAAO;AAAA,IACT;AAAA,EACF;AACA,SAAO;AACT;AAGO,SAAS,eAAe,UAA+B;AAC5D,MAAI,QAAQ;AACZ,aAAW,OAAO,UAAU;AAC1B,UAAM,OAAO,IAAI;AACjB,QAAI,MAAM,SAAS,kBAAmB;AAAA,EACxC;AACA,SAAO;AACT;AAGA,SAAS,mBAAmB,SAA4B;AACtD,aAAW,QAAQ,QAAQ,OAAO;AAChC,QAAI,KAAK,SAAS,QAAQ;AACxB,aAAQ,KAA0B,QAAQ;AAAA,IAC5C;AAAA,EACF;AACA,SAAO;AACT;;;ACpEA,SAAS,oBAAoC;AAE7C,IAAM,yBAAyB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAU/B,IAAM,qBAAqB;AAAA;AAAA;AAM3B,eAAsB,UACpB,UACA,SAOiB;AACjB,QAAM,mBAAmB,mBAAmB,QAAQ;AAEpD,MAAI,eAAe,QAAQ,UAAU;AACrC,MAAI,QAAQ,UAAU;AACpB,oBAAgB;AAAA,EAClB;AAEA,QAAM,EAAE,KAAK,IAAI,MAAM,aAAa;AAAA,IAClC,OAAO,QAAQ;AAAA,IACf,QAAQ;AAAA,IACR,QAAQ;AAAA,EACV,CAAC;AAED,SAAO;AACT;AAGA,SAAS,mBAAmB,UAA+B;AACzD,QAAM,QAAkB,CAAC;AAEzB,aAAW,OAAO,UAAU;AAC1B,UAAM,OAAO,IAAI,KAAK,YAAY;AAClC,UAAM,YAAsB,CAAC;AAC7B,UAAM,YAAsB,CAAC;AAE7B,eAAW,QAAQ,IAAI,OAAO;AAC5B,UAAI,KAAK,SAAS,QAAQ;AACxB,cAAM,OAAQ,KAA0B;AACxC,YAAI,MAAM,KAAK,EAAG,WAAU,KAAK,KAAK,KAAK,CAAC;AAAA,MAC9C,WAAW,KAAK,SAAS,mBAAmB;AAC1C,cAAM,MAAM;AACZ,cAAM,OAAO,IAAI,YAAY;AAC7B,cAAM,QAAQ,IAAI,SAAS;AAC3B,YAAI,UAAU,sBAAsB,IAAI,UAAU,MAAM;AACtD,gBAAM,YACJ,OAAO,IAAI,WAAW,WAClB,IAAI,SACJ,KAAK,UAAU,IAAI,QAAQ,MAAM,CAAC;AAExC,gBAAM,YACJ,UAAU,SAAS,MACf,UAAU,MAAM,GAAG,GAAI,IAAI,oBAC3B;AACN,oBAAU,KAAK,UAAU,IAAI,WAAM,SAAS,GAAG;AAAA,QACjD,OAAO;AACL,oBAAU,KAAK,UAAU,IAAI,KAAK,SAAS,SAAS,IAAI;AAAA,QAC1D;AAAA,MACF;AAAA,IACF;AAEA,UAAM,UAAU,CAAC,GAAG,WAAW,GAAG,SAAS,EAAE,KAAK,IAAI;AACtD,QAAI,QAAQ,KAAK,GAAG;AAClB,YAAM,KAAK,GAAG,IAAI,KAAK,OAAO,EAAE;AAAA,IAClC;AAAA,EACF;AAEA,SAAO,MAAM,KAAK,MAAM;AAC1B;;;ACrEA,IAAM,qBAAqB;AAC3B,IAAM,yBAAyB;AAC/B,IAAM,yBAAyB;AAC/B,IAAM,2BAA2B;AACjC,IAAM,qBAAqB;AAG3B,SAAS,aAAqB;AAC5B,SAAO,OAAO,KAAK,IAAI,CAAC,IAAI,KAAK,OAAO,EAAE,SAAS,EAAE,EAAE,MAAM,GAAG,CAAC,CAAC;AACpE;AAUO,SAAS,qBACd,SACgB;AAChB,QAAM,YAAY,SAAS,aAAa;AACxC,QAAM,gBAAgB,SAAS,iBAAiB;AAChD,QAAM,gBAAgB,SAAS,iBAAiB;AAChD,QAAM,kBAAkB,SAAS,mBAAmB;AACpD,QAAM,oBAAoB,SAAS,qBAAqB;AACxD,QAAMA,kBAAiB,SAAS,kBAAkB;AAGlD,QAAM,kBAAkB,YAAY;AAKpC,MAAI,qBAAqB;AAEzB,MAAI,sBAAsB;AAE1B,MAAI,uBAAuB;AAK3B,WAAS,oBAAoB,UAI3B;AACA,aAAS,IAAI,SAAS,SAAS,GAAG,KAAK,GAAG,KAAK;AAC7C,YAAM,OAAO,SAAS,CAAC,EAAE;AACzB,UAAI,MAAM,SAAS;AACjB,eAAO;AAAA,UACL,aAAa,KAAK,QAAQ,eAAe;AAAA,UACzC,aAAa,KAAK,QAAQ,eAAe;AAAA,UACzC,cAAc,KAAK,QAAQ,gBAAgB;AAAA,QAC7C;AAAA,MACF;AAAA,IACF;AACA,WAAO,EAAE,aAAa,GAAG,aAAa,GAAG,cAAc,EAAE;AAAA,EAC3D;AAEA,WAAS,oBAA4B;AACnC,QAAI,mBAAmB,EAAG,QAAO;AACjC,WAAO,KAAK,IAAI,KAAK,IAAI,uBAAuB,iBAAiB,CAAC,GAAG,CAAC;AAAA,EACxE;AAIA,QAAM,UAA0B;AAAA,IAC9B,WAAyB;AACvB,YAAM,QAAQ,kBAAkB;AAChC,aAAO;AAAA,QACL,kBAAkB;AAAA,QAClB,mBAAmB;AAAA,QACnB;AAAA,QACA;AAAA,QACA,YAAY;AAAA,QACZ,uBAAuB,QAAQ;AAAA,QAC/B,uBAAuB,QAAQ;AAAA,QAC/B,cAAc;AAAA;AAAA,MAChB;AAAA,IACF;AAAA,IAEA,YAAY,OAAO;AACjB,4BAAsB,MAAM,eAAe;AAC3C,6BAAuB,MAAM,gBAAgB;AAG7C,UAAI,MAAM,eAAe,QAAQ,MAAM,cAAc,GAAG;AACtD,+BAAuB,MAAM;AAAA,MAC/B;AAAA,IACF;AAAA,IAEA,eAAe,UAAU;AAEvB,YAAM,EAAE,UAAU,QAAQ,IAAI,gBAAkB,UAAU,EAAE;AAC5D,6BAAuBA,gBAAe,OAAO;AAAA,IAC/C;AAAA,IAEA,MAAM,gBAAgB,UAAU,cAAc;AAC5C,aAAO,gBAAkB,UAAU,YAAY;AAAA,IACjD;AAAA,IAEA,iBAAiB,UAAU;AAEzB,YAAM,EAAE,UAAU,QAAQ,IAAI,gBAAkB,UAAU,EAAE;AAC5D,YAAM,WAAWA,gBAAe,OAAO;AACvC,aAAO,WAAW,kBAAkB;AAAA,IACtC;AAAA,IAEA,gBAAgB,UAAU;AACxB,YAAM,EAAE,UAAU,QAAQ,IAAI,gBAAkB,UAAU,EAAE;AAC5D,YAAM,WAAWA,gBAAe,OAAO;AACvC,YAAM,QAAQ,WAAW;AACzB,aAAO,QAAQ,iBAAiB,SAAS;AAAA,IAC3C;AAAA,IAEA,MAAM,WAAW,UAAU;AAEzB,UAAI,SAAS,YAAY;AACvB,eAAO,QAAQ,WAAW,QAAQ;AAAA,MACpC;AAEA,UAAI,CAAC,SAAS,cAAc;AAC1B,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAGA,UAAI,SAAS,UAAU,mBAAmB;AACxC,eAAO;AAAA,MACT;AAGA,YAAM,WAAW,SAAS,SAAS;AACnC,YAAM,sBAAsB,SAAS,MAAM,GAAG,QAAQ;AACtD,YAAM,iBAAiB,SAAS,MAAM,QAAQ;AAI9C,YAAM,UAAU,SAAS,SAAS,SAAS,CAAC;AAC5C,YAAM,WACJ,SAAS,SAAS,eAClB,QAAQ,MAAM;AAAA,QACZ,CAAC,MACC,EAAE,SAAS,qBACV,EAAyB,UAAU;AAAA,MACxC;AAEF,YAAM,cAAc,MAAM,UAAU,qBAAqB;AAAA,QACvD,OAAO,QAAQ;AAAA,QACf,QAAQ,QAAQ;AAAA,QAChB;AAAA,MACF,CAAC;AAED,YAAM,iBAAiBA,gBAAe,mBAAmB;AACzD,YAAM,iBAA4B;AAAA,QAChC,IAAI,WAAW;AAAA,QACf,MAAM;AAAA,QACN,OAAO,CAAC,EAAE,MAAM,QAAQ,MAAM,YAAY,CAAC;AAAA,QAC3C,UAAU;AAAA,UACR,MAAM;AAAA,UACN,oBAAoB,oBAAoB;AAAA,UACxC;AAAA,UACA,eAAeA,gBAAe;AAAA,YAC5B;AAAA,cACE,IAAI;AAAA,cACJ,MAAM;AAAA,cACN,OAAO,CAAC,EAAE,MAAM,QAAQ,MAAM,YAAY,CAAC;AAAA,YAC7C;AAAA,UACF,CAAC;AAAA,UACD,YAAW,oBAAI,KAAK,GAAE,YAAY;AAAA,QACpC;AAAA,MACF;AAEA,aAAO,CAAC,gBAAgB,GAAG,cAAc;AAAA,IAC3C;AAAA,IAEA,qBAA6C;AAC3C,aAAO;AAAA,QACL,SAAS;AAAA,UACP,aAAa;AAAA,UACb,cAAc;AAAA,UACd,aAAa,qBAAqB;AAAA,UAClC,YAAY,kBAAkB;AAAA,QAChC;AAAA,MACF;AAAA,IACF;AAAA,IAEA,QAAQ;AACN,2BAAqB;AACrB,4BAAsB;AACtB,6BAAuB;AAAA,IACzB;AAAA,EACF;AAGA,QAAM,mBAAmB,QAAQ,SAAS,KAAK,OAAO;AACtD,UAAQ,WAAW,SAAS,2BAA2B;AACrD,UAAM,QAAQ,iBAAiB;AAE/B,WAAO;AAAA,EACT;AAEA,SAAO;AACT;","names":["estimateTokens"]}