agents 0.13.1 → 0.13.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"compaction-helpers-fJyf8j4m.js","names":[],"sources":["../src/experimental/memory/utils/tokens.ts","../src/experimental/memory/utils/compaction-helpers.ts"],"sourcesContent":["/**\n * Token Estimation Utilities\n *\n * IMPORTANT: These are heuristic estimates, not actual tokenizer counts.\n *\n * We intentionally avoid real tokenizers (e.g. tiktoken, sentencepiece) because:\n * - A single tiktoken instance costs ~80-120MB of heap\n * - Cloudflare Workers have tight memory limits (128MB)\n * - For compaction thresholds, a conservative estimate is sufficient\n *\n * The hybrid approach (max of character-based and word-based estimates) handles\n * both dense token content (JSON, code) and natural language reasonably well.\n *\n * Calibration notes:\n * - Character-based: ~4 chars per token (conservative, from OpenAI guidance)\n * - Word-based: ~1.3 tokens per word (empirical, from Mastra's memory system)\n * - Per-message overhead: ~4 tokens for role/framing (empirical)\n *\n * These ratios are tuned for English. CJK, emoji-heavy, or highly technical\n * content may have different ratios. The conservative estimates help ensure\n * compaction triggers before context windows are actually exceeded.\n */\n\nimport type { SessionMessage } from \"../session/types\";\n\n/** Approximate characters per token for English text */\nexport const CHARS_PER_TOKEN = 4;\n\n/** Approximate token multiplier per whitespace-separated word */\nexport const WORDS_TOKEN_MULTIPLIER = 1.3;\n\n/** Approximate overhead tokens per message (role, framing) */\nexport const TOKENS_PER_MESSAGE = 4;\n\n/**\n * Estimate token count for a string using a hybrid heuristic.\n *\n * Takes the max of two estimates:\n * - Character-based: `length / 4` — better for dense content (JSON, code, URLs)\n * - Word-based: `words * 1.3` — better for natural language prose\n *\n * This is a heuristic. Do not use where exact counts are required.\n */\nexport function estimateStringTokens(text: string): number {\n if (!text) return 0;\n const charEstimate = text.length / CHARS_PER_TOKEN;\n const wordEstimate =\n text.split(/\\s+/).filter(Boolean).length * WORDS_TOKEN_MULTIPLIER;\n return Math.ceil(Math.max(charEstimate, wordEstimate));\n}\n\nfunction estimateUnknownTokens(value: unknown): number {\n if (value === null || value === undefined) return 0;\n if (typeof value === \"string\") return estimateStringTokens(value);\n\n try {\n return estimateStringTokens(JSON.stringify(value));\n } catch {\n return estimateStringTokens(String(value));\n }\n}\n\n/**\n * Estimate total token count for an array of UIMessages.\n *\n * Walks each message's parts (text, reasoning, tool invocations, tool results)\n * and applies per-message overhead.\n *\n * This is a heuristic. Do not use where exact counts are required.\n */\nexport function estimateMessageTokens(messages: SessionMessage[]): number {\n let tokens = 0;\n for (const msg of messages) {\n tokens += TOKENS_PER_MESSAGE;\n for (const part of msg.parts) {\n if (part.type === \"text\" || part.type === \"reasoning\") {\n tokens += estimateUnknownTokens(part.text ?? part.reasoning);\n } else if (\n part.type.startsWith(\"tool-\") ||\n part.type === \"dynamic-tool\"\n ) {\n tokens += estimateUnknownTokens(part.input);\n tokens += estimateUnknownTokens(part.output ?? part.result);\n } else if (part.text !== undefined) {\n tokens += estimateUnknownTokens(part.text);\n } else if (part.result !== undefined) {\n tokens += estimateUnknownTokens(part.result);\n }\n }\n }\n return tokens;\n}\n","/**\n * Compaction Helpers\n *\n * Utilities for full compaction (LLM-based summarization).\n * Used by the reference compaction implementation and available\n * for custom CompactFunction implementations.\n */\n\nimport type { SessionMessage } from \"../session/types\";\nimport { estimateMessageTokens } from \"./tokens\";\n\n// ── Compaction ID constants ─────────────────────────────────────────\n\n/** Prefix for all compaction messages (overlays and summaries) */\nexport const COMPACTION_PREFIX = \"compaction_\";\n\n/** Check if a message is a compaction message */\nexport function isCompactionMessage(msg: SessionMessage): boolean {\n return msg.id.startsWith(COMPACTION_PREFIX);\n}\n\n// ── Tool Pair Alignment ──────────────────────────────────────────────\n\n/**\n * Check if a message contains tool invocations.\n */\nfunction hasToolCalls(msg: SessionMessage): boolean {\n return msg.parts.some(\n (p) => p.type.startsWith(\"tool-\") || p.type === \"dynamic-tool\"\n );\n}\n\n/**\n * Get tool call IDs from a message's parts.\n */\nfunction getToolCallIds(msg: SessionMessage): Set<string> {\n const ids = new Set<string>();\n for (const part of msg.parts) {\n if (\n (part.type.startsWith(\"tool-\") || part.type === \"dynamic-tool\") &&\n \"toolCallId\" in part\n ) {\n ids.add((part as { toolCallId: string }).toolCallId);\n }\n }\n return ids;\n}\n\n/**\n * Check if a message is a tool result referencing a specific call ID.\n */\nfunction isToolResultFor(msg: SessionMessage, callIds: Set<string>): boolean {\n return msg.parts.some(\n (p) =>\n (p.type.startsWith(\"tool-\") || p.type === \"dynamic-tool\") &&\n \"toolCallId\" in p &&\n callIds.has((p as { toolCallId: string }).toolCallId)\n );\n}\n\n/**\n * Align a boundary index forward to avoid splitting tool call/result groups.\n * If the boundary falls between an assistant message with tool calls and its\n * tool results, move it forward past the results.\n */\nexport function alignBoundaryForward(\n messages: SessionMessage[],\n idx: number\n): number {\n if (idx <= 0 || idx >= messages.length) return idx;\n\n // Check if the message before the boundary has tool calls\n const prev = messages[idx - 1];\n if (prev.role === \"assistant\" && hasToolCalls(prev)) {\n const callIds = getToolCallIds(prev);\n // Skip forward past any tool results for these calls\n while (idx < messages.length && isToolResultFor(messages[idx], callIds)) {\n idx++;\n }\n }\n\n return idx;\n}\n\n/**\n * Align a boundary index backward to avoid splitting tool call/result groups.\n * If the boundary falls in the middle of tool results, move it backward to\n * include the assistant message that made the calls.\n */\nexport function alignBoundaryBackward(\n messages: SessionMessage[],\n idx: number\n): number {\n if (idx <= 0 || idx >= messages.length) return idx;\n\n // If the message at idx is a tool result, walk backward to find the call\n while (idx > 0) {\n const msg = messages[idx];\n if (msg.role === \"assistant\" && hasToolCalls(msg)) {\n break; // This is a tool call message — include it\n }\n // Check if this looks like a tool result (assistant message following another)\n const prev = messages[idx - 1];\n if (prev.role === \"assistant\" && hasToolCalls(prev)) {\n const callIds = getToolCallIds(prev);\n if (isToolResultFor(msg, callIds)) {\n idx--; // Move back to include the call\n continue;\n }\n }\n break;\n }\n\n return idx;\n}\n\n// ── Token-Budget Tail Protection ─────────────────────────────────────\n\n/**\n * Find the compression end boundary using a token budget for the tail.\n * Walks backward from the end, accumulating tokens until budget is reached.\n * Returns the index where compression should stop (everything from this\n * index onward is protected).\n *\n * @param messages All messages\n * @param headEnd Index where the protected head ends (compression starts here)\n * @param tailTokenBudget Maximum tokens to keep in the tail\n * @param minTailMessages Minimum messages to protect in the tail (fallback)\n */\nexport function findTailCutByTokens(\n messages: SessionMessage[],\n headEnd: number,\n tailTokenBudget = 20000,\n minTailMessages = 2\n): number {\n const n = messages.length;\n let accumulated = 0;\n let tokenCut = n;\n\n for (let i = n - 1; i >= headEnd; i--) {\n const msgTokens = estimateMessageTokens([messages[i]]);\n\n if (accumulated + msgTokens > tailTokenBudget && tokenCut < n) {\n // Budget exceeded and we already have at least one tail message\n break;\n }\n accumulated += msgTokens;\n tokenCut = i;\n }\n\n // Protect whichever is larger: token-based tail or minTailMessages\n const minCut = n - minTailMessages;\n const cutIdx = minCut >= headEnd ? Math.min(tokenCut, minCut) : tokenCut;\n\n // Align to avoid splitting tool groups\n return alignBoundaryBackward(messages, cutIdx);\n}\n\n// ── Tool Pair Sanitization ───────────────────────────────────────────\n\n/**\n * Fix orphaned tool call/result pairs after compaction.\n *\n * Two failure modes:\n * 1. Tool result references a call_id whose assistant tool_call was removed\n * → Remove the orphaned result\n * 2. Assistant has tool_calls whose results were dropped\n * → Add stub results so the API doesn't error\n *\n * @param messages Messages after compaction\n * @returns Sanitized messages with no orphaned pairs\n */\nexport function sanitizeToolPairs(\n messages: SessionMessage[]\n): SessionMessage[] {\n // Build set of surviving tool call IDs (from assistant messages)\n const survivingCallIds = new Set<string>();\n for (const msg of messages) {\n if (msg.role === \"assistant\") {\n for (const id of getToolCallIds(msg)) {\n survivingCallIds.add(id);\n }\n }\n }\n\n // Build set of tool result IDs\n const resultCallIds = new Set<string>();\n for (const msg of messages) {\n for (const part of msg.parts) {\n if (\n (part.type.startsWith(\"tool-\") || part.type === \"dynamic-tool\") &&\n \"toolCallId\" in part &&\n \"output\" in part\n ) {\n resultCallIds.add((part as { toolCallId: string }).toolCallId);\n }\n }\n }\n\n // Remove orphaned results (results whose calls were dropped)\n const orphanedResults = new Set<string>();\n for (const id of resultCallIds) {\n if (!survivingCallIds.has(id)) {\n orphanedResults.add(id);\n }\n }\n\n let result = messages;\n if (orphanedResults.size > 0) {\n result = result.map((msg) => {\n const filteredParts = msg.parts.filter((part) => {\n if (\n (part.type.startsWith(\"tool-\") || part.type === \"dynamic-tool\") &&\n \"toolCallId\" in part &&\n \"output\" in part\n ) {\n return !orphanedResults.has(\n (part as { toolCallId: string }).toolCallId\n );\n }\n return true;\n });\n if (filteredParts.length !== msg.parts.length) {\n return { ...msg, parts: filteredParts } as SessionMessage;\n }\n return msg;\n });\n }\n\n // Add stub results for calls whose results were dropped\n const missingResults = new Set<string>();\n for (const id of survivingCallIds) {\n if (!resultCallIds.has(id) && !orphanedResults.has(id)) {\n missingResults.add(id);\n }\n }\n\n if (missingResults.size > 0) {\n const patched: SessionMessage[] = [];\n for (const msg of result) {\n patched.push(msg);\n if (msg.role === \"assistant\") {\n for (const id of getToolCallIds(msg)) {\n if (missingResults.has(id)) {\n // Find the tool name from the call\n const callPart = msg.parts.find(\n (p) =>\n \"toolCallId\" in p &&\n (p as { toolCallId: string }).toolCallId === id\n ) as { toolName?: string } | undefined;\n\n patched.push({\n id: `stub-${id}`,\n role: \"assistant\",\n parts: [\n {\n type: \"tool-result\" as const,\n toolCallId: id,\n toolName: callPart?.toolName ?? \"unknown\",\n result:\n \"[Result from earlier conversation — see context summary above]\"\n } as unknown as SessionMessage[\"parts\"][number]\n ],\n createdAt: new Date()\n } as SessionMessage);\n }\n }\n }\n }\n result = patched;\n }\n\n // Remove empty messages (all parts filtered out)\n return result.filter((msg) => msg.parts.length > 0);\n}\n\n// ── Summary Budget ───────────────────────────────────────────────────\n\n/**\n * Compute a summary token budget based on the content being compressed.\n * 20% of the compressed content, clamped to 2K-8K tokens.\n */\nexport function computeSummaryBudget(messages: SessionMessage[]): number {\n const contentTokens = estimateMessageTokens(messages);\n // Summary is ~20% of the content being compressed.\n // The summary replaces the compressed middle, so it's sized relative\n // to what it's replacing — not the tail budget (they occupy different\n // slots in the context window).\n const budget = Math.floor(contentTokens * 0.2);\n return Math.max(100, budget);\n}\n\n// ── Structured Summary Prompt ────────────────────────────────────────\n\n/**\n * Build a prompt for LLM summarization of compressed messages.\n *\n * @param messages Messages to summarize\n * @param previousSummary Previous summary for iterative updates (or null for first compaction)\n * @param budget Target token count for the summary\n */\nexport function buildSummaryPrompt(\n messages: SessionMessage[],\n previousSummary: string | null,\n budget: number\n): string {\n const content = messages\n .map((msg) => {\n const textParts = msg.parts\n .filter((p) => p.type === \"text\")\n .map((p) => (p as { text: string }).text)\n .join(\"\\n\");\n\n const toolParts = msg.parts\n .filter((p) => p.type.startsWith(\"tool-\") || p.type === \"dynamic-tool\")\n .map((p) => {\n const tp = p as {\n toolName?: string;\n input?: unknown;\n output?: unknown;\n };\n const parts = [`[Tool: ${tp.toolName ?? \"unknown\"}]`];\n if (tp.input)\n parts.push(`Input: ${JSON.stringify(tp.input).slice(0, 500)}`);\n if (tp.output)\n parts.push(`Output: ${String(tp.output).slice(0, 500)}`);\n return parts.join(\"\\n\");\n })\n .join(\"\\n\");\n\n return `[${msg.role}]\\n${textParts}${toolParts ? \"\\n\" + toolParts : \"\"}`;\n })\n .join(\"\\n\\n---\\n\\n\");\n\n if (previousSummary) {\n return `You are updating a conversation summary. A previous summary exists below. New conversation turns have occurred since then and need to be incorporated.\n\nPREVIOUS SUMMARY:\n${previousSummary}\n\nNEW TURNS TO INCORPORATE:\n${content}\n\nUpdate the summary. PRESERVE existing information that is still relevant. ADD new information. Remove information only if it is clearly obsolete.\n\n## Topic\n[What the conversation is about]\n\n## Key Points\n[Important information, decisions, and conclusions from the conversation]\n\n## Current State\n[Where things stand now — what has been done, what is in progress]\n\n## Open Items\n[Unresolved questions, pending tasks, or next steps discussed]\n\nTarget ~${budget} tokens. Be factual — only include information that was explicitly discussed in the conversation. Do NOT invent file paths, commands, or details that were not mentioned. Write only the summary body.`;\n }\n\n return `Create a concise summary of this conversation that preserves the important information for future context.\n\nCONVERSATION TO SUMMARIZE:\n${content}\n\nUse this structure:\n\n## Topic\n[What the conversation is about]\n\n## Key Points\n[Important information, decisions, and conclusions from the conversation]\n\n## Current State\n[Where things stand now — what has been done, what is in progress]\n\n## Open Items\n[Unresolved questions, pending tasks, or next steps discussed]\n\nTarget ~${budget} tokens. Be factual — only include information that was explicitly discussed in the conversation. Do NOT invent file paths, commands, or details that were not mentioned. Write only the summary body.`;\n}\n\n// ── Reference Compaction Implementation ──────────────────────────────\n\n/**\n * Result of a compaction function — describes the overlay to store.\n */\nexport interface CompactResult {\n /** First message ID in the compacted range */\n fromMessageId: string;\n /** Last message ID in the compacted range */\n toMessageId: string;\n /** Summary text to store as the overlay */\n summary: string;\n}\n\nexport interface CompactOptions {\n /**\n * Function to call the LLM for summarization.\n * Takes a user prompt string, returns the LLM's text response.\n */\n summarize: (prompt: string) => Promise<string>;\n\n /** Number of head messages to protect (default: 2) */\n protectHead?: number;\n\n /** Token budget for tail protection (default: 20000) */\n tailTokenBudget?: number;\n\n /** Minimum tail messages to protect (default: 2) */\n minTailMessages?: number;\n}\n\n/**\n * Reference compaction implementation.\n *\n * Implements the full hermes-style compaction algorithm:\n * 1. Protect head messages (first N)\n * 2. Protect tail by token budget (walk backward)\n * 3. Align boundaries to tool call groups\n * 4. Summarize middle section with LLM (structured format)\n * 5. Sanitize orphaned tool pairs\n * 6. Iterative summary updates on subsequent compactions\n *\n * @example\n * ```typescript\n * import { createCompactFunction } from \"agents/experimental/memory/utils\";\n *\n * const session = new Session(provider, {\n * compaction: {\n * tokenThreshold: 100000,\n * fn: createCompactFunction({\n * summarize: (prompt) => generateText({ model, prompt }).then(r => r.text)\n * })\n * }\n * });\n * ```\n */\nexport function createCompactFunction(opts: CompactOptions) {\n const protectHead = opts.protectHead ?? 3;\n const tailTokenBudget = opts.tailTokenBudget ?? 20000;\n const minTailMessages = opts.minTailMessages ?? 2;\n\n return async (messages: SessionMessage[]): Promise<CompactResult | null> => {\n if (messages.length <= protectHead + minTailMessages) {\n return null;\n }\n\n // 1. Find compression boundaries\n let compressStart = protectHead;\n compressStart = alignBoundaryForward(messages, compressStart);\n\n let compressEnd = findTailCutByTokens(\n messages,\n compressStart,\n tailTokenBudget,\n minTailMessages\n );\n\n if (compressEnd <= compressStart) {\n return null;\n }\n\n // Filter out compaction overlay messages — they have virtual IDs\n // and should not be included in the summary prompt or used as range IDs\n const middleMessages = messages\n .slice(compressStart, compressEnd)\n .filter((m) => !isCompactionMessage(m));\n\n if (middleMessages.length === 0) return null;\n\n // 2. Generate summary — extract previous summary from compaction overlays\n const existingCompaction = messages.find(isCompactionMessage);\n const previousSummary = existingCompaction\n ? existingCompaction.parts\n .filter((p) => p.type === \"text\")\n .map((p) => (p as { text: string }).text)\n .join(\"\\n\")\n : null;\n\n const budget = computeSummaryBudget(middleMessages);\n const prompt = buildSummaryPrompt(middleMessages, previousSummary, budget);\n const summary = await opts.summarize(prompt);\n\n if (!summary.trim()) return null;\n\n return {\n fromMessageId: middleMessages[0].id,\n toMessageId: middleMessages[middleMessages.length - 1].id,\n summary\n };\n };\n}\n"],"mappings":";;AA0BA,MAAa,kBAAkB;;AAG/B,MAAa,yBAAyB;;AAGtC,MAAa,qBAAqB;;;;;;;;;;AAWlC,SAAgB,qBAAqB,MAAsB;CACzD,IAAI,CAAC,MAAM,OAAO;CAClB,MAAM,eAAe,KAAK,SAAA;CAC1B,MAAM,eACJ,KAAK,MAAM,KAAK,EAAE,OAAO,OAAO,EAAE,SAAS;CAC7C,OAAO,KAAK,KAAK,KAAK,IAAI,cAAc,YAAY,CAAC;AACvD;AAEA,SAAS,sBAAsB,OAAwB;CACrD,IAAI,UAAU,QAAQ,UAAU,KAAA,GAAW,OAAO;CAClD,IAAI,OAAO,UAAU,UAAU,OAAO,qBAAqB,KAAK;CAEhE,IAAI;EACF,OAAO,qBAAqB,KAAK,UAAU,KAAK,CAAC;CACnD,QAAQ;EACN,OAAO,qBAAqB,OAAO,KAAK,CAAC;CAC3C;AACF;;;;;;;;;AAUA,SAAgB,sBAAsB,UAAoC;CACxE,IAAI,SAAS;CACb,KAAK,MAAM,OAAO,UAAU;EAC1B,UAAA;EACA,KAAK,MAAM,QAAQ,IAAI,OACrB,IAAI,KAAK,SAAS,UAAU,KAAK,SAAS,aACxC,UAAU,sBAAsB,KAAK,QAAQ,KAAK,SAAS;OACtD,IACL,KAAK,KAAK,WAAW,OAAO,KAC5B,KAAK,SAAS,gBACd;GACA,UAAU,sBAAsB,KAAK,KAAK;GAC1C,UAAU,sBAAsB,KAAK,UAAU,KAAK,MAAM;EAC5D,OAAO,IAAI,KAAK,SAAS,KAAA,GACvB,UAAU,sBAAsB,KAAK,IAAI;OACpC,IAAI,KAAK,WAAW,KAAA,GACzB,UAAU,sBAAsB,KAAK,MAAM;CAGjD;CACA,OAAO;AACT;;;;AC7EA,MAAa,oBAAoB;;AAGjC,SAAgB,oBAAoB,KAA8B;CAChE,OAAO,IAAI,GAAG,WAAW,iBAAiB;AAC5C;;;;AAOA,SAAS,aAAa,KAA8B;CAClD,OAAO,IAAI,MAAM,MACd,MAAM,EAAE,KAAK,WAAW,OAAO,KAAK,EAAE,SAAS,cAClD;AACF;;;;AAKA,SAAS,eAAe,KAAkC;CACxD,MAAM,sBAAM,IAAI,IAAY;CAC5B,KAAK,MAAM,QAAQ,IAAI,OACrB,KACG,KAAK,KAAK,WAAW,OAAO,KAAK,KAAK,SAAS,mBAChD,gBAAgB,MAEhB,IAAI,IAAK,KAAgC,UAAU;CAGvD,OAAO;AACT;;;;AAKA,SAAS,gBAAgB,KAAqB,SAA+B;CAC3E,OAAO,IAAI,MAAM,MACd,OACE,EAAE,KAAK,WAAW,OAAO,KAAK,EAAE,SAAS,mBAC1C,gBAAgB,KAChB,QAAQ,IAAK,EAA6B,UAAU,CACxD;AACF;;;;;;AAOA,SAAgB,qBACd,UACA,KACQ;CACR,IAAI,OAAO,KAAK,OAAO,SAAS,QAAQ,OAAO;CAG/C,MAAM,OAAO,SAAS,MAAM;CAC5B,IAAI,KAAK,SAAS,eAAe,aAAa,IAAI,GAAG;EACnD,MAAM,UAAU,eAAe,IAAI;EAEnC,OAAO,MAAM,SAAS,UAAU,gBAAgB,SAAS,MAAM,OAAO,GACpE;CAEJ;CAEA,OAAO;AACT;;;;;;AAOA,SAAgB,sBACd,UACA,KACQ;CACR,IAAI,OAAO,KAAK,OAAO,SAAS,QAAQ,OAAO;CAG/C,OAAO,MAAM,GAAG;EACd,MAAM,MAAM,SAAS;EACrB,IAAI,IAAI,SAAS,eAAe,aAAa,GAAG,GAC9C;EAGF,MAAM,OAAO,SAAS,MAAM;EAC5B,IAAI,KAAK,SAAS,eAAe,aAAa,IAAI;OAE5C,gBAAgB,KADJ,eAAe,IACA,CAAC,GAAG;IACjC;IACA;GACF;;EAEF;CACF;CAEA,OAAO;AACT;;;;;;;;;;;;AAeA,SAAgB,oBACd,UACA,SACA,kBAAkB,KAClB,kBAAkB,GACV;CACR,MAAM,IAAI,SAAS;CACnB,IAAI,cAAc;CAClB,IAAI,WAAW;CAEf,KAAK,IAAI,IAAI,IAAI,GAAG,KAAK,SAAS,KAAK;EACrC,MAAM,YAAY,sBAAsB,CAAC,SAAS,EAAE,CAAC;EAErD,IAAI,cAAc,YAAY,mBAAmB,WAAW,GAE1D;EAEF,eAAe;EACf,WAAW;CACb;CAGA,MAAM,SAAS,IAAI;CAInB,OAAO,sBAAsB,UAHd,UAAU,UAAU,KAAK,IAAI,UAAU,MAAM,IAAI,QAGnB;AAC/C;;;;;;;;;;;;;AAgBA,SAAgB,kBACd,UACkB;CAElB,MAAM,mCAAmB,IAAI,IAAY;CACzC,KAAK,MAAM,OAAO,UAChB,IAAI,IAAI,SAAS,aACf,KAAK,MAAM,MAAM,eAAe,GAAG,GACjC,iBAAiB,IAAI,EAAE;CAM7B,MAAM,gCAAgB,IAAI,IAAY;CACtC,KAAK,MAAM,OAAO,UAChB,KAAK,MAAM,QAAQ,IAAI,OACrB,KACG,KAAK,KAAK,WAAW,OAAO,KAAK,KAAK,SAAS,mBAChD,gBAAgB,QAChB,YAAY,MAEZ,cAAc,IAAK,KAAgC,UAAU;CAMnE,MAAM,kCAAkB,IAAI,IAAY;CACxC,KAAK,MAAM,MAAM,eACf,IAAI,CAAC,iBAAiB,IAAI,EAAE,GAC1B,gBAAgB,IAAI,EAAE;CAI1B,IAAI,SAAS;CACb,IAAI,gBAAgB,OAAO,GACzB,SAAS,OAAO,KAAK,QAAQ;EAC3B,MAAM,gBAAgB,IAAI,MAAM,QAAQ,SAAS;GAC/C,KACG,KAAK,KAAK,WAAW,OAAO,KAAK,KAAK,SAAS,mBAChD,gBAAgB,QAChB,YAAY,MAEZ,OAAO,CAAC,gBAAgB,IACrB,KAAgC,UACnC;GAEF,OAAO;EACT,CAAC;EACD,IAAI,cAAc,WAAW,IAAI,MAAM,QACrC,OAAO;GAAE,GAAG;GAAK,OAAO;EAAc;EAExC,OAAO;CACT,CAAC;CAIH,MAAM,iCAAiB,IAAI,IAAY;CACvC,KAAK,MAAM,MAAM,kBACf,IAAI,CAAC,cAAc,IAAI,EAAE,KAAK,CAAC,gBAAgB,IAAI,EAAE,GACnD,eAAe,IAAI,EAAE;CAIzB,IAAI,eAAe,OAAO,GAAG;EAC3B,MAAM,UAA4B,CAAC;EACnC,KAAK,MAAM,OAAO,QAAQ;GACxB,QAAQ,KAAK,GAAG;GAChB,IAAI,IAAI,SAAS;SACV,MAAM,MAAM,eAAe,GAAG,GACjC,IAAI,eAAe,IAAI,EAAE,GAAG;KAE1B,MAAM,WAAW,IAAI,MAAM,MACxB,MACC,gBAAgB,KACf,EAA6B,eAAe,EACjD;KAEA,QAAQ,KAAK;MACX,IAAI,QAAQ;MACZ,MAAM;MACN,OAAO,CACL;OACE,MAAM;OACN,YAAY;OACZ,UAAU,UAAU,YAAY;OAChC,QACE;MACJ,CACF;MACA,2BAAW,IAAI,KAAK;KACtB,CAAmB;IACrB;;EAGN;EACA,SAAS;CACX;CAGA,OAAO,OAAO,QAAQ,QAAQ,IAAI,MAAM,SAAS,CAAC;AACpD;;;;;AAQA,SAAgB,qBAAqB,UAAoC;CACvE,MAAM,gBAAgB,sBAAsB,QAAQ;CAKpD,MAAM,SAAS,KAAK,MAAM,gBAAgB,EAAG;CAC7C,OAAO,KAAK,IAAI,KAAK,MAAM;AAC7B;;;;;;;;AAWA,SAAgB,mBACd,UACA,iBACA,QACQ;CACR,MAAM,UAAU,SACb,KAAK,QAAQ;EACZ,MAAM,YAAY,IAAI,MACnB,QAAQ,MAAM,EAAE,SAAS,MAAM,EAC/B,KAAK,MAAO,EAAuB,IAAI,EACvC,KAAK,IAAI;EAEZ,MAAM,YAAY,IAAI,MACnB,QAAQ,MAAM,EAAE,KAAK,WAAW,OAAO,KAAK,EAAE,SAAS,cAAc,EACrE,KAAK,MAAM;GACV,MAAM,KAAK;GAKX,MAAM,QAAQ,CAAC,UAAU,GAAG,YAAY,UAAU,EAAE;GACpD,IAAI,GAAG,OACL,MAAM,KAAK,UAAU,KAAK,UAAU,GAAG,KAAK,EAAE,MAAM,GAAG,GAAG,GAAG;GAC/D,IAAI,GAAG,QACL,MAAM,KAAK,WAAW,OAAO,GAAG,MAAM,EAAE,MAAM,GAAG,GAAG,GAAG;GACzD,OAAO,MAAM,KAAK,IAAI;EACxB,CAAC,EACA,KAAK,IAAI;EAEZ,OAAO,IAAI,IAAI,KAAK,KAAK,YAAY,YAAY,OAAO,YAAY;CACtE,CAAC,EACA,KAAK,aAAa;CAErB,IAAI,iBACF,OAAO;;;EAGT,gBAAgB;;;EAGhB,QAAQ;;;;;;;;;;;;;;;;UAgBA,OAAO;CAGf,OAAO;;;EAGP,QAAQ;;;;;;;;;;;;;;;;UAgBA,OAAO;AACjB;;;;;;;;;;;;;;;;;;;;;;;;;;AA0DA,SAAgB,sBAAsB,MAAsB;CAC1D,MAAM,cAAc,KAAK,eAAe;CACxC,MAAM,kBAAkB,KAAK,mBAAmB;CAChD,MAAM,kBAAkB,KAAK,mBAAmB;CAEhD,OAAO,OAAO,aAA8D;EAC1E,IAAI,SAAS,UAAU,cAAc,iBACnC,OAAO;EAIT,IAAI,gBAAgB;EACpB,gBAAgB,qBAAqB,UAAU,aAAa;EAE5D,IAAI,cAAc,oBAChB,UACA,eACA,iBACA,eACF;EAEA,IAAI,eAAe,eACjB,OAAO;EAKT,MAAM,iBAAiB,SACpB,MAAM,eAAe,WAAW,EAChC,QAAQ,MAAM,CAAC,oBAAoB,CAAC,CAAC;EAExC,IAAI,eAAe,WAAW,GAAG,OAAO;EAGxC,MAAM,qBAAqB,SAAS,KAAK,mBAAmB;EAS5D,MAAM,SAAS,mBAAmB,gBARV,qBACpB,mBAAmB,MAChB,QAAQ,MAAM,EAAE,SAAS,MAAM,EAC/B,KAAK,MAAO,EAAuB,IAAI,EACvC,KAAK,IAAI,IACZ,MAEW,qBAAqB,cACoC,CAAC;EACzE,MAAM,UAAU,MAAM,KAAK,UAAU,MAAM;EAE3C,IAAI,CAAC,QAAQ,KAAK,GAAG,OAAO;EAE5B,OAAO;GACL,eAAe,eAAe,GAAG;GACjC,aAAa,eAAe,eAAe,SAAS,GAAG;GACvD;EACF;CACF;AACF"}
@@ -1,9 +1,12 @@
1
1
  import {
2
- A as SessionOptions,
2
+ A as SessionMessage,
3
3
  C as AgentSessionProvider,
4
4
  D as StoredCompaction,
5
5
  E as SessionProvider,
6
- O as SessionMessage,
6
+ M as SessionOptions,
7
+ N as SessionTokenCounter,
8
+ O as CompactAfterOptions,
9
+ P as SessionTokenCounterInput,
7
10
  S as isSearchProvider,
8
11
  T as SearchResult,
9
12
  _ as R2SkillProvider,
@@ -11,7 +14,8 @@ import {
11
14
  f as ContextBlock,
12
15
  g as isWritableProvider,
13
16
  h as WritableContextProvider,
14
- k as SessionMessagePart,
17
+ j as SessionMessagePart,
18
+ k as CompactionErrorHandler,
15
19
  m as ContextProvider,
16
20
  p as ContextConfig,
17
21
  r as CompactResult,
@@ -19,7 +23,7 @@ import {
19
23
  w as SqlProvider,
20
24
  x as SearchProvider,
21
25
  y as isSkillProvider
22
- } from "../../../compaction-helpers-DAe-xiVY.js";
26
+ } from "../../../compaction-helpers-B-pG5J22.js";
23
27
  import { ToolSet } from "ai";
24
28
 
25
29
  //#region src/experimental/memory/session/session.d.ts
@@ -56,6 +60,8 @@ declare class Session {
56
60
  private _cachedPrompt?;
57
61
  private _compactionFn?;
58
62
  private _tokenThreshold?;
63
+ private _tokenCounter?;
64
+ private _compactionErrorHandler?;
59
65
  private _ready;
60
66
  private _restorePromise?;
61
67
  private _messageChangeListener?;
@@ -104,8 +110,19 @@ declare class Session {
104
110
  /**
105
111
  * Auto-compact when estimated token count exceeds the threshold.
106
112
  * Checked after each `appendMessage`. Requires `onCompaction()`.
113
+ *
114
+ * By default this uses a Workers-safe heuristic over stored messages plus
115
+ * the Session-managed frozen system prompt. Provide `tokenCounter` when you
116
+ * have model-reported usage or a tokenizer and need a stricter budget.
117
+ */
118
+ compactAfter(tokenThreshold: number, options?: CompactAfterOptions): this;
119
+ /**
120
+ * Handle failures from the automatic `compactAfter()` trigger.
121
+ *
122
+ * Manual `compact()` still reports errors through the existing session error
123
+ * broadcast path.
107
124
  */
108
- compactAfter(tokenThreshold: number): this;
125
+ onCompactionError(handler: CompactionErrorHandler): this;
109
126
  /**
110
127
  * @internal
111
128
  * Framework hook for cache-owning callers that need to mirror message
@@ -153,6 +170,9 @@ declare class Session {
153
170
  getBranches(messageId: string): Promise<SessionMessage[]>;
154
171
  getPathLength(leafId?: string | null): Promise<number>;
155
172
  private _broadcast;
173
+ private _shouldEstimateTokens;
174
+ private _estimateTokenCount;
175
+ private _handleAutoCompactionError;
156
176
  private _emitStatus;
157
177
  private _emitError;
158
178
  appendMessage(
@@ -263,6 +283,8 @@ declare class SessionManager {
263
283
  private _cachedPrompt?;
264
284
  private _compactionFn?;
265
285
  private _tokenThreshold?;
286
+ private _tokenCounter?;
287
+ private _compactionErrorHandler?;
266
288
  private _sessions;
267
289
  private _historyLabel?;
268
290
  private _tableReady;
@@ -299,7 +321,11 @@ declare class SessionManager {
299
321
  * Auto-compact when estimated token count exceeds the threshold.
300
322
  * Propagated to all sessions. Requires `onCompaction()`.
301
323
  */
302
- compactAfter(tokenThreshold: number): this;
324
+ compactAfter(tokenThreshold: number, options?: CompactAfterOptions): this;
325
+ /**
326
+ * Handle failures from automatic compaction in managed sessions.
327
+ */
328
+ onCompactionError(handler: CompactionErrorHandler): this;
303
329
  /**
304
330
  * Add a searchable context block that searches conversation history
305
331
  * across all sessions managed by this manager.
@@ -540,6 +566,8 @@ export {
540
566
  AgentContextProvider,
541
567
  AgentSearchProvider,
542
568
  AgentSessionProvider,
569
+ type CompactAfterOptions,
570
+ type CompactionErrorHandler,
543
571
  type ContextBlock,
544
572
  type ContextConfig,
545
573
  type ContextProvider,
@@ -561,6 +589,8 @@ export {
561
589
  type SessionMessagePart,
562
590
  type SessionOptions,
563
591
  type SessionProvider,
592
+ type SessionTokenCounter,
593
+ type SessionTokenCounterInput,
564
594
  type SkillProvider,
565
595
  type SqlProvider,
566
596
  type StoredCompaction,
@@ -1,5 +1,5 @@
1
1
  import "../../../types.js";
2
- import { m as estimateStringTokens, p as estimateMessageTokens, t as COMPACTION_PREFIX } from "../../../compaction-helpers-DvcZnvQ1.js";
2
+ import { m as estimateStringTokens, p as estimateMessageTokens, t as COMPACTION_PREFIX } from "../../../compaction-helpers-fJyf8j4m.js";
3
3
  import { z } from "zod";
4
4
  //#region src/experimental/memory/session/search.ts
5
5
  /**
@@ -446,7 +446,7 @@ var ContextBlocks = class {
446
446
  refreshSnapshot() {
447
447
  return this.captureSnapshot();
448
448
  }
449
- captureSnapshot() {
449
+ renderPrompt() {
450
450
  const parts = [];
451
451
  const sep = "═".repeat(46);
452
452
  for (const block of this.blocks.values()) {
@@ -463,7 +463,10 @@ var ContextBlocks = class {
463
463
  else header += " [writable]";
464
464
  parts.push(`${sep}\n${header}\n${sep}\n${block.content}`);
465
465
  }
466
- this.snapshot = parts.join("\n\n");
466
+ return parts.join("\n\n");
467
+ }
468
+ captureSnapshot() {
469
+ this.snapshot = this.renderPrompt();
467
470
  return this.snapshot;
468
471
  }
469
472
  /**
@@ -512,6 +515,23 @@ var ContextBlocks = class {
512
515
  return prompt;
513
516
  }
514
517
  /**
518
+ * Return the prompt text used for token estimation without persisting a new
519
+ * frozen prompt to the prompt store.
520
+ *
521
+ * This still reads an existing cached prompt when present, so estimates match
522
+ * the prompt that inference would reuse. If no cached prompt exists, it loads
523
+ * providers and renders the current blocks without freezing the snapshot.
524
+ */
525
+ async getSystemPromptForEstimate() {
526
+ if (this.snapshot !== null) return this.snapshot;
527
+ if (this.promptStore) {
528
+ const stored = await this.promptStore.get();
529
+ if (stored !== null) return stored;
530
+ }
531
+ if (!this.loaded) await this.load();
532
+ return this.renderPrompt();
533
+ }
534
+ /**
515
535
  * Force reload blocks from providers, re-render the system prompt,
516
536
  * and persist to the store. Use this after block content has changed
517
537
  * or to invalidate the cached prompt.
@@ -1004,6 +1024,8 @@ var Session = class Session {
1004
1024
  this._ready = false;
1005
1025
  this.storage = storage;
1006
1026
  this.context = new ContextBlocks(options?.context ?? [], options?.promptStore);
1027
+ this._tokenCounter = options?.tokenCounter;
1028
+ this._compactionErrorHandler = options?.onCompactionError;
1007
1029
  this._ready = true;
1008
1030
  }
1009
1031
  /**
@@ -1072,9 +1094,24 @@ var Session = class Session {
1072
1094
  /**
1073
1095
  * Auto-compact when estimated token count exceeds the threshold.
1074
1096
  * Checked after each `appendMessage`. Requires `onCompaction()`.
1097
+ *
1098
+ * By default this uses a Workers-safe heuristic over stored messages plus
1099
+ * the Session-managed frozen system prompt. Provide `tokenCounter` when you
1100
+ * have model-reported usage or a tokenizer and need a stricter budget.
1075
1101
  */
1076
- compactAfter(tokenThreshold) {
1102
+ compactAfter(tokenThreshold, options) {
1077
1103
  this._tokenThreshold = tokenThreshold;
1104
+ if (options?.tokenCounter) this._tokenCounter = options.tokenCounter;
1105
+ return this;
1106
+ }
1107
+ /**
1108
+ * Handle failures from the automatic `compactAfter()` trigger.
1109
+ *
1110
+ * Manual `compact()` still reports errors through the existing session error
1111
+ * broadcast path.
1112
+ */
1113
+ onCompactionError(handler) {
1114
+ this._compactionErrorHandler = handler;
1078
1115
  return this;
1079
1116
  }
1080
1117
  /**
@@ -1223,8 +1260,42 @@ var Session = class Session {
1223
1260
  ...data
1224
1261
  }));
1225
1262
  }
1263
+ _shouldEstimateTokens() {
1264
+ return Boolean(this._broadcaster || this._tokenThreshold != null && this._compactionFn);
1265
+ }
1266
+ async _estimateTokenCount() {
1267
+ const messages = await this.getHistory();
1268
+ const systemPrompt = await this.context.getSystemPromptForEstimate();
1269
+ if (this._tokenCounter) {
1270
+ if (!this.context.isLoaded()) await this.context.load();
1271
+ const contextBlocks = this.context.getBlocks();
1272
+ const estimate = await this._tokenCounter({
1273
+ messages,
1274
+ systemPrompt,
1275
+ contextBlocks
1276
+ });
1277
+ return Number.isFinite(estimate) ? Math.max(0, Math.ceil(estimate)) : 0;
1278
+ }
1279
+ return estimateMessageTokens(messages) + estimateStringTokens(systemPrompt);
1280
+ }
1281
+ async _handleAutoCompactionError(error) {
1282
+ const message = error instanceof Error ? error.message : String(error);
1283
+ if (this._compactionErrorHandler) try {
1284
+ await this._compactionErrorHandler(error);
1285
+ } catch (handlerError) {
1286
+ const handlerMessage = handlerError instanceof Error ? handlerError.message : String(handlerError);
1287
+ console.warn(`Session auto-compaction error handler failed: ${handlerMessage}`);
1288
+ }
1289
+ else console.warn(`Session auto-compaction failed: ${message}`);
1290
+ this._emitError(message);
1291
+ }
1226
1292
  async _emitStatus(phase, extra) {
1227
- const tokenEstimate = estimateMessageTokens(await this.getHistory());
1293
+ let tokenEstimate = 0;
1294
+ if (this._shouldEstimateTokens()) try {
1295
+ tokenEstimate = await this._estimateTokenCount();
1296
+ } catch (err) {
1297
+ await this._handleAutoCompactionError(err);
1298
+ }
1228
1299
  this._broadcast("cf_agent_session", {
1229
1300
  phase,
1230
1301
  tokenEstimate,
@@ -1256,7 +1327,9 @@ var Session = class Session {
1256
1327
  let compacted = false;
1257
1328
  if (this._tokenThreshold != null && this._compactionFn && tokenEstimate > this._tokenThreshold) try {
1258
1329
  compacted = Boolean(await this.compact());
1259
- } catch {}
1330
+ } catch (err) {
1331
+ await this._handleAutoCompactionError(err);
1332
+ }
1260
1333
  if (!compacted) await this._notifyMessagesChanged({
1261
1334
  type: "append",
1262
1335
  message,
@@ -1468,6 +1541,8 @@ var SessionManager = class SessionManager {
1468
1541
  mgr._pending = [];
1469
1542
  mgr._compactionFn = null;
1470
1543
  mgr._tokenThreshold = void 0;
1544
+ mgr._tokenCounter = void 0;
1545
+ mgr._compactionErrorHandler = void 0;
1471
1546
  mgr._sessions = /* @__PURE__ */ new Map();
1472
1547
  mgr._tableReady = false;
1473
1548
  mgr._ready = false;
@@ -1496,8 +1571,16 @@ var SessionManager = class SessionManager {
1496
1571
  * Auto-compact when estimated token count exceeds the threshold.
1497
1572
  * Propagated to all sessions. Requires `onCompaction()`.
1498
1573
  */
1499
- compactAfter(tokenThreshold) {
1574
+ compactAfter(tokenThreshold, options) {
1500
1575
  this._tokenThreshold = tokenThreshold;
1576
+ if (options?.tokenCounter) this._tokenCounter = options.tokenCounter;
1577
+ return this;
1578
+ }
1579
+ /**
1580
+ * Handle failures from automatic compaction in managed sessions.
1581
+ */
1582
+ onCompactionError(handler) {
1583
+ this._compactionErrorHandler = handler;
1501
1584
  return this;
1502
1585
  }
1503
1586
  /**
@@ -1576,7 +1659,8 @@ var SessionManager = class SessionManager {
1576
1659
  provider: this._createHistoryProvider()
1577
1660
  });
1578
1661
  if (this._compactionFn) s.onCompaction(this._compactionFn);
1579
- if (this._tokenThreshold != null) s.compactAfter(this._tokenThreshold);
1662
+ if (this._tokenThreshold != null) s.compactAfter(this._tokenThreshold, { tokenCounter: this._tokenCounter });
1663
+ if (this._compactionErrorHandler) s.onCompactionError(this._compactionErrorHandler);
1580
1664
  session = s;
1581
1665
  this._sessions.set(sessionId, session);
1582
1666
  }