npm - botholomew - Versions diffs - 0.18.7 → 0.19.3 - Mend

botholomew 0.18.7 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +56 -2
package/package.json +12 -9
package/src/chat/agent.ts +175 -181
package/src/chat/session.ts +30 -31
package/src/chat/usage.ts +19 -20
package/src/commands/init.ts +20 -0
package/src/config/loader.ts +50 -10
package/src/config/schemas.ts +48 -22
package/src/init/index.ts +12 -5
package/src/init/templates.ts +45 -4
package/src/llm/abort.ts +9 -0
package/src/llm/cache-control.ts +65 -0
package/src/llm/capabilities.ts +155 -0
package/src/llm/error-format.ts +95 -0
package/src/llm/fake.ts +226 -0
package/src/llm/index.ts +19 -0
package/src/llm/provider-options.ts +29 -0
package/src/llm/provider.ts +65 -0
package/src/llm/tools.ts +24 -0
package/src/llm/types.ts +20 -0
package/src/llm/usage.ts +33 -0
package/src/prompts/capabilities.ts +72 -108
package/src/tools/tool.ts +2 -22
package/src/tui/hooks/useMessageQueue.ts +2 -1
package/src/utils/title.ts +21 -22
package/src/worker/context.ts +45 -77
package/src/worker/llm.ts +147 -112
package/src/worker/prompt.ts +1 -1
package/src/worker/schedules.ts +43 -54
package/src/worker/tick.ts +3 -3
package/src/worker/fake-llm.ts +0 -277
package/src/worker/llm-client.ts +0 -12

package/src/chat/session.ts CHANGED Viewed

@@ -1,7 +1,8 @@
-import type { MessageStream } from "@anthropic-ai/sdk/lib/MessageStream";
-import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
+import type { ModelMessage } from "ai";
 import { loadConfig } from "../config/loader.ts";
 import type { BotholomewConfig } from "../config/schemas.ts";
+import type { AbortHandle } from "../llm/abort.ts";
+import { BotholomewLlmError } from "../llm/types.ts";
 import { createMcpxClient, resolveMcpxDir } from "../mcpx/client.ts";
 import { loadSkills } from "../skills/loader.ts";
 import type { SkillDefinition } from "../skills/parser.ts";
@@ -19,51 +20,62 @@ import { type ChatTurnCallbacks, runChatTurn } from "./agent.ts";
 export interface ChatSession {
   threadId: string;
   projectDir: string;
-  config: Required<BotholomewConfig>;
-  messages: MessageParam[];
+  config: BotholomewConfig;
+  messages: ModelMessage[];
   skills: Map<string, SkillDefinition>;
   // biome-ignore lint/suspicious/noExplicitAny: mcpx client
   mcpxClient: any;
   cleanup: () => Promise<void>;
-  /** Set by `runChatTurn` while a `messages.stream(...)` is in flight. */
-  activeStream: MessageStream | null;
+  /** Set by `runChatTurn` while a `streamText(...)` is in flight. */
+  activeAbort: AbortHandle | null;
   /** Esc-driven steer signal — checked at safe points in the chat agent loop. */
   aborted: boolean;
 }
 /**
  * Abort the in-flight LLM stream (if any) and set the steer flag so the chat
- * agent loop short-circuits before issuing another `messages.stream(...)` call.
+ * agent loop short-circuits before issuing another `streamText(...)` call.
  * Safe to call when no stream is active. Returns true if a live stream was aborted.
  */
 export function abortActiveStream(session: ChatSession): boolean {
   session.aborted = true;
-  if (session.activeStream && !session.activeStream.aborted) {
-    session.activeStream.abort();
+  if (session.activeAbort && !session.activeAbort.signal.aborted) {
+    session.activeAbort.controller.abort();
     return true;
   }
   return false;
 }
+function requireProviderCreds(config: BotholomewConfig): void {
+  const { llm } = config;
+  if (llm.provider === "anthropic" && !llm.api_key) {
+    throw new BotholomewLlmError(
+      "no_credentials",
+      "Anthropic provider requires `llm.api_key` (or set ANTHROPIC_API_KEY). Update config/config.json.",
+    );
+  }
+  if (llm.provider === "openai-compatible" && !llm.base_url) {
+    throw new BotholomewLlmError(
+      "no_credentials",
+      "OpenAI-compatible provider requires `llm.base_url`. Update config/config.json.",
+    );
+  }
+}
 export async function startChatSession(
   projectDir: string,
   existingThreadId?: string,
 ): Promise<ChatSession> {
   const config = await loadConfig(projectDir);
-  if (!config.anthropic_api_key) {
-    throw new Error(
-      "no API key found. add anthropic_api_key to config/config.json",
-    );
-  }
+  requireProviderCreds(config);
   await ensureThreadsDir(projectDir);
   let threadId: string;
-  const messages: MessageParam[] = [];
+  const messages: ModelMessage[] = [];
   if (existingThreadId) {
-    // Resume existing thread
     const result = await getThread(projectDir, existingThreadId);
     if (!result) {
       throw new Error(`Thread not found: ${existingThreadId}`);
@@ -71,7 +83,6 @@ export async function startChatSession(
     threadId = existingThreadId;
     await reopenThread(projectDir, threadId);
-    // Rebuild message history from interactions
     let firstUserMessage: string | undefined;
     for (const interaction of result.interactions) {
       if (interaction.kind !== "message") continue;
@@ -83,7 +94,6 @@ export async function startChatSession(
       }
     }
-    // Backfill title for threads that still have the default
     if (result.thread.title === "New chat" && firstUserMessage) {
       void generateThreadTitle(config, projectDir, threadId, firstUserMessage);
     }
@@ -111,7 +121,7 @@ export async function startChatSession(
     skills,
     mcpxClient,
     cleanup,
-    activeStream: null,
+    activeAbort: null,
     aborted: false,
   };
 }
@@ -121,14 +131,10 @@ export async function sendMessage(
   userMessage: string,
   callbacks: ChatTurnCallbacks,
 ): Promise<void> {
-  // Reset steer flag so a previous turn's Esc doesn't poison this one.
   session.aborted = false;
-  // Hot-reload skills so any skill the agent created/edited last turn (or any
-  // out-of-band edit) is visible to slash-command dispatch this turn.
   session.skills = await loadSkills(session.projectDir);
-  // Log and append user message
   await logInteraction(session.projectDir, session.threadId, {
     role: "user",
     kind: "message",
@@ -137,7 +143,6 @@ export async function sendMessage(
   session.messages.push({ role: "user", content: userMessage });
-  // Auto-generate title after first user message in a new thread
   if (session.messages.length === 1) {
     void generateThreadTitle(
       session.config,
@@ -165,16 +170,10 @@ export async function endChatSession(session: ChatSession): Promise<void> {
 /**
  * End the current thread and start a fresh one on the same session.
- * The old thread is persisted (marked ended) and can still be resumed
- * via `botholomew chat --thread-id <id>`. Returns the previous thread
- * ID so callers can display it to the user.
  */
 export async function clearChatSession(
   session: ChatSession,
 ): Promise<{ previousThreadId: string; newThreadId: string }> {
-  // Abort any in-flight stream up front so its callbacks don't continue to
-  // fire into the new thread (caused #190 — old messages reappearing on the
-  // next user submission).
   abortActiveStream(session);
   const previousThreadId = session.threadId;
   await endThread(session.projectDir, previousThreadId);
@@ -186,7 +185,7 @@ export async function clearChatSession(
   );
   session.threadId = newThreadId;
   session.messages.length = 0;
-  session.activeStream = null;
+  session.activeAbort = null;
   session.aborted = false;
   return { previousThreadId, newThreadId };
 }

package/src/chat/usage.ts CHANGED Viewed

@@ -1,31 +1,30 @@
-import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
+import type { ModelMessage } from "ai";
-/** Rough Anthropic-style estimate: ~4 characters per token. */
+/** Rough estimate: ~4 characters per token. */
 const CHARS_PER_TOKEN = 4;
 /**
  * Estimate of where the prompt's bytes went on the most recent assistant
  * turn. The five categories sum to roughly the server-billed input-tokens
- * total — they're estimates derived from string length / 4, so they don't
- * line up exactly with the API's count.
+ * total — they're estimates derived from string length / 4.
  */
 export interface ContextBreakdown {
   /** Files loaded from `prompts/` (always-on plus any contextual matches). */
   prompts: number;
   /** Chat instructions block + MCP guidance + style rules + meta header. */
   instructions: number;
-  /** Anthropic tool schemas (chat-allowed tools + MCP meta-tools). */
+  /** Tool schemas (chat-allowed tools + MCP meta-tools). */
   tools: number;
   /** User and assistant text in the conversation history. */
   messages: number;
-  /** `tool_use` and `tool_result` blocks accumulated during the conversation. */
+  /** `tool-call` and `tool-result` parts accumulated during the conversation. */
   toolIo: number;
 }
 export interface ContextUsage {
   /** Prompt tokens billed by the server (input + cache_read + cache_creation). */
   used: number;
-  /** Model's max input tokens (from the Anthropic Models API). */
+  /** Model's max input tokens. */
   max: number;
   /** Local estimates per section. */
   breakdown: ContextBreakdown;
@@ -36,7 +35,7 @@ export function estimateTokens(chars: number): number {
 }
 /** Walk a `messages` array and split chars into plain text vs. tool I/O. */
-export function partitionMessages(messages: MessageParam[]): {
+export function partitionMessages(messages: ModelMessage[]): {
   textChars: number;
   toolIoChars: number;
 } {
@@ -48,20 +47,20 @@ export function partitionMessages(messages: MessageParam[]): {
       continue;
     }
     if (!Array.isArray(msg.content)) continue;
-    for (const block of msg.content) {
-      if (!("type" in block)) continue;
-      if (block.type === "text") {
-        textChars += block.text.length;
-      } else if (block.type === "tool_use") {
-        toolIoChars += JSON.stringify(block).length;
-      } else if (block.type === "tool_result") {
+    for (const part of msg.content) {
+      const p = part as Record<string, unknown>;
+      if (p.type === "text" && typeof p.text === "string") {
+        textChars += p.text.length;
+      } else if (p.type === "tool-call") {
+        toolIoChars += JSON.stringify(p).length;
+      } else if (p.type === "tool-result") {
+        const out = p.output as { value?: unknown } | undefined;
         toolIoChars +=
-          typeof block.content === "string"
-            ? block.content.length
-            : JSON.stringify(block.content).length;
+          typeof out?.value === "string"
+            ? out.value.length
+            : JSON.stringify(out ?? "").length;
       } else {
-        // image, document, etc. — count under text for now.
-        textChars += JSON.stringify(block).length;
+        textChars += JSON.stringify(p).length;
       }
     }
   }

package/src/commands/init.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { Command } from "commander";
+import type { LlmProvider } from "../config/schemas.ts";
 import { initProject } from "../init/index.ts";
 import { logger } from "../utils/logger.ts";
@@ -9,6 +10,19 @@ function parseScope(value: string): "global" | "project" {
   return value;
 }
+function parseProvider(value: string): LlmProvider {
+  if (
+    value !== "anthropic" &&
+    value !== "ollama" &&
+    value !== "openai-compatible"
+  ) {
+    throw new Error(
+      `provider must be one of: anthropic, ollama, openai-compatible (got "${value}")`,
+    );
+  }
+  return value;
+}
 export function registerInitCommand(program: Command) {
   program
     .command("init")
@@ -27,6 +41,11 @@ export function registerInitCommand(program: Command) {
       'where this project reads its MCPX config: "global" (default; shared ~/.mcpx) or "project" (per-project mcpx/)',
       parseScope,
     )
+    .option(
+      "--provider <provider>",
+      'LLM provider to preconfigure: "anthropic" (default), "ollama" (local), or "openai-compatible" (LM Studio, OpenRouter, etc.)',
+      parseProvider,
+    )
     .action(async (opts) => {
       const dir = program.opts().dir;
       try {
@@ -34,6 +53,7 @@ export function registerInitCommand(program: Command) {
           force: opts.force,
           membotScope: opts.membotScope,
           mcpxScope: opts.mcpxScope,
+          provider: opts.provider,
         });
       } catch (err) {
         logger.error(String(err instanceof Error ? err.message : err));

package/src/config/loader.ts CHANGED Viewed

@@ -1,24 +1,64 @@
 import { getConfigPath } from "../constants.ts";
 import { setLogLevel } from "../utils/logger.ts";
-import { type BotholomewConfig, DEFAULT_CONFIG } from "./schemas.ts";
+import {
+  type BotholomewConfig,
+  DEFAULT_CHUNKER_LLM,
+  DEFAULT_CONFIG,
+  DEFAULT_LLM,
+  type LlmBlock,
+} from "./schemas.ts";
+type DeepPartial<T> = {
+  [K in keyof T]?: T[K] extends object ? Partial<T[K]> : T[K];
+};
+function mergeLlmBlock(
+  defaults: LlmBlock,
+  override: Partial<LlmBlock> | undefined,
+): LlmBlock {
+  return { ...defaults, ...(override ?? {}) };
+}
+function applyEnvOverrides(config: BotholomewConfig): BotholomewConfig {
+  const applyTo = (block: LlmBlock): LlmBlock => {
+    const next = { ...block };
+    if (next.provider === "anthropic" && process.env.ANTHROPIC_API_KEY) {
+      next.api_key = process.env.ANTHROPIC_API_KEY;
+    }
+    if (next.provider === "openai-compatible" && process.env.OPENAI_API_KEY) {
+      if (!next.api_key) next.api_key = process.env.OPENAI_API_KEY;
+    }
+    if (next.provider === "ollama" && process.env.OLLAMA_HOST) {
+      if (!next.base_url) next.base_url = process.env.OLLAMA_HOST;
+    }
+    return next;
+  };
+  return {
+    ...config,
+    llm: applyTo(config.llm),
+    chunker_llm: applyTo(config.chunker_llm),
+  };
+}
 export async function loadConfig(
   projectDir: string,
-): Promise<Required<BotholomewConfig>> {
+): Promise<BotholomewConfig> {
   const configPath = getConfigPath(projectDir);
   const file = Bun.file(configPath);
-  let userConfig: Partial<BotholomewConfig> = {};
+  let userConfig: DeepPartial<BotholomewConfig> = {};
   if (await file.exists()) {
-    userConfig = JSON.parse(await file.text());
+    userConfig = JSON.parse(await file.text()) as DeepPartial<BotholomewConfig>;
   }
-  const config = { ...DEFAULT_CONFIG, ...userConfig };
+  const merged: BotholomewConfig = {
+    ...DEFAULT_CONFIG,
+    ...userConfig,
+    llm: mergeLlmBlock(DEFAULT_LLM, userConfig.llm),
+    chunker_llm: mergeLlmBlock(DEFAULT_CHUNKER_LLM, userConfig.chunker_llm),
+  };
-  // env var overrides take precedence
-  if (process.env.ANTHROPIC_API_KEY) {
-    config.anthropic_api_key = process.env.ANTHROPIC_API_KEY;
-  }
+  const config = applyEnvOverrides(merged);
   setLogLevel(config.log_level);
@@ -27,7 +67,7 @@ export async function loadConfig(
 export async function saveConfig(
   projectDir: string,
-  config: Partial<BotholomewConfig>,
+  config: DeepPartial<BotholomewConfig>,
 ): Promise<void> {
   const configPath = getConfigPath(projectDir);
   await Bun.write(configPath, `${JSON.stringify(config, null, 2)}\n`);

package/src/config/schemas.ts CHANGED Viewed

@@ -1,31 +1,57 @@
 export type Scope = "global" | "project";
+export type LlmProvider = "anthropic" | "ollama" | "openai-compatible";
+export interface LlmBlock {
+  provider: LlmProvider;
+  model: string;
+  /** Base URL for the provider. Required for `openai-compatible`; optional for `ollama` (defaults to `http://localhost:11434`); ignored for `anthropic`. */
+  base_url: string;
+  api_key: string;
+  /** Manual override for the model's max input tokens. `0` means "look it up". */
+  max_input_tokens: number;
+  /** Manual override for tool-calling support; only honored by `openai-compatible` (which has no portable capability probe). */
+  supports_tools: boolean;
+}
 export interface BotholomewConfig {
-  anthropic_api_key?: string;
-  model?: string;
-  chunker_model?: string;
-  embedding_model?: string;
-  embedding_dimension?: number;
-  tick_interval_seconds?: number;
-  max_tick_duration_seconds?: number;
-  system_prompt_override?: string;
-  max_turns?: number;
-  worker_heartbeat_interval_seconds?: number;
-  worker_dead_after_seconds?: number;
-  worker_reap_interval_seconds?: number;
-  worker_stopped_retention_seconds?: number;
-  schedule_min_interval_seconds?: number;
-  schedule_claim_stale_seconds?: number;
-  tui_idle_timeout_seconds?: number;
-  log_level?: string;
-  membot_scope?: Scope;
-  mcpx_scope?: Scope;
+  llm: LlmBlock;
+  chunker_llm: LlmBlock;
+  embedding_model: string;
+  embedding_dimension: number;
+  tick_interval_seconds: number;
+  max_tick_duration_seconds: number;
+  system_prompt_override: string;
+  max_turns: number;
+  worker_heartbeat_interval_seconds: number;
+  worker_dead_after_seconds: number;
+  worker_reap_interval_seconds: number;
+  worker_stopped_retention_seconds: number;
+  schedule_min_interval_seconds: number;
+  schedule_claim_stale_seconds: number;
+  tui_idle_timeout_seconds: number;
+  log_level: string;
+  membot_scope: Scope;
+  mcpx_scope: Scope;
 }
-export const DEFAULT_CONFIG: Required<BotholomewConfig> = {
-  anthropic_api_key: "",
+export const DEFAULT_LLM: LlmBlock = {
+  provider: "anthropic",
   model: "claude-opus-4-6",
-  chunker_model: "claude-haiku-4-5-20251001",
+  base_url: "",
+  api_key: "",
+  max_input_tokens: 0,
+  supports_tools: true,
+};
+export const DEFAULT_CHUNKER_LLM: LlmBlock = {
+  ...DEFAULT_LLM,
+  model: "claude-haiku-4-5-20251001",
+};
+export const DEFAULT_CONFIG: BotholomewConfig = {
+  llm: DEFAULT_LLM,
+  chunker_llm: DEFAULT_CHUNKER_LLM,
   embedding_model: "Xenova/bge-small-en-v1.5",
   embedding_dimension: 384,
   tick_interval_seconds: 300,

package/src/init/index.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { mkdir } from "node:fs/promises";
 import { join } from "node:path";
 import { loadConfig } from "../config/loader.ts";
+import type { LlmProvider } from "../config/schemas.ts";
 import {
   CONFIG_DIR,
   CONFIG_FILENAME,
@@ -28,9 +29,9 @@ import { registerAllTools } from "../tools/registry.ts";
 import { logger } from "../utils/logger.ts";
 import {
   BELIEFS_MD,
+  buildDefaultConfig,
   CAPABILITIES_MD,
   CAPABILITIES_SKILL,
-  DEFAULT_CONFIG,
   DEFAULT_MCPX_SERVERS,
   GOALS_MD,
   STANDUP_SKILL,
@@ -43,6 +44,8 @@ export interface InitOptions {
   membotScope?: "global" | "project";
   /** Override the default `mcpx_scope` written into config/config.json. */
   mcpxScope?: "global" | "project";
+  /** LLM provider to preconfigure the new project against. Default `anthropic`. */
+  provider?: LlmProvider;
 }
 export async function initProject(
@@ -91,7 +94,7 @@ export async function initProject(
   // the seeded defaults so tests and `botholomew init --membot-scope=project`
   // can pick a per-project layout up front.
   const initialConfig = {
-    ...DEFAULT_CONFIG,
+    ...buildDefaultConfig(opts.provider ?? "anthropic"),
     ...(opts.membotScope ? { membot_scope: opts.membotScope } : {}),
     ...(opts.mcpxScope ? { mcpx_scope: opts.mcpxScope } : {}),
   };
@@ -151,10 +154,14 @@ export async function initProject(
   logger.dim(`  workers/         one JSON pidfile per worker (heartbeats)`);
   logger.dim(`  skills/, mcpx/, logs/`);
   logger.dim("");
+  const providerLine =
+    config.llm.provider === "anthropic"
+      ? `  1. Set ANTHROPIC_API_KEY or add \`llm.api_key\` to ${CONFIG_DIR}/${CONFIG_FILENAME}`
+      : config.llm.provider === "ollama"
+        ? `  1. Make sure \`ollama serve\` is running and you've pulled \`${config.llm.model}\``
+        : `  1. Set \`llm.base_url\` (and \`llm.api_key\` if needed) in ${CONFIG_DIR}/${CONFIG_FILENAME}`;
   logger.dim("Next steps:");
-  logger.dim(
-    `  1. Set ANTHROPIC_API_KEY or add it to ${CONFIG_DIR}/${CONFIG_FILENAME}`,
-  );
+  logger.dim(providerLine);
   logger.dim("  2. Run 'botholomew task add' to create your first task");
   logger.dim(
     "  3. Run 'botholomew worker start --persist' to start a background worker",

package/src/init/templates.ts CHANGED Viewed

@@ -1,4 +1,7 @@
-import { DEFAULT_CONFIG as SCHEMA_DEFAULT_CONFIG } from "../config/schemas.ts";
+import {
+  type LlmProvider,
+  DEFAULT_CONFIG as SCHEMA_DEFAULT_CONFIG,
+} from "../config/schemas.ts";
 export const GOALS_MD = `---
 title: Goals
@@ -80,11 +83,49 @@ and currently in progress) and format a brief standup-style update with:
 - Any blockers or waiting items
 `;
-export const DEFAULT_CONFIG = {
-  ...SCHEMA_DEFAULT_CONFIG,
-  anthropic_api_key: "your-api-key-here",
+const PROVIDER_PRESETS: Record<
+  LlmProvider,
+  { llm: { model: string }; chunker_llm: { model: string } }
+> = {
+  anthropic: {
+    llm: { model: "claude-opus-4-6" },
+    chunker_llm: { model: "claude-haiku-4-5-20251001" },
+  },
+  ollama: {
+    llm: { model: "llama3.1:8b" },
+    chunker_llm: { model: "qwen2.5:3b" },
+  },
+  "openai-compatible": {
+    llm: { model: "gpt-4o" },
+    chunker_llm: { model: "gpt-4o-mini" },
+  },
 };
+export function buildDefaultConfig(provider: LlmProvider = "anthropic") {
+  const preset = PROVIDER_PRESETS[provider];
+  const apiKeyPlaceholder = provider === "anthropic" ? "your-api-key-here" : "";
+  const baseUrl = provider === "ollama" ? "http://localhost:11434" : "";
+  return {
+    ...SCHEMA_DEFAULT_CONFIG,
+    llm: {
+      ...SCHEMA_DEFAULT_CONFIG.llm,
+      provider,
+      model: preset.llm.model,
+      base_url: baseUrl,
+      api_key: apiKeyPlaceholder,
+    },
+    chunker_llm: {
+      ...SCHEMA_DEFAULT_CONFIG.chunker_llm,
+      provider,
+      model: preset.chunker_llm.model,
+      base_url: baseUrl,
+      api_key: apiKeyPlaceholder,
+    },
+  };
+}
+export const DEFAULT_CONFIG = buildDefaultConfig("anthropic");
 export const DEFAULT_MCPX_SERVERS = {
   mcpServers: {},
 };

package/src/llm/abort.ts ADDED Viewed

@@ -0,0 +1,9 @@
+export interface AbortHandle {
+  controller: AbortController;
+  signal: AbortSignal;
+}
+export function createAbortHandle(): AbortHandle {
+  const controller = new AbortController();
+  return { controller, signal: controller.signal };
+}

package/src/llm/cache-control.ts ADDED Viewed

@@ -0,0 +1,65 @@
+import type { ModelMessage, SystemModelMessage, ToolSet } from "ai";
+import type { LlmBlock } from "../config/schemas.ts";
+const EPHEMERAL = { type: "ephemeral" as const };
+/**
+ * On Anthropic, mark stable parts of the request with `cacheControl: ephemeral`
+ * so the server can cache the prompt prefix between turns. No-op for other
+ * providers — they receive unchanged inputs.
+ *
+ * - System prompt: passed as a single SystemModelMessage with cacheControl.
+ * - Messages: the last assistant message is marked as a cache breakpoint so the
+ *   conversation prefix up to (and including) it is cached on the next turn.
+ */
+export function withAnthropicCacheBreakpoints<T extends ToolSet>(args: {
+  provider: LlmBlock["provider"];
+  system: string;
+  messages: ModelMessage[];
+  tools: T;
+}): {
+  system: string | SystemModelMessage;
+  messages: ModelMessage[];
+  tools: T;
+} {
+  if (args.provider !== "anthropic") {
+    return {
+      system: args.system,
+      messages: args.messages,
+      tools: args.tools,
+    };
+  }
+  const systemMessage: SystemModelMessage = {
+    role: "system",
+    content: args.system,
+    providerOptions: { anthropic: { cacheControl: EPHEMERAL } },
+  };
+  // Find the index of the last assistant message; mark it as the cache
+  // breakpoint. The Anthropic API caches up to and including that block.
+  let lastAssistantIdx = -1;
+  for (let i = args.messages.length - 1; i >= 0; i--) {
+    if (args.messages[i]?.role === "assistant") {
+      lastAssistantIdx = i;
+      break;
+    }
+  }
+  const messages = args.messages.map((m, i) => {
+    if (i !== lastAssistantIdx) return m;
+    return {
+      ...m,
+      providerOptions: {
+        ...(m.providerOptions ?? {}),
+        anthropic: { cacheControl: EPHEMERAL },
+      },
+    };
+  });
+  return {
+    system: systemMessage,
+    messages,
+    tools: args.tools,
+  };
+}