botholomew 0.18.7 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
- import type { MessageStream } from "@anthropic-ai/sdk/lib/MessageStream";
2
- import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
1
+ import type { ModelMessage } from "ai";
3
2
  import { loadConfig } from "../config/loader.ts";
4
3
  import type { BotholomewConfig } from "../config/schemas.ts";
4
+ import type { AbortHandle } from "../llm/abort.ts";
5
+ import { BotholomewLlmError } from "../llm/types.ts";
5
6
  import { createMcpxClient, resolveMcpxDir } from "../mcpx/client.ts";
6
7
  import { loadSkills } from "../skills/loader.ts";
7
8
  import type { SkillDefinition } from "../skills/parser.ts";
@@ -19,51 +20,62 @@ import { type ChatTurnCallbacks, runChatTurn } from "./agent.ts";
19
20
  export interface ChatSession {
20
21
  threadId: string;
21
22
  projectDir: string;
22
- config: Required<BotholomewConfig>;
23
- messages: MessageParam[];
23
+ config: BotholomewConfig;
24
+ messages: ModelMessage[];
24
25
  skills: Map<string, SkillDefinition>;
25
26
  // biome-ignore lint/suspicious/noExplicitAny: mcpx client
26
27
  mcpxClient: any;
27
28
  cleanup: () => Promise<void>;
28
- /** Set by `runChatTurn` while a `messages.stream(...)` is in flight. */
29
- activeStream: MessageStream | null;
29
+ /** Set by `runChatTurn` while a `streamText(...)` is in flight. */
30
+ activeAbort: AbortHandle | null;
30
31
  /** Esc-driven steer signal — checked at safe points in the chat agent loop. */
31
32
  aborted: boolean;
32
33
  }
33
34
 
34
35
  /**
35
36
  * Abort the in-flight LLM stream (if any) and set the steer flag so the chat
36
- * agent loop short-circuits before issuing another `messages.stream(...)` call.
37
+ * agent loop short-circuits before issuing another `streamText(...)` call.
37
38
  * Safe to call when no stream is active. Returns true if a live stream was aborted.
38
39
  */
39
40
  export function abortActiveStream(session: ChatSession): boolean {
40
41
  session.aborted = true;
41
- if (session.activeStream && !session.activeStream.aborted) {
42
- session.activeStream.abort();
42
+ if (session.activeAbort && !session.activeAbort.signal.aborted) {
43
+ session.activeAbort.controller.abort();
43
44
  return true;
44
45
  }
45
46
  return false;
46
47
  }
47
48
 
49
+ function requireProviderCreds(config: BotholomewConfig): void {
50
+ const { llm } = config;
51
+ if (llm.provider === "anthropic" && !llm.api_key) {
52
+ throw new BotholomewLlmError(
53
+ "no_credentials",
54
+ "Anthropic provider requires `llm.api_key` (or set ANTHROPIC_API_KEY). Update config/config.json.",
55
+ );
56
+ }
57
+ if (llm.provider === "openai-compatible" && !llm.base_url) {
58
+ throw new BotholomewLlmError(
59
+ "no_credentials",
60
+ "OpenAI-compatible provider requires `llm.base_url`. Update config/config.json.",
61
+ );
62
+ }
63
+ }
64
+
48
65
  export async function startChatSession(
49
66
  projectDir: string,
50
67
  existingThreadId?: string,
51
68
  ): Promise<ChatSession> {
52
69
  const config = await loadConfig(projectDir);
53
70
 
54
- if (!config.anthropic_api_key) {
55
- throw new Error(
56
- "no API key found. add anthropic_api_key to config/config.json",
57
- );
58
- }
71
+ requireProviderCreds(config);
59
72
 
60
73
  await ensureThreadsDir(projectDir);
61
74
 
62
75
  let threadId: string;
63
- const messages: MessageParam[] = [];
76
+ const messages: ModelMessage[] = [];
64
77
 
65
78
  if (existingThreadId) {
66
- // Resume existing thread
67
79
  const result = await getThread(projectDir, existingThreadId);
68
80
  if (!result) {
69
81
  throw new Error(`Thread not found: ${existingThreadId}`);
@@ -71,7 +83,6 @@ export async function startChatSession(
71
83
  threadId = existingThreadId;
72
84
  await reopenThread(projectDir, threadId);
73
85
 
74
- // Rebuild message history from interactions
75
86
  let firstUserMessage: string | undefined;
76
87
  for (const interaction of result.interactions) {
77
88
  if (interaction.kind !== "message") continue;
@@ -83,7 +94,6 @@ export async function startChatSession(
83
94
  }
84
95
  }
85
96
 
86
- // Backfill title for threads that still have the default
87
97
  if (result.thread.title === "New chat" && firstUserMessage) {
88
98
  void generateThreadTitle(config, projectDir, threadId, firstUserMessage);
89
99
  }
@@ -111,7 +121,7 @@ export async function startChatSession(
111
121
  skills,
112
122
  mcpxClient,
113
123
  cleanup,
114
- activeStream: null,
124
+ activeAbort: null,
115
125
  aborted: false,
116
126
  };
117
127
  }
@@ -121,14 +131,10 @@ export async function sendMessage(
121
131
  userMessage: string,
122
132
  callbacks: ChatTurnCallbacks,
123
133
  ): Promise<void> {
124
- // Reset steer flag so a previous turn's Esc doesn't poison this one.
125
134
  session.aborted = false;
126
135
 
127
- // Hot-reload skills so any skill the agent created/edited last turn (or any
128
- // out-of-band edit) is visible to slash-command dispatch this turn.
129
136
  session.skills = await loadSkills(session.projectDir);
130
137
 
131
- // Log and append user message
132
138
  await logInteraction(session.projectDir, session.threadId, {
133
139
  role: "user",
134
140
  kind: "message",
@@ -137,7 +143,6 @@ export async function sendMessage(
137
143
 
138
144
  session.messages.push({ role: "user", content: userMessage });
139
145
 
140
- // Auto-generate title after first user message in a new thread
141
146
  if (session.messages.length === 1) {
142
147
  void generateThreadTitle(
143
148
  session.config,
@@ -165,16 +170,10 @@ export async function endChatSession(session: ChatSession): Promise<void> {
165
170
 
166
171
  /**
167
172
  * End the current thread and start a fresh one on the same session.
168
- * The old thread is persisted (marked ended) and can still be resumed
169
- * via `botholomew chat --thread-id <id>`. Returns the previous thread
170
- * ID so callers can display it to the user.
171
173
  */
172
174
  export async function clearChatSession(
173
175
  session: ChatSession,
174
176
  ): Promise<{ previousThreadId: string; newThreadId: string }> {
175
- // Abort any in-flight stream up front so its callbacks don't continue to
176
- // fire into the new thread (caused #190 — old messages reappearing on the
177
- // next user submission).
178
177
  abortActiveStream(session);
179
178
  const previousThreadId = session.threadId;
180
179
  await endThread(session.projectDir, previousThreadId);
@@ -186,7 +185,7 @@ export async function clearChatSession(
186
185
  );
187
186
  session.threadId = newThreadId;
188
187
  session.messages.length = 0;
189
- session.activeStream = null;
188
+ session.activeAbort = null;
190
189
  session.aborted = false;
191
190
  return { previousThreadId, newThreadId };
192
191
  }
package/src/chat/usage.ts CHANGED
@@ -1,31 +1,30 @@
1
- import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
1
+ import type { ModelMessage } from "ai";
2
2
 
3
- /** Rough Anthropic-style estimate: ~4 characters per token. */
3
+ /** Rough estimate: ~4 characters per token. */
4
4
  const CHARS_PER_TOKEN = 4;
5
5
 
6
6
  /**
7
7
  * Estimate of where the prompt's bytes went on the most recent assistant
8
8
  * turn. The five categories sum to roughly the server-billed input-tokens
9
- * total — they're estimates derived from string length / 4, so they don't
10
- * line up exactly with the API's count.
9
+ * total — they're estimates derived from string length / 4.
11
10
  */
12
11
  export interface ContextBreakdown {
13
12
  /** Files loaded from `prompts/` (always-on plus any contextual matches). */
14
13
  prompts: number;
15
14
  /** Chat instructions block + MCP guidance + style rules + meta header. */
16
15
  instructions: number;
17
- /** Anthropic tool schemas (chat-allowed tools + MCP meta-tools). */
16
+ /** Tool schemas (chat-allowed tools + MCP meta-tools). */
18
17
  tools: number;
19
18
  /** User and assistant text in the conversation history. */
20
19
  messages: number;
21
- /** `tool_use` and `tool_result` blocks accumulated during the conversation. */
20
+ /** `tool-call` and `tool-result` parts accumulated during the conversation. */
22
21
  toolIo: number;
23
22
  }
24
23
 
25
24
  export interface ContextUsage {
26
25
  /** Prompt tokens billed by the server (input + cache_read + cache_creation). */
27
26
  used: number;
28
- /** Model's max input tokens (from the Anthropic Models API). */
27
+ /** Model's max input tokens. */
29
28
  max: number;
30
29
  /** Local estimates per section. */
31
30
  breakdown: ContextBreakdown;
@@ -36,7 +35,7 @@ export function estimateTokens(chars: number): number {
36
35
  }
37
36
 
38
37
  /** Walk a `messages` array and split chars into plain text vs. tool I/O. */
39
- export function partitionMessages(messages: MessageParam[]): {
38
+ export function partitionMessages(messages: ModelMessage[]): {
40
39
  textChars: number;
41
40
  toolIoChars: number;
42
41
  } {
@@ -48,20 +47,20 @@ export function partitionMessages(messages: MessageParam[]): {
48
47
  continue;
49
48
  }
50
49
  if (!Array.isArray(msg.content)) continue;
51
- for (const block of msg.content) {
52
- if (!("type" in block)) continue;
53
- if (block.type === "text") {
54
- textChars += block.text.length;
55
- } else if (block.type === "tool_use") {
56
- toolIoChars += JSON.stringify(block).length;
57
- } else if (block.type === "tool_result") {
50
+ for (const part of msg.content) {
51
+ const p = part as Record<string, unknown>;
52
+ if (p.type === "text" && typeof p.text === "string") {
53
+ textChars += p.text.length;
54
+ } else if (p.type === "tool-call") {
55
+ toolIoChars += JSON.stringify(p).length;
56
+ } else if (p.type === "tool-result") {
57
+ const out = p.output as { value?: unknown } | undefined;
58
58
  toolIoChars +=
59
- typeof block.content === "string"
60
- ? block.content.length
61
- : JSON.stringify(block.content).length;
59
+ typeof out?.value === "string"
60
+ ? out.value.length
61
+ : JSON.stringify(out ?? "").length;
62
62
  } else {
63
- // image, document, etc. — count under text for now.
64
- textChars += JSON.stringify(block).length;
63
+ textChars += JSON.stringify(p).length;
65
64
  }
66
65
  }
67
66
  }
@@ -1,4 +1,5 @@
1
1
  import type { Command } from "commander";
2
+ import type { LlmProvider } from "../config/schemas.ts";
2
3
  import { initProject } from "../init/index.ts";
3
4
  import { logger } from "../utils/logger.ts";
4
5
 
@@ -9,6 +10,19 @@ function parseScope(value: string): "global" | "project" {
9
10
  return value;
10
11
  }
11
12
 
13
+ function parseProvider(value: string): LlmProvider {
14
+ if (
15
+ value !== "anthropic" &&
16
+ value !== "ollama" &&
17
+ value !== "openai-compatible"
18
+ ) {
19
+ throw new Error(
20
+ `provider must be one of: anthropic, ollama, openai-compatible (got "${value}")`,
21
+ );
22
+ }
23
+ return value;
24
+ }
25
+
12
26
  export function registerInitCommand(program: Command) {
13
27
  program
14
28
  .command("init")
@@ -27,6 +41,11 @@ export function registerInitCommand(program: Command) {
27
41
  'where this project reads its MCPX config: "global" (default; shared ~/.mcpx) or "project" (per-project mcpx/)',
28
42
  parseScope,
29
43
  )
44
+ .option(
45
+ "--provider <provider>",
46
+ 'LLM provider to preconfigure: "anthropic" (default), "ollama" (local), or "openai-compatible" (LM Studio, OpenRouter, etc.)',
47
+ parseProvider,
48
+ )
30
49
  .action(async (opts) => {
31
50
  const dir = program.opts().dir;
32
51
  try {
@@ -34,6 +53,7 @@ export function registerInitCommand(program: Command) {
34
53
  force: opts.force,
35
54
  membotScope: opts.membotScope,
36
55
  mcpxScope: opts.mcpxScope,
56
+ provider: opts.provider,
37
57
  });
38
58
  } catch (err) {
39
59
  logger.error(String(err instanceof Error ? err.message : err));
@@ -1,24 +1,64 @@
1
1
  import { getConfigPath } from "../constants.ts";
2
2
  import { setLogLevel } from "../utils/logger.ts";
3
- import { type BotholomewConfig, DEFAULT_CONFIG } from "./schemas.ts";
3
+ import {
4
+ type BotholomewConfig,
5
+ DEFAULT_CHUNKER_LLM,
6
+ DEFAULT_CONFIG,
7
+ DEFAULT_LLM,
8
+ type LlmBlock,
9
+ } from "./schemas.ts";
10
+
11
+ type DeepPartial<T> = {
12
+ [K in keyof T]?: T[K] extends object ? Partial<T[K]> : T[K];
13
+ };
14
+
15
+ function mergeLlmBlock(
16
+ defaults: LlmBlock,
17
+ override: Partial<LlmBlock> | undefined,
18
+ ): LlmBlock {
19
+ return { ...defaults, ...(override ?? {}) };
20
+ }
21
+
22
+ function applyEnvOverrides(config: BotholomewConfig): BotholomewConfig {
23
+ const applyTo = (block: LlmBlock): LlmBlock => {
24
+ const next = { ...block };
25
+ if (next.provider === "anthropic" && process.env.ANTHROPIC_API_KEY) {
26
+ next.api_key = process.env.ANTHROPIC_API_KEY;
27
+ }
28
+ if (next.provider === "openai-compatible" && process.env.OPENAI_API_KEY) {
29
+ if (!next.api_key) next.api_key = process.env.OPENAI_API_KEY;
30
+ }
31
+ if (next.provider === "ollama" && process.env.OLLAMA_HOST) {
32
+ if (!next.base_url) next.base_url = process.env.OLLAMA_HOST;
33
+ }
34
+ return next;
35
+ };
36
+ return {
37
+ ...config,
38
+ llm: applyTo(config.llm),
39
+ chunker_llm: applyTo(config.chunker_llm),
40
+ };
41
+ }
4
42
 
5
43
  export async function loadConfig(
6
44
  projectDir: string,
7
- ): Promise<Required<BotholomewConfig>> {
45
+ ): Promise<BotholomewConfig> {
8
46
  const configPath = getConfigPath(projectDir);
9
47
  const file = Bun.file(configPath);
10
48
 
11
- let userConfig: Partial<BotholomewConfig> = {};
49
+ let userConfig: DeepPartial<BotholomewConfig> = {};
12
50
  if (await file.exists()) {
13
- userConfig = JSON.parse(await file.text());
51
+ userConfig = JSON.parse(await file.text()) as DeepPartial<BotholomewConfig>;
14
52
  }
15
53
 
16
- const config = { ...DEFAULT_CONFIG, ...userConfig };
54
+ const merged: BotholomewConfig = {
55
+ ...DEFAULT_CONFIG,
56
+ ...userConfig,
57
+ llm: mergeLlmBlock(DEFAULT_LLM, userConfig.llm),
58
+ chunker_llm: mergeLlmBlock(DEFAULT_CHUNKER_LLM, userConfig.chunker_llm),
59
+ };
17
60
 
18
- // env var overrides take precedence
19
- if (process.env.ANTHROPIC_API_KEY) {
20
- config.anthropic_api_key = process.env.ANTHROPIC_API_KEY;
21
- }
61
+ const config = applyEnvOverrides(merged);
22
62
 
23
63
  setLogLevel(config.log_level);
24
64
 
@@ -27,7 +67,7 @@ export async function loadConfig(
27
67
 
28
68
  export async function saveConfig(
29
69
  projectDir: string,
30
- config: Partial<BotholomewConfig>,
70
+ config: DeepPartial<BotholomewConfig>,
31
71
  ): Promise<void> {
32
72
  const configPath = getConfigPath(projectDir);
33
73
  await Bun.write(configPath, `${JSON.stringify(config, null, 2)}\n`);
@@ -1,31 +1,57 @@
1
1
  export type Scope = "global" | "project";
2
2
 
3
+ export type LlmProvider = "anthropic" | "ollama" | "openai-compatible";
4
+
5
+ export interface LlmBlock {
6
+ provider: LlmProvider;
7
+ model: string;
8
+ /** Base URL for the provider. Required for `openai-compatible`; optional for `ollama` (defaults to `http://localhost:11434`); ignored for `anthropic`. */
9
+ base_url: string;
10
+ api_key: string;
11
+ /** Manual override for the model's max input tokens. `0` means "look it up". */
12
+ max_input_tokens: number;
13
+ /** Manual override for tool-calling support; only honored by `openai-compatible` (which has no portable capability probe). */
14
+ supports_tools: boolean;
15
+ }
16
+
3
17
  export interface BotholomewConfig {
4
- anthropic_api_key?: string;
5
- model?: string;
6
- chunker_model?: string;
7
- embedding_model?: string;
8
- embedding_dimension?: number;
9
- tick_interval_seconds?: number;
10
- max_tick_duration_seconds?: number;
11
- system_prompt_override?: string;
12
- max_turns?: number;
13
- worker_heartbeat_interval_seconds?: number;
14
- worker_dead_after_seconds?: number;
15
- worker_reap_interval_seconds?: number;
16
- worker_stopped_retention_seconds?: number;
17
- schedule_min_interval_seconds?: number;
18
- schedule_claim_stale_seconds?: number;
19
- tui_idle_timeout_seconds?: number;
20
- log_level?: string;
21
- membot_scope?: Scope;
22
- mcpx_scope?: Scope;
18
+ llm: LlmBlock;
19
+ chunker_llm: LlmBlock;
20
+ embedding_model: string;
21
+ embedding_dimension: number;
22
+ tick_interval_seconds: number;
23
+ max_tick_duration_seconds: number;
24
+ system_prompt_override: string;
25
+ max_turns: number;
26
+ worker_heartbeat_interval_seconds: number;
27
+ worker_dead_after_seconds: number;
28
+ worker_reap_interval_seconds: number;
29
+ worker_stopped_retention_seconds: number;
30
+ schedule_min_interval_seconds: number;
31
+ schedule_claim_stale_seconds: number;
32
+ tui_idle_timeout_seconds: number;
33
+ log_level: string;
34
+ membot_scope: Scope;
35
+ mcpx_scope: Scope;
23
36
  }
24
37
 
25
- export const DEFAULT_CONFIG: Required<BotholomewConfig> = {
26
- anthropic_api_key: "",
38
+ export const DEFAULT_LLM: LlmBlock = {
39
+ provider: "anthropic",
27
40
  model: "claude-opus-4-6",
28
- chunker_model: "claude-haiku-4-5-20251001",
41
+ base_url: "",
42
+ api_key: "",
43
+ max_input_tokens: 0,
44
+ supports_tools: true,
45
+ };
46
+
47
+ export const DEFAULT_CHUNKER_LLM: LlmBlock = {
48
+ ...DEFAULT_LLM,
49
+ model: "claude-haiku-4-5-20251001",
50
+ };
51
+
52
+ export const DEFAULT_CONFIG: BotholomewConfig = {
53
+ llm: DEFAULT_LLM,
54
+ chunker_llm: DEFAULT_CHUNKER_LLM,
29
55
  embedding_model: "Xenova/bge-small-en-v1.5",
30
56
  embedding_dimension: 384,
31
57
  tick_interval_seconds: 300,
package/src/init/index.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import { mkdir } from "node:fs/promises";
2
2
  import { join } from "node:path";
3
3
  import { loadConfig } from "../config/loader.ts";
4
+ import type { LlmProvider } from "../config/schemas.ts";
4
5
  import {
5
6
  CONFIG_DIR,
6
7
  CONFIG_FILENAME,
@@ -28,9 +29,9 @@ import { registerAllTools } from "../tools/registry.ts";
28
29
  import { logger } from "../utils/logger.ts";
29
30
  import {
30
31
  BELIEFS_MD,
32
+ buildDefaultConfig,
31
33
  CAPABILITIES_MD,
32
34
  CAPABILITIES_SKILL,
33
- DEFAULT_CONFIG,
34
35
  DEFAULT_MCPX_SERVERS,
35
36
  GOALS_MD,
36
37
  STANDUP_SKILL,
@@ -43,6 +44,8 @@ export interface InitOptions {
43
44
  membotScope?: "global" | "project";
44
45
  /** Override the default `mcpx_scope` written into config/config.json. */
45
46
  mcpxScope?: "global" | "project";
47
+ /** LLM provider to preconfigure the new project against. Default `anthropic`. */
48
+ provider?: LlmProvider;
46
49
  }
47
50
 
48
51
  export async function initProject(
@@ -91,7 +94,7 @@ export async function initProject(
91
94
  // the seeded defaults so tests and `botholomew init --membot-scope=project`
92
95
  // can pick a per-project layout up front.
93
96
  const initialConfig = {
94
- ...DEFAULT_CONFIG,
97
+ ...buildDefaultConfig(opts.provider ?? "anthropic"),
95
98
  ...(opts.membotScope ? { membot_scope: opts.membotScope } : {}),
96
99
  ...(opts.mcpxScope ? { mcpx_scope: opts.mcpxScope } : {}),
97
100
  };
@@ -151,10 +154,14 @@ export async function initProject(
151
154
  logger.dim(` workers/ one JSON pidfile per worker (heartbeats)`);
152
155
  logger.dim(` skills/, mcpx/, logs/`);
153
156
  logger.dim("");
157
+ const providerLine =
158
+ config.llm.provider === "anthropic"
159
+ ? ` 1. Set ANTHROPIC_API_KEY or add \`llm.api_key\` to ${CONFIG_DIR}/${CONFIG_FILENAME}`
160
+ : config.llm.provider === "ollama"
161
+ ? ` 1. Make sure \`ollama serve\` is running and you've pulled \`${config.llm.model}\``
162
+ : ` 1. Set \`llm.base_url\` (and \`llm.api_key\` if needed) in ${CONFIG_DIR}/${CONFIG_FILENAME}`;
154
163
  logger.dim("Next steps:");
155
- logger.dim(
156
- ` 1. Set ANTHROPIC_API_KEY or add it to ${CONFIG_DIR}/${CONFIG_FILENAME}`,
157
- );
164
+ logger.dim(providerLine);
158
165
  logger.dim(" 2. Run 'botholomew task add' to create your first task");
159
166
  logger.dim(
160
167
  " 3. Run 'botholomew worker start --persist' to start a background worker",
@@ -1,4 +1,7 @@
1
- import { DEFAULT_CONFIG as SCHEMA_DEFAULT_CONFIG } from "../config/schemas.ts";
1
+ import {
2
+ type LlmProvider,
3
+ DEFAULT_CONFIG as SCHEMA_DEFAULT_CONFIG,
4
+ } from "../config/schemas.ts";
2
5
 
3
6
  export const GOALS_MD = `---
4
7
  title: Goals
@@ -80,11 +83,49 @@ and currently in progress) and format a brief standup-style update with:
80
83
  - Any blockers or waiting items
81
84
  `;
82
85
 
83
- export const DEFAULT_CONFIG = {
84
- ...SCHEMA_DEFAULT_CONFIG,
85
- anthropic_api_key: "your-api-key-here",
86
+ const PROVIDER_PRESETS: Record<
87
+ LlmProvider,
88
+ { llm: { model: string }; chunker_llm: { model: string } }
89
+ > = {
90
+ anthropic: {
91
+ llm: { model: "claude-opus-4-6" },
92
+ chunker_llm: { model: "claude-haiku-4-5-20251001" },
93
+ },
94
+ ollama: {
95
+ llm: { model: "llama3.1:8b" },
96
+ chunker_llm: { model: "qwen2.5:3b" },
97
+ },
98
+ "openai-compatible": {
99
+ llm: { model: "gpt-4o" },
100
+ chunker_llm: { model: "gpt-4o-mini" },
101
+ },
86
102
  };
87
103
 
104
+ export function buildDefaultConfig(provider: LlmProvider = "anthropic") {
105
+ const preset = PROVIDER_PRESETS[provider];
106
+ const apiKeyPlaceholder = provider === "anthropic" ? "your-api-key-here" : "";
107
+ const baseUrl = provider === "ollama" ? "http://localhost:11434" : "";
108
+ return {
109
+ ...SCHEMA_DEFAULT_CONFIG,
110
+ llm: {
111
+ ...SCHEMA_DEFAULT_CONFIG.llm,
112
+ provider,
113
+ model: preset.llm.model,
114
+ base_url: baseUrl,
115
+ api_key: apiKeyPlaceholder,
116
+ },
117
+ chunker_llm: {
118
+ ...SCHEMA_DEFAULT_CONFIG.chunker_llm,
119
+ provider,
120
+ model: preset.chunker_llm.model,
121
+ base_url: baseUrl,
122
+ api_key: apiKeyPlaceholder,
123
+ },
124
+ };
125
+ }
126
+
127
+ export const DEFAULT_CONFIG = buildDefaultConfig("anthropic");
128
+
88
129
  export const DEFAULT_MCPX_SERVERS = {
89
130
  mcpServers: {},
90
131
  };
@@ -0,0 +1,9 @@
1
+ export interface AbortHandle {
2
+ controller: AbortController;
3
+ signal: AbortSignal;
4
+ }
5
+
6
+ export function createAbortHandle(): AbortHandle {
7
+ const controller = new AbortController();
8
+ return { controller, signal: controller.signal };
9
+ }
@@ -0,0 +1,65 @@
1
+ import type { ModelMessage, SystemModelMessage, ToolSet } from "ai";
2
+ import type { LlmBlock } from "../config/schemas.ts";
3
+
4
+ const EPHEMERAL = { type: "ephemeral" as const };
5
+
6
+ /**
7
+ * On Anthropic, mark stable parts of the request with `cacheControl: ephemeral`
8
+ * so the server can cache the prompt prefix between turns. No-op for other
9
+ * providers — they receive unchanged inputs.
10
+ *
11
+ * - System prompt: passed as a single SystemModelMessage with cacheControl.
12
+ * - Messages: the last assistant message is marked as a cache breakpoint so the
13
+ * conversation prefix up to (and including) it is cached on the next turn.
14
+ */
15
+ export function withAnthropicCacheBreakpoints<T extends ToolSet>(args: {
16
+ provider: LlmBlock["provider"];
17
+ system: string;
18
+ messages: ModelMessage[];
19
+ tools: T;
20
+ }): {
21
+ system: string | SystemModelMessage;
22
+ messages: ModelMessage[];
23
+ tools: T;
24
+ } {
25
+ if (args.provider !== "anthropic") {
26
+ return {
27
+ system: args.system,
28
+ messages: args.messages,
29
+ tools: args.tools,
30
+ };
31
+ }
32
+
33
+ const systemMessage: SystemModelMessage = {
34
+ role: "system",
35
+ content: args.system,
36
+ providerOptions: { anthropic: { cacheControl: EPHEMERAL } },
37
+ };
38
+
39
+ // Find the index of the last assistant message; mark it as the cache
40
+ // breakpoint. The Anthropic API caches up to and including that block.
41
+ let lastAssistantIdx = -1;
42
+ for (let i = args.messages.length - 1; i >= 0; i--) {
43
+ if (args.messages[i]?.role === "assistant") {
44
+ lastAssistantIdx = i;
45
+ break;
46
+ }
47
+ }
48
+
49
+ const messages = args.messages.map((m, i) => {
50
+ if (i !== lastAssistantIdx) return m;
51
+ return {
52
+ ...m,
53
+ providerOptions: {
54
+ ...(m.providerOptions ?? {}),
55
+ anthropic: { cacheControl: EPHEMERAL },
56
+ },
57
+ };
58
+ });
59
+
60
+ return {
61
+ system: systemMessage,
62
+ messages,
63
+ tools: args.tools,
64
+ };
65
+ }