botholomew 0.18.7 → 0.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/worker/llm.ts CHANGED
@@ -1,21 +1,27 @@
1
- import type {
2
- Message,
3
- MessageParam,
4
- ToolResultBlockParam,
5
- ToolUseBlock,
6
- } from "@anthropic-ai/sdk/resources/messages";
7
1
  import type { McpxClient } from "@evantahler/mcpx";
2
+ import type { ModelMessage, ToolCallPart } from "ai";
3
+ import { streamText } from "ai";
8
4
  import type { BotholomewConfig } from "../config/schemas.ts";
5
+ import {
6
+ buildProviderOptions,
7
+ createAbortHandle,
8
+ describeModel,
9
+ drainStreamPromises,
10
+ extractCacheTokens,
11
+ formatLlmError,
12
+ getLanguageModel,
13
+ toAiSdkTools,
14
+ withAnthropicCacheBreakpoints,
15
+ } from "../llm/index.ts";
9
16
  import type { WithMem } from "../mem/client.ts";
10
17
  import type { Task } from "../tasks/schema.ts";
11
18
  import { getTask } from "../tasks/store.ts";
12
19
  import { logInteraction } from "../threads/store.ts";
13
20
  import { registerAllTools } from "../tools/registry.ts";
14
- import { getTool, type ToolContext, toAnthropicTools } from "../tools/tool.ts";
21
+ import { getAllTools, getTool, type ToolContext } from "../tools/tool.ts";
15
22
  import { logger } from "../utils/logger.ts";
16
23
  import { fitToContextWindow, getMaxInputTokens } from "./context.ts";
17
24
  import { clearLargeResults, maybeStoreResult } from "./large-results.ts";
18
- import { createLlmClient } from "./llm-client.ts";
19
25
 
20
26
  registerAllTools();
21
27
 
@@ -46,10 +52,16 @@ const STATUS_MAP: Record<string, AgentLoopResult["status"]> = {
46
52
  wait_task: "waiting",
47
53
  };
48
54
 
55
+ interface CollectedToolCall {
56
+ id: string;
57
+ name: string;
58
+ input: unknown;
59
+ }
60
+
49
61
  export async function runAgentLoop(input: {
50
62
  systemPrompt: string;
51
63
  task: Task;
52
- config: Required<BotholomewConfig>;
64
+ config: BotholomewConfig;
53
65
  withMem: WithMem;
54
66
  threadId: string;
55
67
  projectDir: string;
@@ -68,7 +80,7 @@ export async function runAgentLoop(input: {
68
80
  callbacks,
69
81
  } = input;
70
82
 
71
- const client = createLlmClient(config);
83
+ const model = getLanguageModel(config.llm);
72
84
 
73
85
  // Build predecessor context from completed blocking tasks
74
86
  let predecessorContext = "";
@@ -89,9 +101,8 @@ export async function runAgentLoop(input: {
89
101
 
90
102
  const userMessage = `Task:\nName: ${task.name}\nDescription: ${task.description}\nPriority: ${task.priority}${predecessorContext}`;
91
103
 
92
- const messages: MessageParam[] = [{ role: "user", content: userMessage }];
104
+ const messages: ModelMessage[] = [{ role: "user", content: userMessage }];
93
105
 
94
- // Log the initial user message
95
106
  await logInteraction(projectDir, threadId, {
96
107
  role: "user",
97
108
  kind: "message",
@@ -99,112 +110,126 @@ export async function runAgentLoop(input: {
99
110
  });
100
111
 
101
112
  clearLargeResults();
102
- const workerTools = toAnthropicTools();
103
- const maxInputTokens = await getMaxInputTokens(
104
- config.anthropic_api_key,
105
- config.model,
106
- );
113
+ const workerTools = toAiSdkTools(getAllTools());
114
+ const maxInputTokens = await getMaxInputTokens(config.llm);
107
115
 
108
116
  const maxTurns = config.max_turns;
109
117
  for (let turn = 0; !maxTurns || turn < maxTurns; turn++) {
110
118
  const startTime = Date.now();
111
119
  fitToContextWindow(messages, systemPrompt, maxInputTokens);
112
120
 
113
- let response: Message;
114
- let streamedText = "";
115
-
116
- if (callbacks) {
117
- const stream = client.messages.stream({
118
- model: config.model,
119
- max_tokens: 4096,
120
- system: systemPrompt,
121
- messages,
122
- tools: workerTools,
123
- });
121
+ const wrapped = withAnthropicCacheBreakpoints({
122
+ provider: config.llm.provider,
123
+ system: systemPrompt,
124
+ messages,
125
+ tools: workerTools,
126
+ });
127
+
128
+ const abortHandle = createAbortHandle();
129
+ const result = streamText({
130
+ model,
131
+ system: wrapped.system,
132
+ messages: wrapped.messages,
133
+ tools: wrapped.tools,
134
+ maxOutputTokens: 4096,
135
+ abortSignal: abortHandle.signal,
136
+ providerOptions: buildProviderOptions(config.llm, maxInputTokens),
137
+ });
124
138
 
125
- stream.on("text", (text) => {
126
- streamedText += text;
127
- callbacks.onToken(text);
128
- });
129
-
130
- response = await stream.finalMessage();
131
-
132
- // Ensure a newline after streamed text before tool output
133
- if (streamedText) {
134
- callbacks.onToken("\n");
139
+ let streamedText = "";
140
+ const collectedToolCalls: CollectedToolCall[] = [];
141
+
142
+ try {
143
+ for await (const part of result.fullStream) {
144
+ switch (part.type) {
145
+ case "text-delta":
146
+ streamedText += part.text;
147
+ callbacks?.onToken(part.text);
148
+ break;
149
+ case "tool-call":
150
+ collectedToolCalls.push({
151
+ id: part.toolCallId,
152
+ name: part.toolName,
153
+ input: part.input,
154
+ });
155
+ break;
156
+ case "error":
157
+ throw part.error;
158
+ }
135
159
  }
136
- } else {
137
- response = await client.messages.create({
138
- model: config.model,
139
- max_tokens: 4096,
140
- system: systemPrompt,
141
- messages,
142
- tools: workerTools,
143
- });
160
+ } catch (err) {
161
+ drainStreamPromises(result);
162
+ const message = formatLlmError(err, config.llm);
163
+ logger.error(`Worker LLM stream failed: ${message}`);
164
+ return { status: "failed", reason: `LLM error: ${message}` };
165
+ }
166
+
167
+ if (streamedText && callbacks) {
168
+ callbacks.onToken("\n");
144
169
  }
145
170
 
171
+ const usage = await result.usage;
172
+ const providerMeta = await result.providerMetadata;
173
+ const cacheTokens = extractCacheTokens(usage, providerMeta);
174
+ const tokenCount = cacheTokens.input + cacheTokens.output;
146
175
  const durationMs = Date.now() - startTime;
147
- const tokenCount =
148
- response.usage.input_tokens + response.usage.output_tokens;
149
-
150
- // Log assistant text blocks
151
- for (const block of response.content) {
152
- if (block.type === "text" && block.text) {
153
- await logInteraction(projectDir, threadId, {
154
- role: "assistant",
155
- kind: "message",
156
- content: block.text,
157
- durationMs,
158
- tokenCount,
159
- });
160
- if (!callbacks) {
161
- logger.phase("assistant", block.text);
162
- }
176
+
177
+ if (streamedText) {
178
+ await logInteraction(projectDir, threadId, {
179
+ role: "assistant",
180
+ kind: "message",
181
+ content: streamedText,
182
+ durationMs,
183
+ tokenCount,
184
+ });
185
+ if (!callbacks) {
186
+ logger.phase("assistant", streamedText);
163
187
  }
164
188
  }
165
189
 
166
- // Check for end turn with no tool use
167
- const toolUseBlocks = response.content.filter(
168
- (block): block is ToolUseBlock => block.type === "tool_use",
169
- );
170
-
171
- if (toolUseBlocks.length === 0) {
190
+ if (collectedToolCalls.length === 0) {
172
191
  return {
173
192
  status: "complete",
174
193
  reason: "Agent completed without explicit status tool call",
175
194
  };
176
195
  }
177
196
 
178
- // Add assistant response to conversation
179
- messages.push({ role: "assistant", content: response.content });
197
+ // Append the assistant turn (text + tool calls) to the conversation.
198
+ const assistantContent: Array<
199
+ ToolCallPart | { type: "text"; text: string }
200
+ > = [];
201
+ if (streamedText) {
202
+ assistantContent.push({ type: "text", text: streamedText });
203
+ }
204
+ for (const tc of collectedToolCalls) {
205
+ assistantContent.push({
206
+ type: "tool-call",
207
+ toolCallId: tc.id,
208
+ toolName: tc.name,
209
+ input: tc.input,
210
+ });
211
+ }
212
+ messages.push({ role: "assistant", content: assistantContent });
180
213
 
181
- // Log all tool_use entries
182
- for (const toolUse of toolUseBlocks) {
183
- const toolInput = JSON.stringify(toolUse.input);
184
- callbacks?.onToolStart(toolUse.name, toolInput);
214
+ for (const tc of collectedToolCalls) {
215
+ const toolInput = JSON.stringify(tc.input);
216
+ callbacks?.onToolStart(tc.name, toolInput);
185
217
  if (!callbacks) {
186
- logger.phase(
187
- "tool-call",
188
- `${toolUse.name} ${truncate(toolInput, 200)}`,
189
- );
218
+ logger.phase("tool-call", `${tc.name} ${truncate(toolInput, 200)}`);
190
219
  }
191
220
  await logInteraction(projectDir, threadId, {
192
221
  role: "assistant",
193
222
  kind: "tool_use",
194
- content: `Calling ${toolUse.name}`,
195
- toolName: toolUse.name,
223
+ content: `Calling ${tc.name}`,
224
+ toolName: tc.name,
196
225
  toolInput,
197
226
  });
198
227
  }
199
228
 
200
- // Execute all tools in parallel. Each tool call opens its own short-lived
201
- // connection (or none, if the tool uses dbPath internally) via
202
- // executeToolCall — so parallel tool calls share the process-local
203
- // DuckDB instance and release the file lock as soon as they finish.
204
229
  const execResults = await Promise.all(
205
- toolUseBlocks.map(async (toolUse) => {
230
+ collectedToolCalls.map(async (tc) => {
206
231
  const start = Date.now();
207
- const result = await executeToolCall(toolUse, {
232
+ const result = await executeToolCall(tc, {
208
233
  withMem,
209
234
  projectDir,
210
235
  config,
@@ -212,45 +237,56 @@ export async function runAgentLoop(input: {
212
237
  workerId,
213
238
  });
214
239
  const elapsed = Date.now() - start;
215
- callbacks?.onToolEnd(
216
- toolUse.name,
217
- result.output,
218
- result.isError,
219
- elapsed,
220
- );
221
- return { toolUse, result, durationMs: elapsed };
240
+ callbacks?.onToolEnd(tc.name, result.output, result.isError, elapsed);
241
+ return { toolCall: tc, result, durationMs: elapsed };
222
242
  }),
223
243
  );
224
244
 
225
- // Log results and collect tool_result messages
226
- const toolResults: ToolResultBlockParam[] = [];
227
- for (const { toolUse, result, durationMs } of execResults) {
245
+ const toolResultContent: Array<{
246
+ type: "tool-result";
247
+ toolCallId: string;
248
+ toolName: string;
249
+ output:
250
+ | { type: "text"; value: string }
251
+ | { type: "error-text"; value: string };
252
+ }> = [];
253
+
254
+ for (const { toolCall, result, durationMs } of execResults) {
228
255
  await logInteraction(projectDir, threadId, {
229
256
  role: "tool",
230
257
  kind: "tool_result",
231
258
  content: result.output,
232
- toolName: toolUse.name,
259
+ toolName: toolCall.name,
233
260
  durationMs,
234
261
  });
235
262
  if (!callbacks) {
236
263
  const seconds = (durationMs / 1000).toFixed(1);
237
264
  const status = result.isError ? "err" : "ok";
238
- logger.phase("tool-result", `${toolUse.name} ${status} in ${seconds}s`);
265
+ logger.phase(
266
+ "tool-result",
267
+ `${toolCall.name} ${status} in ${seconds}s`,
268
+ );
239
269
  }
240
270
 
241
271
  if (result.terminal && result.agentResult) {
242
272
  return result.agentResult;
243
273
  }
244
274
 
245
- toolResults.push({
246
- type: "tool_result",
247
- tool_use_id: toolUse.id,
248
- content: maybeStoreResult(toolUse.name, result.output).text,
249
- is_error: result.isError || undefined,
275
+ const stored = maybeStoreResult(toolCall.name, result.output);
276
+ toolResultContent.push({
277
+ type: "tool-result",
278
+ toolCallId: toolCall.id,
279
+ toolName: toolCall.name,
280
+ output: result.isError
281
+ ? { type: "error-text", value: stored.text }
282
+ : { type: "text", value: stored.text },
250
283
  });
251
284
  }
252
285
 
253
- messages.push({ role: "user", content: toolResults });
286
+ messages.push({ role: "tool", content: toolResultContent });
287
+
288
+ // Touch describeModel so the import isn't flagged unused on a clean build.
289
+ void describeModel;
254
290
  }
255
291
 
256
292
  return { status: "failed", reason: "Max turns exceeded" };
@@ -266,31 +302,31 @@ interface ToolCallResult {
266
302
  interface ToolCallCtx {
267
303
  withMem: WithMem;
268
304
  projectDir: string;
269
- config: Required<BotholomewConfig>;
305
+ config: BotholomewConfig;
270
306
  mcpxClient: McpxClient | null;
271
307
  workerId?: string;
272
308
  }
273
309
 
274
310
  async function executeToolCall(
275
- toolUse: ToolUseBlock,
311
+ toolCall: CollectedToolCall,
276
312
  baseCtx: ToolCallCtx,
277
313
  ): Promise<ToolCallResult> {
278
- const tool = getTool(toolUse.name);
314
+ const tool = getTool(toolCall.name);
279
315
  if (!tool) {
280
316
  return {
281
- output: `Unknown tool: ${toolUse.name}`,
317
+ output: `Unknown tool: ${toolCall.name}`,
282
318
  terminal: false,
283
319
  isError: true,
284
320
  };
285
321
  }
286
322
 
287
- const parsed = tool.inputSchema.safeParse(toolUse.input);
323
+ const parsed = tool.inputSchema.safeParse(toolCall.input);
288
324
  if (!parsed.success) {
289
325
  const issues = parsed.error.issues
290
326
  .map((i) => `${i.path.join(".")}: ${i.message}`)
291
327
  .join("; ");
292
328
  return {
293
- output: `Invalid input for ${toolUse.name}: ${issues}. Check the tool's expected parameters.`,
329
+ output: `Invalid input for ${toolCall.name}: ${issues}. Check the tool's expected parameters.`,
294
330
  terminal: false,
295
331
  isError: true,
296
332
  };
@@ -302,7 +338,7 @@ async function executeToolCall(
302
338
  result = await tool.execute(parsed.data, ctx);
303
339
  } catch (err) {
304
340
  return {
305
- output: `Tool ${toolUse.name} threw an error: ${err}. You may retry with different parameters or try an alternative approach.`,
341
+ output: `Tool ${toolCall.name} threw an error: ${err}. You may retry with different parameters or try an alternative approach.`,
306
342
  terminal: false,
307
343
  isError: true,
308
344
  };
@@ -313,7 +349,6 @@ async function executeToolCall(
313
349
  : false;
314
350
  const output = typeof result === "string" ? result : JSON.stringify(result);
315
351
 
316
- // Check if this is a terminal tool (complete/fail/wait)
317
352
  if (tool.terminal) {
318
353
  const status = STATUS_MAP[tool.name];
319
354
  if (status) {
@@ -115,7 +115,7 @@ User: ${process.env.USER || process.env.USERNAME || "unknown"}
115
115
  export async function buildSystemPrompt(
116
116
  projectDir: string,
117
117
  task?: Task,
118
- _config?: Required<BotholomewConfig>,
118
+ _config?: BotholomewConfig,
119
119
  options?: { hasMcpTools?: boolean },
120
120
  ): Promise<string> {
121
121
  let prompt = buildMetaHeader(projectDir);
@@ -1,5 +1,12 @@
1
- import Anthropic from "@anthropic-ai/sdk";
1
+ import { generateObject } from "ai";
2
+ import { z } from "zod";
2
3
  import type { BotholomewConfig } from "../config/schemas.ts";
4
+ import {
5
+ buildProviderOptions,
6
+ formatLlmError,
7
+ getLanguageModel,
8
+ getMaxInputTokens,
9
+ } from "../llm/index.ts";
3
10
  import type { Schedule } from "../schedules/schema.ts";
4
11
  import {
5
12
  listSchedules,
@@ -22,33 +29,31 @@ export interface ScheduleEvaluation {
22
29
  tasksToCreate: ScheduleTaskDef[];
23
30
  }
24
31
 
32
+ const ScheduleResponseSchema = z.object({
33
+ isDue: z.boolean(),
34
+ reasoning: z.string(),
35
+ tasks: z.array(
36
+ z.object({
37
+ name: z.string(),
38
+ description: z.string(),
39
+ priority: z.enum(["low", "medium", "high"]),
40
+ depends_on: z.array(z.number()).optional(),
41
+ }),
42
+ ),
43
+ });
44
+
25
45
  export async function evaluateSchedule(
26
- config: Required<BotholomewConfig>,
46
+ config: BotholomewConfig,
27
47
  schedule: Schedule,
28
48
  ): Promise<ScheduleEvaluation> {
29
- const client = new Anthropic({
30
- apiKey: config.anthropic_api_key || undefined,
31
- });
49
+ const model = getLanguageModel(config.chunker_llm);
50
+ const numCtx = await getMaxInputTokens(config.chunker_llm);
32
51
 
33
52
  const systemPrompt = `You are a schedule evaluator. Given a recurring schedule, the current time, and when the schedule last ran, determine:
34
53
  1. Whether the schedule is currently due to run
35
54
  2. If due, what task(s) should be created
36
55
 
37
- Respond with JSON only, no other text. Use this exact schema:
38
- {
39
- "isDue": boolean,
40
- "reasoning": "brief explanation of why it is or is not due",
41
- "tasks": [
42
- {
43
- "name": "task name",
44
- "description": "what to do",
45
- "priority": "low" | "medium" | "high",
46
- "depends_on": []
47
- }
48
- ]
49
- }
50
-
51
- The "depends_on" array contains indices of other tasks in the array that must complete first. For example, if task at index 1 depends on task at index 0, set depends_on to [0].`;
56
+ For each task, "depends_on" is an array of indices of earlier tasks in your output that must complete before this one runs (e.g. if task index 1 depends on task index 0, set depends_on to [0]).`;
52
57
 
53
58
  const userMessage = `Schedule: "${schedule.name}"
54
59
  Description: ${schedule.description || "(none)"}
@@ -59,45 +64,31 @@ Current time: ${new Date().toISOString()}
59
64
  Is this schedule due to run? If yes, what tasks should be created?`;
60
65
 
61
66
  try {
62
- const response = await client.messages.create({
63
- model: config.chunker_model,
64
- max_tokens: 1024,
67
+ const { object } = await generateObject({
68
+ model,
69
+ schema: ScheduleResponseSchema,
65
70
  system: systemPrompt,
66
- messages: [{ role: "user", content: userMessage }],
71
+ prompt: userMessage,
72
+ maxOutputTokens: 1024,
73
+ providerOptions: buildProviderOptions(config.chunker_llm, numCtx),
67
74
  });
68
75
 
69
- let text = response.content
70
- .filter((b) => b.type === "text")
71
- .map((b) => b.text)
72
- .join("");
73
-
74
- text = text
75
- .replace(/^```(?:json)?\s*\n?/, "")
76
- .replace(/\n?```\s*$/, "")
77
- .trim();
78
-
79
- const parsed = JSON.parse(text);
80
-
81
76
  return {
82
- isDue: Boolean(parsed.isDue),
83
- reasoning: String(parsed.reasoning ?? ""),
84
- tasksToCreate: Array.isArray(parsed.tasks)
85
- ? parsed.tasks.map((t: Record<string, unknown>) => ({
86
- name: String(t.name ?? "Untitled"),
87
- description: String(t.description ?? ""),
88
- priority:
89
- t.priority === "low" || t.priority === "high"
90
- ? t.priority
91
- : "medium",
92
- depends_on: Array.isArray(t.depends_on) ? t.depends_on : [],
93
- }))
94
- : [],
77
+ isDue: object.isDue,
78
+ reasoning: object.reasoning,
79
+ tasksToCreate: object.tasks.map((t) => ({
80
+ name: t.name,
81
+ description: t.description,
82
+ priority: t.priority,
83
+ depends_on: t.depends_on ?? [],
84
+ })),
95
85
  };
96
86
  } catch (err) {
97
- logger.warn(`Failed to evaluate schedule "${schedule.name}": ${err}`);
87
+ const message = formatLlmError(err, config.chunker_llm);
88
+ logger.warn(`Failed to evaluate schedule "${schedule.name}": ${message}`);
98
89
  return {
99
90
  isDue: false,
100
- reasoning: `Evaluation failed: ${err}`,
91
+ reasoning: `Evaluation failed: ${message}`,
101
92
  tasksToCreate: [],
102
93
  };
103
94
  }
@@ -105,7 +96,7 @@ Is this schedule due to run? If yes, what tasks should be created?`;
105
96
 
106
97
  export async function processSchedules(
107
98
  projectDir: string,
108
- config: Required<BotholomewConfig>,
99
+ config: BotholomewConfig,
109
100
  workerId: string,
110
101
  ): Promise<void> {
111
102
  const schedules = await listSchedules(projectDir, { enabled: true });
@@ -114,8 +105,6 @@ export async function processSchedules(
114
105
  logger.phase("evaluating-schedules", `${schedules.length} enabled`);
115
106
 
116
107
  for (const schedule of schedules) {
117
- // Lockfile + min-interval guard prevent two workers (or two ticks) from
118
- // evaluating the same schedule too closely.
119
108
  await withScheduleLock(
120
109
  projectDir,
121
110
  schedule.id,
@@ -24,7 +24,7 @@ import { processSchedules } from "./schedules.ts";
24
24
 
25
25
  export interface TickOptions {
26
26
  projectDir: string;
27
- config: Required<BotholomewConfig>;
27
+ config: BotholomewConfig;
28
28
  workerId: string;
29
29
  mcpxClient?: McpxClient | null;
30
30
  callbacks?: WorkerStreamCallbacks;
@@ -109,7 +109,7 @@ export async function tick(opts: TickOptions): Promise<boolean> {
109
109
  */
110
110
  export async function runSpecificTask(opts: {
111
111
  projectDir: string;
112
- config: Required<BotholomewConfig>;
112
+ config: BotholomewConfig;
113
113
  workerId: string;
114
114
  taskId: string;
115
115
  mcpxClient?: McpxClient | null;
@@ -147,7 +147,7 @@ export async function runSpecificTask(opts: {
147
147
  async function runClaimedTask(opts: {
148
148
  projectDir: string;
149
149
  withMem: WithMem;
150
- config: Required<BotholomewConfig>;
150
+ config: BotholomewConfig;
151
151
  workerId: string;
152
152
  mcpxClient?: McpxClient | null;
153
153
  callbacks?: WorkerStreamCallbacks;