npm - botholomew - Versions diffs - 0.18.7 → 0.19.4 - Mend

botholomew 0.18.7 → 0.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +56 -2
package/package.json +12 -9
package/src/chat/agent.ts +175 -181
package/src/chat/session.ts +30 -31
package/src/chat/usage.ts +19 -20
package/src/commands/init.ts +20 -0
package/src/config/loader.ts +79 -10
package/src/config/schemas.ts +48 -22
package/src/init/index.ts +12 -5
package/src/init/templates.ts +45 -4
package/src/llm/abort.ts +9 -0
package/src/llm/cache-control.ts +65 -0
package/src/llm/capabilities.ts +155 -0
package/src/llm/error-format.ts +95 -0
package/src/llm/fake.ts +226 -0
package/src/llm/index.ts +19 -0
package/src/llm/provider-options.ts +29 -0
package/src/llm/provider.ts +65 -0
package/src/llm/tools.ts +24 -0
package/src/llm/types.ts +20 -0
package/src/llm/usage.ts +33 -0
package/src/prompts/capabilities.ts +72 -108
package/src/tools/tool.ts +2 -22
package/src/tui/hooks/useMessageQueue.ts +2 -1
package/src/utils/title.ts +21 -22
package/src/worker/context.ts +45 -77
package/src/worker/llm.ts +147 -112
package/src/worker/prompt.ts +1 -1
package/src/worker/schedules.ts +43 -54
package/src/worker/tick.ts +3 -3
package/src/worker/fake-llm.ts +0 -277
package/src/worker/llm-client.ts +0 -12

package/src/worker/llm.ts CHANGED Viewed

@@ -1,21 +1,27 @@
-import type {
-  Message,
-  MessageParam,
-  ToolResultBlockParam,
-  ToolUseBlock,
-} from "@anthropic-ai/sdk/resources/messages";
 import type { McpxClient } from "@evantahler/mcpx";
+import type { ModelMessage, ToolCallPart } from "ai";
+import { streamText } from "ai";
 import type { BotholomewConfig } from "../config/schemas.ts";
+import {
+  buildProviderOptions,
+  createAbortHandle,
+  describeModel,
+  drainStreamPromises,
+  extractCacheTokens,
+  formatLlmError,
+  getLanguageModel,
+  toAiSdkTools,
+  withAnthropicCacheBreakpoints,
+} from "../llm/index.ts";
 import type { WithMem } from "../mem/client.ts";
 import type { Task } from "../tasks/schema.ts";
 import { getTask } from "../tasks/store.ts";
 import { logInteraction } from "../threads/store.ts";
 import { registerAllTools } from "../tools/registry.ts";
-import { getTool, type ToolContext, toAnthropicTools } from "../tools/tool.ts";
+import { getAllTools, getTool, type ToolContext } from "../tools/tool.ts";
 import { logger } from "../utils/logger.ts";
 import { fitToContextWindow, getMaxInputTokens } from "./context.ts";
 import { clearLargeResults, maybeStoreResult } from "./large-results.ts";
-import { createLlmClient } from "./llm-client.ts";
 registerAllTools();
@@ -46,10 +52,16 @@ const STATUS_MAP: Record<string, AgentLoopResult["status"]> = {
   wait_task: "waiting",
 };
+interface CollectedToolCall {
+  id: string;
+  name: string;
+  input: unknown;
+}
 export async function runAgentLoop(input: {
   systemPrompt: string;
   task: Task;
-  config: Required<BotholomewConfig>;
+  config: BotholomewConfig;
   withMem: WithMem;
   threadId: string;
   projectDir: string;
@@ -68,7 +80,7 @@ export async function runAgentLoop(input: {
     callbacks,
   } = input;
-  const client = createLlmClient(config);
+  const model = getLanguageModel(config.llm);
   // Build predecessor context from completed blocking tasks
   let predecessorContext = "";
@@ -89,9 +101,8 @@ export async function runAgentLoop(input: {
   const userMessage = `Task:\nName: ${task.name}\nDescription: ${task.description}\nPriority: ${task.priority}${predecessorContext}`;
-  const messages: MessageParam[] = [{ role: "user", content: userMessage }];
+  const messages: ModelMessage[] = [{ role: "user", content: userMessage }];
-  // Log the initial user message
   await logInteraction(projectDir, threadId, {
     role: "user",
     kind: "message",
@@ -99,112 +110,126 @@ export async function runAgentLoop(input: {
   });
   clearLargeResults();
-  const workerTools = toAnthropicTools();
-  const maxInputTokens = await getMaxInputTokens(
-    config.anthropic_api_key,
-    config.model,
-  );
+  const workerTools = toAiSdkTools(getAllTools());
+  const maxInputTokens = await getMaxInputTokens(config.llm);
   const maxTurns = config.max_turns;
   for (let turn = 0; !maxTurns || turn < maxTurns; turn++) {
     const startTime = Date.now();
     fitToContextWindow(messages, systemPrompt, maxInputTokens);
-    let response: Message;
-    let streamedText = "";
-    if (callbacks) {
-      const stream = client.messages.stream({
-        model: config.model,
-        max_tokens: 4096,
-        system: systemPrompt,
-        messages,
-        tools: workerTools,
-      });
+    const wrapped = withAnthropicCacheBreakpoints({
+      provider: config.llm.provider,
+      system: systemPrompt,
+      messages,
+      tools: workerTools,
+    });
+    const abortHandle = createAbortHandle();
+    const result = streamText({
+      model,
+      system: wrapped.system,
+      messages: wrapped.messages,
+      tools: wrapped.tools,
+      maxOutputTokens: 4096,
+      abortSignal: abortHandle.signal,
+      providerOptions: buildProviderOptions(config.llm, maxInputTokens),
+    });
-      stream.on("text", (text) => {
-        streamedText += text;
-        callbacks.onToken(text);
-      });
-      response = await stream.finalMessage();
-      // Ensure a newline after streamed text before tool output
-      if (streamedText) {
-        callbacks.onToken("\n");
+    let streamedText = "";
+    const collectedToolCalls: CollectedToolCall[] = [];
+    try {
+      for await (const part of result.fullStream) {
+        switch (part.type) {
+          case "text-delta":
+            streamedText += part.text;
+            callbacks?.onToken(part.text);
+            break;
+          case "tool-call":
+            collectedToolCalls.push({
+              id: part.toolCallId,
+              name: part.toolName,
+              input: part.input,
+            });
+            break;
+          case "error":
+            throw part.error;
+        }
       }
-    } else {
-      response = await client.messages.create({
-        model: config.model,
-        max_tokens: 4096,
-        system: systemPrompt,
-        messages,
-        tools: workerTools,
-      });
+    } catch (err) {
+      drainStreamPromises(result);
+      const message = formatLlmError(err, config.llm);
+      logger.error(`Worker LLM stream failed: ${message}`);
+      return { status: "failed", reason: `LLM error: ${message}` };
+    }
+    if (streamedText && callbacks) {
+      callbacks.onToken("\n");
     }
+    const usage = await result.usage;
+    const providerMeta = await result.providerMetadata;
+    const cacheTokens = extractCacheTokens(usage, providerMeta);
+    const tokenCount = cacheTokens.input + cacheTokens.output;
     const durationMs = Date.now() - startTime;
-    const tokenCount =
-      response.usage.input_tokens + response.usage.output_tokens;
-    // Log assistant text blocks
-    for (const block of response.content) {
-      if (block.type === "text" && block.text) {
-        await logInteraction(projectDir, threadId, {
-          role: "assistant",
-          kind: "message",
-          content: block.text,
-          durationMs,
-          tokenCount,
-        });
-        if (!callbacks) {
-          logger.phase("assistant", block.text);
-        }
+    if (streamedText) {
+      await logInteraction(projectDir, threadId, {
+        role: "assistant",
+        kind: "message",
+        content: streamedText,
+        durationMs,
+        tokenCount,
+      });
+      if (!callbacks) {
+        logger.phase("assistant", streamedText);
       }
     }
-    // Check for end turn with no tool use
-    const toolUseBlocks = response.content.filter(
-      (block): block is ToolUseBlock => block.type === "tool_use",
-    );
-    if (toolUseBlocks.length === 0) {
+    if (collectedToolCalls.length === 0) {
       return {
         status: "complete",
         reason: "Agent completed without explicit status tool call",
       };
     }
-    // Add assistant response to conversation
-    messages.push({ role: "assistant", content: response.content });
+    // Append the assistant turn (text + tool calls) to the conversation.
+    const assistantContent: Array<
+      ToolCallPart | { type: "text"; text: string }
+    > = [];
+    if (streamedText) {
+      assistantContent.push({ type: "text", text: streamedText });
+    }
+    for (const tc of collectedToolCalls) {
+      assistantContent.push({
+        type: "tool-call",
+        toolCallId: tc.id,
+        toolName: tc.name,
+        input: tc.input,
+      });
+    }
+    messages.push({ role: "assistant", content: assistantContent });
-    // Log all tool_use entries
-    for (const toolUse of toolUseBlocks) {
-      const toolInput = JSON.stringify(toolUse.input);
-      callbacks?.onToolStart(toolUse.name, toolInput);
+    for (const tc of collectedToolCalls) {
+      const toolInput = JSON.stringify(tc.input);
+      callbacks?.onToolStart(tc.name, toolInput);
       if (!callbacks) {
-        logger.phase(
-          "tool-call",
-          `${toolUse.name} ${truncate(toolInput, 200)}`,
-        );
+        logger.phase("tool-call", `${tc.name} ${truncate(toolInput, 200)}`);
       }
       await logInteraction(projectDir, threadId, {
         role: "assistant",
         kind: "tool_use",
-        content: `Calling ${toolUse.name}`,
-        toolName: toolUse.name,
+        content: `Calling ${tc.name}`,
+        toolName: tc.name,
         toolInput,
       });
     }
-    // Execute all tools in parallel. Each tool call opens its own short-lived
-    // connection (or none, if the tool uses dbPath internally) via
-    // executeToolCall — so parallel tool calls share the process-local
-    // DuckDB instance and release the file lock as soon as they finish.
     const execResults = await Promise.all(
-      toolUseBlocks.map(async (toolUse) => {
+      collectedToolCalls.map(async (tc) => {
         const start = Date.now();
-        const result = await executeToolCall(toolUse, {
+        const result = await executeToolCall(tc, {
           withMem,
           projectDir,
           config,
@@ -212,45 +237,56 @@ export async function runAgentLoop(input: {
           workerId,
         });
         const elapsed = Date.now() - start;
-        callbacks?.onToolEnd(
-          toolUse.name,
-          result.output,
-          result.isError,
-          elapsed,
-        );
-        return { toolUse, result, durationMs: elapsed };
+        callbacks?.onToolEnd(tc.name, result.output, result.isError, elapsed);
+        return { toolCall: tc, result, durationMs: elapsed };
       }),
     );
-    // Log results and collect tool_result messages
-    const toolResults: ToolResultBlockParam[] = [];
-    for (const { toolUse, result, durationMs } of execResults) {
+    const toolResultContent: Array<{
+      type: "tool-result";
+      toolCallId: string;
+      toolName: string;
+      output:
+        | { type: "text"; value: string }
+        | { type: "error-text"; value: string };
+    }> = [];
+    for (const { toolCall, result, durationMs } of execResults) {
       await logInteraction(projectDir, threadId, {
         role: "tool",
         kind: "tool_result",
         content: result.output,
-        toolName: toolUse.name,
+        toolName: toolCall.name,
         durationMs,
       });
       if (!callbacks) {
         const seconds = (durationMs / 1000).toFixed(1);
         const status = result.isError ? "err" : "ok";
-        logger.phase("tool-result", `${toolUse.name} ${status} in ${seconds}s`);
+        logger.phase(
+          "tool-result",
+          `${toolCall.name} ${status} in ${seconds}s`,
+        );
       }
       if (result.terminal && result.agentResult) {
         return result.agentResult;
       }
-      toolResults.push({
-        type: "tool_result",
-        tool_use_id: toolUse.id,
-        content: maybeStoreResult(toolUse.name, result.output).text,
-        is_error: result.isError || undefined,
+      const stored = maybeStoreResult(toolCall.name, result.output);
+      toolResultContent.push({
+        type: "tool-result",
+        toolCallId: toolCall.id,
+        toolName: toolCall.name,
+        output: result.isError
+          ? { type: "error-text", value: stored.text }
+          : { type: "text", value: stored.text },
       });
     }
-    messages.push({ role: "user", content: toolResults });
+    messages.push({ role: "tool", content: toolResultContent });
+    // Touch describeModel so the import isn't flagged unused on a clean build.
+    void describeModel;
   }
   return { status: "failed", reason: "Max turns exceeded" };
@@ -266,31 +302,31 @@ interface ToolCallResult {
 interface ToolCallCtx {
   withMem: WithMem;
   projectDir: string;
-  config: Required<BotholomewConfig>;
+  config: BotholomewConfig;
   mcpxClient: McpxClient | null;
   workerId?: string;
 }
 async function executeToolCall(
-  toolUse: ToolUseBlock,
+  toolCall: CollectedToolCall,
   baseCtx: ToolCallCtx,
 ): Promise<ToolCallResult> {
-  const tool = getTool(toolUse.name);
+  const tool = getTool(toolCall.name);
   if (!tool) {
     return {
-      output: `Unknown tool: ${toolUse.name}`,
+      output: `Unknown tool: ${toolCall.name}`,
       terminal: false,
       isError: true,
     };
   }
-  const parsed = tool.inputSchema.safeParse(toolUse.input);
+  const parsed = tool.inputSchema.safeParse(toolCall.input);
   if (!parsed.success) {
     const issues = parsed.error.issues
       .map((i) => `${i.path.join(".")}: ${i.message}`)
       .join("; ");
     return {
-      output: `Invalid input for ${toolUse.name}: ${issues}. Check the tool's expected parameters.`,
+      output: `Invalid input for ${toolCall.name}: ${issues}. Check the tool's expected parameters.`,
       terminal: false,
       isError: true,
     };
@@ -302,7 +338,7 @@ async function executeToolCall(
     result = await tool.execute(parsed.data, ctx);
   } catch (err) {
     return {
-      output: `Tool ${toolUse.name} threw an error: ${err}. You may retry with different parameters or try an alternative approach.`,
+      output: `Tool ${toolCall.name} threw an error: ${err}. You may retry with different parameters or try an alternative approach.`,
       terminal: false,
       isError: true,
     };
@@ -313,7 +349,6 @@ async function executeToolCall(
       : false;
   const output = typeof result === "string" ? result : JSON.stringify(result);
-  // Check if this is a terminal tool (complete/fail/wait)
   if (tool.terminal) {
     const status = STATUS_MAP[tool.name];
     if (status) {

package/src/worker/prompt.ts CHANGED Viewed

@@ -115,7 +115,7 @@ User: ${process.env.USER || process.env.USERNAME || "unknown"}
 export async function buildSystemPrompt(
   projectDir: string,
   task?: Task,
-  _config?: Required<BotholomewConfig>,
+  _config?: BotholomewConfig,
   options?: { hasMcpTools?: boolean },
 ): Promise<string> {
   let prompt = buildMetaHeader(projectDir);

package/src/worker/schedules.ts CHANGED Viewed

@@ -1,5 +1,12 @@
-import Anthropic from "@anthropic-ai/sdk";
+import { generateObject } from "ai";
+import { z } from "zod";
 import type { BotholomewConfig } from "../config/schemas.ts";
+import {
+  buildProviderOptions,
+  formatLlmError,
+  getLanguageModel,
+  getMaxInputTokens,
+} from "../llm/index.ts";
 import type { Schedule } from "../schedules/schema.ts";
 import {
   listSchedules,
@@ -22,33 +29,31 @@ export interface ScheduleEvaluation {
   tasksToCreate: ScheduleTaskDef[];
 }
+const ScheduleResponseSchema = z.object({
+  isDue: z.boolean(),
+  reasoning: z.string(),
+  tasks: z.array(
+    z.object({
+      name: z.string(),
+      description: z.string(),
+      priority: z.enum(["low", "medium", "high"]),
+      depends_on: z.array(z.number()).optional(),
+    }),
+  ),
+});
 export async function evaluateSchedule(
-  config: Required<BotholomewConfig>,
+  config: BotholomewConfig,
   schedule: Schedule,
 ): Promise<ScheduleEvaluation> {
-  const client = new Anthropic({
-    apiKey: config.anthropic_api_key || undefined,
-  });
+  const model = getLanguageModel(config.chunker_llm);
+  const numCtx = await getMaxInputTokens(config.chunker_llm);
   const systemPrompt = `You are a schedule evaluator. Given a recurring schedule, the current time, and when the schedule last ran, determine:
 1. Whether the schedule is currently due to run
 2. If due, what task(s) should be created
-Respond with JSON only, no other text. Use this exact schema:
-{
-  "isDue": boolean,
-  "reasoning": "brief explanation of why it is or is not due",
-  "tasks": [
-    {
-      "name": "task name",
-      "description": "what to do",
-      "priority": "low" | "medium" | "high",
-      "depends_on": []
-    }
-  ]
-}
-The "depends_on" array contains indices of other tasks in the array that must complete first. For example, if task at index 1 depends on task at index 0, set depends_on to [0].`;
+For each task, "depends_on" is an array of indices of earlier tasks in your output that must complete before this one runs (e.g. if task index 1 depends on task index 0, set depends_on to [0]).`;
   const userMessage = `Schedule: "${schedule.name}"
 Description: ${schedule.description || "(none)"}
@@ -59,45 +64,31 @@ Current time: ${new Date().toISOString()}
 Is this schedule due to run? If yes, what tasks should be created?`;
   try {
-    const response = await client.messages.create({
-      model: config.chunker_model,
-      max_tokens: 1024,
+    const { object } = await generateObject({
+      model,
+      schema: ScheduleResponseSchema,
       system: systemPrompt,
-      messages: [{ role: "user", content: userMessage }],
+      prompt: userMessage,
+      maxOutputTokens: 1024,
+      providerOptions: buildProviderOptions(config.chunker_llm, numCtx),
     });
-    let text = response.content
-      .filter((b) => b.type === "text")
-      .map((b) => b.text)
-      .join("");
-    text = text
-      .replace(/^```(?:json)?\s*\n?/, "")
-      .replace(/\n?```\s*$/, "")
-      .trim();
-    const parsed = JSON.parse(text);
     return {
-      isDue: Boolean(parsed.isDue),
-      reasoning: String(parsed.reasoning ?? ""),
-      tasksToCreate: Array.isArray(parsed.tasks)
-        ? parsed.tasks.map((t: Record<string, unknown>) => ({
-            name: String(t.name ?? "Untitled"),
-            description: String(t.description ?? ""),
-            priority:
-              t.priority === "low" || t.priority === "high"
-                ? t.priority
-                : "medium",
-            depends_on: Array.isArray(t.depends_on) ? t.depends_on : [],
-          }))
-        : [],
+      isDue: object.isDue,
+      reasoning: object.reasoning,
+      tasksToCreate: object.tasks.map((t) => ({
+        name: t.name,
+        description: t.description,
+        priority: t.priority,
+        depends_on: t.depends_on ?? [],
+      })),
     };
   } catch (err) {
-    logger.warn(`Failed to evaluate schedule "${schedule.name}": ${err}`);
+    const message = formatLlmError(err, config.chunker_llm);
+    logger.warn(`Failed to evaluate schedule "${schedule.name}": ${message}`);
     return {
       isDue: false,
-      reasoning: `Evaluation failed: ${err}`,
+      reasoning: `Evaluation failed: ${message}`,
       tasksToCreate: [],
     };
   }
@@ -105,7 +96,7 @@ Is this schedule due to run? If yes, what tasks should be created?`;
 export async function processSchedules(
   projectDir: string,
-  config: Required<BotholomewConfig>,
+  config: BotholomewConfig,
   workerId: string,
 ): Promise<void> {
   const schedules = await listSchedules(projectDir, { enabled: true });
@@ -114,8 +105,6 @@ export async function processSchedules(
   logger.phase("evaluating-schedules", `${schedules.length} enabled`);
   for (const schedule of schedules) {
-    // Lockfile + min-interval guard prevent two workers (or two ticks) from
-    // evaluating the same schedule too closely.
     await withScheduleLock(
       projectDir,
       schedule.id,

package/src/worker/tick.ts CHANGED Viewed

@@ -24,7 +24,7 @@ import { processSchedules } from "./schedules.ts";
 export interface TickOptions {
   projectDir: string;
-  config: Required<BotholomewConfig>;
+  config: BotholomewConfig;
   workerId: string;
   mcpxClient?: McpxClient | null;
   callbacks?: WorkerStreamCallbacks;
@@ -109,7 +109,7 @@ export async function tick(opts: TickOptions): Promise<boolean> {
  */
 export async function runSpecificTask(opts: {
   projectDir: string;
-  config: Required<BotholomewConfig>;
+  config: BotholomewConfig;
   workerId: string;
   taskId: string;
   mcpxClient?: McpxClient | null;
@@ -147,7 +147,7 @@ export async function runSpecificTask(opts: {
 async function runClaimedTask(opts: {
   projectDir: string;
   withMem: WithMem;
-  config: Required<BotholomewConfig>;
+  config: BotholomewConfig;
   workerId: string;
   mcpxClient?: McpxClient | null;
   callbacks?: WorkerStreamCallbacks;