npm - bloby-bot - Versions diffs - 0.25.5 → 0.26.0 - Mend

bloby-bot 0.25.5 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +2 -2
package/supervisor/bloby-agent.ts +96 -52
package/supervisor/channels/manager.ts +10 -7
package/supervisor/chat/OnboardWizard.tsx +4 -3
package/supervisor/cli-warmup.ts +114 -0
package/supervisor/index.ts +9 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bloby-bot",
-  "version": "0.25.5",
+  "version": "0.26.0",
   "releaseNotes": [
     "1. new stuff",
     "2. ",
@@ -51,7 +51,7 @@
     "dev:docs": "cd ./docs && npx fumapress"
   },
   "dependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.97",
+    "@anthropic-ai/claude-agent-sdk": "^0.2.112",
     "@clack/prompts": "^1.1.0",
     "@streamdown/code": "^1.1.1",
     "@tailwindcss/vite": "^4.2.0",

package/supervisor/bloby-agent.ts CHANGED Viewed

@@ -10,7 +10,7 @@
  *    Classic request-response: one query() per message. Backward compat.
  */
-import { query, type SDKMessage, type SDKUserMessage } from '@anthropic-ai/claude-agent-sdk';
+import { query, type SDKMessage, type SDKUserMessage, type Options } from '@anthropic-ai/claude-agent-sdk';
 import fs from 'fs';
 import path from 'path';
 import { log } from '../shared/logger.js';
@@ -19,6 +19,7 @@ import type { SavedFile } from './file-saver.js';
 import { getClaudeAccessToken } from '../worker/claude-auth.js';
 import { assembleSystemPrompt } from '../worker/prompts/prompt-assembler.js';
 import { buildAgents } from './agents/index.js';
+import { preWarm, claimWarmup, discardWarmup } from './cli-warmup.js';
 // ── Types ──────────────────────────────────────────────────────────────────
@@ -100,6 +101,8 @@ export function endAllConversations(): void {
     log.info(`[conversation] Ending conversation ${convId} (auth changed)`);
     endConversation(convId);
   }
+  // The pre-warmed subprocess was initialized with the old OAuth token — drop it.
+  discardWarmup();
 }
 // ── Helpers ─────────────────────────────────────────────────────────────────
@@ -182,6 +185,75 @@ function buildUserMessage(text: string, attachments?: AgentAttachment[], savedFi
 // ── Live Conversation API ──────────────────────────────────────────────────
+/**
+ * Build the options for a live conversation's query(). Shared by
+ * `startConversation` and the boot-time pre-warmer so a warmed subprocess
+ * has byte-identical options.
+ */
+async function buildConversationOptions(
+  model: string,
+  oauthToken: string,
+  names?: { botName: string; humanName: string },
+  recentMessages?: RecentMessage[],
+): Promise<Omit<Options, 'abortController' | 'stderr'>> {
+  const memoryFiles = readMemoryFiles();
+  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
+  let systemPrompt = basePrompt;
+  systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
+  try {
+    const { loadConfig: loadCfg } = await import('../shared/config.js');
+    const cfg = loadCfg();
+    const channels = (cfg as any).channels;
+    if (channels) {
+      systemPrompt += `\n\n---\n# Channel Config\n\`\`\`json\n${JSON.stringify(channels, null, 2)}\n\`\`\``;
+    }
+  } catch {}
+  if (recentMessages?.length) {
+    systemPrompt += `\n\n---\n# Recent Conversation\n${formatConversationHistory(recentMessages)}`;
+  }
+  const agents = buildAgents();
+  const mcpServers = loadMcpServers();
+  return {
+    model,
+    cwd: WORKSPACE_DIR,
+    permissionMode: 'bypassPermissions',
+    allowDangerouslySkipPermissions: true,
+    systemPrompt,
+    mcpServers,
+    agents,
+    agentProgressSummaries: true,
+    env: {
+      ...process.env as Record<string, string>,
+      CLAUDE_CODE_OAUTH_TOKEN: oauthToken,
+      CLAUDE_CODE_BUBBLEWRAP: '1',
+    },
+  };
+}
+/**
+ * Pre-warm the Claude CLI subprocess for the next live conversation. Call
+ * fire-and-forget at supervisor boot (and after a conversation ends) so the
+ * first user message doesn't pay CLI startup latency.
+ */
+export async function warmUpForLiveConversation(
+  model: string,
+  names?: { botName: string; humanName: string },
+): Promise<void> {
+  if (!model) return;
+  try {
+    const oauthToken = await getClaudeAccessToken();
+    if (!oauthToken) return;
+    const options = await buildConversationOptions(model, oauthToken, names);
+    await preWarm(options);
+  } catch (err: any) {
+    log.warn(`[conversation] Warm-up skipped: ${err?.message || err}`);
+  }
+}
 /**
  * Start a long-lived conversation.
  * Creates a single query() with an async input queue.
@@ -211,40 +283,20 @@ export async function startConversation(
     return false;
   }
-  // Assemble system prompt (once for the conversation lifetime)
-  const memoryFiles = readMemoryFiles();
-  const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
-  let systemPrompt = basePrompt;
-  systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
-  // Inject channel config
-  try {
-    const { loadConfig: loadCfg } = await import('../shared/config.js');
-    const cfg = loadCfg();
-    const channels = (cfg as any).channels;
-    if (channels) {
-      systemPrompt += `\n\n---\n# Channel Config\n\`\`\`json\n${JSON.stringify(channels, null, 2)}\n\`\`\``;
-    }
-  } catch {}
-  // Inject recent conversation history for context continuity
-  if (recentMessages?.length) {
-    systemPrompt += `\n\n---\n# Recent Conversation\n${formatConversationHistory(recentMessages)}`;
+  const baseOptions = await buildConversationOptions(model, oauthToken, names, recentMessages);
+  const systemPromptLen = typeof baseOptions.systemPrompt === 'string' ? baseOptions.systemPrompt.length : 0;
+  log.info(`[conversation] Loaded ${Object.keys(baseOptions.agents || {}).length} sub-agent(s): ${Object.keys(baseOptions.agents || {}).join(', ')}`);
+  if (baseOptions.mcpServers) {
+    log.info(`[conversation] MCP servers: ${Object.keys(baseOptions.mcpServers).join(', ')}`);
   }
-  // Build sub-agent definitions
-  const agents = buildAgents();
-  log.info(`[conversation] Loaded ${Object.keys(agents).length} sub-agent(s): ${Object.keys(agents).join(', ')}`);
-  // Load MCP servers
-  const mcpServers = loadMcpServers();
-  if (mcpServers) {
-    log.info(`[conversation] MCP servers: ${Object.keys(mcpServers).join(', ')}`);
-  }
+  // Try to claim a pre-warmed subprocess — its abortController is the one
+  // baked into the warm query and must be reused for end/abort to reach it.
+  const claimed = claimWarmup(baseOptions);
+  const abortController = claimed?.abortController ?? new AbortController();
   // Create the async input queue
   const inputQueue = createAsyncQueue<SDKUserMessage>();
-  const abortController = new AbortController();
   // Store the conversation
   const conv: LiveConversation = {
@@ -257,8 +309,8 @@ export async function startConversation(
   };
   liveConversations.set(conversationId, conv);
-  log.info(`[conversation] System prompt: ${systemPrompt.length} chars`);
-  log.info(`[conversation] Starting long-lived query...`);
+  log.info(`[conversation] System prompt: ${systemPromptLen} chars`);
+  log.info(`[conversation] Starting long-lived query... (${claimed ? 'warm' : 'cold'})`);
   // Run the for-await loop in the background (fire and forget)
   (async () => {
@@ -267,26 +319,16 @@ export async function startConversation(
     let stderrBuf = '';
     try {
-      const claudeQuery = query({
-        prompt: inputQueue,
-        options: {
-          model,
-          cwd: WORKSPACE_DIR,
-          permissionMode: 'bypassPermissions',
-          allowDangerouslySkipPermissions: true,
-          abortController,
-          systemPrompt,
-          mcpServers,
-          agents,
-          agentProgressSummaries: true,
-          stderr: (chunk: string) => { stderrBuf += chunk; },
-          env: {
-            ...process.env as Record<string, string>,
-            CLAUDE_CODE_OAUTH_TOKEN: oauthToken,
-            CLAUDE_CODE_BUBBLEWRAP: '1',
-          },
-        },
-      });
+      const claudeQuery = claimed
+        ? claimed.warmQuery.query(inputQueue)
+        : query({
+            prompt: inputQueue,
+            options: {
+              ...baseOptions,
+              abortController,
+              stderr: (chunk: string) => { stderrBuf += chunk; },
+            },
+          });
       conv.queryHandle = claudeQuery;
       log.info(`[conversation] ──── QUERY LOOP STARTED ────`);
@@ -416,6 +458,8 @@ export async function startConversation(
       log.info(`[conversation] Cleaning up conversation ${conversationId}`);
       liveConversations.delete(conversationId);
       onMessage('bot:conversation-ended', { conversationId });
+      // Pre-warm a fresh subprocess for the next live conversation (fire-and-forget).
+      warmUpForLiveConversation(model, names);
     }
   })();

package/supervisor/channels/manager.ts CHANGED Viewed

@@ -70,8 +70,8 @@ export class ChannelManager {
   private customerBuffers = new Map<string, BufferedMessage[]>();
   /** Debounce buffers per sender (keyed by "channel:sender") */
   private debounceBuffers = new Map<string, DebounceEntry>();
-  /** Dynamic reply target for the admin live conversation (updated before each pushMessage) */
-  private waReplyTarget: { channel: ChannelType; rawSender: string; assistantBufferKey?: string } | null = null;
+  /** FIFO queue of reply targets — one per pushMessage, consumed on each bot:response */
+  private waReplyQueue: { channel: ChannelType; rawSender: string; assistantBufferKey?: string }[] = [];
   constructor(opts: ChannelManagerOpts) {
     this.opts = opts;
@@ -547,8 +547,8 @@ export class ChannelManager {
           waChunkBuf += eventData.token;
         }
-        // Use dynamic reply target (self-chat or contact's chat depending on latest push)
-        const target = this.waReplyTarget;
+        // Peek at the front of the reply queue (the target for the current response)
+        const target = this.waReplyQueue[0];
         if (!target) return;
         // Agent paused to use a tool — send accumulated text as an intermediate WhatsApp message
@@ -560,6 +560,9 @@ export class ChannelManager {
         }
         if (type === 'bot:response' && eventData.content) {
+          // Consume this target from the queue — this response is for it
+          this.waReplyQueue.shift();
           // Send remaining text to the correct chat
           const remaining = waChunkBuf.trim();
           if (remaining) {
@@ -599,12 +602,12 @@ export class ChannelManager {
       }, { botName, humanName }, recentMessages);
     }
-    // Set reply target BEFORE pushing — callback reads this to know where to send
-    this.waReplyTarget = {
+    // Enqueue reply target BEFORE pushing — callback consumes in FIFO order
+    this.waReplyQueue.push({
       channel: msg.channel,
       rawSender: msg.rawSender,
       assistantBufferKey: msg.role === 'assistant' ? `${msg.channel}:${msg.sender}` : undefined,
-    };
+    });
     // Push the message into the live conversation
     const channelContent = channelContext + msg.text;

package/supervisor/chat/OnboardWizard.tsx CHANGED Viewed

@@ -51,9 +51,10 @@ const PROVIDERS = [
 const MODELS: Record<string, { id: string; label: string }[]> = {
   anthropic: [
-    { id: 'claude-opus-4-6', label: 'Opus 4.6' },
-    { id: 'claude-sonnet-4-6', label: 'Sonnet 4.6' },
-    { id: 'claude-haiku-4-5-20251001', label: 'Haiku 4.5' },
+    { id: 'claude-opus-4-7[1m]', label: 'Opus 4.7 (1M context)' },
+    { id: 'claude-opus-4-7', label: 'Opus 4.7' },
+    { id: 'claude-sonnet-4-6', label: 'Sonnet 4.6 (1M context)' },
+    { id: 'claude-haiku-4-5', label: 'Haiku 4.5' },
   ],
   openai: [
     { id: 'gpt-5.2-codex:medium', label: 'GPT-5.2 Codex Medium' },

package/supervisor/cli-warmup.ts ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * CLI subprocess pre-warming.
+ *
+ * The Agent SDK's `startup()` spawns the Claude Code subprocess and completes
+ * its initialize handshake ahead of time, so the first `query()` writes the
+ * prompt directly to a ready process (~20× faster first response).
+ *
+ * Trade-off: all options (model, systemPrompt, mcpServers, agents, env, cwd)
+ * are baked into the warm query at startup time. If the caller's options
+ * don't match, the warm query can't be used and we fall back to a cold start.
+ */
+import { startup, type WarmQuery, type Options } from '@anthropic-ai/claude-agent-sdk';
+import crypto from 'crypto';
+import { log } from '../shared/logger.js';
+interface CachedWarmup {
+  key: string;
+  warmQuery: WarmQuery;
+  /** The abortController baked into the warm subprocess — caller must reuse this
+   *  if it wants to abort the query. */
+  abortController: AbortController;
+}
+let cached: CachedWarmup | null = null;
+let inflight: Promise<void> | null = null;
+/**
+ * Build a cache key from the options that must match between preWarm() and
+ * claimWarmup(). Lived options (like per-turn abortController or stderr
+ * callbacks) are excluded — the SDK wires those at startup time but they're
+ * not relevant to compatibility.
+ */
+function keyFor(options: Options): string {
+  const keyable = {
+    cwd: options.cwd,
+    model: options.model,
+    permissionMode: options.permissionMode,
+    systemPrompt: options.systemPrompt,
+    mcpServers: options.mcpServers
+      ? Object.keys(options.mcpServers).sort().map((k) => [k, options.mcpServers![k]])
+      : null,
+    agents: options.agents
+      ? Object.keys(options.agents).sort().map((k) => [k, options.agents![k]])
+      : null,
+    env: options.env
+      ? Object.keys(options.env).sort().filter((k) => !k.startsWith('npm_')).map((k) => [k, options.env![k]])
+      : null,
+    resume: options.resume,
+    betas: options.betas,
+  };
+  return crypto.createHash('sha256').update(JSON.stringify(keyable)).digest('hex');
+}
+export interface ClaimedWarmup {
+  warmQuery: WarmQuery;
+  abortController: AbortController;
+}
+/**
+ * Spawn a pre-warmed subprocess with the given options. Fire-and-forget: if
+ * preWarm is already in flight or the cached warmup already matches, no-op.
+ *
+ * We own the abortController so the caller can reuse it after claiming —
+ * otherwise `.abort()` wouldn't reach the warm subprocess.
+ */
+export async function preWarm(options: Omit<Options, 'abortController'>): Promise<void> {
+  if (inflight) return inflight;
+  const key = keyFor(options as Options);
+  if (cached?.key === key) return;
+  inflight = (async () => {
+    try {
+      if (cached && cached.key !== key) {
+        try { cached.warmQuery.close(); } catch {}
+        cached = null;
+      }
+      const abortController = new AbortController();
+      log.info('[cli-warmup] Pre-warming Claude subprocess...');
+      const warmQuery = await startup({ options: { ...options, abortController } });
+      cached = { key, warmQuery, abortController };
+      log.ok('[cli-warmup] Subprocess pre-warmed');
+    } catch (err: any) {
+      log.warn(`[cli-warmup] Pre-warm failed: ${err?.message || err}`);
+    } finally {
+      inflight = null;
+    }
+  })();
+  return inflight;
+}
+/**
+ * Atomically claim the warm query if its options match. Returns null if
+ * there's no warmup or the options differ — caller should cold-start.
+ *
+ * The caller must use the returned `abortController` to abort — the one baked
+ * into the subprocess is the only one that works.
+ */
+export function claimWarmup(options: Omit<Options, 'abortController'>): ClaimedWarmup | null {
+  if (!cached) return null;
+  if (cached.key !== keyFor(options as Options)) return null;
+  const claimed: ClaimedWarmup = { warmQuery: cached.warmQuery, abortController: cached.abortController };
+  cached = null;
+  log.info('[cli-warmup] Claimed pre-warmed subprocess');
+  return claimed;
+}
+/** Close and discard any pending warmup (e.g. on shutdown or auth change). */
+export function discardWarmup(): void {
+  if (cached) {
+    try { cached.warmQuery.close(); } catch {}
+    cached = null;
+  }
+}

package/supervisor/index.ts CHANGED Viewed

@@ -18,6 +18,7 @@ import {
   startConversation, pushMessage, hasConversation, endConversation, endAllConversations,
   isConversationBusy, stopSubAgentTask,
   startBlobyAgentQuery, stopBlobyAgentQuery,
+  warmUpForLiveConversation,
   type RecentMessage,
 } from './bloby-agent.js';
 import { ensureFileDirs, saveAttachment, type SavedFile } from './file-saver.js';
@@ -1682,6 +1683,14 @@ ${!connected ? `<script>
     log.warn(`[channels] Init failed: ${err.message}`);
   });
+  // Pre-warm the Claude CLI subprocess for the next live conversation so
+  // the first user message doesn't wait on subprocess spawn + init.
+  // Fire-and-forget: failures are logged but don't block boot.
+  const prewarmCfg = loadConfig();
+  if (prewarmCfg.ai.model) {
+    warmUpForLiveConversation(prewarmCfg.ai.model);
+  }
   // Watch workspace files for changes — auto-restart backend
   // Catches edits from VS Code, CLI, or any external tool.
   // During agent turns, defers to bot:done (avoids mid-turn restarts).