bloby-bot 0.25.5 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.25.5",
3
+ "version": "0.26.0",
4
4
  "releaseNotes": [
5
5
  "1. new stuff",
6
6
  "2. ",
@@ -51,7 +51,7 @@
51
51
  "dev:docs": "cd ./docs && npx fumapress"
52
52
  },
53
53
  "dependencies": {
54
- "@anthropic-ai/claude-agent-sdk": "^0.2.97",
54
+ "@anthropic-ai/claude-agent-sdk": "^0.2.112",
55
55
  "@clack/prompts": "^1.1.0",
56
56
  "@streamdown/code": "^1.1.1",
57
57
  "@tailwindcss/vite": "^4.2.0",
@@ -10,7 +10,7 @@
10
10
  * Classic request-response: one query() per message. Backward compat.
11
11
  */
12
12
 
13
- import { query, type SDKMessage, type SDKUserMessage } from '@anthropic-ai/claude-agent-sdk';
13
+ import { query, type SDKMessage, type SDKUserMessage, type Options } from '@anthropic-ai/claude-agent-sdk';
14
14
  import fs from 'fs';
15
15
  import path from 'path';
16
16
  import { log } from '../shared/logger.js';
@@ -19,6 +19,7 @@ import type { SavedFile } from './file-saver.js';
19
19
  import { getClaudeAccessToken } from '../worker/claude-auth.js';
20
20
  import { assembleSystemPrompt } from '../worker/prompts/prompt-assembler.js';
21
21
  import { buildAgents } from './agents/index.js';
22
+ import { preWarm, claimWarmup, discardWarmup } from './cli-warmup.js';
22
23
 
23
24
  // ── Types ──────────────────────────────────────────────────────────────────
24
25
 
@@ -100,6 +101,8 @@ export function endAllConversations(): void {
100
101
  log.info(`[conversation] Ending conversation ${convId} (auth changed)`);
101
102
  endConversation(convId);
102
103
  }
104
+ // The pre-warmed subprocess was initialized with the old OAuth token — drop it.
105
+ discardWarmup();
103
106
  }
104
107
 
105
108
  // ── Helpers ─────────────────────────────────────────────────────────────────
@@ -182,6 +185,75 @@ function buildUserMessage(text: string, attachments?: AgentAttachment[], savedFi
182
185
 
183
186
  // ── Live Conversation API ──────────────────────────────────────────────────
184
187
 
188
+ /**
189
+ * Build the options for a live conversation's query(). Shared by
190
+ * `startConversation` and the boot-time pre-warmer so a warmed subprocess
191
+ * has byte-identical options.
192
+ */
193
+ async function buildConversationOptions(
194
+ model: string,
195
+ oauthToken: string,
196
+ names?: { botName: string; humanName: string },
197
+ recentMessages?: RecentMessage[],
198
+ ): Promise<Omit<Options, 'abortController' | 'stderr'>> {
199
+ const memoryFiles = readMemoryFiles();
200
+ const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
201
+ let systemPrompt = basePrompt;
202
+ systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
203
+
204
+ try {
205
+ const { loadConfig: loadCfg } = await import('../shared/config.js');
206
+ const cfg = loadCfg();
207
+ const channels = (cfg as any).channels;
208
+ if (channels) {
209
+ systemPrompt += `\n\n---\n# Channel Config\n\`\`\`json\n${JSON.stringify(channels, null, 2)}\n\`\`\``;
210
+ }
211
+ } catch {}
212
+
213
+ if (recentMessages?.length) {
214
+ systemPrompt += `\n\n---\n# Recent Conversation\n${formatConversationHistory(recentMessages)}`;
215
+ }
216
+
217
+ const agents = buildAgents();
218
+ const mcpServers = loadMcpServers();
219
+
220
+ return {
221
+ model,
222
+ cwd: WORKSPACE_DIR,
223
+ permissionMode: 'bypassPermissions',
224
+ allowDangerouslySkipPermissions: true,
225
+ systemPrompt,
226
+ mcpServers,
227
+ agents,
228
+ agentProgressSummaries: true,
229
+ env: {
230
+ ...process.env as Record<string, string>,
231
+ CLAUDE_CODE_OAUTH_TOKEN: oauthToken,
232
+ CLAUDE_CODE_BUBBLEWRAP: '1',
233
+ },
234
+ };
235
+ }
236
+
237
+ /**
238
+ * Pre-warm the Claude CLI subprocess for the next live conversation. Call
239
+ * fire-and-forget at supervisor boot (and after a conversation ends) so the
240
+ * first user message doesn't pay CLI startup latency.
241
+ */
242
+ export async function warmUpForLiveConversation(
243
+ model: string,
244
+ names?: { botName: string; humanName: string },
245
+ ): Promise<void> {
246
+ if (!model) return;
247
+ try {
248
+ const oauthToken = await getClaudeAccessToken();
249
+ if (!oauthToken) return;
250
+ const options = await buildConversationOptions(model, oauthToken, names);
251
+ await preWarm(options);
252
+ } catch (err: any) {
253
+ log.warn(`[conversation] Warm-up skipped: ${err?.message || err}`);
254
+ }
255
+ }
256
+
185
257
  /**
186
258
  * Start a long-lived conversation.
187
259
  * Creates a single query() with an async input queue.
@@ -211,40 +283,20 @@ export async function startConversation(
211
283
  return false;
212
284
  }
213
285
 
214
- // Assemble system prompt (once for the conversation lifetime)
215
- const memoryFiles = readMemoryFiles();
216
- const basePrompt = await assembleSystemPrompt(names?.botName, names?.humanName);
217
- let systemPrompt = basePrompt;
218
- systemPrompt += `\n\n---\n# Your Memory Files\n\n## MYSELF.md\n${memoryFiles.myself}\n\n## MYHUMAN.md\n${memoryFiles.myhuman}\n\n## MEMORY.md\n${memoryFiles.memory}\n\n---\n# Your Config Files\n\n## PULSE.json\n${memoryFiles.pulse}\n\n## CRONS.json\n${memoryFiles.crons}`;
219
-
220
- // Inject channel config
221
- try {
222
- const { loadConfig: loadCfg } = await import('../shared/config.js');
223
- const cfg = loadCfg();
224
- const channels = (cfg as any).channels;
225
- if (channels) {
226
- systemPrompt += `\n\n---\n# Channel Config\n\`\`\`json\n${JSON.stringify(channels, null, 2)}\n\`\`\``;
227
- }
228
- } catch {}
229
-
230
- // Inject recent conversation history for context continuity
231
- if (recentMessages?.length) {
232
- systemPrompt += `\n\n---\n# Recent Conversation\n${formatConversationHistory(recentMessages)}`;
286
+ const baseOptions = await buildConversationOptions(model, oauthToken, names, recentMessages);
287
+ const systemPromptLen = typeof baseOptions.systemPrompt === 'string' ? baseOptions.systemPrompt.length : 0;
288
+ log.info(`[conversation] Loaded ${Object.keys(baseOptions.agents || {}).length} sub-agent(s): ${Object.keys(baseOptions.agents || {}).join(', ')}`);
289
+ if (baseOptions.mcpServers) {
290
+ log.info(`[conversation] MCP servers: ${Object.keys(baseOptions.mcpServers).join(', ')}`);
233
291
  }
234
292
 
235
- // Build sub-agent definitions
236
- const agents = buildAgents();
237
- log.info(`[conversation] Loaded ${Object.keys(agents).length} sub-agent(s): ${Object.keys(agents).join(', ')}`);
238
-
239
- // Load MCP servers
240
- const mcpServers = loadMcpServers();
241
- if (mcpServers) {
242
- log.info(`[conversation] MCP servers: ${Object.keys(mcpServers).join(', ')}`);
243
- }
293
+ // Try to claim a pre-warmed subprocess — its abortController is the one
294
+ // baked into the warm query and must be reused for end/abort to reach it.
295
+ const claimed = claimWarmup(baseOptions);
296
+ const abortController = claimed?.abortController ?? new AbortController();
244
297
 
245
298
  // Create the async input queue
246
299
  const inputQueue = createAsyncQueue<SDKUserMessage>();
247
- const abortController = new AbortController();
248
300
 
249
301
  // Store the conversation
250
302
  const conv: LiveConversation = {
@@ -257,8 +309,8 @@ export async function startConversation(
257
309
  };
258
310
  liveConversations.set(conversationId, conv);
259
311
 
260
- log.info(`[conversation] System prompt: ${systemPrompt.length} chars`);
261
- log.info(`[conversation] Starting long-lived query...`);
312
+ log.info(`[conversation] System prompt: ${systemPromptLen} chars`);
313
+ log.info(`[conversation] Starting long-lived query... (${claimed ? 'warm' : 'cold'})`);
262
314
 
263
315
  // Run the for-await loop in the background (fire and forget)
264
316
  (async () => {
@@ -267,26 +319,16 @@ export async function startConversation(
267
319
  let stderrBuf = '';
268
320
 
269
321
  try {
270
- const claudeQuery = query({
271
- prompt: inputQueue,
272
- options: {
273
- model,
274
- cwd: WORKSPACE_DIR,
275
- permissionMode: 'bypassPermissions',
276
- allowDangerouslySkipPermissions: true,
277
- abortController,
278
- systemPrompt,
279
- mcpServers,
280
- agents,
281
- agentProgressSummaries: true,
282
- stderr: (chunk: string) => { stderrBuf += chunk; },
283
- env: {
284
- ...process.env as Record<string, string>,
285
- CLAUDE_CODE_OAUTH_TOKEN: oauthToken,
286
- CLAUDE_CODE_BUBBLEWRAP: '1',
287
- },
288
- },
289
- });
322
+ const claudeQuery = claimed
323
+ ? claimed.warmQuery.query(inputQueue)
324
+ : query({
325
+ prompt: inputQueue,
326
+ options: {
327
+ ...baseOptions,
328
+ abortController,
329
+ stderr: (chunk: string) => { stderrBuf += chunk; },
330
+ },
331
+ });
290
332
 
291
333
  conv.queryHandle = claudeQuery;
292
334
  log.info(`[conversation] ──── QUERY LOOP STARTED ────`);
@@ -416,6 +458,8 @@ export async function startConversation(
416
458
  log.info(`[conversation] Cleaning up conversation ${conversationId}`);
417
459
  liveConversations.delete(conversationId);
418
460
  onMessage('bot:conversation-ended', { conversationId });
461
+ // Pre-warm a fresh subprocess for the next live conversation (fire-and-forget).
462
+ warmUpForLiveConversation(model, names);
419
463
  }
420
464
  })();
421
465
 
@@ -70,8 +70,8 @@ export class ChannelManager {
70
70
  private customerBuffers = new Map<string, BufferedMessage[]>();
71
71
  /** Debounce buffers per sender (keyed by "channel:sender") */
72
72
  private debounceBuffers = new Map<string, DebounceEntry>();
73
- /** Dynamic reply target for the admin live conversation (updated before each pushMessage) */
74
- private waReplyTarget: { channel: ChannelType; rawSender: string; assistantBufferKey?: string } | null = null;
73
+ /** FIFO queue of reply targets one per pushMessage, consumed on each bot:response */
74
+ private waReplyQueue: { channel: ChannelType; rawSender: string; assistantBufferKey?: string }[] = [];
75
75
 
76
76
  constructor(opts: ChannelManagerOpts) {
77
77
  this.opts = opts;
@@ -547,8 +547,8 @@ export class ChannelManager {
547
547
  waChunkBuf += eventData.token;
548
548
  }
549
549
 
550
- // Use dynamic reply target (self-chat or contact's chat depending on latest push)
551
- const target = this.waReplyTarget;
550
+ // Peek at the front of the reply queue (the target for the current response)
551
+ const target = this.waReplyQueue[0];
552
552
  if (!target) return;
553
553
 
554
554
  // Agent paused to use a tool — send accumulated text as an intermediate WhatsApp message
@@ -560,6 +560,9 @@ export class ChannelManager {
560
560
  }
561
561
 
562
562
  if (type === 'bot:response' && eventData.content) {
563
+ // Consume this target from the queue — this response is for it
564
+ this.waReplyQueue.shift();
565
+
563
566
  // Send remaining text to the correct chat
564
567
  const remaining = waChunkBuf.trim();
565
568
  if (remaining) {
@@ -599,12 +602,12 @@ export class ChannelManager {
599
602
  }, { botName, humanName }, recentMessages);
600
603
  }
601
604
 
602
- // Set reply target BEFORE pushing — callback reads this to know where to send
603
- this.waReplyTarget = {
605
+ // Enqueue reply target BEFORE pushing — callback consumes in FIFO order
606
+ this.waReplyQueue.push({
604
607
  channel: msg.channel,
605
608
  rawSender: msg.rawSender,
606
609
  assistantBufferKey: msg.role === 'assistant' ? `${msg.channel}:${msg.sender}` : undefined,
607
- };
610
+ });
608
611
 
609
612
  // Push the message into the live conversation
610
613
  const channelContent = channelContext + msg.text;
@@ -51,9 +51,10 @@ const PROVIDERS = [
51
51
 
52
52
  const MODELS: Record<string, { id: string; label: string }[]> = {
53
53
  anthropic: [
54
- { id: 'claude-opus-4-6', label: 'Opus 4.6' },
55
- { id: 'claude-sonnet-4-6', label: 'Sonnet 4.6' },
56
- { id: 'claude-haiku-4-5-20251001', label: 'Haiku 4.5' },
54
+ { id: 'claude-opus-4-7[1m]', label: 'Opus 4.7 (1M context)' },
55
+ { id: 'claude-opus-4-7', label: 'Opus 4.7' },
56
+ { id: 'claude-sonnet-4-6', label: 'Sonnet 4.6 (1M context)' },
57
+ { id: 'claude-haiku-4-5', label: 'Haiku 4.5' },
57
58
  ],
58
59
  openai: [
59
60
  { id: 'gpt-5.2-codex:medium', label: 'GPT-5.2 Codex Medium' },
@@ -0,0 +1,114 @@
1
+ /**
2
+ * CLI subprocess pre-warming.
3
+ *
4
+ * The Agent SDK's `startup()` spawns the Claude Code subprocess and completes
5
+ * its initialize handshake ahead of time, so the first `query()` writes the
6
+ * prompt directly to a ready process (~20× faster first response).
7
+ *
8
+ * Trade-off: all options (model, systemPrompt, mcpServers, agents, env, cwd)
9
+ * are baked into the warm query at startup time. If the caller's options
10
+ * don't match, the warm query can't be used and we fall back to a cold start.
11
+ */
12
+
13
+ import { startup, type WarmQuery, type Options } from '@anthropic-ai/claude-agent-sdk';
14
+ import crypto from 'crypto';
15
+ import { log } from '../shared/logger.js';
16
+
17
+ interface CachedWarmup {
18
+ key: string;
19
+ warmQuery: WarmQuery;
20
+ /** The abortController baked into the warm subprocess — caller must reuse this
21
+ * if it wants to abort the query. */
22
+ abortController: AbortController;
23
+ }
24
+
25
+ let cached: CachedWarmup | null = null;
26
+ let inflight: Promise<void> | null = null;
27
+
28
+ /**
29
+ * Build a cache key from the options that must match between preWarm() and
30
+ * claimWarmup(). Lived options (like per-turn abortController or stderr
31
+ * callbacks) are excluded — the SDK wires those at startup time but they're
32
+ * not relevant to compatibility.
33
+ */
34
+ function keyFor(options: Options): string {
35
+ const keyable = {
36
+ cwd: options.cwd,
37
+ model: options.model,
38
+ permissionMode: options.permissionMode,
39
+ systemPrompt: options.systemPrompt,
40
+ mcpServers: options.mcpServers
41
+ ? Object.keys(options.mcpServers).sort().map((k) => [k, options.mcpServers![k]])
42
+ : null,
43
+ agents: options.agents
44
+ ? Object.keys(options.agents).sort().map((k) => [k, options.agents![k]])
45
+ : null,
46
+ env: options.env
47
+ ? Object.keys(options.env).sort().filter((k) => !k.startsWith('npm_')).map((k) => [k, options.env![k]])
48
+ : null,
49
+ resume: options.resume,
50
+ betas: options.betas,
51
+ };
52
+ return crypto.createHash('sha256').update(JSON.stringify(keyable)).digest('hex');
53
+ }
54
+
55
+ export interface ClaimedWarmup {
56
+ warmQuery: WarmQuery;
57
+ abortController: AbortController;
58
+ }
59
+
60
+ /**
61
+ * Spawn a pre-warmed subprocess with the given options. Fire-and-forget: if
62
+ * preWarm is already in flight or the cached warmup already matches, no-op.
63
+ *
64
+ * We own the abortController so the caller can reuse it after claiming —
65
+ * otherwise `.abort()` wouldn't reach the warm subprocess.
66
+ */
67
+ export async function preWarm(options: Omit<Options, 'abortController'>): Promise<void> {
68
+ if (inflight) return inflight;
69
+ const key = keyFor(options as Options);
70
+ if (cached?.key === key) return;
71
+
72
+ inflight = (async () => {
73
+ try {
74
+ if (cached && cached.key !== key) {
75
+ try { cached.warmQuery.close(); } catch {}
76
+ cached = null;
77
+ }
78
+ const abortController = new AbortController();
79
+ log.info('[cli-warmup] Pre-warming Claude subprocess...');
80
+ const warmQuery = await startup({ options: { ...options, abortController } });
81
+ cached = { key, warmQuery, abortController };
82
+ log.ok('[cli-warmup] Subprocess pre-warmed');
83
+ } catch (err: any) {
84
+ log.warn(`[cli-warmup] Pre-warm failed: ${err?.message || err}`);
85
+ } finally {
86
+ inflight = null;
87
+ }
88
+ })();
89
+ return inflight;
90
+ }
91
+
92
+ /**
93
+ * Atomically claim the warm query if its options match. Returns null if
94
+ * there's no warmup or the options differ — caller should cold-start.
95
+ *
96
+ * The caller must use the returned `abortController` to abort — the one baked
97
+ * into the subprocess is the only one that works.
98
+ */
99
+ export function claimWarmup(options: Omit<Options, 'abortController'>): ClaimedWarmup | null {
100
+ if (!cached) return null;
101
+ if (cached.key !== keyFor(options as Options)) return null;
102
+ const claimed: ClaimedWarmup = { warmQuery: cached.warmQuery, abortController: cached.abortController };
103
+ cached = null;
104
+ log.info('[cli-warmup] Claimed pre-warmed subprocess');
105
+ return claimed;
106
+ }
107
+
108
+ /** Close and discard any pending warmup (e.g. on shutdown or auth change). */
109
+ export function discardWarmup(): void {
110
+ if (cached) {
111
+ try { cached.warmQuery.close(); } catch {}
112
+ cached = null;
113
+ }
114
+ }
@@ -18,6 +18,7 @@ import {
18
18
  startConversation, pushMessage, hasConversation, endConversation, endAllConversations,
19
19
  isConversationBusy, stopSubAgentTask,
20
20
  startBlobyAgentQuery, stopBlobyAgentQuery,
21
+ warmUpForLiveConversation,
21
22
  type RecentMessage,
22
23
  } from './bloby-agent.js';
23
24
  import { ensureFileDirs, saveAttachment, type SavedFile } from './file-saver.js';
@@ -1682,6 +1683,14 @@ ${!connected ? `<script>
1682
1683
  log.warn(`[channels] Init failed: ${err.message}`);
1683
1684
  });
1684
1685
 
1686
+ // Pre-warm the Claude CLI subprocess for the next live conversation so
1687
+ // the first user message doesn't wait on subprocess spawn + init.
1688
+ // Fire-and-forget: failures are logged but don't block boot.
1689
+ const prewarmCfg = loadConfig();
1690
+ if (prewarmCfg.ai.model) {
1691
+ warmUpForLiveConversation(prewarmCfg.ai.model);
1692
+ }
1693
+
1685
1694
  // Watch workspace files for changes — auto-restart backend
1686
1695
  // Catches edits from VS Code, CLI, or any external tool.
1687
1696
  // During agent turns, defers to bot:done (avoids mid-turn restarts).