bloby-bot 0.70.9 → 0.70.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.70.9",
3
+ "version": "0.70.10",
4
4
  "releaseNotes": [
5
5
  "1. Fix: agent self-update ",
6
6
  "1",
@@ -16,6 +16,8 @@ import { log } from '../../../shared/logger.js';
16
16
  import { WORKSPACE_DIR } from '../../../shared/paths.js';
17
17
  import type { SavedFile } from '../../file-saver.js';
18
18
  import { assembleSystemPrompt } from '../../../worker/prompts/prompt-assembler.js';
19
+ import { buildAgents } from '../../agents/index.js';
20
+ import crypto from 'crypto';
19
21
  import fs from 'fs';
20
22
  import path from 'path';
21
23
  import type {
@@ -34,6 +36,7 @@ import { readPiAuth } from './auth-storage.js';
34
36
  import { streamProvider } from './providers/stream.js';
35
37
  import type { PiMessage } from './providers/types.js';
36
38
  import { toolDefsForProvider } from './tools/registry.js';
39
+ import type { PiTaskHost } from './tools/types.js';
37
40
 
38
41
  // ── Live conversation state ────────────────────────────────────────────────
39
42
 
@@ -49,9 +52,29 @@ interface LiveConversation {
49
52
  pendingCount: number;
50
53
  /** 60ms micro-batcher for bot:token — collapses per-delta WS frame floods. */
51
54
  batcher: TokenBatcher;
55
+ /** Running background sub-agent tasks (Phase B). While non-empty, the
56
+ * conversation reports idle:false (recycling deferred) and counts as busy
57
+ * (backend restarts / self-updates deferred) so a task is never killed
58
+ * mid-flight by housekeeping. */
59
+ tasks: Map<string, RunningTask>;
60
+ /** Set when a completed background task used file tools — OR'd into the next
61
+ * bot:turn-complete (the continuation turn) so the backend restarts right
62
+ * after the user hears "Done!", mirroring claude's usedTools capture of
63
+ * sub-agent tool_use blocks. */
64
+ taskUsedFileTools: boolean;
52
65
  loopDone: Promise<void> | null;
53
66
  }
54
67
 
68
+ interface RunningTask {
69
+ id: string;
70
+ description: string;
71
+ subagentType: string;
72
+ abortController: AbortController;
73
+ /** True when stopped via user:stop-task or conversation teardown. */
74
+ stopped: boolean;
75
+ startedAt: number;
76
+ }
77
+
55
78
  const liveConversations = new Map<string, LiveConversation>();
56
79
 
57
80
  /**
@@ -227,6 +250,182 @@ function resolveAuth(): { ok: true; auth: PiSessionAuth } | { ok: false; error:
227
250
  };
228
251
  }
229
252
 
253
+ // ── Background sub-agents (Phase B — audit D4-1) ───────────────────────────
254
+
255
+ /** Inject a system-originated message into the parent's queue (task completion).
256
+ * Mirrors the Claude SDK's self-prompted continuation turn: no routing target
257
+ * is enqueued (channelManager only wraps USER pushes), so the continuation's
258
+ * bot:response meets an empty routing FIFO and falls through to the dashboard
259
+ * broadcast — exactly claude's behavior. pendingCount/busy are maintained so
260
+ * idle stays accurate and the recycler can't fire mid-continuation. No
261
+ * bot:typing (claude's continuation turns emit none either). */
262
+ function pushSyntheticMessage(conv: LiveConversation, text: string): void {
263
+ conv.busy = true;
264
+ conv.pendingCount += 1;
265
+ conv.inputQueue.push({ role: 'user', content: [{ type: 'text', text }] });
266
+ }
267
+
268
+ /** coder.txt advertises the claude toolset ("Read, Write, Edit, Bash, Glob,
269
+ * Grep") — swap in the child's REAL pi toolset so the sub-agent never chases
270
+ * tools it doesn't have (audit D4-4). claude keeps its richer line. */
271
+ function rewriteToolAccessLine(prompt: string, toolNames: string[]): string {
272
+ return prompt.replace(/You have full tool access:[^\n]*/i, `You have full tool access: ${toolNames.join(', ')}.`);
273
+ }
274
+
275
+ /** Compact human-readable descriptor of a child tool call for bot:task-progress. */
276
+ function toolCallSummary(name: string, input: any): string {
277
+ const tail = (p: any) => (typeof p === 'string' ? p.split('/').slice(-2).join('/') : '');
278
+ switch (name.toLowerCase()) {
279
+ case 'bash': return `Bash: ${String(input?.description || input?.command || '').slice(0, 80)}`;
280
+ case 'read': return `Reading ${tail(input?.file_path)}`;
281
+ case 'write': return `Writing ${tail(input?.file_path)}`;
282
+ case 'edit': return `Editing ${tail(input?.file_path)}`;
283
+ default: return name;
284
+ }
285
+ }
286
+
287
+ /**
288
+ * Per-conversation task host: spawns an in-process child `createPiSession`
289
+ * per Task call, translates child events into the `bot:task-*` vocabulary
290
+ * (payload fields exactly as claude.ts:443-484 emits them), and injects the
291
+ * completion back into the parent's queue for the "Done!" continuation turn.
292
+ */
293
+ function createTaskHost(conv: LiveConversation, getAuth: () => PiSessionAuth): PiTaskHost {
294
+ return {
295
+ spawn(req) {
296
+ const agents = buildAgents();
297
+ const cfg = agents[req.subagentType];
298
+ if (!cfg) {
299
+ return {
300
+ ok: false,
301
+ error: `Unknown subagent_type "${req.subagentType}". Available: ${Object.keys(agents).join(', ') || 'none'}.`,
302
+ };
303
+ }
304
+
305
+ const taskId = crypto.randomUUID().slice(0, 8);
306
+ const abortController = new AbortController();
307
+ const task: RunningTask = {
308
+ id: taskId,
309
+ description: req.description,
310
+ subagentType: req.subagentType,
311
+ abortController,
312
+ stopped: false,
313
+ startedAt: Date.now(),
314
+ };
315
+ conv.tasks.set(taskId, task);
316
+
317
+ // Honor the agent config's tool restrictions (claude applies these via
318
+ // the SDK's tools/disallowedTools options — e.g. a future researcher
319
+ // agent with disallowedTools: ['Write','Edit']).
320
+ let childTools = toolDefsForProvider({ forSubagent: true });
321
+ if (Array.isArray(cfg.tools) && cfg.tools.length > 0) {
322
+ childTools = childTools.filter((t) => cfg.tools.includes(t.name));
323
+ }
324
+ if (Array.isArray(cfg.disallowedTools) && cfg.disallowedTools.length > 0) {
325
+ childTools = childTools.filter((t) => !cfg.disallowedTools.includes(t.name));
326
+ }
327
+ const systemPrompt = rewriteToolAccessLine(String(cfg.prompt || ''), childTools.map((t) => t.name));
328
+
329
+ let summaryText = '';
330
+ let errorText = '';
331
+ let usedFileTools = false;
332
+ let toolUses = 0;
333
+ let lastUsage: { inputTokens?: number; outputTokens?: number; cacheReadTokens?: number; cacheCreationTokens?: number } | undefined;
334
+
335
+ const session = createPiSession({
336
+ getAuth,
337
+ systemPrompt,
338
+ tools: childTools,
339
+ cwd: WORKSPACE_DIR,
340
+ abortController,
341
+ maxToolRounds: typeof cfg.maxTurns === 'number' ? cfg.maxTurns : 50,
342
+ onEvent: (evt: PiSessionEvent) => {
343
+ switch (evt.type) {
344
+ case 'tool_use':
345
+ toolUses += 1;
346
+ conv.batcher.flush();
347
+ conv.onMessage('bot:task-progress', {
348
+ conversationId: conv.id,
349
+ taskId,
350
+ summary: toolCallSummary(evt.name, evt.input),
351
+ lastTool: evt.name,
352
+ usage: { tool_uses: toolUses, duration_ms: Date.now() - task.startedAt },
353
+ });
354
+ break;
355
+ case 'text_end':
356
+ summaryText = evt.text;
357
+ break;
358
+ case 'error':
359
+ errorText = evt.error;
360
+ break;
361
+ case 'turn_complete':
362
+ usedFileTools = usedFileTools || evt.usedFileTools;
363
+ if (evt.usage) lastUsage = evt.usage;
364
+ break;
365
+ }
366
+ },
367
+ });
368
+
369
+ const queue = createAsyncQueue<PiMessage>();
370
+ queue.push({ role: 'user', content: [{ type: 'text', text: req.prompt }] });
371
+ queue.end();
372
+
373
+ log.info(`[pi/task] ──── SUB-AGENT STARTED ──── id=${taskId} type=${req.subagentType} "${req.description}"`);
374
+ // Task events bypass translateAndEmit, so flush the token batcher first —
375
+ // bot:task-created COMMITS the dashboard stream buffer (useBlobyChat),
376
+ // and a batch flushed after it would mis-slice committedTextLength.
377
+ conv.batcher.flush();
378
+ conv.onMessage('bot:task-created', {
379
+ conversationId: conv.id,
380
+ taskId,
381
+ description: req.description,
382
+ type: req.subagentType,
383
+ });
384
+
385
+ void (async () => {
386
+ try {
387
+ await session.run(queue);
388
+ } catch (err: any) {
389
+ errorText = errorText || err?.message || String(err);
390
+ } finally {
391
+ conv.tasks.delete(taskId);
392
+ const status = task.stopped ? 'stopped' : (errorText && !summaryText ? 'failed' : 'completed');
393
+ const summary = summaryText || errorText || '(the agent produced no output)';
394
+ const u = lastUsage;
395
+ const totalTokens = u
396
+ ? (u.inputTokens || 0) + (u.outputTokens || 0) + (u.cacheReadTokens || 0) + (u.cacheCreationTokens || 0)
397
+ : 0;
398
+ log.info(
399
+ `[pi/task] ──── SUB-AGENT ${status.toUpperCase()} ──── id=${taskId} ` +
400
+ `tools=${toolUses} ${Math.round((Date.now() - task.startedAt) / 1000)}s summary=${summary.slice(0, 160)}`,
401
+ );
402
+ conv.batcher.flush();
403
+ conv.onMessage('bot:task-done', {
404
+ conversationId: conv.id,
405
+ taskId,
406
+ status,
407
+ summary,
408
+ usage: { tool_uses: toolUses, duration_ms: Date.now() - task.startedAt, total_tokens: totalTokens },
409
+ });
410
+ if (usedFileTools) conv.taskUsedFileTools = true;
411
+
412
+ // Drive the user-facing continuation turn — unless the conversation
413
+ // itself is gone (ended/recycled), in which case the report dies with
414
+ // it (claude parity: the SDK subprocess dies too).
415
+ if (liveConversations.get(conv.id) === conv && !conv.abortController.signal.aborted) {
416
+ const note = task.stopped
417
+ ? `[System: the background task "${req.description}" was stopped by the user. Acknowledge that briefly in your own voice — never mention agents, tasks, or system messages.]`
418
+ : `[System: background task "${req.description}" ${status}.]\n\nResult summary:\n${summary}\n\nRelay the outcome to the user concisely in your own voice (never mention agents, tasks, ids, or system messages). If it failed, say what went wrong and offer a next step.`;
419
+ pushSyntheticMessage(conv, note);
420
+ }
421
+ }
422
+ })();
423
+
424
+ return { ok: true, taskId };
425
+ },
426
+ };
427
+ }
428
+
230
429
  /** Convert a saved RecentMessage[] into the provider-neutral PiMessage[]. */
231
430
  function recentToPiMessages(messages: RecentMessage[] | undefined): PiMessage[] {
232
431
  if (!messages?.length) return [];
@@ -299,6 +498,8 @@ export async function startConversation(
299
498
  busy: false,
300
499
  pendingCount: 0,
301
500
  batcher: createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text })),
501
+ tasks: new Map(),
502
+ taskUsedFileTools: false,
302
503
  loopDone: null,
303
504
  };
304
505
  liveConversations.set(conversationId, conv);
@@ -319,6 +520,7 @@ export async function startConversation(
319
520
  tools: toolDefsForProvider(),
320
521
  cwd: WORKSPACE_DIR,
321
522
  abortController,
523
+ taskHost: createTaskHost(conv, getAuth),
322
524
  onEvent: (evt: PiSessionEvent) => {
323
525
  translateAndEmit(conv, evt);
324
526
  },
@@ -365,9 +567,14 @@ function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
365
567
  case 'text_end':
366
568
  conv.onMessage('bot:response', { conversationId: conv.id, content: evt.text });
367
569
  break;
368
- case 'tool_use':
369
- conv.onMessage('bot:tool', { conversationId: conv.id, name: evt.name, input: evt.input });
570
+ case 'tool_use': {
571
+ // House vocabulary: claude's delegation tool is named Task; the pi
572
+ // prompt's 'Agent' alias resolves to the same tool — normalize the
573
+ // event so consumers see one name.
574
+ const toolName = evt.name === 'Agent' || evt.name === 'agent' ? 'Task' : evt.name;
575
+ conv.onMessage('bot:tool', { conversationId: conv.id, name: toolName, input: evt.input });
370
576
  break;
577
+ }
371
578
  case 'tool_result':
372
579
  // Not surfaced yet (Phase D: translate to a bot:tool progress pulse).
373
580
  break;
@@ -375,9 +582,16 @@ function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
375
582
  conv.busy = false;
376
583
  // One turn-complete per pushed message (D1-1 restored that invariant);
377
584
  // idle gates the supervisor's proactive recycling so it never fires with
378
- // a message still queued claude.ts pendingCount semantics exactly.
585
+ // a message still queued OR a background task still running — recycling
586
+ // mid-task would kill the task (claude has the same teardown semantics,
587
+ // but its idle flag doesn't guard tasks; this is strictly safer).
379
588
  conv.pendingCount = Math.max(0, conv.pendingCount - 1);
380
- const idle = conv.pendingCount === 0;
589
+ const idle = conv.pendingCount === 0 && conv.tasks.size === 0;
590
+ // A finished background task's file edits restart the backend on the
591
+ // very next turn boundary (the continuation turn) — claude captures
592
+ // sub-agent tool_use blocks into the parent's usedTools the same way.
593
+ const usedFileTools = evt.usedFileTools || conv.taskUsedFileTools;
594
+ conv.taskUsedFileTools = false;
381
595
  // Prompt occupancy of the last provider round — input + cache reads +
382
596
  // cache writes, exactly claude.ts's contextTokens math. Output tokens
383
597
  // are NOT added (claude doesn't either; the recycler's 70% threshold
@@ -387,12 +601,12 @@ function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
387
601
  : 0;
388
602
  conv.onMessage('bot:turn-complete', {
389
603
  conversationId: conv.id,
390
- usedFileTools: evt.usedFileTools,
604
+ usedFileTools,
391
605
  contextTokens,
392
606
  contextWindow: evt.contextWindow || 0,
393
607
  idle,
394
608
  });
395
- log.info(`[pi/conversation] ──── TURN COMPLETE ──── busy=false ctx=${contextTokens}/${evt.contextWindow || 'n/a'} idle=${idle}`);
609
+ log.info(`[pi/conversation] ──── TURN COMPLETE ──── busy=false ctx=${contextTokens}/${evt.contextWindow || 'n/a'} idle=${idle} tasks=${conv.tasks.size}`);
396
610
  break;
397
611
  }
398
612
  case 'error': {
@@ -445,6 +659,14 @@ export function endConversation(conversationId: string): void {
445
659
  if (!conv) return;
446
660
 
447
661
  log.info(`[pi/conversation] ──── ENDING CONVERSATION ${conversationId} ────`);
662
+ // Background tasks die with the conversation (claude parity — the SDK
663
+ // subprocess takes its tasks down too). Their finallys still emit
664
+ // bot:task-done {status:'stopped'} so dashboard task cards don't spin
665
+ // forever; the completion injection is skipped (conv gone).
666
+ for (const task of conv.tasks.values()) {
667
+ task.stopped = true;
668
+ task.abortController.abort();
669
+ }
448
670
  conv.batcher.discard();
449
671
  conv.inputQueue.end();
450
672
  conv.abortController.abort();
@@ -455,16 +677,29 @@ export function isConversationBusy(conversationId: string): boolean {
455
677
  return liveConversations.get(conversationId)?.busy || false;
456
678
  }
457
679
 
458
- /** True if ANY live conversation in this harness is mid-turn. Used by the supervisor to defer
459
- * backend restarts during channel/Alexa turns (which don't set the dashboard's agentQueryActive). */
680
+ /** True if ANY live conversation in this harness is mid-turn OR has a background
681
+ * sub-agent running. Used by the supervisor to defer backend restarts and
682
+ * self-updates — a restart mid-task would kill the task's work in flight. */
460
683
  export function anyConversationBusy(): boolean {
461
- for (const c of liveConversations.values()) if (c.busy) return true;
684
+ for (const c of liveConversations.values()) {
685
+ if (c.busy || c.tasks.size > 0) return true;
686
+ }
462
687
  return false;
463
688
  }
464
689
 
465
- /** Pi has no sub-agents yet; provided for interface compatibility. */
466
- export async function stopSubAgentTask(_conversationId: string, _taskId: string): Promise<void> {
467
- // no-op for Phase 1
690
+ /** Stop a specific background sub-agent task (dashboard user:stop-task). The
691
+ * child's teardown emits bot:task-done {status:'stopped'} and injects a brief
692
+ * acknowledgement turn into the parent. */
693
+ export async function stopSubAgentTask(conversationId: string, taskId: string): Promise<void> {
694
+ const conv = liveConversations.get(conversationId);
695
+ const task = conv?.tasks.get(taskId);
696
+ if (!task) {
697
+ log.warn(`[pi/task] Cannot stop task ${taskId} — not running in conversation ${conversationId}`);
698
+ return;
699
+ }
700
+ log.info(`[pi/task] Stopping sub-agent task ${taskId}`);
701
+ task.stopped = true;
702
+ task.abortController.abort();
468
703
  }
469
704
 
470
705
  /** Pi has no pre-warm step (no subprocess), but the interface requires this. */
@@ -38,7 +38,7 @@ import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock, PiUsage, PiEr
38
38
  import { sleep } from './providers/retry.js';
39
39
  import type { AsyncQueue } from './async-queue.js';
40
40
  import { findTool } from './tools/registry.js';
41
- import type { PiTool } from './tools/types.js';
41
+ import type { PiTool, PiTaskHost } from './tools/types.js';
42
42
 
43
43
  export type PiSessionEvent =
44
44
  | { type: 'turn_started' }
@@ -78,6 +78,17 @@ export interface PiSessionInit {
78
78
  tools?: PiToolDef[];
79
79
  /** Resolved every time a tool fires (registry → run). */
80
80
  cwd: string;
81
+ /**
82
+ * Background sub-agent host (Phase B). Set only on PARENT live sessions —
83
+ * threaded into PiToolContext so the Task tool can spawn; child sessions
84
+ * leave it unset (no grandchildren, Claude SDK parity).
85
+ */
86
+ taskHost?: PiTaskHost;
87
+ /**
88
+ * Per-turn tool-round budget. Parents keep the default; sub-agent children
89
+ * get their agent config's maxTurns (e.g. coder: 50).
90
+ */
91
+ maxToolRounds?: number;
81
92
  /** Used to interrupt in-flight provider calls when the session ends. */
82
93
  abortController: AbortController;
83
94
  /** Caller's event sink — translated to bloby's `bot:*` events one layer up. */
@@ -199,7 +210,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
199
210
  };
200
211
  }
201
212
  try {
202
- return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
213
+ return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal, tasks: init.taskHost });
203
214
  } catch (err: any) {
204
215
  return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
205
216
  }
@@ -218,7 +229,8 @@ export function createPiSession(init: PiSessionInit): PiSession {
218
229
  let turnErrorMsg: string | undefined;
219
230
  let turnErrorKind: PiErrorKind | undefined;
220
231
 
221
- for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
232
+ const maxRounds = Math.max(1, init.maxToolRounds ?? MAX_TOOL_ROUNDS);
233
+ for (let round = 0; round < maxRounds; round++) {
222
234
  if (init.abortController.signal.aborted) break;
223
235
  // The separator condition is decided BEFORE the round so the round can
224
236
  // emit it ahead of its first token (claude.ts ordering — see runOneRound).
@@ -1,8 +1,10 @@
1
1
  /**
2
2
  * Tool registry — the bag of tools the pi session passes to the model.
3
3
  *
4
- * Phase 2 ships the four core coding tools. Phase 3 or later will add Grep,
5
- * Glob, LS, NotebookEdit, etc. so the surface fully matches Claude SDK's.
4
+ * Read/Write/Edit/Bash mirror the Claude SDK tools; Task is the background
5
+ * sub-agent delegator (Phase B of the parity plan). Grep, Glob, LS,
6
+ * NotebookEdit etc. are still pending (Phase D) to fully match Claude SDK's
7
+ * surface.
6
8
  */
7
9
  import type { PiTool } from './types.js';
8
10
  import type { PiToolDef } from '../providers/types.js';
@@ -10,8 +12,9 @@ import { readTool } from './read.js';
10
12
  import { writeTool } from './write.js';
11
13
  import { editTool } from './edit.js';
12
14
  import { bashTool } from './bash.js';
15
+ import { taskTool, taskToolDef } from './task.js';
13
16
 
14
- export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
17
+ export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool, taskTool];
15
18
 
16
19
  const TOOL_BY_NAME = new Map<string, PiTool>();
17
20
  for (const t of PI_TOOLS) {
@@ -20,15 +23,28 @@ for (const t of PI_TOOLS) {
20
23
  // common aliases so we don't 404 a legitimate call over a casing nit.
21
24
  TOOL_BY_NAME.set(t.name.toLowerCase(), t);
22
25
  }
26
+ // The pi system prompt calls background delegation "the Agent tool" (claude
27
+ // heritage) — alias it so a model following the prompt verbatim still lands
28
+ // on the Task implementation.
29
+ TOOL_BY_NAME.set('Agent', taskTool);
30
+ TOOL_BY_NAME.set('agent', taskTool);
23
31
 
24
32
  export function findTool(name: string): PiTool | undefined {
25
33
  return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
26
34
  }
27
35
 
28
- export function toolDefsForProvider(): PiToolDef[] {
29
- return PI_TOOLS.map((t) => ({
30
- name: t.name,
31
- description: t.description,
32
- inputSchema: t.inputSchema,
33
- }));
36
+ export function toolDefsForProvider(opts?: { forSubagent?: boolean }): PiToolDef[] {
37
+ const defs: PiToolDef[] = [];
38
+ for (const t of PI_TOOLS) {
39
+ if (t.name === 'Task') {
40
+ // Children cannot spawn grandchildren (Claude SDK parity) — a child that
41
+ // hallucinates a Task call still fails gracefully (ctx.tasks is unset).
42
+ if (opts?.forSubagent) continue;
43
+ // Rebuilt fresh so agent-roster/prompt edits apply per session start.
44
+ defs.push(taskToolDef());
45
+ continue;
46
+ }
47
+ defs.push({ name: t.name, description: t.description, inputSchema: t.inputSchema });
48
+ }
49
+ return defs;
34
50
  }
@@ -0,0 +1,108 @@
1
+ /**
2
+ * Task tool — background sub-agent delegation (audit D4-1, Phase B).
3
+ *
4
+ * Mirrors the Claude Agent SDK's Task tool contract and feel:
5
+ * - the model calls Task({description, prompt, subagent_type})
6
+ * - the tool returns IMMEDIATELY with an acknowledgement, so the parent turn
7
+ * ends in seconds and the chat stays fully conversational
8
+ * - the child runs as an in-process `createPiSession` (Bloby owns the agent
9
+ * loop, so no subprocess is needed — unlike upstream pi's subagent
10
+ * extension, which must spawn the pi CLI)
11
+ * - completion is injected back into the parent's input queue as a synthetic
12
+ * message that drives the user-facing "Done!" continuation turn
13
+ *
14
+ * Also registered under the alias 'Agent' (registry.ts): the pi system prompt
15
+ * sells "the Agent tool" — with this tool live, those sections are true as
16
+ * written, closing the audit's D4-2 finding without a prompt edit.
17
+ *
18
+ * Agent definitions come from `supervisor/agents/index.ts:buildAgents()` —
19
+ * the same roster the Claude harness uses, so both harnesses stay in sync.
20
+ */
21
+ import type { PiTool } from './types.js';
22
+ import type { PiToolDef } from '../providers/types.js';
23
+ import { buildAgents } from '../../../agents/index.js';
24
+
25
+ /**
26
+ * Tool definition with a FRESH subagent enum — built per session start (not at
27
+ * module load) so prompt-file/roster edits apply without a supervisor restart,
28
+ * and so the workspace is guaranteed to exist when prompts are read.
29
+ */
30
+ export function taskToolDef(): PiToolDef {
31
+ const agents = buildAgents();
32
+ const names = Object.keys(agents);
33
+ return {
34
+ name: 'Task',
35
+ description:
36
+ 'Delegate heavy work to a background sub-agent and keep chatting while it runs. ' +
37
+ 'Returns immediately — you will automatically receive the result when the agent finishes, ' +
38
+ 'so acknowledge the user briefly and end your turn. ' +
39
+ 'Available agents: ' +
40
+ (names.map((n) => `${n} (${agents[n].description})`).join('; ') || 'none configured'),
41
+ inputSchema: {
42
+ type: 'object',
43
+ properties: {
44
+ description: {
45
+ type: 'string',
46
+ description: 'Short (3-5 word) description of the task, shown to the user as a progress card.',
47
+ },
48
+ prompt: {
49
+ type: 'string',
50
+ description: 'Complete, self-contained instructions for the sub-agent. It cannot ask follow-up questions — include every detail it needs.',
51
+ },
52
+ subagent_type: {
53
+ type: 'string',
54
+ ...(names.length > 0 ? { enum: names } : {}),
55
+ description: 'Which sub-agent to delegate to.',
56
+ },
57
+ },
58
+ required: ['description', 'prompt', 'subagent_type'],
59
+ },
60
+ };
61
+ }
62
+
63
+ export const taskTool: PiTool = {
64
+ name: 'Task',
65
+ description: 'Delegate heavy work to a background sub-agent.',
66
+ // Static placeholder (no file I/O at module load) — providers receive the
67
+ // dynamic enum schema from taskToolDef() via registry.toolDefsForProvider.
68
+ inputSchema: {
69
+ type: 'object',
70
+ properties: {
71
+ description: { type: 'string' },
72
+ prompt: { type: 'string' },
73
+ subagent_type: { type: 'string' },
74
+ },
75
+ required: ['description', 'prompt', 'subagent_type'],
76
+ },
77
+
78
+ async run(input, ctx) {
79
+ if (!ctx.tasks) {
80
+ return {
81
+ output:
82
+ 'The Task tool is not available in this context — do the work yourself with your other tools.',
83
+ isError: true,
84
+ };
85
+ }
86
+ const description = typeof input?.description === 'string' ? input.description.trim() : '';
87
+ const prompt = typeof input?.prompt === 'string' ? input.prompt.trim() : '';
88
+ const subagentType = typeof input?.subagent_type === 'string' ? input.subagent_type.trim() : '';
89
+ if (!prompt) {
90
+ return { output: 'Task requires `prompt` — complete instructions for the sub-agent.', isError: true };
91
+ }
92
+
93
+ const res = ctx.tasks.spawn({
94
+ description: description || prompt.slice(0, 60),
95
+ prompt,
96
+ subagentType: subagentType || 'coder',
97
+ });
98
+ if (!res.ok) return { output: res.error, isError: true };
99
+
100
+ return {
101
+ output:
102
+ `Background task started (id: ${res.taskId}). It is now running while you keep chatting. ` +
103
+ `Tell the user in ONE short sentence that you're on it (your usual voice — never mention ` +
104
+ `agents, tasks, or ids), then end your turn. You will automatically receive the result ` +
105
+ `when it finishes.`,
106
+ };
107
+ },
108
+ };
@@ -14,11 +14,26 @@ export interface PiToolResult {
14
14
  isError?: boolean;
15
15
  }
16
16
 
17
+ /**
18
+ * Host interface a live conversation exposes to the Task tool so it can spawn
19
+ * background sub-agents (audit D4-1). Only PARENT sessions provide it — child
20
+ * sessions get `ctx.tasks` undefined, so a sub-agent cannot spawn
21
+ * grandchildren (Claude SDK parity).
22
+ */
23
+ export interface PiTaskHost {
24
+ /** Spawn a background sub-agent. Returns synchronously; the child runs detached. */
25
+ spawn(req: { description: string; prompt: string; subagentType: string }):
26
+ | { ok: true; taskId: string }
27
+ | { ok: false; error: string };
28
+ }
29
+
17
30
  export interface PiToolContext {
18
31
  /** Workspace root — every tool resolves paths against this. */
19
32
  cwd: string;
20
33
  /** Aborted when the session ends so long-running tools stop fast. */
21
34
  signal?: AbortSignal;
35
+ /** Present only in live parent sessions — lets the Task tool spawn sub-agents. */
36
+ tasks?: PiTaskHost;
22
37
  }
23
38
 
24
39
  export interface PiTool {
@@ -170,7 +170,7 @@ export const SHELL_HTML = `<!DOCTYPE html>
170
170
  // preload=auto so the clip is fetched (and SW-cached) while the supervisor is
171
171
  // still up — by the time we show this, the origin is unreachable.
172
172
  '<video autoplay loop muted playsinline preload="auto" style="position:relative;width:100%;height:100%;object-fit:contain;border-radius:50%">' +
173
- '<source src="/morphy_sad.mov" type=\'video/mp4; codecs="hvc1"\'><source src="/morphy_sad.webm" type="video/webm">' +
173
+ '<source src="/morphy_sad.webm" type="video/webm"><source src="/morphy_sad.mov" type="video/mp4">' +
174
174
  '</video>' +
175
175
  '</div>' +
176
176
  '<h1 style="font-size:1.5rem;font-weight:700;margin:0 0 .6rem;background:linear-gradient(135deg,#0166FF,#009AFE,#4AEEFF);-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text">Workspace is restarting&hellip;</h1>' +
@@ -562,7 +562,7 @@
562
562
  '<div style="position:relative;width:160px;height:160px;margin:0 auto 1.2rem">' +
563
563
  '<div style="position:absolute;inset:-18px;background:radial-gradient(circle,rgba(1,102,255,.18) 0%,transparent 60%);filter:blur(18px)"></div>' +
564
564
  '<video autoplay loop muted playsinline style="position:relative;width:100%;height:100%;object-fit:contain;border-radius:50%">' +
565
- '<source src="/morphy_sad.mov" type=\'video/mp4; codecs="hvc1"\'><source src="/morphy_sad.webm" type="video/webm">' +
565
+ '<source src="/morphy_sad.webm" type="video/webm"><source src="/morphy_sad.mov" type="video/mp4">' +
566
566
  '</video>' +
567
567
  '</div>' +
568
568
  '<h1 style="font-size:1.5rem;font-weight:700;margin:0 0 .6rem;background:linear-gradient(135deg,#0166FF,#009AFE,#4AEEFF);-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text">Workspace error</h1>' +