npm - bloby-bot - Versions diffs - 0.47.4 → 0.47.6 - Mend

bloby-bot 0.47.4 → 0.47.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +1 -1
package/supervisor/harnesses/pi/index.ts +3 -0
package/supervisor/harnesses/pi/providers/stream-google.ts +124 -27
package/supervisor/harnesses/pi/session.ts +108 -32
package/supervisor/harnesses/pi/tools/bash.ts +109 -0
package/supervisor/harnesses/pi/tools/edit.ts +66 -0
package/supervisor/harnesses/pi/tools/path-safety.ts +29 -0
package/supervisor/harnesses/pi/tools/read.ts +71 -0
package/supervisor/harnesses/pi/tools/registry.ts +34 -0
package/supervisor/harnesses/pi/tools/types.ts +29 -0
package/supervisor/harnesses/pi/tools/write.ts +42 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bloby-bot",
-  "version": "0.47.4",
+  "version": "0.47.6",
   "releaseNotes": [
     "1. # voice note (PTT bubble)",
     "2. # audio file + caption",

package/supervisor/harnesses/pi/index.ts CHANGED Viewed

@@ -30,6 +30,7 @@ import { getPiSubProvider } from './sub-providers.js';
 import { readPiAuth } from './auth-storage.js';
 import { streamProvider } from './providers/stream.js';
 import type { PiMessage } from './providers/types.js';
+import { toolDefsForProvider } from './tools/registry.js';
 // ── Live conversation state ────────────────────────────────────────────────
@@ -211,6 +212,8 @@ export async function startConversation(
     baseUrl: auth.baseUrl,
     apiKey: auth.apiKey,
     systemPrompt,
+    tools: toolDefsForProvider(),
+    cwd: WORKSPACE_DIR,
     abortController,
     onEvent: (evt: PiSessionEvent) => {
       translateAndEmit(conv, evt);

package/supervisor/harnesses/pi/providers/stream-google.ts CHANGED Viewed

@@ -8,6 +8,7 @@
  * Endpoint: POST {baseUrl}/models/{modelId}:streamGenerateContent?alt=sse&key={apiKey}
  * Stream:   SSE — each `data: {...}` is one candidate update.
  */
+import crypto from 'crypto';
 import { log } from '../../../../shared/logger.js';
 import type {
   PiStreamRequest,
@@ -18,51 +19,121 @@ import type {
 } from './types.js';
 /** Walk an SSE byte stream and yield each parsed JSON event. */
-async function* parseSse(res: Response): AsyncIterable<any> {
+async function* parseSse(res: Response, dbg: { firstBytes: string }): AsyncIterable<any> {
   if (!res.body) return;
   const reader = res.body.getReader();
   const decoder = new TextDecoder();
   let buffer = '';
+  let totalBytes = 0;
   try {
     while (true) {
       const { value, done } = await reader.read();
       if (done) break;
+      if (value) totalBytes += value.byteLength;
       buffer += decoder.decode(value, { stream: true });
-      // SSE event boundary is a blank line. Process every complete event in buffer.
+      if (!dbg.firstBytes && buffer.length > 0) {
+        dbg.firstBytes = buffer.slice(0, 800);
+      }
+      // SSE event boundary is a blank line. Accept both LF and CRLF separators.
       let idx;
-      while ((idx = buffer.indexOf('\n\n')) !== -1) {
+      while (
+        (idx = (() => {
+          const a = buffer.indexOf('\n\n');
+          const b = buffer.indexOf('\r\n\r\n');
+          if (a < 0) return b;
+          if (b < 0) return a;
+          return Math.min(a, b);
+        })()) !== -1
+      ) {
+        const isCrlf = buffer.slice(idx, idx + 4) === '\r\n\r\n';
         const raw = buffer.slice(0, idx);
-        buffer = buffer.slice(idx + 2);
-        const dataLines = raw.split('\n').filter((l) => l.startsWith('data:'));
-        if (!dataLines.length) continue;
-        const data = dataLines.map((l) => l.slice(5).trimStart()).join('\n');
-        if (!data || data === '[DONE]') continue;
-        try {
-          yield JSON.parse(data);
-        } catch {
-          // Skip malformed chunks rather than killing the whole turn.
-        }
+        buffer = buffer.slice(idx + (isCrlf ? 4 : 2));
+        const parsed = parseSseEvent(raw);
+        if (parsed !== undefined) yield parsed;
       }
     }
+    // Flush whatever remains — Gemini's final event may not have a trailing blank line.
+    buffer += decoder.decode();
+    if (buffer.trim()) {
+      const parsed = parseSseEvent(buffer);
+      if (parsed !== undefined) yield parsed;
+    }
   } finally {
     try { reader.releaseLock(); } catch {}
+    dbg.firstBytes = dbg.firstBytes || `(zero bytes — total=${totalBytes})`;
+  }
+}
+function parseSseEvent(raw: string): any | undefined {
+  // Standard SSE: one or more `data:` lines per event. Concatenate their payloads.
+  const lines = raw.split(/\r?\n/);
+  const dataLines = lines
+    .filter((l) => l.startsWith('data:'))
+    .map((l) => l.slice(5).trimStart());
+  if (!dataLines.length) {
+    // Fallback: some servers omit the `data:` prefix and send pure JSON per event.
+    const trimmed = raw.trim();
+    if (!trimmed || trimmed === '[DONE]') return undefined;
+    // Strip a leading JSON-array delimiter if Gemini is returning array-stream
+    // instead of SSE (alt=sse not honored).
+    const candidate = trimmed.replace(/^[\[,]/, '').replace(/[\],]$/, '').trim();
+    if (!candidate) return undefined;
+    try { return JSON.parse(candidate); } catch { return undefined; }
   }
+  const data = dataLines.join('\n');
+  if (!data || data === '[DONE]') return undefined;
+  try { return JSON.parse(data); } catch { return undefined; }
 }
 function toGeminiRole(role: PiMessage['role']): 'user' | 'model' {
-  return role === 'assistant' ? 'model' : 'user';
+  // Tool results piggyback on the user role with a `functionResponse` part —
+  // see Gemini function-calling docs.
+  if (role === 'assistant') return 'model';
+  return 'user';
 }
 function toGeminiParts(content: PiContentBlock[]): any[] {
   const parts: any[] = [];
   for (const b of content) {
-    if (b.type === 'text') parts.push({ text: b.text });
-    else if (b.type === 'image') parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
-    // tool_use / tool_result are Phase 2.
+    if (b.type === 'text') {
+      parts.push({ text: b.text });
+    } else if (b.type === 'image') {
+      parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
+    } else if (b.type === 'tool_use') {
+      // Assistant turn: the model asked to invoke a tool.
+      parts.push({ functionCall: { name: b.name, args: b.input || {} } });
+    } else if (b.type === 'tool_result') {
+      // Function responses can be strings, objects, or even error markers.
+      // Wrap text in `{ output: ... }` (Gemini's docs use a flexible
+      // `response` JSON map), with `isError` keying so the model can react.
+      const response = b.isError ? { error: b.content } : { output: b.content };
+      parts.push({ functionResponse: { name: extractToolName(b.toolUseId), response } });
+    }
   }
   return parts;
 }
+/**
+ * Gemini doesn't carry a tool-call id forward to the response; we encode the
+ * tool name into the id we generate at tool-use time (`{name}::{uuid}`) so
+ * we can recover it here. Falls back to the raw id if the prefix is missing.
+ */
+function extractToolName(toolUseId: string): string {
+  const idx = toolUseId.indexOf('::');
+  return idx > 0 ? toolUseId.slice(0, idx) : toolUseId;
+}
+function toGeminiTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
+  return [{
+    functionDeclarations: tools.map((t) => ({
+      name: t.name,
+      description: t.description,
+      // Gemini accepts plain JSON Schema for `parameters`.
+      parameters: t.inputSchema,
+    })),
+  }];
+}
 function mapStopReason(reason?: string): PiStopReason {
   switch (reason) {
     case 'STOP':
@@ -77,6 +148,7 @@ function mapStopReason(reason?: string): PiStopReason {
     case 'PROHIBITED_CONTENT':
     case 'SPII':
     case 'OTHER':
+    case 'MALFORMED_FUNCTION_CALL':
       return 'error';
     default:
       return 'end_turn';
@@ -95,6 +167,8 @@ function finishReasonMessage(reason?: string): string {
     case 'PROHIBITED_CONTENT':
     case 'SPII':
       return `Response blocked by Gemini policy (${reason}).`;
+    case 'MALFORMED_FUNCTION_CALL':
+      return 'Gemini emitted a malformed function call. Often means the model tried to invoke a tool that wasn\'t declared, or with arguments that failed schema validation.';
     case 'OTHER':
     default:
       return `Gemini stopped without producing output (finishReason=${reason || 'unknown'}).`;
@@ -125,6 +199,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
   if (req.systemPrompt?.trim()) {
     body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
   }
+  if (req.tools && req.tools.length > 0) {
+    body.tools = toGeminiTools(req.tools);
+  }
   let res: Response;
   try {
@@ -147,6 +224,7 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
   }
   let accumulated = '';
+  let toolCallCount = 0;
   let lastFinish: string | undefined;
   let promptBlockReason: string | undefined;
   let usage: { inputTokens?: number; outputTokens?: number } | undefined;
@@ -155,9 +233,10 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
   let thoughtPartCount = 0;
   let emptyTextPartCount = 0;
   let firstChunkSummary = '';
+  const dbg = { firstBytes: '' };
   try {
-    for await (const chunk of parseSse(res)) {
+    for await (const chunk of parseSse(res, dbg)) {
       chunkCount++;
       if (chunkCount === 1) {
         try { firstChunkSummary = JSON.stringify(chunk).slice(0, 600); } catch {}
@@ -172,6 +251,20 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
         // Thinking models emit reasoning parts with `thought: true`. They
         // shouldn't be shown to the user as part of the visible answer.
         if (part?.thought) { thoughtPartCount++; continue; }
+        if (part?.functionCall && typeof part.functionCall.name === 'string') {
+          // Gemini doesn't surface a tool-call id of its own; bake the tool
+          // name into the id so the session can echo it back as a
+          // `functionResponse` referencing the same name.
+          const id = `${part.functionCall.name}::${crypto.randomUUID()}`;
+          toolCallCount++;
+          yield {
+            type: 'tool_use',
+            id,
+            name: part.functionCall.name,
+            input: part.functionCall.args || {},
+          };
+          continue;
+        }
         if (typeof part?.text === 'string' && part.text.length > 0) {
           accumulated += part.text;
           yield { type: 'text_delta', delta: part.text };
@@ -198,15 +291,16 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
   }
   log.info(
-    `[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
+    `[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} toolCalls=${toolCallCount} ` +
     `thoughtParts=${thoughtPartCount} emptyTextParts=${emptyTextPartCount} ` +
     `finishReason=${lastFinish || 'none'} ` +
     `promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
   );
-  if (chunkCount > 0 && !accumulated) {
+  if (chunkCount > 0 && !accumulated && toolCallCount === 0) {
     log.info(`[pi/google] first chunk (truncated): ${firstChunkSummary}`);
   } else if (chunkCount === 0) {
-    log.warn(`[pi/google] SSE stream parsed zero chunks — check response shape (status=${res.status} content-type=${res.headers.get('content-type') || ''})`);
+    log.warn(`[pi/google] SSE stream parsed zero chunks — content-type=${res.headers.get('content-type') || '?'}`);
+    log.warn(`[pi/google] first raw bytes: ${JSON.stringify(dbg.firstBytes)}`);
   }
   // Prompt-level block: nothing came back at all.
@@ -216,10 +310,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
     return;
   }
-  // We finished cleanly but the model produced no visible text. That's almost
-  // always a finish-reason problem (MAX_TOKENS, SAFETY, ...) we'd otherwise
-  // silently swallow. Surface it.
-  if (!accumulated) {
+  // Tool-only round (Gemini fires functionCall parts with no text) is valid output —
+  // the session will execute the tool, push the result, and re-stream.
+  if (!accumulated && toolCallCount === 0) {
     const reason = lastFinish && lastFinish !== 'STOP' && lastFinish !== 'FINISH_REASON_STOP'
       ? lastFinish
       : undefined;
@@ -231,6 +324,10 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
     return;
   }
-  yield { type: 'text_end', text: accumulated };
-  yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
+  if (accumulated) yield { type: 'text_end', text: accumulated };
+  yield {
+    type: 'done',
+    stopReason: toolCallCount > 0 ? 'tool_use' : mapStopReason(lastFinish),
+    usage,
+  };
 }

package/supervisor/harnesses/pi/session.ts CHANGED Viewed

@@ -8,23 +8,28 @@
  *   - each turn streams provider events back through a single `onEvent`
  *     callback the caller hooked up
  *
- * Phase 1 scope: text-only, no tools. Each user turn = one provider call.
- * Phase 2 will plug tools into the inner loop (model emits `tool_use` →
- * execute → append `tool_result` → re-stream → repeat until `end_turn`).
+ * Phase 2: each user turn is an inner loop — provider call → if the model
+ * asked for tool calls, execute them and feed results back → call provider
+ * again — until the model finishes without requesting more tools. Tokens
+ * stream live; `text_end` only fires once at the very end of the turn so the
+ * UI doesn't display half-answers between tool rounds.
  *
- * Phase 1 explicitly does NOT spawn sub-agents — Bruno will add those later.
+ * Sub-agents are NOT spawned here — Bruno will add those later.
  */
 import { log } from '../../../shared/logger.js';
 import type { PiApiFlavor } from './sub-providers.js';
 import { streamProvider } from './providers/stream.js';
-import type { PiMessage, PiStreamEvent, PiToolDef } from './providers/types.js';
+import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
 import type { AsyncQueue } from './async-queue.js';
+import { findTool } from './tools/registry.js';
+import type { PiTool } from './tools/types.js';
 export type PiSessionEvent =
   | { type: 'turn_started' }
   | { type: 'text_delta'; delta: string }
   | { type: 'text_end'; text: string }
-  | { type: 'tool_use'; id: string; name: string; input: any }   // Phase 2
+  | { type: 'tool_use'; id: string; name: string; input: any }
+  | { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
   | { type: 'turn_complete'; usedFileTools: boolean }
   | { type: 'error'; error: string };
@@ -36,8 +41,10 @@ export interface PiSessionInit {
   systemPrompt: string;
   /** Pre-loaded history before the first new user turn. */
   initialMessages?: PiMessage[];
-  /** Phase 2 wires this through. Empty for Phase 1. */
+  /** Tools the model can call this session. Empty array ⇒ chat-only. */
   tools?: PiToolDef[];
+  /** Resolved every time a tool fires (registry → run). */
+  cwd: string;
   maxOutputTokens?: number;
   /** Used to interrupt in-flight provider calls when the session ends. */
   abortController: AbortController;
@@ -53,19 +60,20 @@ export interface PiSession {
 }
 const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'write', 'edit']);
+const MAX_TOOL_ROUNDS = 25;
 export function createPiSession(init: PiSessionInit): PiSession {
   const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
-  async function runOneTurn(userMsg: PiMessage): Promise<void> {
-    if (init.abortController.signal.aborted) return;
-    messages.push(userMsg);
-    init.onEvent({ type: 'turn_started' });
-    let accumulated = '';
-    const usedTools = new Set<string>();
-    let errored = false;
+  /** One stream round — collect the assistant blocks the model emits this pass. */
+  interface RoundResult {
+    text: string;
+    toolUses: { id: string; name: string; input: any }[];
+    errored: boolean;
+  }
+  async function runOneRound(): Promise<RoundResult> {
+    const result: RoundResult = { text: '', toolUses: [], errored: false };
     try {
       const stream = streamProvider(init.flavor, {
         modelId: init.modelId,
@@ -79,43 +87,111 @@ export function createPiSession(init: PiSessionInit): PiSession {
       });
       for await (const evt of stream as AsyncIterable<PiStreamEvent>) {
-        if (init.abortController.signal.aborted) return;
+        if (init.abortController.signal.aborted) break;
         switch (evt.type) {
           case 'text_delta':
-            accumulated += evt.delta;
+            result.text += evt.delta;
             init.onEvent({ type: 'text_delta', delta: evt.delta });
             break;
           case 'text_end':
-            // Provider gives us the final accumulated text; trust the deltas
-            // we already forwarded and reconcile state from here.
-            accumulated = evt.text;
-            init.onEvent({ type: 'text_end', text: evt.text });
+            // Sync up with the provider's authoritative concatenation in case
+            // we missed a delta. Don't forward — we only emit text_end once
+            // at the end of the whole turn so the UI doesn't show half-answers.
+            result.text = evt.text;
             break;
           case 'tool_use':
-            // Phase 2: execute the tool, append a tool_result message, re-stream.
-            usedTools.add(evt.name);
+            result.toolUses.push({ id: evt.id, name: evt.name, input: evt.input });
             init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
             break;
           case 'error':
-            errored = true;
+            result.errored = true;
             init.onEvent({ type: 'error', error: evt.error });
             break;
           case 'done':
-            // Loop back if the model is waiting on a tool result (Phase 2);
-            // for now `tool_use` is impossible since we don't pass tools.
+            // Loop control is by tool_use presence, not stop reason.
             break;
         }
       }
     } catch (err: any) {
-      if (init.abortController.signal.aborted) return;
-      errored = true;
-      init.onEvent({ type: 'error', error: err?.message || String(err) });
+      if (!init.abortController.signal.aborted) {
+        result.errored = true;
+        init.onEvent({ type: 'error', error: err?.message || String(err) });
+      }
     }
+    return result;
+  }
+  async function executeTool(call: { id: string; name: string; input: any }): Promise<{ output: string; isError?: boolean }> {
+    const tool: PiTool | undefined = findTool(call.name);
+    if (!tool) {
+      return {
+        output: `Tool not found: ${call.name}. Available tools: ${(init.tools || []).map((t) => t.name).join(', ') || 'none'}.`,
+        isError: true,
+      };
+    }
+    try {
+      return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
+    } catch (err: any) {
+      return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
+    }
+  }
+  async function runOneTurn(userMsg: PiMessage): Promise<void> {
+    if (init.abortController.signal.aborted) return;
+    messages.push(userMsg);
+    init.onEvent({ type: 'turn_started' });
-    if (accumulated) {
-      messages.push({ role: 'assistant', content: [{ type: 'text', text: accumulated }] });
+    let accumulatedText = '';
+    const usedTools = new Set<string>();
+    let turnErrored = false;
+    for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
+      if (init.abortController.signal.aborted) break;
+      const { text, toolUses, errored } = await runOneRound();
+      // Append whatever the model produced this round to history so subsequent
+      // rounds (and the next user turn) see it.
+      const assistantContent: PiContentBlock[] = [];
+      if (text) {
+        accumulatedText += (accumulatedText && !accumulatedText.endsWith('\n') ? '\n\n' : '') + text;
+        assistantContent.push({ type: 'text', text });
+      }
+      for (const tu of toolUses) {
+        assistantContent.push({ type: 'tool_use', id: tu.id, name: tu.name, input: tu.input });
+      }
+      if (assistantContent.length > 0) {
+        messages.push({ role: 'assistant', content: assistantContent });
+      }
+      if (errored) { turnErrored = true; break; }
+      if (toolUses.length === 0) break;        // model finished — exit loop
+      // Run every tool the model asked for this round, then feed the results
+      // back as a single user message Gemini accepts as a batch.
+      const toolResultBlocks: PiContentBlock[] = [];
+      for (const tu of toolUses) {
+        usedTools.add(tu.name);
+        if (init.abortController.signal.aborted) break;
+        log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
+        const res = await executeTool(tu);
+        init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res.isError });
+        toolResultBlocks.push({
+          type: 'tool_result',
+          toolUseId: tu.id,
+          content: res.output,
+          isError: res.isError,
+        });
+      }
+      if (toolResultBlocks.length > 0) {
+        messages.push({ role: 'user', content: toolResultBlocks });
+      }
+      // Loop continues — re-stream with the new tool results in context.
     }
-    if (!errored) {
+    if (!turnErrored) {
+      if (accumulatedText) {
+        init.onEvent({ type: 'text_end', text: accumulatedText });
+      }
       const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
       init.onEvent({ type: 'turn_complete', usedFileTools });
     }

package/supervisor/harnesses/pi/tools/bash.ts ADDED Viewed

@@ -0,0 +1,109 @@
+/**
+ * Bash tool — runs a shell command in the workspace.
+ *
+ * Stays small on purpose: combined stdout+stderr, hard timeout, kills the
+ * process on session abort. No interactive subshells, no background jobs.
+ */
+import { spawn } from 'child_process';
+import type { PiTool } from './types.js';
+const DEFAULT_TIMEOUT_MS = 60_000;
+const HARD_TIMEOUT_MS = 5 * 60_000;
+const OUTPUT_CAP_BYTES = 200 * 1024;  // 200 KB; matches Claude SDK's behavior
+export const bashTool: PiTool = {
+  name: 'Bash',
+  description:
+    'Run a shell command in the workspace and return its combined stdout+stderr. Use this for non-interactive commands only — no editors, no long-running servers.',
+  inputSchema: {
+    type: 'object',
+    properties: {
+      command: { type: 'string', description: 'The shell command to execute.' },
+      description: { type: 'string', description: 'A short description (5–10 words) of what the command does.' },
+      timeout: { type: 'integer', description: 'Timeout in milliseconds (default 60 000, max 300 000).' },
+    },
+    required: ['command'],
+  },
+  async run(input, ctx) {
+    const command = typeof input?.command === 'string' ? input.command : '';
+    if (!command.trim()) return { output: 'command is required.', isError: true };
+    const requestedTimeout = Number(input?.timeout) || DEFAULT_TIMEOUT_MS;
+    const timeout = Math.min(HARD_TIMEOUT_MS, Math.max(1000, requestedTimeout));
+    return await new Promise((resolve) => {
+      let out = '';
+      let truncated = false;
+      let timedOut = false;
+      let settled = false;
+      const child = spawn('bash', ['-lc', command], {
+        cwd: ctx.cwd,
+        env: process.env,
+        stdio: ['ignore', 'pipe', 'pipe'],
+      });
+      const append = (chunk: Buffer) => {
+        if (truncated) return;
+        const remaining = OUTPUT_CAP_BYTES - Buffer.byteLength(out, 'utf-8');
+        if (remaining <= 0) {
+          truncated = true;
+          return;
+        }
+        const text = chunk.toString('utf-8');
+        if (Buffer.byteLength(text, 'utf-8') > remaining) {
+          out += text.slice(0, remaining);
+          truncated = true;
+        } else {
+          out += text;
+        }
+      };
+      child.stdout?.on('data', append);
+      child.stderr?.on('data', append);
+      const timer = setTimeout(() => {
+        timedOut = true;
+        try { child.kill('SIGKILL'); } catch {}
+      }, timeout);
+      const onAbort = () => {
+        try { child.kill('SIGKILL'); } catch {}
+      };
+      ctx.signal?.addEventListener('abort', onAbort);
+      child.on('error', (err) => {
+        if (settled) return;
+        settled = true;
+        clearTimeout(timer);
+        ctx.signal?.removeEventListener('abort', onAbort);
+        resolve({ output: `Failed to spawn command: ${err.message}`, isError: true });
+      });
+      child.on('close', (code, signal) => {
+        if (settled) return;
+        settled = true;
+        clearTimeout(timer);
+        ctx.signal?.removeEventListener('abort', onAbort);
+        const tail = truncated ? `\n\n[Output truncated at ${OUTPUT_CAP_BYTES} bytes]` : '';
+        if (timedOut) {
+          resolve({ output: `Command timed out after ${timeout}ms.\n\n${out}${tail}`, isError: true });
+          return;
+        }
+        if (ctx.signal?.aborted) {
+          resolve({ output: 'Command aborted (session ended).', isError: true });
+          return;
+        }
+        if (code === 0) {
+          resolve({ output: (out || '(no output)') + tail });
+        } else {
+          resolve({
+            output: `Command exited with code ${code}${signal ? ` (signal ${signal})` : ''}.\n\n${out}${tail}`,
+            isError: true,
+          });
+        }
+      });
+    });
+  },
+};

package/supervisor/harnesses/pi/tools/edit.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Edit tool — surgical string replacement in an existing file.
+ *
+ * Behavior matches Claude SDK's Edit semantics: refuses if `old_string` isn't
+ * unique (and `replace_all` is false), so the model can't accidentally edit
+ * the wrong occurrence.
+ */
+import fs from 'fs';
+import type { PiTool } from './types.js';
+import { safeResolve, displayPath } from './path-safety.js';
+export const editTool: PiTool = {
+  name: 'Edit',
+  description:
+    'Replace a unique substring in a file. Fails if `old_string` is not found, or if it appears more than once unless `replace_all` is true.',
+  inputSchema: {
+    type: 'object',
+    properties: {
+      file_path: { type: 'string', description: 'File to edit (relative to workspace).' },
+      old_string: { type: 'string', description: 'The exact text to find. Include enough surrounding context to make it unique.' },
+      new_string: { type: 'string', description: 'Replacement text.' },
+      replace_all: { type: 'boolean', description: 'If true, replace every occurrence instead of requiring uniqueness.' },
+    },
+    required: ['file_path', 'old_string', 'new_string'],
+  },
+  async run(input, ctx) {
+    let abs: string;
+    try {
+      abs = safeResolve(ctx.cwd, input?.file_path);
+    } catch (err: any) {
+      return { output: err.message, isError: true };
+    }
+    if (!fs.existsSync(abs)) {
+      return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
+    }
+    const oldStr = typeof input?.old_string === 'string' ? input.old_string : '';
+    const newStr = typeof input?.new_string === 'string' ? input.new_string : '';
+    if (!oldStr) return { output: 'old_string is required and cannot be empty.', isError: true };
+    if (oldStr === newStr) return { output: 'old_string and new_string are identical — nothing to change.', isError: true };
+    const original = fs.readFileSync(abs, 'utf-8');
+    const occurrences = original.split(oldStr).length - 1;
+    if (occurrences === 0) {
+      return {
+        output: `Did not find old_string in ${displayPath(ctx.cwd, abs)}. Check whitespace/quoting and re-read the file.`,
+        isError: true,
+      };
+    }
+    if (occurrences > 1 && !input?.replace_all) {
+      return {
+        output: `Found ${occurrences} matches for old_string in ${displayPath(ctx.cwd, abs)}. Add more surrounding context to make it unique, or set replace_all: true.`,
+        isError: true,
+      };
+    }
+    const updated = input?.replace_all
+      ? original.split(oldStr).join(newStr)
+      : original.replace(oldStr, newStr);
+    try {
+      fs.writeFileSync(abs, updated, 'utf-8');
+    } catch (err: any) {
+      return { output: `Write failed: ${err.message}`, isError: true };
+    }
+    return { output: `Edited ${displayPath(ctx.cwd, abs)} (${occurrences} ${occurrences === 1 ? 'match' : 'matches'} replaced).` };
+  },
+};

package/supervisor/harnesses/pi/tools/path-safety.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * Workspace path safety.
+ *
+ * The pi harness runs untrusted model output. Every file-touching tool must
+ * resolve its path through `safeResolve()` so the agent can't read or write
+ * outside the workspace via traversal (`../../etc/passwd`) or absolute paths.
+ */
+import path from 'path';
+import fs from 'fs';
+export function safeResolve(cwd: string, requested: string): string {
+  if (!requested || typeof requested !== 'string') {
+    throw new Error('Missing file path');
+  }
+  const root = fs.realpathSync.native ? fs.realpathSync(cwd) : path.resolve(cwd);
+  const abs = path.isAbsolute(requested)
+    ? path.normalize(requested)
+    : path.normalize(path.join(root, requested));
+  const rel = path.relative(root, abs);
+  if (rel.startsWith('..') || path.isAbsolute(rel)) {
+    throw new Error(`Path escapes workspace: ${requested}`);
+  }
+  return abs;
+}
+export function displayPath(cwd: string, abs: string): string {
+  const rel = path.relative(cwd, abs);
+  return rel || path.basename(abs);
+}

package/supervisor/harnesses/pi/tools/read.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * Read tool — fetches a file's contents, optionally with line-range slicing.
+ *
+ * Output mirrors what Claude SDK's Read tool produces so the model — which
+ * was trained against that format — uses it correctly. Line numbers are
+ * prefixed with 1-based indices padded for alignment.
+ */
+import fs from 'fs';
+import path from 'path';
+import type { PiTool } from './types.js';
+import { safeResolve, displayPath } from './path-safety.js';
+const MAX_BYTES = 256 * 1024;        // 256 KB cap per read
+const DEFAULT_LIMIT = 2000;          // default line cap
+function formatWithLineNumbers(text: string, startLine: number): string {
+  const lines = text.split('\n');
+  return lines.map((line, i) => {
+    const n = String(startLine + i).padStart(6, ' ');
+    return `${n}\t${line}`;
+  }).join('\n');
+}
+export const readTool: PiTool = {
+  name: 'Read',
+  description: 'Read a file from the workspace. Use this to inspect existing code, configuration, or data files.',
+  inputSchema: {
+    type: 'object',
+    properties: {
+      file_path: { type: 'string', description: 'Path to the file. Relative paths resolve against the workspace root.' },
+      offset: { type: 'integer', description: '1-based line number to start at (default 1).', minimum: 1 },
+      limit: { type: 'integer', description: 'How many lines to return (default 2000, max 2000).', minimum: 1 },
+    },
+    required: ['file_path'],
+  },
+  async run(input, ctx) {
+    const filePath = input?.file_path;
+    let abs: string;
+    try {
+      abs = safeResolve(ctx.cwd, filePath);
+    } catch (err: any) {
+      return { output: err.message, isError: true };
+    }
+    if (!fs.existsSync(abs)) {
+      return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
+    }
+    const stat = fs.statSync(abs);
+    if (stat.isDirectory()) {
+      return { output: `Path is a directory, not a file: ${displayPath(ctx.cwd, abs)}`, isError: true };
+    }
+    if (stat.size > MAX_BYTES) {
+      return {
+        output: `File too large (${stat.size} bytes; max ${MAX_BYTES}). Use a smaller range with offset/limit.`,
+        isError: true,
+      };
+    }
+    const raw = fs.readFileSync(abs, 'utf-8');
+    const allLines = raw.split('\n');
+    const offset = Math.max(1, Number(input?.offset) || 1);
+    const limit = Math.min(DEFAULT_LIMIT, Math.max(1, Number(input?.limit) || DEFAULT_LIMIT));
+    const slice = allLines.slice(offset - 1, offset - 1 + limit).join('\n');
+    const truncatedNote = (offset - 1 + limit) < allLines.length
+      ? `\n\n[Truncated — file has ${allLines.length} lines; showed ${offset}–${offset + limit - 1}.]`
+      : '';
+    if (!slice.trim()) {
+      return { output: `(file ${displayPath(ctx.cwd, abs)} is empty${truncatedNote ? ` past line ${offset}` : ''})` };
+    }
+    return { output: formatWithLineNumbers(slice, offset) + truncatedNote };
+  },
+};

package/supervisor/harnesses/pi/tools/registry.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Tool registry — the bag of tools the pi session passes to the model.
+ *
+ * Phase 2 ships the four core coding tools. Phase 3 or later will add Grep,
+ * Glob, LS, NotebookEdit, etc. so the surface fully matches Claude SDK's.
+ */
+import type { PiTool } from './types.js';
+import type { PiToolDef } from '../providers/types.js';
+import { readTool } from './read.js';
+import { writeTool } from './write.js';
+import { editTool } from './edit.js';
+import { bashTool } from './bash.js';
+export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
+const TOOL_BY_NAME = new Map<string, PiTool>();
+for (const t of PI_TOOLS) {
+  TOOL_BY_NAME.set(t.name, t);
+  // Some models lowercase or otherwise normalise tool names. Register
+  // common aliases so we don't 404 a legitimate call over a casing nit.
+  TOOL_BY_NAME.set(t.name.toLowerCase(), t);
+}
+export function findTool(name: string): PiTool | undefined {
+  return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
+}
+export function toolDefsForProvider(): PiToolDef[] {
+  return PI_TOOLS.map((t) => ({
+    name: t.name,
+    description: t.description,
+    inputSchema: t.inputSchema,
+  }));
+}

package/supervisor/harnesses/pi/tools/types.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/**
+ * Tool runtime contract for the pi harness.
+ *
+ * Tool names match the Claude Agent SDK's so the existing system prompt
+ * (`worker/prompts/...`) — which advertises Read / Write / Edit / Bash —
+ * keeps working without rewriting prompts per harness. Schemas are JSON
+ * Schema in the shape Google's `functionDeclarations.parameters` accepts.
+ */
+export interface PiToolResult {
+  /** Text shown back to the model as the tool's output. */
+  output: string;
+  /** Mark `true` when the tool failed; the loop tells the model so it can recover. */
+  isError?: boolean;
+}
+export interface PiToolContext {
+  /** Workspace root — every tool resolves paths against this. */
+  cwd: string;
+  /** Aborted when the session ends so long-running tools stop fast. */
+  signal?: AbortSignal;
+}
+export interface PiTool {
+  name: string;
+  description: string;
+  inputSchema: Record<string, any>;
+  run(input: any, ctx: PiToolContext): Promise<PiToolResult>;
+}

package/supervisor/harnesses/pi/tools/write.ts ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * Write tool — overwrites or creates a file inside the workspace.
+ */
+import fs from 'fs';
+import path from 'path';
+import type { PiTool } from './types.js';
+import { safeResolve, displayPath } from './path-safety.js';
+const MAX_BYTES = 1024 * 1024;  // 1 MB cap to avoid runaway writes
+export const writeTool: PiTool = {
+  name: 'Write',
+  description: 'Create or overwrite a file in the workspace with the given content. Creates parent directories as needed.',
+  inputSchema: {
+    type: 'object',
+    properties: {
+      file_path: { type: 'string', description: 'Destination path. Relative paths resolve against the workspace root.' },
+      content: { type: 'string', description: 'Full file contents.' },
+    },
+    required: ['file_path', 'content'],
+  },
+  async run(input, ctx) {
+    let abs: string;
+    try {
+      abs = safeResolve(ctx.cwd, input?.file_path);
+    } catch (err: any) {
+      return { output: err.message, isError: true };
+    }
+    const content = typeof input?.content === 'string' ? input.content : '';
+    if (content.length > MAX_BYTES) {
+      return { output: `Content too large (${content.length} bytes; max ${MAX_BYTES}).`, isError: true };
+    }
+    try {
+      fs.mkdirSync(path.dirname(abs), { recursive: true });
+      fs.writeFileSync(abs, content, 'utf-8');
+      return { output: `Wrote ${content.length} bytes to ${displayPath(ctx.cwd, abs)}` };
+    } catch (err: any) {
+      return { output: `Write failed: ${err.message}`, isError: true };
+    }
+  },
+};