bloby-bot 0.47.4 → 0.47.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.47.4",
3
+ "version": "0.47.6",
4
4
  "releaseNotes": [
5
5
  "1. # voice note (PTT bubble)",
6
6
  "2. # audio file + caption",
@@ -30,6 +30,7 @@ import { getPiSubProvider } from './sub-providers.js';
30
30
  import { readPiAuth } from './auth-storage.js';
31
31
  import { streamProvider } from './providers/stream.js';
32
32
  import type { PiMessage } from './providers/types.js';
33
+ import { toolDefsForProvider } from './tools/registry.js';
33
34
 
34
35
  // ── Live conversation state ────────────────────────────────────────────────
35
36
 
@@ -211,6 +212,8 @@ export async function startConversation(
211
212
  baseUrl: auth.baseUrl,
212
213
  apiKey: auth.apiKey,
213
214
  systemPrompt,
215
+ tools: toolDefsForProvider(),
216
+ cwd: WORKSPACE_DIR,
214
217
  abortController,
215
218
  onEvent: (evt: PiSessionEvent) => {
216
219
  translateAndEmit(conv, evt);
@@ -8,6 +8,7 @@
8
8
  * Endpoint: POST {baseUrl}/models/{modelId}:streamGenerateContent?alt=sse&key={apiKey}
9
9
  * Stream: SSE — each `data: {...}` is one candidate update.
10
10
  */
11
+ import crypto from 'crypto';
11
12
  import { log } from '../../../../shared/logger.js';
12
13
  import type {
13
14
  PiStreamRequest,
@@ -18,51 +19,121 @@ import type {
18
19
  } from './types.js';
19
20
 
20
21
  /** Walk an SSE byte stream and yield each parsed JSON event. */
21
- async function* parseSse(res: Response): AsyncIterable<any> {
22
+ async function* parseSse(res: Response, dbg: { firstBytes: string }): AsyncIterable<any> {
22
23
  if (!res.body) return;
23
24
  const reader = res.body.getReader();
24
25
  const decoder = new TextDecoder();
25
26
  let buffer = '';
27
+ let totalBytes = 0;
26
28
  try {
27
29
  while (true) {
28
30
  const { value, done } = await reader.read();
29
31
  if (done) break;
32
+ if (value) totalBytes += value.byteLength;
30
33
  buffer += decoder.decode(value, { stream: true });
31
- // SSE event boundary is a blank line. Process every complete event in buffer.
34
+ if (!dbg.firstBytes && buffer.length > 0) {
35
+ dbg.firstBytes = buffer.slice(0, 800);
36
+ }
37
+ // SSE event boundary is a blank line. Accept both LF and CRLF separators.
32
38
  let idx;
33
- while ((idx = buffer.indexOf('\n\n')) !== -1) {
39
+ while (
40
+ (idx = (() => {
41
+ const a = buffer.indexOf('\n\n');
42
+ const b = buffer.indexOf('\r\n\r\n');
43
+ if (a < 0) return b;
44
+ if (b < 0) return a;
45
+ return Math.min(a, b);
46
+ })()) !== -1
47
+ ) {
48
+ const isCrlf = buffer.slice(idx, idx + 4) === '\r\n\r\n';
34
49
  const raw = buffer.slice(0, idx);
35
- buffer = buffer.slice(idx + 2);
36
- const dataLines = raw.split('\n').filter((l) => l.startsWith('data:'));
37
- if (!dataLines.length) continue;
38
- const data = dataLines.map((l) => l.slice(5).trimStart()).join('\n');
39
- if (!data || data === '[DONE]') continue;
40
- try {
41
- yield JSON.parse(data);
42
- } catch {
43
- // Skip malformed chunks rather than killing the whole turn.
44
- }
50
+ buffer = buffer.slice(idx + (isCrlf ? 4 : 2));
51
+ const parsed = parseSseEvent(raw);
52
+ if (parsed !== undefined) yield parsed;
45
53
  }
46
54
  }
55
+ // Flush whatever remains — Gemini's final event may not have a trailing blank line.
56
+ buffer += decoder.decode();
57
+ if (buffer.trim()) {
58
+ const parsed = parseSseEvent(buffer);
59
+ if (parsed !== undefined) yield parsed;
60
+ }
47
61
  } finally {
48
62
  try { reader.releaseLock(); } catch {}
63
+ dbg.firstBytes = dbg.firstBytes || `(zero bytes — total=${totalBytes})`;
64
+ }
65
+ }
66
+
67
+ function parseSseEvent(raw: string): any | undefined {
68
+ // Standard SSE: one or more `data:` lines per event. Concatenate their payloads.
69
+ const lines = raw.split(/\r?\n/);
70
+ const dataLines = lines
71
+ .filter((l) => l.startsWith('data:'))
72
+ .map((l) => l.slice(5).trimStart());
73
+ if (!dataLines.length) {
74
+ // Fallback: some servers omit the `data:` prefix and send pure JSON per event.
75
+ const trimmed = raw.trim();
76
+ if (!trimmed || trimmed === '[DONE]') return undefined;
77
+ // Strip a leading JSON-array delimiter if Gemini is returning array-stream
78
+ // instead of SSE (alt=sse not honored).
79
+ const candidate = trimmed.replace(/^[\[,]/, '').replace(/[\],]$/, '').trim();
80
+ if (!candidate) return undefined;
81
+ try { return JSON.parse(candidate); } catch { return undefined; }
49
82
  }
83
+ const data = dataLines.join('\n');
84
+ if (!data || data === '[DONE]') return undefined;
85
+ try { return JSON.parse(data); } catch { return undefined; }
50
86
  }
51
87
 
52
88
  function toGeminiRole(role: PiMessage['role']): 'user' | 'model' {
53
- return role === 'assistant' ? 'model' : 'user';
89
+ // Tool results piggyback on the user role with a `functionResponse` part
90
+ // see Gemini function-calling docs.
91
+ if (role === 'assistant') return 'model';
92
+ return 'user';
54
93
  }
55
94
 
56
95
  function toGeminiParts(content: PiContentBlock[]): any[] {
57
96
  const parts: any[] = [];
58
97
  for (const b of content) {
59
- if (b.type === 'text') parts.push({ text: b.text });
60
- else if (b.type === 'image') parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
61
- // tool_use / tool_result are Phase 2.
98
+ if (b.type === 'text') {
99
+ parts.push({ text: b.text });
100
+ } else if (b.type === 'image') {
101
+ parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
102
+ } else if (b.type === 'tool_use') {
103
+ // Assistant turn: the model asked to invoke a tool.
104
+ parts.push({ functionCall: { name: b.name, args: b.input || {} } });
105
+ } else if (b.type === 'tool_result') {
106
+ // Function responses can be strings, objects, or even error markers.
107
+ // Wrap text in `{ output: ... }` (Gemini's docs use a flexible
108
+ // `response` JSON map), with `isError` keying so the model can react.
109
+ const response = b.isError ? { error: b.content } : { output: b.content };
110
+ parts.push({ functionResponse: { name: extractToolName(b.toolUseId), response } });
111
+ }
62
112
  }
63
113
  return parts;
64
114
  }
65
115
 
116
+ /**
117
+ * Gemini doesn't carry a tool-call id forward to the response; we encode the
118
+ * tool name into the id we generate at tool-use time (`{name}::{uuid}`) so
119
+ * we can recover it here. Falls back to the raw id if the prefix is missing.
120
+ */
121
+ function extractToolName(toolUseId: string): string {
122
+ const idx = toolUseId.indexOf('::');
123
+ return idx > 0 ? toolUseId.slice(0, idx) : toolUseId;
124
+ }
125
+
126
+ function toGeminiTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
127
+ return [{
128
+ functionDeclarations: tools.map((t) => ({
129
+ name: t.name,
130
+ description: t.description,
131
+ // Gemini accepts plain JSON Schema for `parameters`.
132
+ parameters: t.inputSchema,
133
+ })),
134
+ }];
135
+ }
136
+
66
137
  function mapStopReason(reason?: string): PiStopReason {
67
138
  switch (reason) {
68
139
  case 'STOP':
@@ -77,6 +148,7 @@ function mapStopReason(reason?: string): PiStopReason {
77
148
  case 'PROHIBITED_CONTENT':
78
149
  case 'SPII':
79
150
  case 'OTHER':
151
+ case 'MALFORMED_FUNCTION_CALL':
80
152
  return 'error';
81
153
  default:
82
154
  return 'end_turn';
@@ -95,6 +167,8 @@ function finishReasonMessage(reason?: string): string {
95
167
  case 'PROHIBITED_CONTENT':
96
168
  case 'SPII':
97
169
  return `Response blocked by Gemini policy (${reason}).`;
170
+ case 'MALFORMED_FUNCTION_CALL':
171
+ return 'Gemini emitted a malformed function call. Often means the model tried to invoke a tool that wasn\'t declared, or with arguments that failed schema validation.';
98
172
  case 'OTHER':
99
173
  default:
100
174
  return `Gemini stopped without producing output (finishReason=${reason || 'unknown'}).`;
@@ -125,6 +199,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
125
199
  if (req.systemPrompt?.trim()) {
126
200
  body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
127
201
  }
202
+ if (req.tools && req.tools.length > 0) {
203
+ body.tools = toGeminiTools(req.tools);
204
+ }
128
205
 
129
206
  let res: Response;
130
207
  try {
@@ -147,6 +224,7 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
147
224
  }
148
225
 
149
226
  let accumulated = '';
227
+ let toolCallCount = 0;
150
228
  let lastFinish: string | undefined;
151
229
  let promptBlockReason: string | undefined;
152
230
  let usage: { inputTokens?: number; outputTokens?: number } | undefined;
@@ -155,9 +233,10 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
155
233
  let thoughtPartCount = 0;
156
234
  let emptyTextPartCount = 0;
157
235
  let firstChunkSummary = '';
236
+ const dbg = { firstBytes: '' };
158
237
 
159
238
  try {
160
- for await (const chunk of parseSse(res)) {
239
+ for await (const chunk of parseSse(res, dbg)) {
161
240
  chunkCount++;
162
241
  if (chunkCount === 1) {
163
242
  try { firstChunkSummary = JSON.stringify(chunk).slice(0, 600); } catch {}
@@ -172,6 +251,20 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
172
251
  // Thinking models emit reasoning parts with `thought: true`. They
173
252
  // shouldn't be shown to the user as part of the visible answer.
174
253
  if (part?.thought) { thoughtPartCount++; continue; }
254
+ if (part?.functionCall && typeof part.functionCall.name === 'string') {
255
+ // Gemini doesn't surface a tool-call id of its own; bake the tool
256
+ // name into the id so the session can echo it back as a
257
+ // `functionResponse` referencing the same name.
258
+ const id = `${part.functionCall.name}::${crypto.randomUUID()}`;
259
+ toolCallCount++;
260
+ yield {
261
+ type: 'tool_use',
262
+ id,
263
+ name: part.functionCall.name,
264
+ input: part.functionCall.args || {},
265
+ };
266
+ continue;
267
+ }
175
268
  if (typeof part?.text === 'string' && part.text.length > 0) {
176
269
  accumulated += part.text;
177
270
  yield { type: 'text_delta', delta: part.text };
@@ -198,15 +291,16 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
198
291
  }
199
292
 
200
293
  log.info(
201
- `[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
294
+ `[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} toolCalls=${toolCallCount} ` +
202
295
  `thoughtParts=${thoughtPartCount} emptyTextParts=${emptyTextPartCount} ` +
203
296
  `finishReason=${lastFinish || 'none'} ` +
204
297
  `promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
205
298
  );
206
- if (chunkCount > 0 && !accumulated) {
299
+ if (chunkCount > 0 && !accumulated && toolCallCount === 0) {
207
300
  log.info(`[pi/google] first chunk (truncated): ${firstChunkSummary}`);
208
301
  } else if (chunkCount === 0) {
209
- log.warn(`[pi/google] SSE stream parsed zero chunks — check response shape (status=${res.status} content-type=${res.headers.get('content-type') || ''})`);
302
+ log.warn(`[pi/google] SSE stream parsed zero chunks — content-type=${res.headers.get('content-type') || '?'}`);
303
+ log.warn(`[pi/google] first raw bytes: ${JSON.stringify(dbg.firstBytes)}`);
210
304
  }
211
305
 
212
306
  // Prompt-level block: nothing came back at all.
@@ -216,10 +310,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
216
310
  return;
217
311
  }
218
312
 
219
- // We finished cleanly but the model produced no visible text. That's almost
220
- // always a finish-reason problem (MAX_TOKENS, SAFETY, ...) we'd otherwise
221
- // silently swallow. Surface it.
222
- if (!accumulated) {
313
+ // Tool-only round (Gemini fires functionCall parts with no text) is valid output —
314
+ // the session will execute the tool, push the result, and re-stream.
315
+ if (!accumulated && toolCallCount === 0) {
223
316
  const reason = lastFinish && lastFinish !== 'STOP' && lastFinish !== 'FINISH_REASON_STOP'
224
317
  ? lastFinish
225
318
  : undefined;
@@ -231,6 +324,10 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
231
324
  return;
232
325
  }
233
326
 
234
- yield { type: 'text_end', text: accumulated };
235
- yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
327
+ if (accumulated) yield { type: 'text_end', text: accumulated };
328
+ yield {
329
+ type: 'done',
330
+ stopReason: toolCallCount > 0 ? 'tool_use' : mapStopReason(lastFinish),
331
+ usage,
332
+ };
236
333
  }
@@ -8,23 +8,28 @@
8
8
  * - each turn streams provider events back through a single `onEvent`
9
9
  * callback the caller hooked up
10
10
  *
11
- * Phase 1 scope: text-only, no tools. Each user turn = one provider call.
12
- * Phase 2 will plug tools into the inner loop (model emits `tool_use`
13
- * execute append `tool_result` re-stream repeat until `end_turn`).
11
+ * Phase 2: each user turn is an inner loop provider call → if the model
12
+ * asked for tool calls, execute them and feed results back call provider
13
+ * again until the model finishes without requesting more tools. Tokens
14
+ * stream live; `text_end` only fires once at the very end of the turn so the
15
+ * UI doesn't display half-answers between tool rounds.
14
16
  *
15
- * Phase 1 explicitly does NOT spawn sub-agents — Bruno will add those later.
17
+ * Sub-agents are NOT spawned here — Bruno will add those later.
16
18
  */
17
19
  import { log } from '../../../shared/logger.js';
18
20
  import type { PiApiFlavor } from './sub-providers.js';
19
21
  import { streamProvider } from './providers/stream.js';
20
- import type { PiMessage, PiStreamEvent, PiToolDef } from './providers/types.js';
22
+ import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
21
23
  import type { AsyncQueue } from './async-queue.js';
24
+ import { findTool } from './tools/registry.js';
25
+ import type { PiTool } from './tools/types.js';
22
26
 
23
27
  export type PiSessionEvent =
24
28
  | { type: 'turn_started' }
25
29
  | { type: 'text_delta'; delta: string }
26
30
  | { type: 'text_end'; text: string }
27
- | { type: 'tool_use'; id: string; name: string; input: any } // Phase 2
31
+ | { type: 'tool_use'; id: string; name: string; input: any }
32
+ | { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
28
33
  | { type: 'turn_complete'; usedFileTools: boolean }
29
34
  | { type: 'error'; error: string };
30
35
 
@@ -36,8 +41,10 @@ export interface PiSessionInit {
36
41
  systemPrompt: string;
37
42
  /** Pre-loaded history before the first new user turn. */
38
43
  initialMessages?: PiMessage[];
39
- /** Phase 2 wires this through. Empty for Phase 1. */
44
+ /** Tools the model can call this session. Empty array chat-only. */
40
45
  tools?: PiToolDef[];
46
+ /** Resolved every time a tool fires (registry → run). */
47
+ cwd: string;
41
48
  maxOutputTokens?: number;
42
49
  /** Used to interrupt in-flight provider calls when the session ends. */
43
50
  abortController: AbortController;
@@ -53,19 +60,20 @@ export interface PiSession {
53
60
  }
54
61
 
55
62
  const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'write', 'edit']);
63
+ const MAX_TOOL_ROUNDS = 25;
56
64
 
57
65
  export function createPiSession(init: PiSessionInit): PiSession {
58
66
  const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
59
67
 
60
- async function runOneTurn(userMsg: PiMessage): Promise<void> {
61
- if (init.abortController.signal.aborted) return;
62
- messages.push(userMsg);
63
- init.onEvent({ type: 'turn_started' });
64
-
65
- let accumulated = '';
66
- const usedTools = new Set<string>();
67
- let errored = false;
68
+ /** One stream round collect the assistant blocks the model emits this pass. */
69
+ interface RoundResult {
70
+ text: string;
71
+ toolUses: { id: string; name: string; input: any }[];
72
+ errored: boolean;
73
+ }
68
74
 
75
+ async function runOneRound(): Promise<RoundResult> {
76
+ const result: RoundResult = { text: '', toolUses: [], errored: false };
69
77
  try {
70
78
  const stream = streamProvider(init.flavor, {
71
79
  modelId: init.modelId,
@@ -79,43 +87,111 @@ export function createPiSession(init: PiSessionInit): PiSession {
79
87
  });
80
88
 
81
89
  for await (const evt of stream as AsyncIterable<PiStreamEvent>) {
82
- if (init.abortController.signal.aborted) return;
90
+ if (init.abortController.signal.aborted) break;
83
91
  switch (evt.type) {
84
92
  case 'text_delta':
85
- accumulated += evt.delta;
93
+ result.text += evt.delta;
86
94
  init.onEvent({ type: 'text_delta', delta: evt.delta });
87
95
  break;
88
96
  case 'text_end':
89
- // Provider gives us the final accumulated text; trust the deltas
90
- // we already forwarded and reconcile state from here.
91
- accumulated = evt.text;
92
- init.onEvent({ type: 'text_end', text: evt.text });
97
+ // Sync up with the provider's authoritative concatenation in case
98
+ // we missed a delta. Don't forward we only emit text_end once
99
+ // at the end of the whole turn so the UI doesn't show half-answers.
100
+ result.text = evt.text;
93
101
  break;
94
102
  case 'tool_use':
95
- // Phase 2: execute the tool, append a tool_result message, re-stream.
96
- usedTools.add(evt.name);
103
+ result.toolUses.push({ id: evt.id, name: evt.name, input: evt.input });
97
104
  init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
98
105
  break;
99
106
  case 'error':
100
- errored = true;
107
+ result.errored = true;
101
108
  init.onEvent({ type: 'error', error: evt.error });
102
109
  break;
103
110
  case 'done':
104
- // Loop back if the model is waiting on a tool result (Phase 2);
105
- // for now `tool_use` is impossible since we don't pass tools.
111
+ // Loop control is by tool_use presence, not stop reason.
106
112
  break;
107
113
  }
108
114
  }
109
115
  } catch (err: any) {
110
- if (init.abortController.signal.aborted) return;
111
- errored = true;
112
- init.onEvent({ type: 'error', error: err?.message || String(err) });
116
+ if (!init.abortController.signal.aborted) {
117
+ result.errored = true;
118
+ init.onEvent({ type: 'error', error: err?.message || String(err) });
119
+ }
113
120
  }
121
+ return result;
122
+ }
123
+
124
+ async function executeTool(call: { id: string; name: string; input: any }): Promise<{ output: string; isError?: boolean }> {
125
+ const tool: PiTool | undefined = findTool(call.name);
126
+ if (!tool) {
127
+ return {
128
+ output: `Tool not found: ${call.name}. Available tools: ${(init.tools || []).map((t) => t.name).join(', ') || 'none'}.`,
129
+ isError: true,
130
+ };
131
+ }
132
+ try {
133
+ return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
134
+ } catch (err: any) {
135
+ return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
136
+ }
137
+ }
138
+
139
+ async function runOneTurn(userMsg: PiMessage): Promise<void> {
140
+ if (init.abortController.signal.aborted) return;
141
+ messages.push(userMsg);
142
+ init.onEvent({ type: 'turn_started' });
114
143
 
115
- if (accumulated) {
116
- messages.push({ role: 'assistant', content: [{ type: 'text', text: accumulated }] });
144
+ let accumulatedText = '';
145
+ const usedTools = new Set<string>();
146
+ let turnErrored = false;
147
+
148
+ for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
149
+ if (init.abortController.signal.aborted) break;
150
+ const { text, toolUses, errored } = await runOneRound();
151
+
152
+ // Append whatever the model produced this round to history so subsequent
153
+ // rounds (and the next user turn) see it.
154
+ const assistantContent: PiContentBlock[] = [];
155
+ if (text) {
156
+ accumulatedText += (accumulatedText && !accumulatedText.endsWith('\n') ? '\n\n' : '') + text;
157
+ assistantContent.push({ type: 'text', text });
158
+ }
159
+ for (const tu of toolUses) {
160
+ assistantContent.push({ type: 'tool_use', id: tu.id, name: tu.name, input: tu.input });
161
+ }
162
+ if (assistantContent.length > 0) {
163
+ messages.push({ role: 'assistant', content: assistantContent });
164
+ }
165
+
166
+ if (errored) { turnErrored = true; break; }
167
+ if (toolUses.length === 0) break; // model finished — exit loop
168
+
169
+ // Run every tool the model asked for this round, then feed the results
170
+ // back as a single user message Gemini accepts as a batch.
171
+ const toolResultBlocks: PiContentBlock[] = [];
172
+ for (const tu of toolUses) {
173
+ usedTools.add(tu.name);
174
+ if (init.abortController.signal.aborted) break;
175
+ log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
176
+ const res = await executeTool(tu);
177
+ init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res.isError });
178
+ toolResultBlocks.push({
179
+ type: 'tool_result',
180
+ toolUseId: tu.id,
181
+ content: res.output,
182
+ isError: res.isError,
183
+ });
184
+ }
185
+ if (toolResultBlocks.length > 0) {
186
+ messages.push({ role: 'user', content: toolResultBlocks });
187
+ }
188
+ // Loop continues — re-stream with the new tool results in context.
117
189
  }
118
- if (!errored) {
190
+
191
+ if (!turnErrored) {
192
+ if (accumulatedText) {
193
+ init.onEvent({ type: 'text_end', text: accumulatedText });
194
+ }
119
195
  const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
120
196
  init.onEvent({ type: 'turn_complete', usedFileTools });
121
197
  }
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Bash tool — runs a shell command in the workspace.
3
+ *
4
+ * Stays small on purpose: combined stdout+stderr, hard timeout, kills the
5
+ * process on session abort. No interactive subshells, no background jobs.
6
+ */
7
+ import { spawn } from 'child_process';
8
+ import type { PiTool } from './types.js';
9
+
10
+ const DEFAULT_TIMEOUT_MS = 60_000;
11
+ const HARD_TIMEOUT_MS = 5 * 60_000;
12
+ const OUTPUT_CAP_BYTES = 200 * 1024; // 200 KB; matches Claude SDK's behavior
13
+
14
+ export const bashTool: PiTool = {
15
+ name: 'Bash',
16
+ description:
17
+ 'Run a shell command in the workspace and return its combined stdout+stderr. Use this for non-interactive commands only — no editors, no long-running servers.',
18
+ inputSchema: {
19
+ type: 'object',
20
+ properties: {
21
+ command: { type: 'string', description: 'The shell command to execute.' },
22
+ description: { type: 'string', description: 'A short description (5–10 words) of what the command does.' },
23
+ timeout: { type: 'integer', description: 'Timeout in milliseconds (default 60 000, max 300 000).' },
24
+ },
25
+ required: ['command'],
26
+ },
27
+
28
+ async run(input, ctx) {
29
+ const command = typeof input?.command === 'string' ? input.command : '';
30
+ if (!command.trim()) return { output: 'command is required.', isError: true };
31
+
32
+ const requestedTimeout = Number(input?.timeout) || DEFAULT_TIMEOUT_MS;
33
+ const timeout = Math.min(HARD_TIMEOUT_MS, Math.max(1000, requestedTimeout));
34
+
35
+ return await new Promise((resolve) => {
36
+ let out = '';
37
+ let truncated = false;
38
+ let timedOut = false;
39
+ let settled = false;
40
+
41
+ const child = spawn('bash', ['-lc', command], {
42
+ cwd: ctx.cwd,
43
+ env: process.env,
44
+ stdio: ['ignore', 'pipe', 'pipe'],
45
+ });
46
+
47
+ const append = (chunk: Buffer) => {
48
+ if (truncated) return;
49
+ const remaining = OUTPUT_CAP_BYTES - Buffer.byteLength(out, 'utf-8');
50
+ if (remaining <= 0) {
51
+ truncated = true;
52
+ return;
53
+ }
54
+ const text = chunk.toString('utf-8');
55
+ if (Buffer.byteLength(text, 'utf-8') > remaining) {
56
+ out += text.slice(0, remaining);
57
+ truncated = true;
58
+ } else {
59
+ out += text;
60
+ }
61
+ };
62
+
63
+ child.stdout?.on('data', append);
64
+ child.stderr?.on('data', append);
65
+
66
+ const timer = setTimeout(() => {
67
+ timedOut = true;
68
+ try { child.kill('SIGKILL'); } catch {}
69
+ }, timeout);
70
+
71
+ const onAbort = () => {
72
+ try { child.kill('SIGKILL'); } catch {}
73
+ };
74
+ ctx.signal?.addEventListener('abort', onAbort);
75
+
76
+ child.on('error', (err) => {
77
+ if (settled) return;
78
+ settled = true;
79
+ clearTimeout(timer);
80
+ ctx.signal?.removeEventListener('abort', onAbort);
81
+ resolve({ output: `Failed to spawn command: ${err.message}`, isError: true });
82
+ });
83
+
84
+ child.on('close', (code, signal) => {
85
+ if (settled) return;
86
+ settled = true;
87
+ clearTimeout(timer);
88
+ ctx.signal?.removeEventListener('abort', onAbort);
89
+ const tail = truncated ? `\n\n[Output truncated at ${OUTPUT_CAP_BYTES} bytes]` : '';
90
+ if (timedOut) {
91
+ resolve({ output: `Command timed out after ${timeout}ms.\n\n${out}${tail}`, isError: true });
92
+ return;
93
+ }
94
+ if (ctx.signal?.aborted) {
95
+ resolve({ output: 'Command aborted (session ended).', isError: true });
96
+ return;
97
+ }
98
+ if (code === 0) {
99
+ resolve({ output: (out || '(no output)') + tail });
100
+ } else {
101
+ resolve({
102
+ output: `Command exited with code ${code}${signal ? ` (signal ${signal})` : ''}.\n\n${out}${tail}`,
103
+ isError: true,
104
+ });
105
+ }
106
+ });
107
+ });
108
+ },
109
+ };
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Edit tool — surgical string replacement in an existing file.
3
+ *
4
+ * Behavior matches Claude SDK's Edit semantics: refuses if `old_string` isn't
5
+ * unique (and `replace_all` is false), so the model can't accidentally edit
6
+ * the wrong occurrence.
7
+ */
8
+ import fs from 'fs';
9
+ import type { PiTool } from './types.js';
10
+ import { safeResolve, displayPath } from './path-safety.js';
11
+
12
+ export const editTool: PiTool = {
13
+ name: 'Edit',
14
+ description:
15
+ 'Replace a unique substring in a file. Fails if `old_string` is not found, or if it appears more than once unless `replace_all` is true.',
16
+ inputSchema: {
17
+ type: 'object',
18
+ properties: {
19
+ file_path: { type: 'string', description: 'File to edit (relative to workspace).' },
20
+ old_string: { type: 'string', description: 'The exact text to find. Include enough surrounding context to make it unique.' },
21
+ new_string: { type: 'string', description: 'Replacement text.' },
22
+ replace_all: { type: 'boolean', description: 'If true, replace every occurrence instead of requiring uniqueness.' },
23
+ },
24
+ required: ['file_path', 'old_string', 'new_string'],
25
+ },
26
+
27
+ async run(input, ctx) {
28
+ let abs: string;
29
+ try {
30
+ abs = safeResolve(ctx.cwd, input?.file_path);
31
+ } catch (err: any) {
32
+ return { output: err.message, isError: true };
33
+ }
34
+ if (!fs.existsSync(abs)) {
35
+ return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
36
+ }
37
+ const oldStr = typeof input?.old_string === 'string' ? input.old_string : '';
38
+ const newStr = typeof input?.new_string === 'string' ? input.new_string : '';
39
+ if (!oldStr) return { output: 'old_string is required and cannot be empty.', isError: true };
40
+ if (oldStr === newStr) return { output: 'old_string and new_string are identical — nothing to change.', isError: true };
41
+
42
+ const original = fs.readFileSync(abs, 'utf-8');
43
+ const occurrences = original.split(oldStr).length - 1;
44
+ if (occurrences === 0) {
45
+ return {
46
+ output: `Did not find old_string in ${displayPath(ctx.cwd, abs)}. Check whitespace/quoting and re-read the file.`,
47
+ isError: true,
48
+ };
49
+ }
50
+ if (occurrences > 1 && !input?.replace_all) {
51
+ return {
52
+ output: `Found ${occurrences} matches for old_string in ${displayPath(ctx.cwd, abs)}. Add more surrounding context to make it unique, or set replace_all: true.`,
53
+ isError: true,
54
+ };
55
+ }
56
+ const updated = input?.replace_all
57
+ ? original.split(oldStr).join(newStr)
58
+ : original.replace(oldStr, newStr);
59
+ try {
60
+ fs.writeFileSync(abs, updated, 'utf-8');
61
+ } catch (err: any) {
62
+ return { output: `Write failed: ${err.message}`, isError: true };
63
+ }
64
+ return { output: `Edited ${displayPath(ctx.cwd, abs)} (${occurrences} ${occurrences === 1 ? 'match' : 'matches'} replaced).` };
65
+ },
66
+ };
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Workspace path safety.
3
+ *
4
+ * The pi harness runs untrusted model output. Every file-touching tool must
5
+ * resolve its path through `safeResolve()` so the agent can't read or write
6
+ * outside the workspace via traversal (`../../etc/passwd`) or absolute paths.
7
+ */
8
+ import path from 'path';
9
+ import fs from 'fs';
10
+
11
+ export function safeResolve(cwd: string, requested: string): string {
12
+ if (!requested || typeof requested !== 'string') {
13
+ throw new Error('Missing file path');
14
+ }
15
+ const root = fs.realpathSync.native ? fs.realpathSync(cwd) : path.resolve(cwd);
16
+ const abs = path.isAbsolute(requested)
17
+ ? path.normalize(requested)
18
+ : path.normalize(path.join(root, requested));
19
+ const rel = path.relative(root, abs);
20
+ if (rel.startsWith('..') || path.isAbsolute(rel)) {
21
+ throw new Error(`Path escapes workspace: ${requested}`);
22
+ }
23
+ return abs;
24
+ }
25
+
26
+ export function displayPath(cwd: string, abs: string): string {
27
+ const rel = path.relative(cwd, abs);
28
+ return rel || path.basename(abs);
29
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Read tool — fetches a file's contents, optionally with line-range slicing.
3
+ *
4
+ * Output mirrors what Claude SDK's Read tool produces so the model — which
5
+ * was trained against that format — uses it correctly. Line numbers are
6
+ * prefixed with 1-based indices padded for alignment.
7
+ */
8
+ import fs from 'fs';
9
+ import path from 'path';
10
+ import type { PiTool } from './types.js';
11
+ import { safeResolve, displayPath } from './path-safety.js';
12
+
13
+ const MAX_BYTES = 256 * 1024; // 256 KB cap per read
14
+ const DEFAULT_LIMIT = 2000; // default line cap
15
+
16
+ function formatWithLineNumbers(text: string, startLine: number): string {
17
+ const lines = text.split('\n');
18
+ return lines.map((line, i) => {
19
+ const n = String(startLine + i).padStart(6, ' ');
20
+ return `${n}\t${line}`;
21
+ }).join('\n');
22
+ }
23
+
24
+ export const readTool: PiTool = {
25
+ name: 'Read',
26
+ description: 'Read a file from the workspace. Use this to inspect existing code, configuration, or data files.',
27
+ inputSchema: {
28
+ type: 'object',
29
+ properties: {
30
+ file_path: { type: 'string', description: 'Path to the file. Relative paths resolve against the workspace root.' },
31
+ offset: { type: 'integer', description: '1-based line number to start at (default 1).', minimum: 1 },
32
+ limit: { type: 'integer', description: 'How many lines to return (default 2000, max 2000).', minimum: 1 },
33
+ },
34
+ required: ['file_path'],
35
+ },
36
+
37
+ async run(input, ctx) {
38
+ const filePath = input?.file_path;
39
+ let abs: string;
40
+ try {
41
+ abs = safeResolve(ctx.cwd, filePath);
42
+ } catch (err: any) {
43
+ return { output: err.message, isError: true };
44
+ }
45
+ if (!fs.existsSync(abs)) {
46
+ return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
47
+ }
48
+ const stat = fs.statSync(abs);
49
+ if (stat.isDirectory()) {
50
+ return { output: `Path is a directory, not a file: ${displayPath(ctx.cwd, abs)}`, isError: true };
51
+ }
52
+ if (stat.size > MAX_BYTES) {
53
+ return {
54
+ output: `File too large (${stat.size} bytes; max ${MAX_BYTES}). Use a smaller range with offset/limit.`,
55
+ isError: true,
56
+ };
57
+ }
58
+ const raw = fs.readFileSync(abs, 'utf-8');
59
+ const allLines = raw.split('\n');
60
+ const offset = Math.max(1, Number(input?.offset) || 1);
61
+ const limit = Math.min(DEFAULT_LIMIT, Math.max(1, Number(input?.limit) || DEFAULT_LIMIT));
62
+ const slice = allLines.slice(offset - 1, offset - 1 + limit).join('\n');
63
+ const truncatedNote = (offset - 1 + limit) < allLines.length
64
+ ? `\n\n[Truncated — file has ${allLines.length} lines; showed ${offset}–${offset + limit - 1}.]`
65
+ : '';
66
+ if (!slice.trim()) {
67
+ return { output: `(file ${displayPath(ctx.cwd, abs)} is empty${truncatedNote ? ` past line ${offset}` : ''})` };
68
+ }
69
+ return { output: formatWithLineNumbers(slice, offset) + truncatedNote };
70
+ },
71
+ };
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Tool registry — the bag of tools the pi session passes to the model.
3
+ *
4
+ * Phase 2 ships the four core coding tools. Phase 3 or later will add Grep,
5
+ * Glob, LS, NotebookEdit, etc. so the surface fully matches Claude SDK's.
6
+ */
7
+ import type { PiTool } from './types.js';
8
+ import type { PiToolDef } from '../providers/types.js';
9
+ import { readTool } from './read.js';
10
+ import { writeTool } from './write.js';
11
+ import { editTool } from './edit.js';
12
+ import { bashTool } from './bash.js';
13
+
14
+ export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
15
+
16
+ const TOOL_BY_NAME = new Map<string, PiTool>();
17
+ for (const t of PI_TOOLS) {
18
+ TOOL_BY_NAME.set(t.name, t);
19
+ // Some models lowercase or otherwise normalise tool names. Register
20
+ // common aliases so we don't 404 a legitimate call over a casing nit.
21
+ TOOL_BY_NAME.set(t.name.toLowerCase(), t);
22
+ }
23
+
24
+ export function findTool(name: string): PiTool | undefined {
25
+ return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
26
+ }
27
+
28
+ export function toolDefsForProvider(): PiToolDef[] {
29
+ return PI_TOOLS.map((t) => ({
30
+ name: t.name,
31
+ description: t.description,
32
+ inputSchema: t.inputSchema,
33
+ }));
34
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Tool runtime contract for the pi harness.
3
+ *
4
+ * Tool names match the Claude Agent SDK's so the existing system prompt
5
+ * (`worker/prompts/...`) — which advertises Read / Write / Edit / Bash —
6
+ * keeps working without rewriting prompts per harness. Schemas are JSON
7
+ * Schema in the shape Google's `functionDeclarations.parameters` accepts.
8
+ */
9
+
10
+ export interface PiToolResult {
11
+ /** Text shown back to the model as the tool's output. */
12
+ output: string;
13
+ /** Mark `true` when the tool failed; the loop tells the model so it can recover. */
14
+ isError?: boolean;
15
+ }
16
+
17
+ export interface PiToolContext {
18
+ /** Workspace root — every tool resolves paths against this. */
19
+ cwd: string;
20
+ /** Aborted when the session ends so long-running tools stop fast. */
21
+ signal?: AbortSignal;
22
+ }
23
+
24
+ export interface PiTool {
25
+ name: string;
26
+ description: string;
27
+ inputSchema: Record<string, any>;
28
+ run(input: any, ctx: PiToolContext): Promise<PiToolResult>;
29
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Write tool — overwrites or creates a file inside the workspace.
3
+ */
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+ import type { PiTool } from './types.js';
7
+ import { safeResolve, displayPath } from './path-safety.js';
8
+
9
+ const MAX_BYTES = 1024 * 1024; // 1 MB cap to avoid runaway writes
10
+
11
+ export const writeTool: PiTool = {
12
+ name: 'Write',
13
+ description: 'Create or overwrite a file in the workspace with the given content. Creates parent directories as needed.',
14
+ inputSchema: {
15
+ type: 'object',
16
+ properties: {
17
+ file_path: { type: 'string', description: 'Destination path. Relative paths resolve against the workspace root.' },
18
+ content: { type: 'string', description: 'Full file contents.' },
19
+ },
20
+ required: ['file_path', 'content'],
21
+ },
22
+
23
+ async run(input, ctx) {
24
+ let abs: string;
25
+ try {
26
+ abs = safeResolve(ctx.cwd, input?.file_path);
27
+ } catch (err: any) {
28
+ return { output: err.message, isError: true };
29
+ }
30
+ const content = typeof input?.content === 'string' ? input.content : '';
31
+ if (content.length > MAX_BYTES) {
32
+ return { output: `Content too large (${content.length} bytes; max ${MAX_BYTES}).`, isError: true };
33
+ }
34
+ try {
35
+ fs.mkdirSync(path.dirname(abs), { recursive: true });
36
+ fs.writeFileSync(abs, content, 'utf-8');
37
+ return { output: `Wrote ${content.length} bytes to ${displayPath(ctx.cwd, abs)}` };
38
+ } catch (err: any) {
39
+ return { output: `Write failed: ${err.message}`, isError: true };
40
+ }
41
+ },
42
+ };