bloby-bot 0.47.5 → 0.47.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.47.5",
3
+ "version": "0.47.6",
4
4
  "releaseNotes": [
5
5
  "1. # voice note (PTT bubble)",
6
6
  "2. # audio file + caption",
@@ -30,6 +30,7 @@ import { getPiSubProvider } from './sub-providers.js';
30
30
  import { readPiAuth } from './auth-storage.js';
31
31
  import { streamProvider } from './providers/stream.js';
32
32
  import type { PiMessage } from './providers/types.js';
33
+ import { toolDefsForProvider } from './tools/registry.js';
33
34
 
34
35
  // ── Live conversation state ────────────────────────────────────────────────
35
36
 
@@ -211,6 +212,8 @@ export async function startConversation(
211
212
  baseUrl: auth.baseUrl,
212
213
  apiKey: auth.apiKey,
213
214
  systemPrompt,
215
+ tools: toolDefsForProvider(),
216
+ cwd: WORKSPACE_DIR,
214
217
  abortController,
215
218
  onEvent: (evt: PiSessionEvent) => {
216
219
  translateAndEmit(conv, evt);
@@ -8,6 +8,7 @@
8
8
  * Endpoint: POST {baseUrl}/models/{modelId}:streamGenerateContent?alt=sse&key={apiKey}
9
9
  * Stream: SSE — each `data: {...}` is one candidate update.
10
10
  */
11
+ import crypto from 'crypto';
11
12
  import { log } from '../../../../shared/logger.js';
12
13
  import type {
13
14
  PiStreamRequest,
@@ -85,19 +86,54 @@ function parseSseEvent(raw: string): any | undefined {
85
86
  }
86
87
 
87
88
  function toGeminiRole(role: PiMessage['role']): 'user' | 'model' {
88
- return role === 'assistant' ? 'model' : 'user';
89
+ // Tool results piggyback on the user role with a `functionResponse` part
90
+ // see Gemini function-calling docs.
91
+ if (role === 'assistant') return 'model';
92
+ return 'user';
89
93
  }
90
94
 
91
95
  function toGeminiParts(content: PiContentBlock[]): any[] {
92
96
  const parts: any[] = [];
93
97
  for (const b of content) {
94
- if (b.type === 'text') parts.push({ text: b.text });
95
- else if (b.type === 'image') parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
96
- // tool_use / tool_result are Phase 2.
98
+ if (b.type === 'text') {
99
+ parts.push({ text: b.text });
100
+ } else if (b.type === 'image') {
101
+ parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
102
+ } else if (b.type === 'tool_use') {
103
+ // Assistant turn: the model asked to invoke a tool.
104
+ parts.push({ functionCall: { name: b.name, args: b.input || {} } });
105
+ } else if (b.type === 'tool_result') {
106
+ // Function responses can be strings, objects, or even error markers.
107
+ // Wrap text in `{ output: ... }` (Gemini's docs use a flexible
108
+ // `response` JSON map), with `isError` keying so the model can react.
109
+ const response = b.isError ? { error: b.content } : { output: b.content };
110
+ parts.push({ functionResponse: { name: extractToolName(b.toolUseId), response } });
111
+ }
97
112
  }
98
113
  return parts;
99
114
  }
100
115
 
116
+ /**
117
+ * Gemini doesn't carry a tool-call id forward to the response; we encode the
118
+ * tool name into the id we generate at tool-use time (`{name}::{uuid}`) so
119
+ * we can recover it here. Falls back to the raw id if the prefix is missing.
120
+ */
121
+ function extractToolName(toolUseId: string): string {
122
+ const idx = toolUseId.indexOf('::');
123
+ return idx > 0 ? toolUseId.slice(0, idx) : toolUseId;
124
+ }
125
+
126
+ function toGeminiTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
127
+ return [{
128
+ functionDeclarations: tools.map((t) => ({
129
+ name: t.name,
130
+ description: t.description,
131
+ // Gemini accepts plain JSON Schema for `parameters`.
132
+ parameters: t.inputSchema,
133
+ })),
134
+ }];
135
+ }
136
+
101
137
  function mapStopReason(reason?: string): PiStopReason {
102
138
  switch (reason) {
103
139
  case 'STOP':
@@ -112,6 +148,7 @@ function mapStopReason(reason?: string): PiStopReason {
112
148
  case 'PROHIBITED_CONTENT':
113
149
  case 'SPII':
114
150
  case 'OTHER':
151
+ case 'MALFORMED_FUNCTION_CALL':
115
152
  return 'error';
116
153
  default:
117
154
  return 'end_turn';
@@ -130,6 +167,8 @@ function finishReasonMessage(reason?: string): string {
130
167
  case 'PROHIBITED_CONTENT':
131
168
  case 'SPII':
132
169
  return `Response blocked by Gemini policy (${reason}).`;
170
+ case 'MALFORMED_FUNCTION_CALL':
171
+ return 'Gemini emitted a malformed function call. Often means the model tried to invoke a tool that wasn\'t declared, or with arguments that failed schema validation.';
133
172
  case 'OTHER':
134
173
  default:
135
174
  return `Gemini stopped without producing output (finishReason=${reason || 'unknown'}).`;
@@ -160,6 +199,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
160
199
  if (req.systemPrompt?.trim()) {
161
200
  body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
162
201
  }
202
+ if (req.tools && req.tools.length > 0) {
203
+ body.tools = toGeminiTools(req.tools);
204
+ }
163
205
 
164
206
  let res: Response;
165
207
  try {
@@ -182,6 +224,7 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
182
224
  }
183
225
 
184
226
  let accumulated = '';
227
+ let toolCallCount = 0;
185
228
  let lastFinish: string | undefined;
186
229
  let promptBlockReason: string | undefined;
187
230
  let usage: { inputTokens?: number; outputTokens?: number } | undefined;
@@ -208,6 +251,20 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
208
251
  // Thinking models emit reasoning parts with `thought: true`. They
209
252
  // shouldn't be shown to the user as part of the visible answer.
210
253
  if (part?.thought) { thoughtPartCount++; continue; }
254
+ if (part?.functionCall && typeof part.functionCall.name === 'string') {
255
+ // Gemini doesn't surface a tool-call id of its own; bake the tool
256
+ // name into the id so the session can echo it back as a
257
+ // `functionResponse` referencing the same name.
258
+ const id = `${part.functionCall.name}::${crypto.randomUUID()}`;
259
+ toolCallCount++;
260
+ yield {
261
+ type: 'tool_use',
262
+ id,
263
+ name: part.functionCall.name,
264
+ input: part.functionCall.args || {},
265
+ };
266
+ continue;
267
+ }
211
268
  if (typeof part?.text === 'string' && part.text.length > 0) {
212
269
  accumulated += part.text;
213
270
  yield { type: 'text_delta', delta: part.text };
@@ -234,12 +291,12 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
234
291
  }
235
292
 
236
293
  log.info(
237
- `[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
294
+ `[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} toolCalls=${toolCallCount} ` +
238
295
  `thoughtParts=${thoughtPartCount} emptyTextParts=${emptyTextPartCount} ` +
239
296
  `finishReason=${lastFinish || 'none'} ` +
240
297
  `promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
241
298
  );
242
- if (chunkCount > 0 && !accumulated) {
299
+ if (chunkCount > 0 && !accumulated && toolCallCount === 0) {
243
300
  log.info(`[pi/google] first chunk (truncated): ${firstChunkSummary}`);
244
301
  } else if (chunkCount === 0) {
245
302
  log.warn(`[pi/google] SSE stream parsed zero chunks — content-type=${res.headers.get('content-type') || '?'}`);
@@ -253,10 +310,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
253
310
  return;
254
311
  }
255
312
 
256
- // We finished cleanly but the model produced no visible text. That's almost
257
- // always a finish-reason problem (MAX_TOKENS, SAFETY, ...) we'd otherwise
258
- // silently swallow. Surface it.
259
- if (!accumulated) {
313
+ // Tool-only round (Gemini fires functionCall parts with no text) is valid output —
314
+ // the session will execute the tool, push the result, and re-stream.
315
+ if (!accumulated && toolCallCount === 0) {
260
316
  const reason = lastFinish && lastFinish !== 'STOP' && lastFinish !== 'FINISH_REASON_STOP'
261
317
  ? lastFinish
262
318
  : undefined;
@@ -268,6 +324,10 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
268
324
  return;
269
325
  }
270
326
 
271
- yield { type: 'text_end', text: accumulated };
272
- yield { type: 'done', stopReason: mapStopReason(lastFinish), usage };
327
+ if (accumulated) yield { type: 'text_end', text: accumulated };
328
+ yield {
329
+ type: 'done',
330
+ stopReason: toolCallCount > 0 ? 'tool_use' : mapStopReason(lastFinish),
331
+ usage,
332
+ };
273
333
  }
@@ -8,23 +8,28 @@
8
8
  * - each turn streams provider events back through a single `onEvent`
9
9
  * callback the caller hooked up
10
10
  *
11
- * Phase 1 scope: text-only, no tools. Each user turn = one provider call.
12
- * Phase 2 will plug tools into the inner loop (model emits `tool_use`
13
- * execute append `tool_result` re-stream repeat until `end_turn`).
11
+ * Phase 2: each user turn is an inner loop provider call → if the model
12
+ * asked for tool calls, execute them and feed results back call provider
13
+ * again until the model finishes without requesting more tools. Tokens
14
+ * stream live; `text_end` only fires once at the very end of the turn so the
15
+ * UI doesn't display half-answers between tool rounds.
14
16
  *
15
- * Phase 1 explicitly does NOT spawn sub-agents — Bruno will add those later.
17
+ * Sub-agents are NOT spawned here — Bruno will add those later.
16
18
  */
17
19
  import { log } from '../../../shared/logger.js';
18
20
  import type { PiApiFlavor } from './sub-providers.js';
19
21
  import { streamProvider } from './providers/stream.js';
20
- import type { PiMessage, PiStreamEvent, PiToolDef } from './providers/types.js';
22
+ import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
21
23
  import type { AsyncQueue } from './async-queue.js';
24
+ import { findTool } from './tools/registry.js';
25
+ import type { PiTool } from './tools/types.js';
22
26
 
23
27
  export type PiSessionEvent =
24
28
  | { type: 'turn_started' }
25
29
  | { type: 'text_delta'; delta: string }
26
30
  | { type: 'text_end'; text: string }
27
- | { type: 'tool_use'; id: string; name: string; input: any } // Phase 2
31
+ | { type: 'tool_use'; id: string; name: string; input: any }
32
+ | { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
28
33
  | { type: 'turn_complete'; usedFileTools: boolean }
29
34
  | { type: 'error'; error: string };
30
35
 
@@ -36,8 +41,10 @@ export interface PiSessionInit {
36
41
  systemPrompt: string;
37
42
  /** Pre-loaded history before the first new user turn. */
38
43
  initialMessages?: PiMessage[];
39
- /** Phase 2 wires this through. Empty for Phase 1. */
44
+ /** Tools the model can call this session. Empty array chat-only. */
40
45
  tools?: PiToolDef[];
46
+ /** Resolved every time a tool fires (registry → run). */
47
+ cwd: string;
41
48
  maxOutputTokens?: number;
42
49
  /** Used to interrupt in-flight provider calls when the session ends. */
43
50
  abortController: AbortController;
@@ -53,19 +60,20 @@ export interface PiSession {
53
60
  }
54
61
 
55
62
  const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'write', 'edit']);
63
+ const MAX_TOOL_ROUNDS = 25;
56
64
 
57
65
  export function createPiSession(init: PiSessionInit): PiSession {
58
66
  const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
59
67
 
60
- async function runOneTurn(userMsg: PiMessage): Promise<void> {
61
- if (init.abortController.signal.aborted) return;
62
- messages.push(userMsg);
63
- init.onEvent({ type: 'turn_started' });
64
-
65
- let accumulated = '';
66
- const usedTools = new Set<string>();
67
- let errored = false;
68
+ /** One stream round collect the assistant blocks the model emits this pass. */
69
+ interface RoundResult {
70
+ text: string;
71
+ toolUses: { id: string; name: string; input: any }[];
72
+ errored: boolean;
73
+ }
68
74
 
75
+ async function runOneRound(): Promise<RoundResult> {
76
+ const result: RoundResult = { text: '', toolUses: [], errored: false };
69
77
  try {
70
78
  const stream = streamProvider(init.flavor, {
71
79
  modelId: init.modelId,
@@ -79,43 +87,111 @@ export function createPiSession(init: PiSessionInit): PiSession {
79
87
  });
80
88
 
81
89
  for await (const evt of stream as AsyncIterable<PiStreamEvent>) {
82
- if (init.abortController.signal.aborted) return;
90
+ if (init.abortController.signal.aborted) break;
83
91
  switch (evt.type) {
84
92
  case 'text_delta':
85
- accumulated += evt.delta;
93
+ result.text += evt.delta;
86
94
  init.onEvent({ type: 'text_delta', delta: evt.delta });
87
95
  break;
88
96
  case 'text_end':
89
- // Provider gives us the final accumulated text; trust the deltas
90
- // we already forwarded and reconcile state from here.
91
- accumulated = evt.text;
92
- init.onEvent({ type: 'text_end', text: evt.text });
97
+ // Sync up with the provider's authoritative concatenation in case
98
+ // we missed a delta. Don't forward we only emit text_end once
99
+ // at the end of the whole turn so the UI doesn't show half-answers.
100
+ result.text = evt.text;
93
101
  break;
94
102
  case 'tool_use':
95
- // Phase 2: execute the tool, append a tool_result message, re-stream.
96
- usedTools.add(evt.name);
103
+ result.toolUses.push({ id: evt.id, name: evt.name, input: evt.input });
97
104
  init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
98
105
  break;
99
106
  case 'error':
100
- errored = true;
107
+ result.errored = true;
101
108
  init.onEvent({ type: 'error', error: evt.error });
102
109
  break;
103
110
  case 'done':
104
- // Loop back if the model is waiting on a tool result (Phase 2);
105
- // for now `tool_use` is impossible since we don't pass tools.
111
+ // Loop control is by tool_use presence, not stop reason.
106
112
  break;
107
113
  }
108
114
  }
109
115
  } catch (err: any) {
110
- if (init.abortController.signal.aborted) return;
111
- errored = true;
112
- init.onEvent({ type: 'error', error: err?.message || String(err) });
116
+ if (!init.abortController.signal.aborted) {
117
+ result.errored = true;
118
+ init.onEvent({ type: 'error', error: err?.message || String(err) });
119
+ }
113
120
  }
121
+ return result;
122
+ }
123
+
124
+ async function executeTool(call: { id: string; name: string; input: any }): Promise<{ output: string; isError?: boolean }> {
125
+ const tool: PiTool | undefined = findTool(call.name);
126
+ if (!tool) {
127
+ return {
128
+ output: `Tool not found: ${call.name}. Available tools: ${(init.tools || []).map((t) => t.name).join(', ') || 'none'}.`,
129
+ isError: true,
130
+ };
131
+ }
132
+ try {
133
+ return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
134
+ } catch (err: any) {
135
+ return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
136
+ }
137
+ }
138
+
139
+ async function runOneTurn(userMsg: PiMessage): Promise<void> {
140
+ if (init.abortController.signal.aborted) return;
141
+ messages.push(userMsg);
142
+ init.onEvent({ type: 'turn_started' });
114
143
 
115
- if (accumulated) {
116
- messages.push({ role: 'assistant', content: [{ type: 'text', text: accumulated }] });
144
+ let accumulatedText = '';
145
+ const usedTools = new Set<string>();
146
+ let turnErrored = false;
147
+
148
+ for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
149
+ if (init.abortController.signal.aborted) break;
150
+ const { text, toolUses, errored } = await runOneRound();
151
+
152
+ // Append whatever the model produced this round to history so subsequent
153
+ // rounds (and the next user turn) see it.
154
+ const assistantContent: PiContentBlock[] = [];
155
+ if (text) {
156
+ accumulatedText += (accumulatedText && !accumulatedText.endsWith('\n') ? '\n\n' : '') + text;
157
+ assistantContent.push({ type: 'text', text });
158
+ }
159
+ for (const tu of toolUses) {
160
+ assistantContent.push({ type: 'tool_use', id: tu.id, name: tu.name, input: tu.input });
161
+ }
162
+ if (assistantContent.length > 0) {
163
+ messages.push({ role: 'assistant', content: assistantContent });
164
+ }
165
+
166
+ if (errored) { turnErrored = true; break; }
167
+ if (toolUses.length === 0) break; // model finished — exit loop
168
+
169
+ // Run every tool the model asked for this round, then feed the results
170
+ // back as a single user message Gemini accepts as a batch.
171
+ const toolResultBlocks: PiContentBlock[] = [];
172
+ for (const tu of toolUses) {
173
+ usedTools.add(tu.name);
174
+ if (init.abortController.signal.aborted) break;
175
+ log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
176
+ const res = await executeTool(tu);
177
+ init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res.isError });
178
+ toolResultBlocks.push({
179
+ type: 'tool_result',
180
+ toolUseId: tu.id,
181
+ content: res.output,
182
+ isError: res.isError,
183
+ });
184
+ }
185
+ if (toolResultBlocks.length > 0) {
186
+ messages.push({ role: 'user', content: toolResultBlocks });
187
+ }
188
+ // Loop continues — re-stream with the new tool results in context.
117
189
  }
118
- if (!errored) {
190
+
191
+ if (!turnErrored) {
192
+ if (accumulatedText) {
193
+ init.onEvent({ type: 'text_end', text: accumulatedText });
194
+ }
119
195
  const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
120
196
  init.onEvent({ type: 'turn_complete', usedFileTools });
121
197
  }
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Bash tool — runs a shell command in the workspace.
3
+ *
4
+ * Stays small on purpose: combined stdout+stderr, hard timeout, kills the
5
+ * process on session abort. No interactive subshells, no background jobs.
6
+ */
7
+ import { spawn } from 'child_process';
8
+ import type { PiTool } from './types.js';
9
+
10
+ const DEFAULT_TIMEOUT_MS = 60_000;
11
+ const HARD_TIMEOUT_MS = 5 * 60_000;
12
+ const OUTPUT_CAP_BYTES = 200 * 1024; // 200 KB; matches Claude SDK's behavior
13
+
14
+ export const bashTool: PiTool = {
15
+ name: 'Bash',
16
+ description:
17
+ 'Run a shell command in the workspace and return its combined stdout+stderr. Use this for non-interactive commands only — no editors, no long-running servers.',
18
+ inputSchema: {
19
+ type: 'object',
20
+ properties: {
21
+ command: { type: 'string', description: 'The shell command to execute.' },
22
+ description: { type: 'string', description: 'A short description (5–10 words) of what the command does.' },
23
+ timeout: { type: 'integer', description: 'Timeout in milliseconds (default 60 000, max 300 000).' },
24
+ },
25
+ required: ['command'],
26
+ },
27
+
28
+ async run(input, ctx) {
29
+ const command = typeof input?.command === 'string' ? input.command : '';
30
+ if (!command.trim()) return { output: 'command is required.', isError: true };
31
+
32
+ const requestedTimeout = Number(input?.timeout) || DEFAULT_TIMEOUT_MS;
33
+ const timeout = Math.min(HARD_TIMEOUT_MS, Math.max(1000, requestedTimeout));
34
+
35
+ return await new Promise((resolve) => {
36
+ let out = '';
37
+ let truncated = false;
38
+ let timedOut = false;
39
+ let settled = false;
40
+
41
+ const child = spawn('bash', ['-lc', command], {
42
+ cwd: ctx.cwd,
43
+ env: process.env,
44
+ stdio: ['ignore', 'pipe', 'pipe'],
45
+ });
46
+
47
+ const append = (chunk: Buffer) => {
48
+ if (truncated) return;
49
+ const remaining = OUTPUT_CAP_BYTES - Buffer.byteLength(out, 'utf-8');
50
+ if (remaining <= 0) {
51
+ truncated = true;
52
+ return;
53
+ }
54
+ const text = chunk.toString('utf-8');
55
+ if (Buffer.byteLength(text, 'utf-8') > remaining) {
56
+ out += text.slice(0, remaining);
57
+ truncated = true;
58
+ } else {
59
+ out += text;
60
+ }
61
+ };
62
+
63
+ child.stdout?.on('data', append);
64
+ child.stderr?.on('data', append);
65
+
66
+ const timer = setTimeout(() => {
67
+ timedOut = true;
68
+ try { child.kill('SIGKILL'); } catch {}
69
+ }, timeout);
70
+
71
+ const onAbort = () => {
72
+ try { child.kill('SIGKILL'); } catch {}
73
+ };
74
+ ctx.signal?.addEventListener('abort', onAbort);
75
+
76
+ child.on('error', (err) => {
77
+ if (settled) return;
78
+ settled = true;
79
+ clearTimeout(timer);
80
+ ctx.signal?.removeEventListener('abort', onAbort);
81
+ resolve({ output: `Failed to spawn command: ${err.message}`, isError: true });
82
+ });
83
+
84
+ child.on('close', (code, signal) => {
85
+ if (settled) return;
86
+ settled = true;
87
+ clearTimeout(timer);
88
+ ctx.signal?.removeEventListener('abort', onAbort);
89
+ const tail = truncated ? `\n\n[Output truncated at ${OUTPUT_CAP_BYTES} bytes]` : '';
90
+ if (timedOut) {
91
+ resolve({ output: `Command timed out after ${timeout}ms.\n\n${out}${tail}`, isError: true });
92
+ return;
93
+ }
94
+ if (ctx.signal?.aborted) {
95
+ resolve({ output: 'Command aborted (session ended).', isError: true });
96
+ return;
97
+ }
98
+ if (code === 0) {
99
+ resolve({ output: (out || '(no output)') + tail });
100
+ } else {
101
+ resolve({
102
+ output: `Command exited with code ${code}${signal ? ` (signal ${signal})` : ''}.\n\n${out}${tail}`,
103
+ isError: true,
104
+ });
105
+ }
106
+ });
107
+ });
108
+ },
109
+ };
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Edit tool — surgical string replacement in an existing file.
3
+ *
4
+ * Behavior matches Claude SDK's Edit semantics: refuses if `old_string` isn't
5
+ * unique (and `replace_all` is false), so the model can't accidentally edit
6
+ * the wrong occurrence.
7
+ */
8
+ import fs from 'fs';
9
+ import type { PiTool } from './types.js';
10
+ import { safeResolve, displayPath } from './path-safety.js';
11
+
12
+ export const editTool: PiTool = {
13
+ name: 'Edit',
14
+ description:
15
+ 'Replace a unique substring in a file. Fails if `old_string` is not found, or if it appears more than once unless `replace_all` is true.',
16
+ inputSchema: {
17
+ type: 'object',
18
+ properties: {
19
+ file_path: { type: 'string', description: 'File to edit (relative to workspace).' },
20
+ old_string: { type: 'string', description: 'The exact text to find. Include enough surrounding context to make it unique.' },
21
+ new_string: { type: 'string', description: 'Replacement text.' },
22
+ replace_all: { type: 'boolean', description: 'If true, replace every occurrence instead of requiring uniqueness.' },
23
+ },
24
+ required: ['file_path', 'old_string', 'new_string'],
25
+ },
26
+
27
+ async run(input, ctx) {
28
+ let abs: string;
29
+ try {
30
+ abs = safeResolve(ctx.cwd, input?.file_path);
31
+ } catch (err: any) {
32
+ return { output: err.message, isError: true };
33
+ }
34
+ if (!fs.existsSync(abs)) {
35
+ return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
36
+ }
37
+ const oldStr = typeof input?.old_string === 'string' ? input.old_string : '';
38
+ const newStr = typeof input?.new_string === 'string' ? input.new_string : '';
39
+ if (!oldStr) return { output: 'old_string is required and cannot be empty.', isError: true };
40
+ if (oldStr === newStr) return { output: 'old_string and new_string are identical — nothing to change.', isError: true };
41
+
42
+ const original = fs.readFileSync(abs, 'utf-8');
43
+ const occurrences = original.split(oldStr).length - 1;
44
+ if (occurrences === 0) {
45
+ return {
46
+ output: `Did not find old_string in ${displayPath(ctx.cwd, abs)}. Check whitespace/quoting and re-read the file.`,
47
+ isError: true,
48
+ };
49
+ }
50
+ if (occurrences > 1 && !input?.replace_all) {
51
+ return {
52
+ output: `Found ${occurrences} matches for old_string in ${displayPath(ctx.cwd, abs)}. Add more surrounding context to make it unique, or set replace_all: true.`,
53
+ isError: true,
54
+ };
55
+ }
56
+ const updated = input?.replace_all
57
+ ? original.split(oldStr).join(newStr)
58
+ : original.replace(oldStr, newStr);
59
+ try {
60
+ fs.writeFileSync(abs, updated, 'utf-8');
61
+ } catch (err: any) {
62
+ return { output: `Write failed: ${err.message}`, isError: true };
63
+ }
64
+ return { output: `Edited ${displayPath(ctx.cwd, abs)} (${occurrences} ${occurrences === 1 ? 'match' : 'matches'} replaced).` };
65
+ },
66
+ };
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Workspace path safety.
3
+ *
4
+ * The pi harness runs untrusted model output. Every file-touching tool must
5
+ * resolve its path through `safeResolve()` so the agent can't read or write
6
+ * outside the workspace via traversal (`../../etc/passwd`) or absolute paths.
7
+ */
8
+ import path from 'path';
9
+ import fs from 'fs';
10
+
11
+ export function safeResolve(cwd: string, requested: string): string {
12
+ if (!requested || typeof requested !== 'string') {
13
+ throw new Error('Missing file path');
14
+ }
15
+ const root = fs.realpathSync.native ? fs.realpathSync(cwd) : path.resolve(cwd);
16
+ const abs = path.isAbsolute(requested)
17
+ ? path.normalize(requested)
18
+ : path.normalize(path.join(root, requested));
19
+ const rel = path.relative(root, abs);
20
+ if (rel.startsWith('..') || path.isAbsolute(rel)) {
21
+ throw new Error(`Path escapes workspace: ${requested}`);
22
+ }
23
+ return abs;
24
+ }
25
+
26
+ export function displayPath(cwd: string, abs: string): string {
27
+ const rel = path.relative(cwd, abs);
28
+ return rel || path.basename(abs);
29
+ }
@@ -0,0 +1,71 @@
1
+ /**
2
+ * Read tool — fetches a file's contents, optionally with line-range slicing.
3
+ *
4
+ * Output mirrors what Claude SDK's Read tool produces so the model — which
5
+ * was trained against that format — uses it correctly. Line numbers are
6
+ * prefixed with 1-based indices padded for alignment.
7
+ */
8
+ import fs from 'fs';
9
+ import path from 'path';
10
+ import type { PiTool } from './types.js';
11
+ import { safeResolve, displayPath } from './path-safety.js';
12
+
13
+ const MAX_BYTES = 256 * 1024; // 256 KB cap per read
14
+ const DEFAULT_LIMIT = 2000; // default line cap
15
+
16
+ function formatWithLineNumbers(text: string, startLine: number): string {
17
+ const lines = text.split('\n');
18
+ return lines.map((line, i) => {
19
+ const n = String(startLine + i).padStart(6, ' ');
20
+ return `${n}\t${line}`;
21
+ }).join('\n');
22
+ }
23
+
24
+ export const readTool: PiTool = {
25
+ name: 'Read',
26
+ description: 'Read a file from the workspace. Use this to inspect existing code, configuration, or data files.',
27
+ inputSchema: {
28
+ type: 'object',
29
+ properties: {
30
+ file_path: { type: 'string', description: 'Path to the file. Relative paths resolve against the workspace root.' },
31
+ offset: { type: 'integer', description: '1-based line number to start at (default 1).', minimum: 1 },
32
+ limit: { type: 'integer', description: 'How many lines to return (default 2000, max 2000).', minimum: 1 },
33
+ },
34
+ required: ['file_path'],
35
+ },
36
+
37
+ async run(input, ctx) {
38
+ const filePath = input?.file_path;
39
+ let abs: string;
40
+ try {
41
+ abs = safeResolve(ctx.cwd, filePath);
42
+ } catch (err: any) {
43
+ return { output: err.message, isError: true };
44
+ }
45
+ if (!fs.existsSync(abs)) {
46
+ return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
47
+ }
48
+ const stat = fs.statSync(abs);
49
+ if (stat.isDirectory()) {
50
+ return { output: `Path is a directory, not a file: ${displayPath(ctx.cwd, abs)}`, isError: true };
51
+ }
52
+ if (stat.size > MAX_BYTES) {
53
+ return {
54
+ output: `File too large (${stat.size} bytes; max ${MAX_BYTES}). Use a smaller range with offset/limit.`,
55
+ isError: true,
56
+ };
57
+ }
58
+ const raw = fs.readFileSync(abs, 'utf-8');
59
+ const allLines = raw.split('\n');
60
+ const offset = Math.max(1, Number(input?.offset) || 1);
61
+ const limit = Math.min(DEFAULT_LIMIT, Math.max(1, Number(input?.limit) || DEFAULT_LIMIT));
62
+ const slice = allLines.slice(offset - 1, offset - 1 + limit).join('\n');
63
+ const truncatedNote = (offset - 1 + limit) < allLines.length
64
+ ? `\n\n[Truncated — file has ${allLines.length} lines; showed ${offset}–${offset + limit - 1}.]`
65
+ : '';
66
+ if (!slice.trim()) {
67
+ return { output: `(file ${displayPath(ctx.cwd, abs)} is empty${truncatedNote ? ` past line ${offset}` : ''})` };
68
+ }
69
+ return { output: formatWithLineNumbers(slice, offset) + truncatedNote };
70
+ },
71
+ };
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Tool registry — the bag of tools the pi session passes to the model.
3
+ *
4
+ * Phase 2 ships the four core coding tools. Phase 3 or later will add Grep,
5
+ * Glob, LS, NotebookEdit, etc. so the surface fully matches Claude SDK's.
6
+ */
7
+ import type { PiTool } from './types.js';
8
+ import type { PiToolDef } from '../providers/types.js';
9
+ import { readTool } from './read.js';
10
+ import { writeTool } from './write.js';
11
+ import { editTool } from './edit.js';
12
+ import { bashTool } from './bash.js';
13
+
14
+ export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
15
+
16
+ const TOOL_BY_NAME = new Map<string, PiTool>();
17
+ for (const t of PI_TOOLS) {
18
+ TOOL_BY_NAME.set(t.name, t);
19
+ // Some models lowercase or otherwise normalise tool names. Register
20
+ // common aliases so we don't 404 a legitimate call over a casing nit.
21
+ TOOL_BY_NAME.set(t.name.toLowerCase(), t);
22
+ }
23
+
24
+ export function findTool(name: string): PiTool | undefined {
25
+ return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
26
+ }
27
+
28
+ export function toolDefsForProvider(): PiToolDef[] {
29
+ return PI_TOOLS.map((t) => ({
30
+ name: t.name,
31
+ description: t.description,
32
+ inputSchema: t.inputSchema,
33
+ }));
34
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * Tool runtime contract for the pi harness.
3
+ *
4
+ * Tool names match the Claude Agent SDK's so the existing system prompt
5
+ * (`worker/prompts/...`) — which advertises Read / Write / Edit / Bash —
6
+ * keeps working without rewriting prompts per harness. Schemas are JSON
7
+ * Schema in the shape Google's `functionDeclarations.parameters` accepts.
8
+ */
9
+
10
+ export interface PiToolResult {
11
+ /** Text shown back to the model as the tool's output. */
12
+ output: string;
13
+ /** Mark `true` when the tool failed; the loop tells the model so it can recover. */
14
+ isError?: boolean;
15
+ }
16
+
17
+ export interface PiToolContext {
18
+ /** Workspace root — every tool resolves paths against this. */
19
+ cwd: string;
20
+ /** Aborted when the session ends so long-running tools stop fast. */
21
+ signal?: AbortSignal;
22
+ }
23
+
24
+ export interface PiTool {
25
+ name: string;
26
+ description: string;
27
+ inputSchema: Record<string, any>;
28
+ run(input: any, ctx: PiToolContext): Promise<PiToolResult>;
29
+ }
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Write tool — overwrites or creates a file inside the workspace.
3
+ */
4
+ import fs from 'fs';
5
+ import path from 'path';
6
+ import type { PiTool } from './types.js';
7
+ import { safeResolve, displayPath } from './path-safety.js';
8
+
9
+ const MAX_BYTES = 1024 * 1024; // 1 MB cap to avoid runaway writes
10
+
11
+ export const writeTool: PiTool = {
12
+ name: 'Write',
13
+ description: 'Create or overwrite a file in the workspace with the given content. Creates parent directories as needed.',
14
+ inputSchema: {
15
+ type: 'object',
16
+ properties: {
17
+ file_path: { type: 'string', description: 'Destination path. Relative paths resolve against the workspace root.' },
18
+ content: { type: 'string', description: 'Full file contents.' },
19
+ },
20
+ required: ['file_path', 'content'],
21
+ },
22
+
23
+ async run(input, ctx) {
24
+ let abs: string;
25
+ try {
26
+ abs = safeResolve(ctx.cwd, input?.file_path);
27
+ } catch (err: any) {
28
+ return { output: err.message, isError: true };
29
+ }
30
+ const content = typeof input?.content === 'string' ? input.content : '';
31
+ if (content.length > MAX_BYTES) {
32
+ return { output: `Content too large (${content.length} bytes; max ${MAX_BYTES}).`, isError: true };
33
+ }
34
+ try {
35
+ fs.mkdirSync(path.dirname(abs), { recursive: true });
36
+ fs.writeFileSync(abs, content, 'utf-8');
37
+ return { output: `Wrote ${content.length} bytes to ${displayPath(ctx.cwd, abs)}` };
38
+ } catch (err: any) {
39
+ return { output: `Write failed: ${err.message}`, isError: true };
40
+ }
41
+ },
42
+ };