bloby-bot 0.47.5 → 0.47.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/supervisor/harnesses/pi/index.ts +3 -0
- package/supervisor/harnesses/pi/providers/stream-google.ts +72 -12
- package/supervisor/harnesses/pi/session.ts +108 -32
- package/supervisor/harnesses/pi/tools/bash.ts +109 -0
- package/supervisor/harnesses/pi/tools/edit.ts +66 -0
- package/supervisor/harnesses/pi/tools/path-safety.ts +29 -0
- package/supervisor/harnesses/pi/tools/read.ts +71 -0
- package/supervisor/harnesses/pi/tools/registry.ts +34 -0
- package/supervisor/harnesses/pi/tools/types.ts +29 -0
- package/supervisor/harnesses/pi/tools/write.ts +42 -0
package/package.json
CHANGED
|
@@ -30,6 +30,7 @@ import { getPiSubProvider } from './sub-providers.js';
|
|
|
30
30
|
import { readPiAuth } from './auth-storage.js';
|
|
31
31
|
import { streamProvider } from './providers/stream.js';
|
|
32
32
|
import type { PiMessage } from './providers/types.js';
|
|
33
|
+
import { toolDefsForProvider } from './tools/registry.js';
|
|
33
34
|
|
|
34
35
|
// ── Live conversation state ────────────────────────────────────────────────
|
|
35
36
|
|
|
@@ -211,6 +212,8 @@ export async function startConversation(
|
|
|
211
212
|
baseUrl: auth.baseUrl,
|
|
212
213
|
apiKey: auth.apiKey,
|
|
213
214
|
systemPrompt,
|
|
215
|
+
tools: toolDefsForProvider(),
|
|
216
|
+
cwd: WORKSPACE_DIR,
|
|
214
217
|
abortController,
|
|
215
218
|
onEvent: (evt: PiSessionEvent) => {
|
|
216
219
|
translateAndEmit(conv, evt);
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
* Endpoint: POST {baseUrl}/models/{modelId}:streamGenerateContent?alt=sse&key={apiKey}
|
|
9
9
|
* Stream: SSE — each `data: {...}` is one candidate update.
|
|
10
10
|
*/
|
|
11
|
+
import crypto from 'crypto';
|
|
11
12
|
import { log } from '../../../../shared/logger.js';
|
|
12
13
|
import type {
|
|
13
14
|
PiStreamRequest,
|
|
@@ -85,19 +86,54 @@ function parseSseEvent(raw: string): any | undefined {
|
|
|
85
86
|
}
|
|
86
87
|
|
|
87
88
|
function toGeminiRole(role: PiMessage['role']): 'user' | 'model' {
|
|
88
|
-
|
|
89
|
+
// Tool results piggyback on the user role with a `functionResponse` part —
|
|
90
|
+
// see Gemini function-calling docs.
|
|
91
|
+
if (role === 'assistant') return 'model';
|
|
92
|
+
return 'user';
|
|
89
93
|
}
|
|
90
94
|
|
|
91
95
|
function toGeminiParts(content: PiContentBlock[]): any[] {
|
|
92
96
|
const parts: any[] = [];
|
|
93
97
|
for (const b of content) {
|
|
94
|
-
if (b.type === 'text')
|
|
95
|
-
|
|
96
|
-
|
|
98
|
+
if (b.type === 'text') {
|
|
99
|
+
parts.push({ text: b.text });
|
|
100
|
+
} else if (b.type === 'image') {
|
|
101
|
+
parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
|
|
102
|
+
} else if (b.type === 'tool_use') {
|
|
103
|
+
// Assistant turn: the model asked to invoke a tool.
|
|
104
|
+
parts.push({ functionCall: { name: b.name, args: b.input || {} } });
|
|
105
|
+
} else if (b.type === 'tool_result') {
|
|
106
|
+
// Function responses can be strings, objects, or even error markers.
|
|
107
|
+
// Wrap text in `{ output: ... }` (Gemini's docs use a flexible
|
|
108
|
+
// `response` JSON map), with `isError` keying so the model can react.
|
|
109
|
+
const response = b.isError ? { error: b.content } : { output: b.content };
|
|
110
|
+
parts.push({ functionResponse: { name: extractToolName(b.toolUseId), response } });
|
|
111
|
+
}
|
|
97
112
|
}
|
|
98
113
|
return parts;
|
|
99
114
|
}
|
|
100
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Gemini doesn't carry a tool-call id forward to the response; we encode the
|
|
118
|
+
* tool name into the id we generate at tool-use time (`{name}::{uuid}`) so
|
|
119
|
+
* we can recover it here. Falls back to the raw id if the prefix is missing.
|
|
120
|
+
*/
|
|
121
|
+
function extractToolName(toolUseId: string): string {
|
|
122
|
+
const idx = toolUseId.indexOf('::');
|
|
123
|
+
return idx > 0 ? toolUseId.slice(0, idx) : toolUseId;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function toGeminiTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
|
|
127
|
+
return [{
|
|
128
|
+
functionDeclarations: tools.map((t) => ({
|
|
129
|
+
name: t.name,
|
|
130
|
+
description: t.description,
|
|
131
|
+
// Gemini accepts plain JSON Schema for `parameters`.
|
|
132
|
+
parameters: t.inputSchema,
|
|
133
|
+
})),
|
|
134
|
+
}];
|
|
135
|
+
}
|
|
136
|
+
|
|
101
137
|
function mapStopReason(reason?: string): PiStopReason {
|
|
102
138
|
switch (reason) {
|
|
103
139
|
case 'STOP':
|
|
@@ -112,6 +148,7 @@ function mapStopReason(reason?: string): PiStopReason {
|
|
|
112
148
|
case 'PROHIBITED_CONTENT':
|
|
113
149
|
case 'SPII':
|
|
114
150
|
case 'OTHER':
|
|
151
|
+
case 'MALFORMED_FUNCTION_CALL':
|
|
115
152
|
return 'error';
|
|
116
153
|
default:
|
|
117
154
|
return 'end_turn';
|
|
@@ -130,6 +167,8 @@ function finishReasonMessage(reason?: string): string {
|
|
|
130
167
|
case 'PROHIBITED_CONTENT':
|
|
131
168
|
case 'SPII':
|
|
132
169
|
return `Response blocked by Gemini policy (${reason}).`;
|
|
170
|
+
case 'MALFORMED_FUNCTION_CALL':
|
|
171
|
+
return 'Gemini emitted a malformed function call. Often means the model tried to invoke a tool that wasn\'t declared, or with arguments that failed schema validation.';
|
|
133
172
|
case 'OTHER':
|
|
134
173
|
default:
|
|
135
174
|
return `Gemini stopped without producing output (finishReason=${reason || 'unknown'}).`;
|
|
@@ -160,6 +199,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
160
199
|
if (req.systemPrompt?.trim()) {
|
|
161
200
|
body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
|
|
162
201
|
}
|
|
202
|
+
if (req.tools && req.tools.length > 0) {
|
|
203
|
+
body.tools = toGeminiTools(req.tools);
|
|
204
|
+
}
|
|
163
205
|
|
|
164
206
|
let res: Response;
|
|
165
207
|
try {
|
|
@@ -182,6 +224,7 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
182
224
|
}
|
|
183
225
|
|
|
184
226
|
let accumulated = '';
|
|
227
|
+
let toolCallCount = 0;
|
|
185
228
|
let lastFinish: string | undefined;
|
|
186
229
|
let promptBlockReason: string | undefined;
|
|
187
230
|
let usage: { inputTokens?: number; outputTokens?: number } | undefined;
|
|
@@ -208,6 +251,20 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
208
251
|
// Thinking models emit reasoning parts with `thought: true`. They
|
|
209
252
|
// shouldn't be shown to the user as part of the visible answer.
|
|
210
253
|
if (part?.thought) { thoughtPartCount++; continue; }
|
|
254
|
+
if (part?.functionCall && typeof part.functionCall.name === 'string') {
|
|
255
|
+
// Gemini doesn't surface a tool-call id of its own; bake the tool
|
|
256
|
+
// name into the id so the session can echo it back as a
|
|
257
|
+
// `functionResponse` referencing the same name.
|
|
258
|
+
const id = `${part.functionCall.name}::${crypto.randomUUID()}`;
|
|
259
|
+
toolCallCount++;
|
|
260
|
+
yield {
|
|
261
|
+
type: 'tool_use',
|
|
262
|
+
id,
|
|
263
|
+
name: part.functionCall.name,
|
|
264
|
+
input: part.functionCall.args || {},
|
|
265
|
+
};
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
211
268
|
if (typeof part?.text === 'string' && part.text.length > 0) {
|
|
212
269
|
accumulated += part.text;
|
|
213
270
|
yield { type: 'text_delta', delta: part.text };
|
|
@@ -234,12 +291,12 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
234
291
|
}
|
|
235
292
|
|
|
236
293
|
log.info(
|
|
237
|
-
`[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} ` +
|
|
294
|
+
`[pi/google] stream done — chunks=${chunkCount} text=${accumulated.length} toolCalls=${toolCallCount} ` +
|
|
238
295
|
`thoughtParts=${thoughtPartCount} emptyTextParts=${emptyTextPartCount} ` +
|
|
239
296
|
`finishReason=${lastFinish || 'none'} ` +
|
|
240
297
|
`promptTok=${usage?.inputTokens ?? '?'} outTok=${usage?.outputTokens ?? '?'}`,
|
|
241
298
|
);
|
|
242
|
-
if (chunkCount > 0 && !accumulated) {
|
|
299
|
+
if (chunkCount > 0 && !accumulated && toolCallCount === 0) {
|
|
243
300
|
log.info(`[pi/google] first chunk (truncated): ${firstChunkSummary}`);
|
|
244
301
|
} else if (chunkCount === 0) {
|
|
245
302
|
log.warn(`[pi/google] SSE stream parsed zero chunks — content-type=${res.headers.get('content-type') || '?'}`);
|
|
@@ -253,10 +310,9 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
253
310
|
return;
|
|
254
311
|
}
|
|
255
312
|
|
|
256
|
-
//
|
|
257
|
-
//
|
|
258
|
-
|
|
259
|
-
if (!accumulated) {
|
|
313
|
+
// Tool-only round (Gemini fires functionCall parts with no text) is valid output —
|
|
314
|
+
// the session will execute the tool, push the result, and re-stream.
|
|
315
|
+
if (!accumulated && toolCallCount === 0) {
|
|
260
316
|
const reason = lastFinish && lastFinish !== 'STOP' && lastFinish !== 'FINISH_REASON_STOP'
|
|
261
317
|
? lastFinish
|
|
262
318
|
: undefined;
|
|
@@ -268,6 +324,10 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
268
324
|
return;
|
|
269
325
|
}
|
|
270
326
|
|
|
271
|
-
yield { type: 'text_end', text: accumulated };
|
|
272
|
-
yield {
|
|
327
|
+
if (accumulated) yield { type: 'text_end', text: accumulated };
|
|
328
|
+
yield {
|
|
329
|
+
type: 'done',
|
|
330
|
+
stopReason: toolCallCount > 0 ? 'tool_use' : mapStopReason(lastFinish),
|
|
331
|
+
usage,
|
|
332
|
+
};
|
|
273
333
|
}
|
|
@@ -8,23 +8,28 @@
|
|
|
8
8
|
* - each turn streams provider events back through a single `onEvent`
|
|
9
9
|
* callback the caller hooked up
|
|
10
10
|
*
|
|
11
|
-
* Phase
|
|
12
|
-
*
|
|
13
|
-
*
|
|
11
|
+
* Phase 2: each user turn is an inner loop — provider call → if the model
|
|
12
|
+
* asked for tool calls, execute them and feed results back → call provider
|
|
13
|
+
* again — until the model finishes without requesting more tools. Tokens
|
|
14
|
+
* stream live; `text_end` only fires once at the very end of the turn so the
|
|
15
|
+
* UI doesn't display half-answers between tool rounds.
|
|
14
16
|
*
|
|
15
|
-
*
|
|
17
|
+
* Sub-agents are NOT spawned here — Bruno will add those later.
|
|
16
18
|
*/
|
|
17
19
|
import { log } from '../../../shared/logger.js';
|
|
18
20
|
import type { PiApiFlavor } from './sub-providers.js';
|
|
19
21
|
import { streamProvider } from './providers/stream.js';
|
|
20
|
-
import type { PiMessage, PiStreamEvent, PiToolDef } from './providers/types.js';
|
|
22
|
+
import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
|
|
21
23
|
import type { AsyncQueue } from './async-queue.js';
|
|
24
|
+
import { findTool } from './tools/registry.js';
|
|
25
|
+
import type { PiTool } from './tools/types.js';
|
|
22
26
|
|
|
23
27
|
export type PiSessionEvent =
|
|
24
28
|
| { type: 'turn_started' }
|
|
25
29
|
| { type: 'text_delta'; delta: string }
|
|
26
30
|
| { type: 'text_end'; text: string }
|
|
27
|
-
| { type: 'tool_use'; id: string; name: string; input: any }
|
|
31
|
+
| { type: 'tool_use'; id: string; name: string; input: any }
|
|
32
|
+
| { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
|
|
28
33
|
| { type: 'turn_complete'; usedFileTools: boolean }
|
|
29
34
|
| { type: 'error'; error: string };
|
|
30
35
|
|
|
@@ -36,8 +41,10 @@ export interface PiSessionInit {
|
|
|
36
41
|
systemPrompt: string;
|
|
37
42
|
/** Pre-loaded history before the first new user turn. */
|
|
38
43
|
initialMessages?: PiMessage[];
|
|
39
|
-
/**
|
|
44
|
+
/** Tools the model can call this session. Empty array ⇒ chat-only. */
|
|
40
45
|
tools?: PiToolDef[];
|
|
46
|
+
/** Resolved every time a tool fires (registry → run). */
|
|
47
|
+
cwd: string;
|
|
41
48
|
maxOutputTokens?: number;
|
|
42
49
|
/** Used to interrupt in-flight provider calls when the session ends. */
|
|
43
50
|
abortController: AbortController;
|
|
@@ -53,19 +60,20 @@ export interface PiSession {
|
|
|
53
60
|
}
|
|
54
61
|
|
|
55
62
|
const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'write', 'edit']);
|
|
63
|
+
const MAX_TOOL_ROUNDS = 25;
|
|
56
64
|
|
|
57
65
|
export function createPiSession(init: PiSessionInit): PiSession {
|
|
58
66
|
const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
|
|
59
67
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
const usedTools = new Set<string>();
|
|
67
|
-
let errored = false;
|
|
68
|
+
/** One stream round — collect the assistant blocks the model emits this pass. */
|
|
69
|
+
interface RoundResult {
|
|
70
|
+
text: string;
|
|
71
|
+
toolUses: { id: string; name: string; input: any }[];
|
|
72
|
+
errored: boolean;
|
|
73
|
+
}
|
|
68
74
|
|
|
75
|
+
async function runOneRound(): Promise<RoundResult> {
|
|
76
|
+
const result: RoundResult = { text: '', toolUses: [], errored: false };
|
|
69
77
|
try {
|
|
70
78
|
const stream = streamProvider(init.flavor, {
|
|
71
79
|
modelId: init.modelId,
|
|
@@ -79,43 +87,111 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
79
87
|
});
|
|
80
88
|
|
|
81
89
|
for await (const evt of stream as AsyncIterable<PiStreamEvent>) {
|
|
82
|
-
if (init.abortController.signal.aborted)
|
|
90
|
+
if (init.abortController.signal.aborted) break;
|
|
83
91
|
switch (evt.type) {
|
|
84
92
|
case 'text_delta':
|
|
85
|
-
|
|
93
|
+
result.text += evt.delta;
|
|
86
94
|
init.onEvent({ type: 'text_delta', delta: evt.delta });
|
|
87
95
|
break;
|
|
88
96
|
case 'text_end':
|
|
89
|
-
//
|
|
90
|
-
// we
|
|
91
|
-
|
|
92
|
-
|
|
97
|
+
// Sync up with the provider's authoritative concatenation in case
|
|
98
|
+
// we missed a delta. Don't forward — we only emit text_end once
|
|
99
|
+
// at the end of the whole turn so the UI doesn't show half-answers.
|
|
100
|
+
result.text = evt.text;
|
|
93
101
|
break;
|
|
94
102
|
case 'tool_use':
|
|
95
|
-
|
|
96
|
-
usedTools.add(evt.name);
|
|
103
|
+
result.toolUses.push({ id: evt.id, name: evt.name, input: evt.input });
|
|
97
104
|
init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
|
|
98
105
|
break;
|
|
99
106
|
case 'error':
|
|
100
|
-
errored = true;
|
|
107
|
+
result.errored = true;
|
|
101
108
|
init.onEvent({ type: 'error', error: evt.error });
|
|
102
109
|
break;
|
|
103
110
|
case 'done':
|
|
104
|
-
// Loop
|
|
105
|
-
// for now `tool_use` is impossible since we don't pass tools.
|
|
111
|
+
// Loop control is by tool_use presence, not stop reason.
|
|
106
112
|
break;
|
|
107
113
|
}
|
|
108
114
|
}
|
|
109
115
|
} catch (err: any) {
|
|
110
|
-
if (init.abortController.signal.aborted)
|
|
111
|
-
|
|
112
|
-
|
|
116
|
+
if (!init.abortController.signal.aborted) {
|
|
117
|
+
result.errored = true;
|
|
118
|
+
init.onEvent({ type: 'error', error: err?.message || String(err) });
|
|
119
|
+
}
|
|
113
120
|
}
|
|
121
|
+
return result;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
async function executeTool(call: { id: string; name: string; input: any }): Promise<{ output: string; isError?: boolean }> {
|
|
125
|
+
const tool: PiTool | undefined = findTool(call.name);
|
|
126
|
+
if (!tool) {
|
|
127
|
+
return {
|
|
128
|
+
output: `Tool not found: ${call.name}. Available tools: ${(init.tools || []).map((t) => t.name).join(', ') || 'none'}.`,
|
|
129
|
+
isError: true,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
try {
|
|
133
|
+
return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
|
|
134
|
+
} catch (err: any) {
|
|
135
|
+
return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async function runOneTurn(userMsg: PiMessage): Promise<void> {
|
|
140
|
+
if (init.abortController.signal.aborted) return;
|
|
141
|
+
messages.push(userMsg);
|
|
142
|
+
init.onEvent({ type: 'turn_started' });
|
|
114
143
|
|
|
115
|
-
|
|
116
|
-
|
|
144
|
+
let accumulatedText = '';
|
|
145
|
+
const usedTools = new Set<string>();
|
|
146
|
+
let turnErrored = false;
|
|
147
|
+
|
|
148
|
+
for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
|
|
149
|
+
if (init.abortController.signal.aborted) break;
|
|
150
|
+
const { text, toolUses, errored } = await runOneRound();
|
|
151
|
+
|
|
152
|
+
// Append whatever the model produced this round to history so subsequent
|
|
153
|
+
// rounds (and the next user turn) see it.
|
|
154
|
+
const assistantContent: PiContentBlock[] = [];
|
|
155
|
+
if (text) {
|
|
156
|
+
accumulatedText += (accumulatedText && !accumulatedText.endsWith('\n') ? '\n\n' : '') + text;
|
|
157
|
+
assistantContent.push({ type: 'text', text });
|
|
158
|
+
}
|
|
159
|
+
for (const tu of toolUses) {
|
|
160
|
+
assistantContent.push({ type: 'tool_use', id: tu.id, name: tu.name, input: tu.input });
|
|
161
|
+
}
|
|
162
|
+
if (assistantContent.length > 0) {
|
|
163
|
+
messages.push({ role: 'assistant', content: assistantContent });
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (errored) { turnErrored = true; break; }
|
|
167
|
+
if (toolUses.length === 0) break; // model finished — exit loop
|
|
168
|
+
|
|
169
|
+
// Run every tool the model asked for this round, then feed the results
|
|
170
|
+
// back as a single user message Gemini accepts as a batch.
|
|
171
|
+
const toolResultBlocks: PiContentBlock[] = [];
|
|
172
|
+
for (const tu of toolUses) {
|
|
173
|
+
usedTools.add(tu.name);
|
|
174
|
+
if (init.abortController.signal.aborted) break;
|
|
175
|
+
log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
|
|
176
|
+
const res = await executeTool(tu);
|
|
177
|
+
init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res.isError });
|
|
178
|
+
toolResultBlocks.push({
|
|
179
|
+
type: 'tool_result',
|
|
180
|
+
toolUseId: tu.id,
|
|
181
|
+
content: res.output,
|
|
182
|
+
isError: res.isError,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
if (toolResultBlocks.length > 0) {
|
|
186
|
+
messages.push({ role: 'user', content: toolResultBlocks });
|
|
187
|
+
}
|
|
188
|
+
// Loop continues — re-stream with the new tool results in context.
|
|
117
189
|
}
|
|
118
|
-
|
|
190
|
+
|
|
191
|
+
if (!turnErrored) {
|
|
192
|
+
if (accumulatedText) {
|
|
193
|
+
init.onEvent({ type: 'text_end', text: accumulatedText });
|
|
194
|
+
}
|
|
119
195
|
const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
|
|
120
196
|
init.onEvent({ type: 'turn_complete', usedFileTools });
|
|
121
197
|
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bash tool — runs a shell command in the workspace.
|
|
3
|
+
*
|
|
4
|
+
* Stays small on purpose: combined stdout+stderr, hard timeout, kills the
|
|
5
|
+
* process on session abort. No interactive subshells, no background jobs.
|
|
6
|
+
*/
|
|
7
|
+
import { spawn } from 'child_process';
|
|
8
|
+
import type { PiTool } from './types.js';
|
|
9
|
+
|
|
10
|
+
const DEFAULT_TIMEOUT_MS = 60_000;
|
|
11
|
+
const HARD_TIMEOUT_MS = 5 * 60_000;
|
|
12
|
+
const OUTPUT_CAP_BYTES = 200 * 1024; // 200 KB; matches Claude SDK's behavior
|
|
13
|
+
|
|
14
|
+
export const bashTool: PiTool = {
|
|
15
|
+
name: 'Bash',
|
|
16
|
+
description:
|
|
17
|
+
'Run a shell command in the workspace and return its combined stdout+stderr. Use this for non-interactive commands only — no editors, no long-running servers.',
|
|
18
|
+
inputSchema: {
|
|
19
|
+
type: 'object',
|
|
20
|
+
properties: {
|
|
21
|
+
command: { type: 'string', description: 'The shell command to execute.' },
|
|
22
|
+
description: { type: 'string', description: 'A short description (5–10 words) of what the command does.' },
|
|
23
|
+
timeout: { type: 'integer', description: 'Timeout in milliseconds (default 60 000, max 300 000).' },
|
|
24
|
+
},
|
|
25
|
+
required: ['command'],
|
|
26
|
+
},
|
|
27
|
+
|
|
28
|
+
async run(input, ctx) {
|
|
29
|
+
const command = typeof input?.command === 'string' ? input.command : '';
|
|
30
|
+
if (!command.trim()) return { output: 'command is required.', isError: true };
|
|
31
|
+
|
|
32
|
+
const requestedTimeout = Number(input?.timeout) || DEFAULT_TIMEOUT_MS;
|
|
33
|
+
const timeout = Math.min(HARD_TIMEOUT_MS, Math.max(1000, requestedTimeout));
|
|
34
|
+
|
|
35
|
+
return await new Promise((resolve) => {
|
|
36
|
+
let out = '';
|
|
37
|
+
let truncated = false;
|
|
38
|
+
let timedOut = false;
|
|
39
|
+
let settled = false;
|
|
40
|
+
|
|
41
|
+
const child = spawn('bash', ['-lc', command], {
|
|
42
|
+
cwd: ctx.cwd,
|
|
43
|
+
env: process.env,
|
|
44
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const append = (chunk: Buffer) => {
|
|
48
|
+
if (truncated) return;
|
|
49
|
+
const remaining = OUTPUT_CAP_BYTES - Buffer.byteLength(out, 'utf-8');
|
|
50
|
+
if (remaining <= 0) {
|
|
51
|
+
truncated = true;
|
|
52
|
+
return;
|
|
53
|
+
}
|
|
54
|
+
const text = chunk.toString('utf-8');
|
|
55
|
+
if (Buffer.byteLength(text, 'utf-8') > remaining) {
|
|
56
|
+
out += text.slice(0, remaining);
|
|
57
|
+
truncated = true;
|
|
58
|
+
} else {
|
|
59
|
+
out += text;
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
child.stdout?.on('data', append);
|
|
64
|
+
child.stderr?.on('data', append);
|
|
65
|
+
|
|
66
|
+
const timer = setTimeout(() => {
|
|
67
|
+
timedOut = true;
|
|
68
|
+
try { child.kill('SIGKILL'); } catch {}
|
|
69
|
+
}, timeout);
|
|
70
|
+
|
|
71
|
+
const onAbort = () => {
|
|
72
|
+
try { child.kill('SIGKILL'); } catch {}
|
|
73
|
+
};
|
|
74
|
+
ctx.signal?.addEventListener('abort', onAbort);
|
|
75
|
+
|
|
76
|
+
child.on('error', (err) => {
|
|
77
|
+
if (settled) return;
|
|
78
|
+
settled = true;
|
|
79
|
+
clearTimeout(timer);
|
|
80
|
+
ctx.signal?.removeEventListener('abort', onAbort);
|
|
81
|
+
resolve({ output: `Failed to spawn command: ${err.message}`, isError: true });
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
child.on('close', (code, signal) => {
|
|
85
|
+
if (settled) return;
|
|
86
|
+
settled = true;
|
|
87
|
+
clearTimeout(timer);
|
|
88
|
+
ctx.signal?.removeEventListener('abort', onAbort);
|
|
89
|
+
const tail = truncated ? `\n\n[Output truncated at ${OUTPUT_CAP_BYTES} bytes]` : '';
|
|
90
|
+
if (timedOut) {
|
|
91
|
+
resolve({ output: `Command timed out after ${timeout}ms.\n\n${out}${tail}`, isError: true });
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
if (ctx.signal?.aborted) {
|
|
95
|
+
resolve({ output: 'Command aborted (session ended).', isError: true });
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
if (code === 0) {
|
|
99
|
+
resolve({ output: (out || '(no output)') + tail });
|
|
100
|
+
} else {
|
|
101
|
+
resolve({
|
|
102
|
+
output: `Command exited with code ${code}${signal ? ` (signal ${signal})` : ''}.\n\n${out}${tail}`,
|
|
103
|
+
isError: true,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
},
|
|
109
|
+
};
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Edit tool — surgical string replacement in an existing file.
|
|
3
|
+
*
|
|
4
|
+
* Behavior matches Claude SDK's Edit semantics: refuses if `old_string` isn't
|
|
5
|
+
* unique (and `replace_all` is false), so the model can't accidentally edit
|
|
6
|
+
* the wrong occurrence.
|
|
7
|
+
*/
|
|
8
|
+
import fs from 'fs';
|
|
9
|
+
import type { PiTool } from './types.js';
|
|
10
|
+
import { safeResolve, displayPath } from './path-safety.js';
|
|
11
|
+
|
|
12
|
+
export const editTool: PiTool = {
|
|
13
|
+
name: 'Edit',
|
|
14
|
+
description:
|
|
15
|
+
'Replace a unique substring in a file. Fails if `old_string` is not found, or if it appears more than once unless `replace_all` is true.',
|
|
16
|
+
inputSchema: {
|
|
17
|
+
type: 'object',
|
|
18
|
+
properties: {
|
|
19
|
+
file_path: { type: 'string', description: 'File to edit (relative to workspace).' },
|
|
20
|
+
old_string: { type: 'string', description: 'The exact text to find. Include enough surrounding context to make it unique.' },
|
|
21
|
+
new_string: { type: 'string', description: 'Replacement text.' },
|
|
22
|
+
replace_all: { type: 'boolean', description: 'If true, replace every occurrence instead of requiring uniqueness.' },
|
|
23
|
+
},
|
|
24
|
+
required: ['file_path', 'old_string', 'new_string'],
|
|
25
|
+
},
|
|
26
|
+
|
|
27
|
+
async run(input, ctx) {
|
|
28
|
+
let abs: string;
|
|
29
|
+
try {
|
|
30
|
+
abs = safeResolve(ctx.cwd, input?.file_path);
|
|
31
|
+
} catch (err: any) {
|
|
32
|
+
return { output: err.message, isError: true };
|
|
33
|
+
}
|
|
34
|
+
if (!fs.existsSync(abs)) {
|
|
35
|
+
return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
|
|
36
|
+
}
|
|
37
|
+
const oldStr = typeof input?.old_string === 'string' ? input.old_string : '';
|
|
38
|
+
const newStr = typeof input?.new_string === 'string' ? input.new_string : '';
|
|
39
|
+
if (!oldStr) return { output: 'old_string is required and cannot be empty.', isError: true };
|
|
40
|
+
if (oldStr === newStr) return { output: 'old_string and new_string are identical — nothing to change.', isError: true };
|
|
41
|
+
|
|
42
|
+
const original = fs.readFileSync(abs, 'utf-8');
|
|
43
|
+
const occurrences = original.split(oldStr).length - 1;
|
|
44
|
+
if (occurrences === 0) {
|
|
45
|
+
return {
|
|
46
|
+
output: `Did not find old_string in ${displayPath(ctx.cwd, abs)}. Check whitespace/quoting and re-read the file.`,
|
|
47
|
+
isError: true,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
if (occurrences > 1 && !input?.replace_all) {
|
|
51
|
+
return {
|
|
52
|
+
output: `Found ${occurrences} matches for old_string in ${displayPath(ctx.cwd, abs)}. Add more surrounding context to make it unique, or set replace_all: true.`,
|
|
53
|
+
isError: true,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
const updated = input?.replace_all
|
|
57
|
+
? original.split(oldStr).join(newStr)
|
|
58
|
+
: original.replace(oldStr, newStr);
|
|
59
|
+
try {
|
|
60
|
+
fs.writeFileSync(abs, updated, 'utf-8');
|
|
61
|
+
} catch (err: any) {
|
|
62
|
+
return { output: `Write failed: ${err.message}`, isError: true };
|
|
63
|
+
}
|
|
64
|
+
return { output: `Edited ${displayPath(ctx.cwd, abs)} (${occurrences} ${occurrences === 1 ? 'match' : 'matches'} replaced).` };
|
|
65
|
+
},
|
|
66
|
+
};
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Workspace path safety.
|
|
3
|
+
*
|
|
4
|
+
* The pi harness runs untrusted model output. Every file-touching tool must
|
|
5
|
+
* resolve its path through `safeResolve()` so the agent can't read or write
|
|
6
|
+
* outside the workspace via traversal (`../../etc/passwd`) or absolute paths.
|
|
7
|
+
*/
|
|
8
|
+
import path from 'path';
|
|
9
|
+
import fs from 'fs';
|
|
10
|
+
|
|
11
|
+
export function safeResolve(cwd: string, requested: string): string {
|
|
12
|
+
if (!requested || typeof requested !== 'string') {
|
|
13
|
+
throw new Error('Missing file path');
|
|
14
|
+
}
|
|
15
|
+
const root = fs.realpathSync.native ? fs.realpathSync(cwd) : path.resolve(cwd);
|
|
16
|
+
const abs = path.isAbsolute(requested)
|
|
17
|
+
? path.normalize(requested)
|
|
18
|
+
: path.normalize(path.join(root, requested));
|
|
19
|
+
const rel = path.relative(root, abs);
|
|
20
|
+
if (rel.startsWith('..') || path.isAbsolute(rel)) {
|
|
21
|
+
throw new Error(`Path escapes workspace: ${requested}`);
|
|
22
|
+
}
|
|
23
|
+
return abs;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function displayPath(cwd: string, abs: string): string {
|
|
27
|
+
const rel = path.relative(cwd, abs);
|
|
28
|
+
return rel || path.basename(abs);
|
|
29
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Read tool — fetches a file's contents, optionally with line-range slicing.
|
|
3
|
+
*
|
|
4
|
+
* Output mirrors what Claude SDK's Read tool produces so the model — which
|
|
5
|
+
* was trained against that format — uses it correctly. Line numbers are
|
|
6
|
+
* prefixed with 1-based indices padded for alignment.
|
|
7
|
+
*/
|
|
8
|
+
import fs from 'fs';
|
|
9
|
+
import path from 'path';
|
|
10
|
+
import type { PiTool } from './types.js';
|
|
11
|
+
import { safeResolve, displayPath } from './path-safety.js';
|
|
12
|
+
|
|
13
|
+
const MAX_BYTES = 256 * 1024; // 256 KB cap per read
|
|
14
|
+
const DEFAULT_LIMIT = 2000; // default line cap
|
|
15
|
+
|
|
16
|
+
function formatWithLineNumbers(text: string, startLine: number): string {
|
|
17
|
+
const lines = text.split('\n');
|
|
18
|
+
return lines.map((line, i) => {
|
|
19
|
+
const n = String(startLine + i).padStart(6, ' ');
|
|
20
|
+
return `${n}\t${line}`;
|
|
21
|
+
}).join('\n');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const readTool: PiTool = {
|
|
25
|
+
name: 'Read',
|
|
26
|
+
description: 'Read a file from the workspace. Use this to inspect existing code, configuration, or data files.',
|
|
27
|
+
inputSchema: {
|
|
28
|
+
type: 'object',
|
|
29
|
+
properties: {
|
|
30
|
+
file_path: { type: 'string', description: 'Path to the file. Relative paths resolve against the workspace root.' },
|
|
31
|
+
offset: { type: 'integer', description: '1-based line number to start at (default 1).', minimum: 1 },
|
|
32
|
+
limit: { type: 'integer', description: 'How many lines to return (default 2000, max 2000).', minimum: 1 },
|
|
33
|
+
},
|
|
34
|
+
required: ['file_path'],
|
|
35
|
+
},
|
|
36
|
+
|
|
37
|
+
async run(input, ctx) {
|
|
38
|
+
const filePath = input?.file_path;
|
|
39
|
+
let abs: string;
|
|
40
|
+
try {
|
|
41
|
+
abs = safeResolve(ctx.cwd, filePath);
|
|
42
|
+
} catch (err: any) {
|
|
43
|
+
return { output: err.message, isError: true };
|
|
44
|
+
}
|
|
45
|
+
if (!fs.existsSync(abs)) {
|
|
46
|
+
return { output: `File not found: ${displayPath(ctx.cwd, abs)}`, isError: true };
|
|
47
|
+
}
|
|
48
|
+
const stat = fs.statSync(abs);
|
|
49
|
+
if (stat.isDirectory()) {
|
|
50
|
+
return { output: `Path is a directory, not a file: ${displayPath(ctx.cwd, abs)}`, isError: true };
|
|
51
|
+
}
|
|
52
|
+
if (stat.size > MAX_BYTES) {
|
|
53
|
+
return {
|
|
54
|
+
output: `File too large (${stat.size} bytes; max ${MAX_BYTES}). Use a smaller range with offset/limit.`,
|
|
55
|
+
isError: true,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
const raw = fs.readFileSync(abs, 'utf-8');
|
|
59
|
+
const allLines = raw.split('\n');
|
|
60
|
+
const offset = Math.max(1, Number(input?.offset) || 1);
|
|
61
|
+
const limit = Math.min(DEFAULT_LIMIT, Math.max(1, Number(input?.limit) || DEFAULT_LIMIT));
|
|
62
|
+
const slice = allLines.slice(offset - 1, offset - 1 + limit).join('\n');
|
|
63
|
+
const truncatedNote = (offset - 1 + limit) < allLines.length
|
|
64
|
+
? `\n\n[Truncated — file has ${allLines.length} lines; showed ${offset}–${offset + limit - 1}.]`
|
|
65
|
+
: '';
|
|
66
|
+
if (!slice.trim()) {
|
|
67
|
+
return { output: `(file ${displayPath(ctx.cwd, abs)} is empty${truncatedNote ? ` past line ${offset}` : ''})` };
|
|
68
|
+
}
|
|
69
|
+
return { output: formatWithLineNumbers(slice, offset) + truncatedNote };
|
|
70
|
+
},
|
|
71
|
+
};
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool registry — the bag of tools the pi session passes to the model.
|
|
3
|
+
*
|
|
4
|
+
* Phase 2 ships the four core coding tools. Phase 3 or later will add Grep,
|
|
5
|
+
* Glob, LS, NotebookEdit, etc. so the surface fully matches Claude SDK's.
|
|
6
|
+
*/
|
|
7
|
+
import type { PiTool } from './types.js';
|
|
8
|
+
import type { PiToolDef } from '../providers/types.js';
|
|
9
|
+
import { readTool } from './read.js';
|
|
10
|
+
import { writeTool } from './write.js';
|
|
11
|
+
import { editTool } from './edit.js';
|
|
12
|
+
import { bashTool } from './bash.js';
|
|
13
|
+
|
|
14
|
+
export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
|
|
15
|
+
|
|
16
|
+
const TOOL_BY_NAME = new Map<string, PiTool>();
|
|
17
|
+
for (const t of PI_TOOLS) {
|
|
18
|
+
TOOL_BY_NAME.set(t.name, t);
|
|
19
|
+
// Some models lowercase or otherwise normalise tool names. Register
|
|
20
|
+
// common aliases so we don't 404 a legitimate call over a casing nit.
|
|
21
|
+
TOOL_BY_NAME.set(t.name.toLowerCase(), t);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function findTool(name: string): PiTool | undefined {
|
|
25
|
+
return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function toolDefsForProvider(): PiToolDef[] {
|
|
29
|
+
return PI_TOOLS.map((t) => ({
|
|
30
|
+
name: t.name,
|
|
31
|
+
description: t.description,
|
|
32
|
+
inputSchema: t.inputSchema,
|
|
33
|
+
}));
|
|
34
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool runtime contract for the pi harness.
|
|
3
|
+
*
|
|
4
|
+
* Tool names match the Claude Agent SDK's so the existing system prompt
|
|
5
|
+
* (`worker/prompts/...`) — which advertises Read / Write / Edit / Bash —
|
|
6
|
+
* keeps working without rewriting prompts per harness. Schemas are JSON
|
|
7
|
+
* Schema in the shape Google's `functionDeclarations.parameters` accepts.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
export interface PiToolResult {
|
|
11
|
+
/** Text shown back to the model as the tool's output. */
|
|
12
|
+
output: string;
|
|
13
|
+
/** Mark `true` when the tool failed; the loop tells the model so it can recover. */
|
|
14
|
+
isError?: boolean;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface PiToolContext {
|
|
18
|
+
/** Workspace root — every tool resolves paths against this. */
|
|
19
|
+
cwd: string;
|
|
20
|
+
/** Aborted when the session ends so long-running tools stop fast. */
|
|
21
|
+
signal?: AbortSignal;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface PiTool {
|
|
25
|
+
name: string;
|
|
26
|
+
description: string;
|
|
27
|
+
inputSchema: Record<string, any>;
|
|
28
|
+
run(input: any, ctx: PiToolContext): Promise<PiToolResult>;
|
|
29
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Write tool — overwrites or creates a file inside the workspace.
|
|
3
|
+
*/
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import type { PiTool } from './types.js';
|
|
7
|
+
import { safeResolve, displayPath } from './path-safety.js';
|
|
8
|
+
|
|
9
|
+
const MAX_BYTES = 1024 * 1024; // 1 MB cap to avoid runaway writes
|
|
10
|
+
|
|
11
|
+
export const writeTool: PiTool = {
|
|
12
|
+
name: 'Write',
|
|
13
|
+
description: 'Create or overwrite a file in the workspace with the given content. Creates parent directories as needed.',
|
|
14
|
+
inputSchema: {
|
|
15
|
+
type: 'object',
|
|
16
|
+
properties: {
|
|
17
|
+
file_path: { type: 'string', description: 'Destination path. Relative paths resolve against the workspace root.' },
|
|
18
|
+
content: { type: 'string', description: 'Full file contents.' },
|
|
19
|
+
},
|
|
20
|
+
required: ['file_path', 'content'],
|
|
21
|
+
},
|
|
22
|
+
|
|
23
|
+
async run(input, ctx) {
|
|
24
|
+
let abs: string;
|
|
25
|
+
try {
|
|
26
|
+
abs = safeResolve(ctx.cwd, input?.file_path);
|
|
27
|
+
} catch (err: any) {
|
|
28
|
+
return { output: err.message, isError: true };
|
|
29
|
+
}
|
|
30
|
+
const content = typeof input?.content === 'string' ? input.content : '';
|
|
31
|
+
if (content.length > MAX_BYTES) {
|
|
32
|
+
return { output: `Content too large (${content.length} bytes; max ${MAX_BYTES}).`, isError: true };
|
|
33
|
+
}
|
|
34
|
+
try {
|
|
35
|
+
fs.mkdirSync(path.dirname(abs), { recursive: true });
|
|
36
|
+
fs.writeFileSync(abs, content, 'utf-8');
|
|
37
|
+
return { output: `Wrote ${content.length} bytes to ${displayPath(ctx.cwd, abs)}` };
|
|
38
|
+
} catch (err: any) {
|
|
39
|
+
return { output: `Write failed: ${err.message}`, isError: true };
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
};
|