bloby-bot 0.70.8 → 0.70.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
- package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
- package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
- package/dist-bloby/bloby.html +2 -2
- package/dist-bloby/onboard.html +2 -2
- package/package.json +1 -1
- package/scripts/sync-pi-models.ts +37 -6
- package/supervisor/chat/OnboardWizard.tsx +4 -4
- package/supervisor/harnesses/pi/async-queue.ts +7 -11
- package/supervisor/harnesses/pi/index.ts +475 -73
- package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
- package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
- package/supervisor/harnesses/pi/providers/retry.ts +87 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
- package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
- package/supervisor/harnesses/pi/providers/types.ts +26 -1
- package/supervisor/harnesses/pi/session.ts +179 -73
- package/supervisor/harnesses/pi/sub-providers.ts +30 -1
- package/supervisor/harnesses/pi/test-completion.ts +8 -2
- package/supervisor/harnesses/pi/tools/registry.ts +25 -9
- package/supervisor/harnesses/pi/tools/task.ts +108 -0
- package/supervisor/harnesses/pi/tools/types.ts +15 -0
- package/supervisor/index.ts +11 -10
- package/supervisor/public/morphy_sad.mov +0 -0
- package/supervisor/public/morphy_sad.webm +0 -0
- package/supervisor/shell.ts +1 -1
- package/supervisor/workspace-guard.js +1 -1
- package/workspace/client/public/morphy_bounce.mov +0 -0
- package/workspace/client/public/morphy_bounce.webm +0 -0
- package/workspace/client/public/morphy_hi.mov +0 -0
- package/workspace/client/public/morphy_hi.webm +0 -0
- package/workspace/client/src/App.tsx +5 -3
- package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
- package/supervisor/public/what-happened.mp4 +0 -0
- package/supervisor/public/what-happened.webm +0 -0
|
@@ -4,25 +4,41 @@
|
|
|
4
4
|
* Mirrors the *shape* of the Claude harness loop in `harnesses/claude.ts`:
|
|
5
5
|
* - one long-lived session per conversation
|
|
6
6
|
* - user messages arrive via an `AsyncQueue<PiMessage>` input
|
|
7
|
-
* - the loop drains the queue
|
|
7
|
+
* - the loop drains the queue ONE MESSAGE PER TURN — exactly like the Claude
|
|
8
|
+
* SDK's input queue: each pushed message gets its own turn, its own
|
|
9
|
+
* text_end, and its own turn_complete. (An earlier design folded mid-turn
|
|
10
|
+
* messages into the in-flight turn; that broke the channel manager's
|
|
11
|
+
* one-response-per-push routing FIFO — see PI-PARITY-AUDIT-2026-06-11.md
|
|
12
|
+
* D1-1 — so queued messages now simply wait for their own turn.)
|
|
8
13
|
* - each turn streams provider events back through a single `onEvent`
|
|
9
14
|
* callback the caller hooked up
|
|
10
15
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
+
* Each user turn is an inner loop — provider call → if the model asked for
|
|
17
|
+
* tool calls, execute them and feed results back → call provider again — until
|
|
18
|
+
* the model finishes without requesting more tools. Tokens stream live;
|
|
19
|
+
* `text_end` only fires once at the very end of the turn so the UI doesn't
|
|
20
|
+
* display half-answers between tool rounds.
|
|
16
21
|
*
|
|
17
|
-
*
|
|
22
|
+
* Error precedence matches claude (audit D6-2): streamed partial text is
|
|
23
|
+
* always committed via `text_end` (the consumer persists it and the routing
|
|
24
|
+
* FIFO consumes normally); the `error` event fires only when a failed turn
|
|
25
|
+
* produced no text — except fatal kinds (auth / context-overflow), which are
|
|
26
|
+
* surfaced even after partial text so the harness can tear the session down.
|
|
27
|
+
*
|
|
28
|
+
* Auth (key/model/base URL/flavor) is resolved via `getAuth()` on every
|
|
29
|
+
* provider round (audit D6-8): fixing a revoked key or switching models in the
|
|
30
|
+
* wizard applies on the very next round, with full history intact.
|
|
31
|
+
*
|
|
32
|
+
* Sub-agents are NOT spawned here — Bruno will add those later (Phase B).
|
|
18
33
|
*/
|
|
19
34
|
import { log } from '../../../shared/logger.js';
|
|
20
35
|
import type { PiApiFlavor } from './sub-providers.js';
|
|
21
36
|
import { streamProvider } from './providers/stream.js';
|
|
22
|
-
import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
|
|
37
|
+
import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock, PiUsage, PiErrorKind } from './providers/types.js';
|
|
38
|
+
import { sleep } from './providers/retry.js';
|
|
23
39
|
import type { AsyncQueue } from './async-queue.js';
|
|
24
40
|
import { findTool } from './tools/registry.js';
|
|
25
|
-
import type { PiTool } from './tools/types.js';
|
|
41
|
+
import type { PiTool, PiTaskHost } from './tools/types.js';
|
|
26
42
|
|
|
27
43
|
export type PiSessionEvent =
|
|
28
44
|
| { type: 'turn_started' }
|
|
@@ -30,14 +46,31 @@ export type PiSessionEvent =
|
|
|
30
46
|
| { type: 'text_end'; text: string }
|
|
31
47
|
| { type: 'tool_use'; id: string; name: string; input: any }
|
|
32
48
|
| { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
|
|
33
|
-
| { type: 'turn_complete'; usedFileTools: boolean }
|
|
34
|
-
| { type: 'error'; error: string };
|
|
49
|
+
| { type: 'turn_complete'; usedFileTools: boolean; usage?: PiUsage; contextWindow?: number }
|
|
50
|
+
| { type: 'error'; error: string; kind?: PiErrorKind };
|
|
35
51
|
|
|
36
|
-
|
|
52
|
+
/** Everything the providers need that can change while a session is alive. */
|
|
53
|
+
export interface PiSessionAuth {
|
|
37
54
|
flavor: PiApiFlavor;
|
|
38
55
|
modelId: string;
|
|
39
56
|
baseUrl: string;
|
|
40
57
|
apiKey: string;
|
|
58
|
+
/** Per-model output cap from the catalog; providers fall back to safe defaults. */
|
|
59
|
+
maxOutputTokens?: number;
|
|
60
|
+
/** openai-completions only: which field carries the output cap (C-2). */
|
|
61
|
+
maxTokensField?: 'max_tokens' | 'max_completion_tokens';
|
|
62
|
+
/** openai-completions only: false for strict-schema vendors that 422 on stream_options. */
|
|
63
|
+
includeStreamUsage?: boolean;
|
|
64
|
+
/** Model context window from the catalog — reported on turn_complete for the recycler. */
|
|
65
|
+
contextWindow?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface PiSessionInit {
|
|
69
|
+
/**
|
|
70
|
+
* Resolved on EVERY provider round (not captured once) so wizard-side
|
|
71
|
+
* key/model fixes heal a live conversation on the next round.
|
|
72
|
+
*/
|
|
73
|
+
getAuth: () => PiSessionAuth;
|
|
41
74
|
systemPrompt: string;
|
|
42
75
|
/** Pre-loaded history before the first new user turn. */
|
|
43
76
|
initialMessages?: PiMessage[];
|
|
@@ -45,7 +78,17 @@ export interface PiSessionInit {
|
|
|
45
78
|
tools?: PiToolDef[];
|
|
46
79
|
/** Resolved every time a tool fires (registry → run). */
|
|
47
80
|
cwd: string;
|
|
48
|
-
|
|
81
|
+
/**
|
|
82
|
+
* Background sub-agent host (Phase B). Set only on PARENT live sessions —
|
|
83
|
+
* threaded into PiToolContext so the Task tool can spawn; child sessions
|
|
84
|
+
* leave it unset (no grandchildren, Claude SDK parity).
|
|
85
|
+
*/
|
|
86
|
+
taskHost?: PiTaskHost;
|
|
87
|
+
/**
|
|
88
|
+
* Per-turn tool-round budget. Parents keep the default; sub-agent children
|
|
89
|
+
* get their agent config's maxTurns (e.g. coder: 50).
|
|
90
|
+
*/
|
|
91
|
+
maxToolRounds?: number;
|
|
49
92
|
/** Used to interrupt in-flight provider calls when the session ends. */
|
|
50
93
|
abortController: AbortController;
|
|
51
94
|
/** Caller's event sink — translated to bloby's `bot:*` events one layer up. */
|
|
@@ -61,28 +104,45 @@ export interface PiSession {
|
|
|
61
104
|
|
|
62
105
|
const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit', 'write', 'edit', 'multiEdit', 'notebookEdit']);
|
|
63
106
|
const MAX_TOOL_ROUNDS = 25;
|
|
107
|
+
/** Transparent re-runs of a failed round that produced nothing (audit D6-1). */
|
|
108
|
+
const MAX_ROUND_RETRIES = 2;
|
|
64
109
|
|
|
65
110
|
export function createPiSession(init: PiSessionInit): PiSession {
|
|
66
111
|
const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
|
|
67
112
|
|
|
113
|
+
// Last provider-reported usage + window, session-scoped so even an errored
|
|
114
|
+
// turn's turn_complete carries the most recent context occupancy (D2-1).
|
|
115
|
+
let lastUsage: PiUsage | undefined;
|
|
116
|
+
let lastContextWindow: number | undefined;
|
|
117
|
+
|
|
68
118
|
/** One stream round — collect the assistant blocks the model emits this pass. */
|
|
69
119
|
interface RoundResult {
|
|
70
120
|
text: string;
|
|
71
121
|
toolUses: { id: string; name: string; input: any; thoughtSignature?: string }[];
|
|
72
122
|
errored: boolean;
|
|
123
|
+
/** Stashed, NOT emitted inline — the turn decides response-vs-error precedence (D6-2). */
|
|
124
|
+
errorMsg?: string;
|
|
125
|
+
errorKind?: PiErrorKind;
|
|
126
|
+
/** True when re-sending the identical round can plausibly succeed (429/5xx/network). */
|
|
127
|
+
retryable?: boolean;
|
|
73
128
|
}
|
|
74
129
|
|
|
75
|
-
async function runOneRound(): Promise<RoundResult> {
|
|
130
|
+
async function runOneRound(emitSeparatorFirst: boolean): Promise<RoundResult> {
|
|
76
131
|
const result: RoundResult = { text: '', toolUses: [], errored: false };
|
|
132
|
+
let firstDelta = true;
|
|
77
133
|
try {
|
|
78
|
-
const
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
134
|
+
const auth = init.getAuth();
|
|
135
|
+
lastContextWindow = auth.contextWindow ?? lastContextWindow;
|
|
136
|
+
const stream = streamProvider(auth.flavor, {
|
|
137
|
+
modelId: auth.modelId,
|
|
138
|
+
baseUrl: auth.baseUrl,
|
|
139
|
+
apiKey: auth.apiKey,
|
|
82
140
|
systemPrompt: init.systemPrompt,
|
|
83
141
|
messages,
|
|
84
142
|
tools: init.tools,
|
|
85
|
-
maxOutputTokens:
|
|
143
|
+
maxOutputTokens: auth.maxOutputTokens,
|
|
144
|
+
maxTokensField: auth.maxTokensField,
|
|
145
|
+
includeStreamUsage: auth.includeStreamUsage,
|
|
86
146
|
signal: init.abortController.signal,
|
|
87
147
|
});
|
|
88
148
|
|
|
@@ -90,6 +150,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
90
150
|
if (init.abortController.signal.aborted) break;
|
|
91
151
|
switch (evt.type) {
|
|
92
152
|
case 'text_delta':
|
|
153
|
+
// Round separator rides BEFORE the new round's first token —
|
|
154
|
+
// claude.ts:374-379 ordering — so the streamed bytes stay a true
|
|
155
|
+
// prefix of the final bot:response even when the dashboard commits
|
|
156
|
+
// the buffer at a tool boundary mid-turn (audit D1-5/PI-SES-1).
|
|
157
|
+
if (firstDelta && emitSeparatorFirst) {
|
|
158
|
+
init.onEvent({ type: 'text_delta', delta: '\n\n' });
|
|
159
|
+
}
|
|
160
|
+
firstDelta = false;
|
|
93
161
|
result.text += evt.delta;
|
|
94
162
|
init.onEvent({ type: 'text_delta', delta: evt.delta });
|
|
95
163
|
break;
|
|
@@ -110,17 +178,24 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
110
178
|
break;
|
|
111
179
|
case 'error':
|
|
112
180
|
result.errored = true;
|
|
113
|
-
|
|
181
|
+
result.errorMsg = evt.error;
|
|
182
|
+
result.errorKind = evt.kind;
|
|
183
|
+
result.retryable = evt.retryable;
|
|
114
184
|
break;
|
|
115
185
|
case 'done':
|
|
116
|
-
// Loop control is by tool_use presence, not stop reason
|
|
186
|
+
// Loop control is by tool_use presence, not stop reason — but the
|
|
187
|
+
// usage rides here and feeds the supervisor's session recycling.
|
|
188
|
+
if (evt.usage) lastUsage = evt.usage;
|
|
117
189
|
break;
|
|
118
190
|
}
|
|
119
191
|
}
|
|
120
192
|
} catch (err: any) {
|
|
121
193
|
if (!init.abortController.signal.aborted) {
|
|
122
194
|
result.errored = true;
|
|
123
|
-
|
|
195
|
+
result.errorMsg = err?.message || String(err);
|
|
196
|
+
// A throw mid-iteration is a network/stream failure — transient.
|
|
197
|
+
result.errorKind = 'transient';
|
|
198
|
+
result.retryable = true;
|
|
124
199
|
}
|
|
125
200
|
}
|
|
126
201
|
return result;
|
|
@@ -135,51 +210,87 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
135
210
|
};
|
|
136
211
|
}
|
|
137
212
|
try {
|
|
138
|
-
return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
|
|
213
|
+
return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal, tasks: init.taskHost });
|
|
139
214
|
} catch (err: any) {
|
|
140
215
|
return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
|
|
141
216
|
}
|
|
142
217
|
}
|
|
143
218
|
|
|
144
|
-
async function runOneTurn(
|
|
219
|
+
async function runOneTurn(userMsg: PiMessage): Promise<void> {
|
|
145
220
|
if (init.abortController.signal.aborted) return;
|
|
146
|
-
//
|
|
147
|
-
|
|
148
|
-
|
|
221
|
+
// ONE message per turn — queued messages wait for their own turn so each
|
|
222
|
+
// push gets its own bot:response (routing-FIFO invariant, audit D1-1).
|
|
223
|
+
messages.push(userMsg);
|
|
149
224
|
init.onEvent({ type: 'turn_started' });
|
|
150
225
|
|
|
151
226
|
let accumulatedText = '';
|
|
152
227
|
const usedTools = new Set<string>();
|
|
153
228
|
let turnErrored = false;
|
|
154
|
-
let
|
|
229
|
+
let turnErrorMsg: string | undefined;
|
|
230
|
+
let turnErrorKind: PiErrorKind | undefined;
|
|
155
231
|
|
|
156
|
-
|
|
232
|
+
const maxRounds = Math.max(1, init.maxToolRounds ?? MAX_TOOL_ROUNDS);
|
|
233
|
+
for (let round = 0; round < maxRounds; round++) {
|
|
157
234
|
if (init.abortController.signal.aborted) break;
|
|
158
|
-
|
|
235
|
+
// The separator condition is decided BEFORE the round so the round can
|
|
236
|
+
// emit it ahead of its first token (claude.ts ordering — see runOneRound).
|
|
237
|
+
const needsSeparator = accumulatedText.length > 0 && !accumulatedText.endsWith('\n');
|
|
238
|
+
let res = await runOneRound(needsSeparator);
|
|
239
|
+
|
|
240
|
+
// Transparent round retry (D6-1): a transient failure that produced
|
|
241
|
+
// NOTHING is safe to re-run — requests are stateless full-history
|
|
242
|
+
// resends. Never retry a round that already streamed text or tool calls.
|
|
243
|
+
for (
|
|
244
|
+
let attempt = 0;
|
|
245
|
+
attempt < MAX_ROUND_RETRIES &&
|
|
246
|
+
res.errored && res.retryable && !res.text && res.toolUses.length === 0 &&
|
|
247
|
+
!init.abortController.signal.aborted;
|
|
248
|
+
attempt++
|
|
249
|
+
) {
|
|
250
|
+
log.info(`[pi/session] transient round failure — retrying (${attempt + 1}/${MAX_ROUND_RETRIES}): ${res.errorMsg?.slice(0, 160)}`);
|
|
251
|
+
try { await sleep(1000 * 2 ** attempt, init.abortController.signal); } catch { break; }
|
|
252
|
+
res = await runOneRound(needsSeparator);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const { text, toolUses, errored } = res;
|
|
159
256
|
|
|
160
257
|
// Append whatever the model produced this round to history so subsequent
|
|
161
258
|
// rounds (and the next user turn) see it.
|
|
162
259
|
const assistantContent: PiContentBlock[] = [];
|
|
163
260
|
if (text) {
|
|
164
|
-
|
|
261
|
+
// Matches the separator runOneRound streamed before this round's
|
|
262
|
+
// first delta — accumulatedText and the token stream stay byte-equal.
|
|
263
|
+
if (needsSeparator) accumulatedText += '\n\n';
|
|
264
|
+
accumulatedText += text;
|
|
165
265
|
assistantContent.push({ type: 'text', text });
|
|
166
266
|
}
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
267
|
+
if (!errored) {
|
|
268
|
+
// On an errored round, keep the text but DROP the round's tool_use
|
|
269
|
+
// blocks: the turn ends before executing them, and a dangling
|
|
270
|
+
// tool_use with no tool_result poisons the history (Anthropic and
|
|
271
|
+
// Gemini reject the next request outright).
|
|
272
|
+
for (const tu of toolUses) {
|
|
273
|
+
assistantContent.push({
|
|
274
|
+
type: 'tool_use',
|
|
275
|
+
id: tu.id,
|
|
276
|
+
name: tu.name,
|
|
277
|
+
input: tu.input,
|
|
278
|
+
// Forward Gemini's thoughtSignature unchanged so the next turn's
|
|
279
|
+
// request echoes it back; without it the API rejects with 400.
|
|
280
|
+
thoughtSignature: tu.thoughtSignature,
|
|
281
|
+
});
|
|
282
|
+
}
|
|
177
283
|
}
|
|
178
284
|
if (assistantContent.length > 0) {
|
|
179
285
|
messages.push({ role: 'assistant', content: assistantContent });
|
|
180
286
|
}
|
|
181
287
|
|
|
182
|
-
if (errored) {
|
|
288
|
+
if (errored) {
|
|
289
|
+
turnErrored = true;
|
|
290
|
+
turnErrorMsg = res.errorMsg;
|
|
291
|
+
turnErrorKind = res.errorKind;
|
|
292
|
+
break;
|
|
293
|
+
}
|
|
183
294
|
|
|
184
295
|
// Run every tool the model asked for this round, then feed the results
|
|
185
296
|
// back as a single user message Gemini accepts as a batch.
|
|
@@ -188,50 +299,45 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
188
299
|
usedTools.add(tu.name);
|
|
189
300
|
if (init.abortController.signal.aborted) break;
|
|
190
301
|
log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
|
|
191
|
-
const
|
|
192
|
-
init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!
|
|
302
|
+
const res2 = await executeTool(tu);
|
|
303
|
+
init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res2.isError });
|
|
193
304
|
toolResultBlocks.push({
|
|
194
305
|
type: 'tool_result',
|
|
195
306
|
toolUseId: tu.id,
|
|
196
|
-
content:
|
|
197
|
-
isError:
|
|
307
|
+
content: res2.output,
|
|
308
|
+
isError: res2.isError,
|
|
198
309
|
});
|
|
199
310
|
}
|
|
200
311
|
if (toolResultBlocks.length > 0) {
|
|
201
312
|
messages.push({ role: 'user', content: toolResultBlocks });
|
|
202
313
|
}
|
|
203
314
|
|
|
204
|
-
//
|
|
205
|
-
|
|
206
|
-
// feel alive: while the agent is grinding on a long task, a question
|
|
207
|
-
// typed mid-stream lands in the very next request as a user-role part,
|
|
208
|
-
// and the model can answer it inline before continuing.
|
|
209
|
-
const interleaved = input.drainPending();
|
|
210
|
-
if (interleaved.length > 0) {
|
|
211
|
-
log.info(`[pi/session] interleaved ${interleaved.length} mid-turn user message(s) into history`);
|
|
212
|
-
for (const m of interleaved) messages.push(m);
|
|
213
|
-
pendingInterleave = true;
|
|
214
|
-
} else {
|
|
215
|
-
pendingInterleave = false;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
// Exit when the model has nothing more to do AND no new user messages
|
|
219
|
-
// arrived mid-round. Either condition by itself keeps the loop alive.
|
|
220
|
-
if (toolUses.length === 0 && !pendingInterleave) break;
|
|
315
|
+
// No tool calls ⇒ the model is done with this turn.
|
|
316
|
+
if (toolUses.length === 0) break;
|
|
221
317
|
}
|
|
222
318
|
|
|
223
|
-
//
|
|
224
|
-
//
|
|
225
|
-
//
|
|
226
|
-
//
|
|
227
|
-
//
|
|
228
|
-
//
|
|
319
|
+
// Turn-end emission order (audit D6-2, mirrors claude.ts:394-401):
|
|
320
|
+
// 1. text_end whenever ANY text streamed — even on errored turns, so the
|
|
321
|
+
// partial the user watched is committed, persisted, and consumes its
|
|
322
|
+
// routing-FIFO entry (the frontend's bot:error handler would
|
|
323
|
+
// otherwise erase it).
|
|
324
|
+
// 2. error only when the turn produced no text — EXCEPT fatal kinds
|
|
325
|
+
// (auth / context-overflow), which must surface regardless so the
|
|
326
|
+
// harness tears the poisoned session down.
|
|
327
|
+
// 3. turn_complete ALWAYS on a non-aborted turn — including errored
|
|
328
|
+
// paths — so the supervisor clears agentQueryActive. Skipping it
|
|
329
|
+
// wedged the flag true historically. Aborted turns are torn down via
|
|
330
|
+
// bot:conversation-ended.
|
|
229
331
|
if (!init.abortController.signal.aborted) {
|
|
230
|
-
if (
|
|
332
|
+
if (accumulatedText) {
|
|
231
333
|
init.onEvent({ type: 'text_end', text: accumulatedText });
|
|
232
334
|
}
|
|
335
|
+
const fatal = turnErrorKind === 'auth' || turnErrorKind === 'context-overflow';
|
|
336
|
+
if (turnErrored && (!accumulatedText || fatal)) {
|
|
337
|
+
init.onEvent({ type: 'error', error: turnErrorMsg || 'Provider turn failed', kind: turnErrorKind });
|
|
338
|
+
}
|
|
233
339
|
const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
|
|
234
|
-
init.onEvent({ type: 'turn_complete', usedFileTools });
|
|
340
|
+
init.onEvent({ type: 'turn_complete', usedFileTools, usage: lastUsage, contextWindow: lastContextWindow });
|
|
235
341
|
}
|
|
236
342
|
}
|
|
237
343
|
|
|
@@ -240,7 +346,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
240
346
|
for await (const userMsg of input) {
|
|
241
347
|
if (init.abortController.signal.aborted) break;
|
|
242
348
|
try {
|
|
243
|
-
await runOneTurn(
|
|
349
|
+
await runOneTurn(userMsg);
|
|
244
350
|
} catch (err: any) {
|
|
245
351
|
log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
|
|
246
352
|
init.onEvent({ type: 'error', error: err?.message || String(err) });
|
|
@@ -248,7 +354,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
248
354
|
// and chat aren't wedged. Skip when aborting (teardown emits conversation-ended).
|
|
249
355
|
// usedFileTools=false is the safe default (it only governs whether to auto-restart now).
|
|
250
356
|
if (!init.abortController.signal.aborted) {
|
|
251
|
-
init.onEvent({ type: 'turn_complete', usedFileTools: false });
|
|
357
|
+
init.onEvent({ type: 'turn_complete', usedFileTools: false, usage: lastUsage, contextWindow: lastContextWindow });
|
|
252
358
|
}
|
|
253
359
|
}
|
|
254
360
|
}
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* synced from upstream pi via `npm run sync:pi-models`. Sub-providers without
|
|
16
16
|
* a pi mapping (Ollama, LM Studio, custom) stay `'dynamic'` — free-form ID.
|
|
17
17
|
*/
|
|
18
|
-
import { PI_MODELS_CATALOG } from './models-catalog.generated.js';
|
|
18
|
+
import { PI_MODELS_CATALOG, type PiCatalogModel } from './models-catalog.generated.js';
|
|
19
19
|
|
|
20
20
|
export type PiApiFlavor = 'openai-completions' | 'anthropic-messages' | 'google-gemini';
|
|
21
21
|
|
|
@@ -24,6 +24,16 @@ export interface PiSubProviderModel {
|
|
|
24
24
|
label: string;
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
/**
|
|
28
|
+
* Catalog metadata for a saved sub-provider + model pair. Drives the per-model
|
|
29
|
+
* output cap (C-5), the context-window figure the supervisor's recycler needs
|
|
30
|
+
* (D2-1), and — later — the vision gate. Returns undefined for dynamic
|
|
31
|
+
* sub-providers (OpenRouter/Ollama/LM Studio/custom) and unknown model ids.
|
|
32
|
+
*/
|
|
33
|
+
export function getCatalogModel(subProviderId: string, modelId: string): PiCatalogModel | undefined {
|
|
34
|
+
return PI_MODELS_CATALOG[subProviderId]?.find((m) => m.id === modelId);
|
|
35
|
+
}
|
|
36
|
+
|
|
27
37
|
export interface PiSubProvider {
|
|
28
38
|
id: string;
|
|
29
39
|
name: string;
|
|
@@ -41,6 +51,23 @@ export interface PiSubProvider {
|
|
|
41
51
|
models: PiSubProviderModel[] | 'dynamic';
|
|
42
52
|
/** Default model selection when the user hasn't picked one. */
|
|
43
53
|
defaultModel?: string;
|
|
54
|
+
/**
|
|
55
|
+
* openai-completions flavor only: which request field carries the output
|
|
56
|
+
* cap. OpenAI's reasoning models (gpt-5.x, o-series — 31 of 37 catalog
|
|
57
|
+
* entries) reject the legacy `max_tokens` with HTTP 400;
|
|
58
|
+
* `max_completion_tokens` is accepted by ALL OpenAI models, so the
|
|
59
|
+
* openai-api entry opts in. Other vendors stay on `max_tokens`, matching
|
|
60
|
+
* their current working behavior.
|
|
61
|
+
*/
|
|
62
|
+
maxTokensField?: 'max_tokens' | 'max_completion_tokens';
|
|
63
|
+
/**
|
|
64
|
+
* openai-completions flavor only: set true for vendors whose request schema
|
|
65
|
+
* rejects unknown fields — Mistral 422s ("Extra inputs are not permitted")
|
|
66
|
+
* on `stream_options`, so it must not receive the include_usage opt-in.
|
|
67
|
+
* (Mistral still sends usage in the final streamed chunk by default, so the
|
|
68
|
+
* provider's chunk.usage read keeps working without it.)
|
|
69
|
+
*/
|
|
70
|
+
noStreamUsage?: boolean;
|
|
44
71
|
}
|
|
45
72
|
|
|
46
73
|
function fromCatalog(key: string): PiSubProviderModel[] | 'dynamic' {
|
|
@@ -130,6 +157,7 @@ export const PI_SUB_PROVIDERS: PiSubProvider[] = [
|
|
|
130
157
|
apiKeyUrl: 'https://console.mistral.ai/api-keys/',
|
|
131
158
|
models: fromCatalog('mistral'),
|
|
132
159
|
defaultModel: defaultFor('mistral'),
|
|
160
|
+
noStreamUsage: true,
|
|
133
161
|
},
|
|
134
162
|
{
|
|
135
163
|
id: 'openai-api',
|
|
@@ -141,6 +169,7 @@ export const PI_SUB_PROVIDERS: PiSubProvider[] = [
|
|
|
141
169
|
apiKeyUrl: 'https://platform.openai.com/api-keys',
|
|
142
170
|
models: fromCatalog('openai-api'),
|
|
143
171
|
defaultModel: defaultFor('openai-api'),
|
|
172
|
+
maxTokensField: 'max_completion_tokens',
|
|
144
173
|
},
|
|
145
174
|
{
|
|
146
175
|
id: 'anthropic-api',
|
|
@@ -74,6 +74,7 @@ export async function runPiTestCompletion(input: PiTestCompletionInput): Promise
|
|
|
74
74
|
modelId,
|
|
75
75
|
apiKey: input.apiKey?.trim() || '',
|
|
76
76
|
prompt: input.prompt,
|
|
77
|
+
maxTokensField: provider.maxTokensField,
|
|
77
78
|
});
|
|
78
79
|
return { ok: true, text, modelId, subProvider: provider.id };
|
|
79
80
|
} catch (err: any) {
|
|
@@ -92,6 +93,8 @@ interface DispatchArgs {
|
|
|
92
93
|
modelId: string;
|
|
93
94
|
apiKey: string;
|
|
94
95
|
prompt: string;
|
|
96
|
+
/** openai-completions only — gpt-5.x/o-series reject the legacy max_tokens (C-2). */
|
|
97
|
+
maxTokensField?: 'max_tokens' | 'max_completion_tokens';
|
|
95
98
|
}
|
|
96
99
|
|
|
97
100
|
async function callByFlavor(flavor: PiApiFlavor, args: DispatchArgs): Promise<string> {
|
|
@@ -107,17 +110,20 @@ async function callByFlavor(flavor: PiApiFlavor, args: DispatchArgs): Promise<st
|
|
|
107
110
|
|
|
108
111
|
/* ── OpenAI / OpenAI-compatible ── */
|
|
109
112
|
|
|
110
|
-
async function callOpenAICompletions({ baseUrl, modelId, apiKey, prompt }: DispatchArgs): Promise<string> {
|
|
113
|
+
async function callOpenAICompletions({ baseUrl, modelId, apiKey, prompt, maxTokensField }: DispatchArgs): Promise<string> {
|
|
111
114
|
const headers: Record<string, string> = { 'content-type': 'application/json' };
|
|
112
115
|
if (apiKey) headers['authorization'] = `Bearer ${apiKey}`;
|
|
113
116
|
|
|
117
|
+
// Reasoning models (gpt-5.x/o-series) spend the budget on hidden reasoning
|
|
118
|
+
// first — 256 would come back as an empty message, failing a valid key.
|
|
119
|
+
const capField = maxTokensField ?? 'max_tokens';
|
|
114
120
|
const res = await timedFetch(`${baseUrl}/chat/completions`, {
|
|
115
121
|
method: 'POST',
|
|
116
122
|
headers,
|
|
117
123
|
body: JSON.stringify({
|
|
118
124
|
model: modelId,
|
|
119
125
|
messages: [{ role: 'user', content: prompt }],
|
|
120
|
-
|
|
126
|
+
[capField]: capField === 'max_completion_tokens' ? 2048 : 256,
|
|
121
127
|
stream: false,
|
|
122
128
|
}),
|
|
123
129
|
});
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool registry — the bag of tools the pi session passes to the model.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Read/Write/Edit/Bash mirror the Claude SDK tools; Task is the background
|
|
5
|
+
* sub-agent delegator (Phase B of the parity plan). Grep, Glob, LS,
|
|
6
|
+
* NotebookEdit etc. are still pending (Phase D) to fully match Claude SDK's
|
|
7
|
+
* surface.
|
|
6
8
|
*/
|
|
7
9
|
import type { PiTool } from './types.js';
|
|
8
10
|
import type { PiToolDef } from '../providers/types.js';
|
|
@@ -10,8 +12,9 @@ import { readTool } from './read.js';
|
|
|
10
12
|
import { writeTool } from './write.js';
|
|
11
13
|
import { editTool } from './edit.js';
|
|
12
14
|
import { bashTool } from './bash.js';
|
|
15
|
+
import { taskTool, taskToolDef } from './task.js';
|
|
13
16
|
|
|
14
|
-
export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
|
|
17
|
+
export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool, taskTool];
|
|
15
18
|
|
|
16
19
|
const TOOL_BY_NAME = new Map<string, PiTool>();
|
|
17
20
|
for (const t of PI_TOOLS) {
|
|
@@ -20,15 +23,28 @@ for (const t of PI_TOOLS) {
|
|
|
20
23
|
// common aliases so we don't 404 a legitimate call over a casing nit.
|
|
21
24
|
TOOL_BY_NAME.set(t.name.toLowerCase(), t);
|
|
22
25
|
}
|
|
26
|
+
// The pi system prompt calls background delegation "the Agent tool" (claude
|
|
27
|
+
// heritage) — alias it so a model following the prompt verbatim still lands
|
|
28
|
+
// on the Task implementation.
|
|
29
|
+
TOOL_BY_NAME.set('Agent', taskTool);
|
|
30
|
+
TOOL_BY_NAME.set('agent', taskTool);
|
|
23
31
|
|
|
24
32
|
export function findTool(name: string): PiTool | undefined {
|
|
25
33
|
return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
|
|
26
34
|
}
|
|
27
35
|
|
|
28
|
-
export function toolDefsForProvider(): PiToolDef[] {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
36
|
+
export function toolDefsForProvider(opts?: { forSubagent?: boolean }): PiToolDef[] {
|
|
37
|
+
const defs: PiToolDef[] = [];
|
|
38
|
+
for (const t of PI_TOOLS) {
|
|
39
|
+
if (t.name === 'Task') {
|
|
40
|
+
// Children cannot spawn grandchildren (Claude SDK parity) — a child that
|
|
41
|
+
// hallucinates a Task call still fails gracefully (ctx.tasks is unset).
|
|
42
|
+
if (opts?.forSubagent) continue;
|
|
43
|
+
// Rebuilt fresh so agent-roster/prompt edits apply per session start.
|
|
44
|
+
defs.push(taskToolDef());
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
defs.push({ name: t.name, description: t.description, inputSchema: t.inputSchema });
|
|
48
|
+
}
|
|
49
|
+
return defs;
|
|
34
50
|
}
|