bloby-bot 0.70.8 → 0.70.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
  2. package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
  3. package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
  4. package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
  5. package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
  6. package/dist-bloby/bloby.html +2 -2
  7. package/dist-bloby/onboard.html +2 -2
  8. package/package.json +1 -1
  9. package/scripts/sync-pi-models.ts +37 -6
  10. package/supervisor/chat/OnboardWizard.tsx +4 -4
  11. package/supervisor/harnesses/pi/async-queue.ts +7 -11
  12. package/supervisor/harnesses/pi/index.ts +475 -73
  13. package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
  14. package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
  15. package/supervisor/harnesses/pi/providers/retry.ts +87 -0
  16. package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
  17. package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
  18. package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
  19. package/supervisor/harnesses/pi/providers/types.ts +26 -1
  20. package/supervisor/harnesses/pi/session.ts +179 -73
  21. package/supervisor/harnesses/pi/sub-providers.ts +30 -1
  22. package/supervisor/harnesses/pi/test-completion.ts +8 -2
  23. package/supervisor/harnesses/pi/tools/registry.ts +25 -9
  24. package/supervisor/harnesses/pi/tools/task.ts +108 -0
  25. package/supervisor/harnesses/pi/tools/types.ts +15 -0
  26. package/supervisor/index.ts +11 -10
  27. package/supervisor/public/morphy_sad.mov +0 -0
  28. package/supervisor/public/morphy_sad.webm +0 -0
  29. package/supervisor/shell.ts +1 -1
  30. package/supervisor/workspace-guard.js +1 -1
  31. package/workspace/client/public/morphy_bounce.mov +0 -0
  32. package/workspace/client/public/morphy_bounce.webm +0 -0
  33. package/workspace/client/public/morphy_hi.mov +0 -0
  34. package/workspace/client/public/morphy_hi.webm +0 -0
  35. package/workspace/client/src/App.tsx +5 -3
  36. package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
  37. package/supervisor/public/what-happened.mp4 +0 -0
  38. package/supervisor/public/what-happened.webm +0 -0
@@ -4,25 +4,41 @@
4
4
  * Mirrors the *shape* of the Claude harness loop in `harnesses/claude.ts`:
5
5
  * - one long-lived session per conversation
6
6
  * - user messages arrive via an `AsyncQueue<PiMessage>` input
7
- * - the loop drains the queue one turn at a time
7
+ * - the loop drains the queue ONE MESSAGE PER TURN — exactly like the Claude
8
+ * SDK's input queue: each pushed message gets its own turn, its own
9
+ * text_end, and its own turn_complete. (An earlier design folded mid-turn
10
+ * messages into the in-flight turn; that broke the channel manager's
11
+ * one-response-per-push routing FIFO — see PI-PARITY-AUDIT-2026-06-11.md
12
+ * D1-1 — so queued messages now simply wait for their own turn.)
8
13
  * - each turn streams provider events back through a single `onEvent`
9
14
  * callback the caller hooked up
10
15
  *
11
- * Phase 2: each user turn is an inner loop — provider call → if the model
12
- * asked for tool calls, execute them and feed results back → call provider
13
- * again — until the model finishes without requesting more tools. Tokens
14
- * stream live; `text_end` only fires once at the very end of the turn so the
15
- * UI doesn't display half-answers between tool rounds.
16
+ * Each user turn is an inner loop — provider call → if the model asked for
17
+ * tool calls, execute them and feed results back → call provider again — until
18
+ * the model finishes without requesting more tools. Tokens stream live;
19
+ * `text_end` only fires once at the very end of the turn so the UI doesn't
20
+ * display half-answers between tool rounds.
16
21
  *
17
- * Sub-agents are NOT spawned here Bruno will add those later.
22
+ * Error precedence matches claude (audit D6-2): streamed partial text is
23
+ * always committed via `text_end` (the consumer persists it and the routing
24
+ * FIFO consumes normally); the `error` event fires only when a failed turn
25
+ * produced no text — except fatal kinds (auth / context-overflow), which are
26
+ * surfaced even after partial text so the harness can tear the session down.
27
+ *
28
+ * Auth (key/model/base URL/flavor) is resolved via `getAuth()` on every
29
+ * provider round (audit D6-8): fixing a revoked key or switching models in the
30
+ * wizard applies on the very next round, with full history intact.
31
+ *
32
+ * Sub-agents are NOT spawned here — Bruno will add those later (Phase B).
18
33
  */
19
34
  import { log } from '../../../shared/logger.js';
20
35
  import type { PiApiFlavor } from './sub-providers.js';
21
36
  import { streamProvider } from './providers/stream.js';
22
- import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
37
+ import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock, PiUsage, PiErrorKind } from './providers/types.js';
38
+ import { sleep } from './providers/retry.js';
23
39
  import type { AsyncQueue } from './async-queue.js';
24
40
  import { findTool } from './tools/registry.js';
25
- import type { PiTool } from './tools/types.js';
41
+ import type { PiTool, PiTaskHost } from './tools/types.js';
26
42
 
27
43
  export type PiSessionEvent =
28
44
  | { type: 'turn_started' }
@@ -30,14 +46,31 @@ export type PiSessionEvent =
30
46
  | { type: 'text_end'; text: string }
31
47
  | { type: 'tool_use'; id: string; name: string; input: any }
32
48
  | { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
33
- | { type: 'turn_complete'; usedFileTools: boolean }
34
- | { type: 'error'; error: string };
49
+ | { type: 'turn_complete'; usedFileTools: boolean; usage?: PiUsage; contextWindow?: number }
50
+ | { type: 'error'; error: string; kind?: PiErrorKind };
35
51
 
36
- export interface PiSessionInit {
52
+ /** Everything the providers need that can change while a session is alive. */
53
+ export interface PiSessionAuth {
37
54
  flavor: PiApiFlavor;
38
55
  modelId: string;
39
56
  baseUrl: string;
40
57
  apiKey: string;
58
+ /** Per-model output cap from the catalog; providers fall back to safe defaults. */
59
+ maxOutputTokens?: number;
60
+ /** openai-completions only: which field carries the output cap (C-2). */
61
+ maxTokensField?: 'max_tokens' | 'max_completion_tokens';
62
+ /** openai-completions only: false for strict-schema vendors that 422 on stream_options. */
63
+ includeStreamUsage?: boolean;
64
+ /** Model context window from the catalog — reported on turn_complete for the recycler. */
65
+ contextWindow?: number;
66
+ }
67
+
68
+ export interface PiSessionInit {
69
+ /**
70
+ * Resolved on EVERY provider round (not captured once) so wizard-side
71
+ * key/model fixes heal a live conversation on the next round.
72
+ */
73
+ getAuth: () => PiSessionAuth;
41
74
  systemPrompt: string;
42
75
  /** Pre-loaded history before the first new user turn. */
43
76
  initialMessages?: PiMessage[];
@@ -45,7 +78,17 @@ export interface PiSessionInit {
45
78
  tools?: PiToolDef[];
46
79
  /** Resolved every time a tool fires (registry → run). */
47
80
  cwd: string;
48
- maxOutputTokens?: number;
81
+ /**
82
+ * Background sub-agent host (Phase B). Set only on PARENT live sessions —
83
+ * threaded into PiToolContext so the Task tool can spawn; child sessions
84
+ * leave it unset (no grandchildren, Claude SDK parity).
85
+ */
86
+ taskHost?: PiTaskHost;
87
+ /**
88
+ * Per-turn tool-round budget. Parents keep the default; sub-agent children
89
+ * get their agent config's maxTurns (e.g. coder: 50).
90
+ */
91
+ maxToolRounds?: number;
49
92
  /** Used to interrupt in-flight provider calls when the session ends. */
50
93
  abortController: AbortController;
51
94
  /** Caller's event sink — translated to bloby's `bot:*` events one layer up. */
@@ -61,28 +104,45 @@ export interface PiSession {
61
104
 
62
105
  const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit', 'write', 'edit', 'multiEdit', 'notebookEdit']);
63
106
  const MAX_TOOL_ROUNDS = 25;
107
+ /** Transparent re-runs of a failed round that produced nothing (audit D6-1). */
108
+ const MAX_ROUND_RETRIES = 2;
64
109
 
65
110
  export function createPiSession(init: PiSessionInit): PiSession {
66
111
  const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
67
112
 
113
+ // Last provider-reported usage + window, session-scoped so even an errored
114
+ // turn's turn_complete carries the most recent context occupancy (D2-1).
115
+ let lastUsage: PiUsage | undefined;
116
+ let lastContextWindow: number | undefined;
117
+
68
118
  /** One stream round — collect the assistant blocks the model emits this pass. */
69
119
  interface RoundResult {
70
120
  text: string;
71
121
  toolUses: { id: string; name: string; input: any; thoughtSignature?: string }[];
72
122
  errored: boolean;
123
+ /** Stashed, NOT emitted inline — the turn decides response-vs-error precedence (D6-2). */
124
+ errorMsg?: string;
125
+ errorKind?: PiErrorKind;
126
+ /** True when re-sending the identical round can plausibly succeed (429/5xx/network). */
127
+ retryable?: boolean;
73
128
  }
74
129
 
75
- async function runOneRound(): Promise<RoundResult> {
130
+ async function runOneRound(emitSeparatorFirst: boolean): Promise<RoundResult> {
76
131
  const result: RoundResult = { text: '', toolUses: [], errored: false };
132
+ let firstDelta = true;
77
133
  try {
78
- const stream = streamProvider(init.flavor, {
79
- modelId: init.modelId,
80
- baseUrl: init.baseUrl,
81
- apiKey: init.apiKey,
134
+ const auth = init.getAuth();
135
+ lastContextWindow = auth.contextWindow ?? lastContextWindow;
136
+ const stream = streamProvider(auth.flavor, {
137
+ modelId: auth.modelId,
138
+ baseUrl: auth.baseUrl,
139
+ apiKey: auth.apiKey,
82
140
  systemPrompt: init.systemPrompt,
83
141
  messages,
84
142
  tools: init.tools,
85
- maxOutputTokens: init.maxOutputTokens,
143
+ maxOutputTokens: auth.maxOutputTokens,
144
+ maxTokensField: auth.maxTokensField,
145
+ includeStreamUsage: auth.includeStreamUsage,
86
146
  signal: init.abortController.signal,
87
147
  });
88
148
 
@@ -90,6 +150,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
90
150
  if (init.abortController.signal.aborted) break;
91
151
  switch (evt.type) {
92
152
  case 'text_delta':
153
+ // Round separator rides BEFORE the new round's first token —
154
+ // claude.ts:374-379 ordering — so the streamed bytes stay a true
155
+ // prefix of the final bot:response even when the dashboard commits
156
+ // the buffer at a tool boundary mid-turn (audit D1-5/PI-SES-1).
157
+ if (firstDelta && emitSeparatorFirst) {
158
+ init.onEvent({ type: 'text_delta', delta: '\n\n' });
159
+ }
160
+ firstDelta = false;
93
161
  result.text += evt.delta;
94
162
  init.onEvent({ type: 'text_delta', delta: evt.delta });
95
163
  break;
@@ -110,17 +178,24 @@ export function createPiSession(init: PiSessionInit): PiSession {
110
178
  break;
111
179
  case 'error':
112
180
  result.errored = true;
113
- init.onEvent({ type: 'error', error: evt.error });
181
+ result.errorMsg = evt.error;
182
+ result.errorKind = evt.kind;
183
+ result.retryable = evt.retryable;
114
184
  break;
115
185
  case 'done':
116
- // Loop control is by tool_use presence, not stop reason.
186
+ // Loop control is by tool_use presence, not stop reason — but the
187
+ // usage rides here and feeds the supervisor's session recycling.
188
+ if (evt.usage) lastUsage = evt.usage;
117
189
  break;
118
190
  }
119
191
  }
120
192
  } catch (err: any) {
121
193
  if (!init.abortController.signal.aborted) {
122
194
  result.errored = true;
123
- init.onEvent({ type: 'error', error: err?.message || String(err) });
195
+ result.errorMsg = err?.message || String(err);
196
+ // A throw mid-iteration is a network/stream failure — transient.
197
+ result.errorKind = 'transient';
198
+ result.retryable = true;
124
199
  }
125
200
  }
126
201
  return result;
@@ -135,51 +210,87 @@ export function createPiSession(init: PiSessionInit): PiSession {
135
210
  };
136
211
  }
137
212
  try {
138
- return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal });
213
+ return await tool.run(call.input, { cwd: init.cwd, signal: init.abortController.signal, tasks: init.taskHost });
139
214
  } catch (err: any) {
140
215
  return { output: `Tool ${call.name} threw: ${err?.message || err}`, isError: true };
141
216
  }
142
217
  }
143
218
 
144
- async function runOneTurn(input: AsyncQueue<PiMessage>, firstUserMsg: PiMessage): Promise<void> {
219
+ async function runOneTurn(userMsg: PiMessage): Promise<void> {
145
220
  if (init.abortController.signal.aborted) return;
146
- // Stack any messages that arrived in the same millisecond into one turn.
147
- messages.push(firstUserMsg);
148
- for (const extra of input.drainPending()) messages.push(extra);
221
+ // ONE message per turn queued messages wait for their own turn so each
222
+ // push gets its own bot:response (routing-FIFO invariant, audit D1-1).
223
+ messages.push(userMsg);
149
224
  init.onEvent({ type: 'turn_started' });
150
225
 
151
226
  let accumulatedText = '';
152
227
  const usedTools = new Set<string>();
153
228
  let turnErrored = false;
154
- let pendingInterleave = false;
229
+ let turnErrorMsg: string | undefined;
230
+ let turnErrorKind: PiErrorKind | undefined;
155
231
 
156
- for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
232
+ const maxRounds = Math.max(1, init.maxToolRounds ?? MAX_TOOL_ROUNDS);
233
+ for (let round = 0; round < maxRounds; round++) {
157
234
  if (init.abortController.signal.aborted) break;
158
- const { text, toolUses, errored } = await runOneRound();
235
+ // The separator condition is decided BEFORE the round so the round can
236
+ // emit it ahead of its first token (claude.ts ordering — see runOneRound).
237
+ const needsSeparator = accumulatedText.length > 0 && !accumulatedText.endsWith('\n');
238
+ let res = await runOneRound(needsSeparator);
239
+
240
+ // Transparent round retry (D6-1): a transient failure that produced
241
+ // NOTHING is safe to re-run — requests are stateless full-history
242
+ // resends. Never retry a round that already streamed text or tool calls.
243
+ for (
244
+ let attempt = 0;
245
+ attempt < MAX_ROUND_RETRIES &&
246
+ res.errored && res.retryable && !res.text && res.toolUses.length === 0 &&
247
+ !init.abortController.signal.aborted;
248
+ attempt++
249
+ ) {
250
+ log.info(`[pi/session] transient round failure — retrying (${attempt + 1}/${MAX_ROUND_RETRIES}): ${res.errorMsg?.slice(0, 160)}`);
251
+ try { await sleep(1000 * 2 ** attempt, init.abortController.signal); } catch { break; }
252
+ res = await runOneRound(needsSeparator);
253
+ }
254
+
255
+ const { text, toolUses, errored } = res;
159
256
 
160
257
  // Append whatever the model produced this round to history so subsequent
161
258
  // rounds (and the next user turn) see it.
162
259
  const assistantContent: PiContentBlock[] = [];
163
260
  if (text) {
164
- accumulatedText += (accumulatedText && !accumulatedText.endsWith('\n') ? '\n\n' : '') + text;
261
+ // Matches the separator runOneRound streamed before this round's
262
+ // first delta — accumulatedText and the token stream stay byte-equal.
263
+ if (needsSeparator) accumulatedText += '\n\n';
264
+ accumulatedText += text;
165
265
  assistantContent.push({ type: 'text', text });
166
266
  }
167
- for (const tu of toolUses) {
168
- assistantContent.push({
169
- type: 'tool_use',
170
- id: tu.id,
171
- name: tu.name,
172
- input: tu.input,
173
- // Forward Gemini's thoughtSignature unchanged so the next turn's
174
- // request echoes it back; without it the API rejects with 400.
175
- thoughtSignature: tu.thoughtSignature,
176
- });
267
+ if (!errored) {
268
+ // On an errored round, keep the text but DROP the round's tool_use
269
+ // blocks: the turn ends before executing them, and a dangling
270
+ // tool_use with no tool_result poisons the history (Anthropic and
271
+ // Gemini reject the next request outright).
272
+ for (const tu of toolUses) {
273
+ assistantContent.push({
274
+ type: 'tool_use',
275
+ id: tu.id,
276
+ name: tu.name,
277
+ input: tu.input,
278
+ // Forward Gemini's thoughtSignature unchanged so the next turn's
279
+ // request echoes it back; without it the API rejects with 400.
280
+ thoughtSignature: tu.thoughtSignature,
281
+ });
282
+ }
177
283
  }
178
284
  if (assistantContent.length > 0) {
179
285
  messages.push({ role: 'assistant', content: assistantContent });
180
286
  }
181
287
 
182
- if (errored) { turnErrored = true; break; }
288
+ if (errored) {
289
+ turnErrored = true;
290
+ turnErrorMsg = res.errorMsg;
291
+ turnErrorKind = res.errorKind;
292
+ break;
293
+ }
183
294
 
184
295
  // Run every tool the model asked for this round, then feed the results
185
296
  // back as a single user message Gemini accepts as a batch.
@@ -188,50 +299,45 @@ export function createPiSession(init: PiSessionInit): PiSession {
188
299
  usedTools.add(tu.name);
189
300
  if (init.abortController.signal.aborted) break;
190
301
  log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
191
- const res = await executeTool(tu);
192
- init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res.isError });
302
+ const res2 = await executeTool(tu);
303
+ init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res2.isError });
193
304
  toolResultBlocks.push({
194
305
  type: 'tool_result',
195
306
  toolUseId: tu.id,
196
- content: res.output,
197
- isError: res.isError,
307
+ content: res2.output,
308
+ isError: res2.isError,
198
309
  });
199
310
  }
200
311
  if (toolResultBlocks.length > 0) {
201
312
  messages.push({ role: 'user', content: toolResultBlocks });
202
313
  }
203
314
 
204
- // Fold any user messages that arrived during this round into history so
205
- // the next stream pass sees them. This is what makes the conversation
206
- // feel alive: while the agent is grinding on a long task, a question
207
- // typed mid-stream lands in the very next request as a user-role part,
208
- // and the model can answer it inline before continuing.
209
- const interleaved = input.drainPending();
210
- if (interleaved.length > 0) {
211
- log.info(`[pi/session] interleaved ${interleaved.length} mid-turn user message(s) into history`);
212
- for (const m of interleaved) messages.push(m);
213
- pendingInterleave = true;
214
- } else {
215
- pendingInterleave = false;
216
- }
217
-
218
- // Exit when the model has nothing more to do AND no new user messages
219
- // arrived mid-round. Either condition by itself keeps the loop alive.
220
- if (toolUses.length === 0 && !pendingInterleave) break;
315
+ // No tool calls the model is done with this turn.
316
+ if (toolUses.length === 0) break;
221
317
  }
222
318
 
223
- // Emit text_end only on a clean turn (don't persist a half-baked answer from an errored
224
- // turn). But ALWAYS emit turn_complete on a non-aborted turn including the errored path
225
- // — so the supervisor clears agentQueryActive (set on turn_started). Skipping it on error
226
- // wedged the flag true: backend auto-heal stayed deferred and chat stuck in "typing" until
227
- // the next successful turn. The 'error' event was already emitted by runOneRound, so the
228
- // user still sees the failure. Aborted turns are torn down via bot:conversation-ended.
319
+ // Turn-end emission order (audit D6-2, mirrors claude.ts:394-401):
320
+ // 1. text_end whenever ANY text streamed even on errored turns, so the
321
+ // partial the user watched is committed, persisted, and consumes its
322
+ // routing-FIFO entry (the frontend's bot:error handler would
323
+ // otherwise erase it).
324
+ // 2. error only when the turn produced no text EXCEPT fatal kinds
325
+ // (auth / context-overflow), which must surface regardless so the
326
+ // harness tears the poisoned session down.
327
+ // 3. turn_complete ALWAYS on a non-aborted turn — including errored
328
+ // paths — so the supervisor clears agentQueryActive. Skipping it
329
+ // wedged the flag true historically. Aborted turns are torn down via
330
+ // bot:conversation-ended.
229
331
  if (!init.abortController.signal.aborted) {
230
- if (!turnErrored && accumulatedText) {
332
+ if (accumulatedText) {
231
333
  init.onEvent({ type: 'text_end', text: accumulatedText });
232
334
  }
335
+ const fatal = turnErrorKind === 'auth' || turnErrorKind === 'context-overflow';
336
+ if (turnErrored && (!accumulatedText || fatal)) {
337
+ init.onEvent({ type: 'error', error: turnErrorMsg || 'Provider turn failed', kind: turnErrorKind });
338
+ }
233
339
  const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
234
- init.onEvent({ type: 'turn_complete', usedFileTools });
340
+ init.onEvent({ type: 'turn_complete', usedFileTools, usage: lastUsage, contextWindow: lastContextWindow });
235
341
  }
236
342
  }
237
343
 
@@ -240,7 +346,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
240
346
  for await (const userMsg of input) {
241
347
  if (init.abortController.signal.aborted) break;
242
348
  try {
243
- await runOneTurn(input, userMsg);
349
+ await runOneTurn(userMsg);
244
350
  } catch (err: any) {
245
351
  log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
246
352
  init.onEvent({ type: 'error', error: err?.message || String(err) });
@@ -248,7 +354,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
248
354
  // and chat aren't wedged. Skip when aborting (teardown emits conversation-ended).
249
355
  // usedFileTools=false is the safe default (it only governs whether to auto-restart now).
250
356
  if (!init.abortController.signal.aborted) {
251
- init.onEvent({ type: 'turn_complete', usedFileTools: false });
357
+ init.onEvent({ type: 'turn_complete', usedFileTools: false, usage: lastUsage, contextWindow: lastContextWindow });
252
358
  }
253
359
  }
254
360
  }
@@ -15,7 +15,7 @@
15
15
  * synced from upstream pi via `npm run sync:pi-models`. Sub-providers without
16
16
  * a pi mapping (Ollama, LM Studio, custom) stay `'dynamic'` — free-form ID.
17
17
  */
18
- import { PI_MODELS_CATALOG } from './models-catalog.generated.js';
18
+ import { PI_MODELS_CATALOG, type PiCatalogModel } from './models-catalog.generated.js';
19
19
 
20
20
  export type PiApiFlavor = 'openai-completions' | 'anthropic-messages' | 'google-gemini';
21
21
 
@@ -24,6 +24,16 @@ export interface PiSubProviderModel {
24
24
  label: string;
25
25
  }
26
26
 
27
+ /**
28
+ * Catalog metadata for a saved sub-provider + model pair. Drives the per-model
29
+ * output cap (C-5), the context-window figure the supervisor's recycler needs
30
+ * (D2-1), and — later — the vision gate. Returns undefined for dynamic
31
+ * sub-providers (OpenRouter/Ollama/LM Studio/custom) and unknown model ids.
32
+ */
33
+ export function getCatalogModel(subProviderId: string, modelId: string): PiCatalogModel | undefined {
34
+ return PI_MODELS_CATALOG[subProviderId]?.find((m) => m.id === modelId);
35
+ }
36
+
27
37
  export interface PiSubProvider {
28
38
  id: string;
29
39
  name: string;
@@ -41,6 +51,23 @@ export interface PiSubProvider {
41
51
  models: PiSubProviderModel[] | 'dynamic';
42
52
  /** Default model selection when the user hasn't picked one. */
43
53
  defaultModel?: string;
54
+ /**
55
+ * openai-completions flavor only: which request field carries the output
56
+ * cap. OpenAI's reasoning models (gpt-5.x, o-series — 31 of 37 catalog
57
+ * entries) reject the legacy `max_tokens` with HTTP 400;
58
+ * `max_completion_tokens` is accepted by ALL OpenAI models, so the
59
+ * openai-api entry opts in. Other vendors stay on `max_tokens`, matching
60
+ * their current working behavior.
61
+ */
62
+ maxTokensField?: 'max_tokens' | 'max_completion_tokens';
63
+ /**
64
+ * openai-completions flavor only: set true for vendors whose request schema
65
+ * rejects unknown fields — Mistral 422s ("Extra inputs are not permitted")
66
+ * on `stream_options`, so it must not receive the include_usage opt-in.
67
+ * (Mistral still sends usage in the final streamed chunk by default, so the
68
+ * provider's chunk.usage read keeps working without it.)
69
+ */
70
+ noStreamUsage?: boolean;
44
71
  }
45
72
 
46
73
  function fromCatalog(key: string): PiSubProviderModel[] | 'dynamic' {
@@ -130,6 +157,7 @@ export const PI_SUB_PROVIDERS: PiSubProvider[] = [
130
157
  apiKeyUrl: 'https://console.mistral.ai/api-keys/',
131
158
  models: fromCatalog('mistral'),
132
159
  defaultModel: defaultFor('mistral'),
160
+ noStreamUsage: true,
133
161
  },
134
162
  {
135
163
  id: 'openai-api',
@@ -141,6 +169,7 @@ export const PI_SUB_PROVIDERS: PiSubProvider[] = [
141
169
  apiKeyUrl: 'https://platform.openai.com/api-keys',
142
170
  models: fromCatalog('openai-api'),
143
171
  defaultModel: defaultFor('openai-api'),
172
+ maxTokensField: 'max_completion_tokens',
144
173
  },
145
174
  {
146
175
  id: 'anthropic-api',
@@ -74,6 +74,7 @@ export async function runPiTestCompletion(input: PiTestCompletionInput): Promise
74
74
  modelId,
75
75
  apiKey: input.apiKey?.trim() || '',
76
76
  prompt: input.prompt,
77
+ maxTokensField: provider.maxTokensField,
77
78
  });
78
79
  return { ok: true, text, modelId, subProvider: provider.id };
79
80
  } catch (err: any) {
@@ -92,6 +93,8 @@ interface DispatchArgs {
92
93
  modelId: string;
93
94
  apiKey: string;
94
95
  prompt: string;
96
+ /** openai-completions only — gpt-5.x/o-series reject the legacy max_tokens (C-2). */
97
+ maxTokensField?: 'max_tokens' | 'max_completion_tokens';
95
98
  }
96
99
 
97
100
  async function callByFlavor(flavor: PiApiFlavor, args: DispatchArgs): Promise<string> {
@@ -107,17 +110,20 @@ async function callByFlavor(flavor: PiApiFlavor, args: DispatchArgs): Promise<st
107
110
 
108
111
  /* ── OpenAI / OpenAI-compatible ── */
109
112
 
110
- async function callOpenAICompletions({ baseUrl, modelId, apiKey, prompt }: DispatchArgs): Promise<string> {
113
+ async function callOpenAICompletions({ baseUrl, modelId, apiKey, prompt, maxTokensField }: DispatchArgs): Promise<string> {
111
114
  const headers: Record<string, string> = { 'content-type': 'application/json' };
112
115
  if (apiKey) headers['authorization'] = `Bearer ${apiKey}`;
113
116
 
117
+ // Reasoning models (gpt-5.x/o-series) spend the budget on hidden reasoning
118
+ // first — 256 would come back as an empty message, failing a valid key.
119
+ const capField = maxTokensField ?? 'max_tokens';
114
120
  const res = await timedFetch(`${baseUrl}/chat/completions`, {
115
121
  method: 'POST',
116
122
  headers,
117
123
  body: JSON.stringify({
118
124
  model: modelId,
119
125
  messages: [{ role: 'user', content: prompt }],
120
- max_tokens: 256,
126
+ [capField]: capField === 'max_completion_tokens' ? 2048 : 256,
121
127
  stream: false,
122
128
  }),
123
129
  });
@@ -1,8 +1,10 @@
1
1
  /**
2
2
  * Tool registry — the bag of tools the pi session passes to the model.
3
3
  *
4
- * Phase 2 ships the four core coding tools. Phase 3 or later will add Grep,
5
- * Glob, LS, NotebookEdit, etc. so the surface fully matches Claude SDK's.
4
+ * Read/Write/Edit/Bash mirror the Claude SDK tools; Task is the background
5
+ * sub-agent delegator (Phase B of the parity plan). Grep, Glob, LS,
6
+ * NotebookEdit etc. are still pending (Phase D) to fully match Claude SDK's
7
+ * surface.
6
8
  */
7
9
  import type { PiTool } from './types.js';
8
10
  import type { PiToolDef } from '../providers/types.js';
@@ -10,8 +12,9 @@ import { readTool } from './read.js';
10
12
  import { writeTool } from './write.js';
11
13
  import { editTool } from './edit.js';
12
14
  import { bashTool } from './bash.js';
15
+ import { taskTool, taskToolDef } from './task.js';
13
16
 
14
- export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool];
17
+ export const PI_TOOLS: PiTool[] = [readTool, writeTool, editTool, bashTool, taskTool];
15
18
 
16
19
  const TOOL_BY_NAME = new Map<string, PiTool>();
17
20
  for (const t of PI_TOOLS) {
@@ -20,15 +23,28 @@ for (const t of PI_TOOLS) {
20
23
  // common aliases so we don't 404 a legitimate call over a casing nit.
21
24
  TOOL_BY_NAME.set(t.name.toLowerCase(), t);
22
25
  }
26
+ // The pi system prompt calls background delegation "the Agent tool" (claude
27
+ // heritage) — alias it so a model following the prompt verbatim still lands
28
+ // on the Task implementation.
29
+ TOOL_BY_NAME.set('Agent', taskTool);
30
+ TOOL_BY_NAME.set('agent', taskTool);
23
31
 
24
32
  export function findTool(name: string): PiTool | undefined {
25
33
  return TOOL_BY_NAME.get(name) || TOOL_BY_NAME.get(name.toLowerCase());
26
34
  }
27
35
 
28
- export function toolDefsForProvider(): PiToolDef[] {
29
- return PI_TOOLS.map((t) => ({
30
- name: t.name,
31
- description: t.description,
32
- inputSchema: t.inputSchema,
33
- }));
36
+ export function toolDefsForProvider(opts?: { forSubagent?: boolean }): PiToolDef[] {
37
+ const defs: PiToolDef[] = [];
38
+ for (const t of PI_TOOLS) {
39
+ if (t.name === 'Task') {
40
+ // Children cannot spawn grandchildren (Claude SDK parity) — a child that
41
+ // hallucinates a Task call still fails gracefully (ctx.tasks is unset).
42
+ if (opts?.forSubagent) continue;
43
+ // Rebuilt fresh so agent-roster/prompt edits apply per session start.
44
+ defs.push(taskToolDef());
45
+ continue;
46
+ }
47
+ defs.push({ name: t.name, description: t.description, inputSchema: t.inputSchema });
48
+ }
49
+ return defs;
34
50
  }