bloby-bot 0.70.8 → 0.70.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
  2. package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
  3. package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
  4. package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
  5. package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
  6. package/dist-bloby/bloby.html +2 -2
  7. package/dist-bloby/onboard.html +2 -2
  8. package/package.json +1 -1
  9. package/scripts/sync-pi-models.ts +37 -6
  10. package/supervisor/chat/OnboardWizard.tsx +4 -4
  11. package/supervisor/harnesses/pi/async-queue.ts +7 -11
  12. package/supervisor/harnesses/pi/index.ts +232 -65
  13. package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
  14. package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
  15. package/supervisor/harnesses/pi/providers/retry.ts +87 -0
  16. package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
  17. package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
  18. package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
  19. package/supervisor/harnesses/pi/providers/types.ts +26 -1
  20. package/supervisor/harnesses/pi/session.ts +164 -70
  21. package/supervisor/harnesses/pi/sub-providers.ts +30 -1
  22. package/supervisor/harnesses/pi/test-completion.ts +8 -2
  23. package/supervisor/index.ts +11 -10
  24. package/supervisor/public/morphy_sad.mov +0 -0
  25. package/supervisor/public/morphy_sad.webm +0 -0
  26. package/supervisor/shell.ts +1 -1
  27. package/supervisor/workspace-guard.js +1 -1
  28. package/workspace/client/public/morphy_bounce.mov +0 -0
  29. package/workspace/client/public/morphy_bounce.webm +0 -0
  30. package/workspace/client/public/morphy_hi.mov +0 -0
  31. package/workspace/client/public/morphy_hi.webm +0 -0
  32. package/workspace/client/src/App.tsx +5 -3
  33. package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
  34. package/supervisor/public/what-happened.mp4 +0 -0
  35. package/supervisor/public/what-happened.webm +0 -0
@@ -4,22 +4,38 @@
4
4
  * Mirrors the *shape* of the Claude harness loop in `harnesses/claude.ts`:
5
5
  * - one long-lived session per conversation
6
6
  * - user messages arrive via an `AsyncQueue<PiMessage>` input
7
- * - the loop drains the queue one turn at a time
7
+ * - the loop drains the queue ONE MESSAGE PER TURN — exactly like the Claude
8
+ * SDK's input queue: each pushed message gets its own turn, its own
9
+ * text_end, and its own turn_complete. (An earlier design folded mid-turn
10
+ * messages into the in-flight turn; that broke the channel manager's
11
+ * one-response-per-push routing FIFO — see PI-PARITY-AUDIT-2026-06-11.md
12
+ * D1-1 — so queued messages now simply wait for their own turn.)
8
13
  * - each turn streams provider events back through a single `onEvent`
9
14
  * callback the caller hooked up
10
15
  *
11
- * Phase 2: each user turn is an inner loop — provider call → if the model
12
- * asked for tool calls, execute them and feed results back → call provider
13
- * again — until the model finishes without requesting more tools. Tokens
14
- * stream live; `text_end` only fires once at the very end of the turn so the
15
- * UI doesn't display half-answers between tool rounds.
16
+ * Each user turn is an inner loop — provider call → if the model asked for
17
+ * tool calls, execute them and feed results back → call provider again — until
18
+ * the model finishes without requesting more tools. Tokens stream live;
19
+ * `text_end` only fires once at the very end of the turn so the UI doesn't
20
+ * display half-answers between tool rounds.
16
21
  *
17
- * Sub-agents are NOT spawned here Bruno will add those later.
22
+ * Error precedence matches claude (audit D6-2): streamed partial text is
23
+ * always committed via `text_end` (the consumer persists it and the routing
24
+ * FIFO consumes normally); the `error` event fires only when a failed turn
25
+ * produced no text — except fatal kinds (auth / context-overflow), which are
26
+ * surfaced even after partial text so the harness can tear the session down.
27
+ *
28
+ * Auth (key/model/base URL/flavor) is resolved via `getAuth()` on every
29
+ * provider round (audit D6-8): fixing a revoked key or switching models in the
30
+ * wizard applies on the very next round, with full history intact.
31
+ *
32
+ * Sub-agents are NOT spawned here — Bruno will add those later (Phase B).
18
33
  */
19
34
  import { log } from '../../../shared/logger.js';
20
35
  import type { PiApiFlavor } from './sub-providers.js';
21
36
  import { streamProvider } from './providers/stream.js';
22
- import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock } from './providers/types.js';
37
+ import type { PiMessage, PiStreamEvent, PiToolDef, PiContentBlock, PiUsage, PiErrorKind } from './providers/types.js';
38
+ import { sleep } from './providers/retry.js';
23
39
  import type { AsyncQueue } from './async-queue.js';
24
40
  import { findTool } from './tools/registry.js';
25
41
  import type { PiTool } from './tools/types.js';
@@ -30,14 +46,31 @@ export type PiSessionEvent =
30
46
  | { type: 'text_end'; text: string }
31
47
  | { type: 'tool_use'; id: string; name: string; input: any }
32
48
  | { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
33
- | { type: 'turn_complete'; usedFileTools: boolean }
34
- | { type: 'error'; error: string };
49
+ | { type: 'turn_complete'; usedFileTools: boolean; usage?: PiUsage; contextWindow?: number }
50
+ | { type: 'error'; error: string; kind?: PiErrorKind };
35
51
 
36
- export interface PiSessionInit {
52
+ /** Everything the providers need that can change while a session is alive. */
53
+ export interface PiSessionAuth {
37
54
  flavor: PiApiFlavor;
38
55
  modelId: string;
39
56
  baseUrl: string;
40
57
  apiKey: string;
58
+ /** Per-model output cap from the catalog; providers fall back to safe defaults. */
59
+ maxOutputTokens?: number;
60
+ /** openai-completions only: which field carries the output cap (C-2). */
61
+ maxTokensField?: 'max_tokens' | 'max_completion_tokens';
62
+ /** openai-completions only: false for strict-schema vendors that 422 on stream_options. */
63
+ includeStreamUsage?: boolean;
64
+ /** Model context window from the catalog — reported on turn_complete for the recycler. */
65
+ contextWindow?: number;
66
+ }
67
+
68
+ export interface PiSessionInit {
69
+ /**
70
+ * Resolved on EVERY provider round (not captured once) so wizard-side
71
+ * key/model fixes heal a live conversation on the next round.
72
+ */
73
+ getAuth: () => PiSessionAuth;
41
74
  systemPrompt: string;
42
75
  /** Pre-loaded history before the first new user turn. */
43
76
  initialMessages?: PiMessage[];
@@ -45,7 +78,6 @@ export interface PiSessionInit {
45
78
  tools?: PiToolDef[];
46
79
  /** Resolved every time a tool fires (registry → run). */
47
80
  cwd: string;
48
- maxOutputTokens?: number;
49
81
  /** Used to interrupt in-flight provider calls when the session ends. */
50
82
  abortController: AbortController;
51
83
  /** Caller's event sink — translated to bloby's `bot:*` events one layer up. */
@@ -61,28 +93,45 @@ export interface PiSession {
61
93
 
62
94
  const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit', 'write', 'edit', 'multiEdit', 'notebookEdit']);
63
95
  const MAX_TOOL_ROUNDS = 25;
96
+ /** Transparent re-runs of a failed round that produced nothing (audit D6-1). */
97
+ const MAX_ROUND_RETRIES = 2;
64
98
 
65
99
  export function createPiSession(init: PiSessionInit): PiSession {
66
100
  const messages: PiMessage[] = init.initialMessages ? [...init.initialMessages] : [];
67
101
 
102
+ // Last provider-reported usage + window, session-scoped so even an errored
103
+ // turn's turn_complete carries the most recent context occupancy (D2-1).
104
+ let lastUsage: PiUsage | undefined;
105
+ let lastContextWindow: number | undefined;
106
+
68
107
  /** One stream round — collect the assistant blocks the model emits this pass. */
69
108
  interface RoundResult {
70
109
  text: string;
71
110
  toolUses: { id: string; name: string; input: any; thoughtSignature?: string }[];
72
111
  errored: boolean;
112
+ /** Stashed, NOT emitted inline — the turn decides response-vs-error precedence (D6-2). */
113
+ errorMsg?: string;
114
+ errorKind?: PiErrorKind;
115
+ /** True when re-sending the identical round can plausibly succeed (429/5xx/network). */
116
+ retryable?: boolean;
73
117
  }
74
118
 
75
- async function runOneRound(): Promise<RoundResult> {
119
+ async function runOneRound(emitSeparatorFirst: boolean): Promise<RoundResult> {
76
120
  const result: RoundResult = { text: '', toolUses: [], errored: false };
121
+ let firstDelta = true;
77
122
  try {
78
- const stream = streamProvider(init.flavor, {
79
- modelId: init.modelId,
80
- baseUrl: init.baseUrl,
81
- apiKey: init.apiKey,
123
+ const auth = init.getAuth();
124
+ lastContextWindow = auth.contextWindow ?? lastContextWindow;
125
+ const stream = streamProvider(auth.flavor, {
126
+ modelId: auth.modelId,
127
+ baseUrl: auth.baseUrl,
128
+ apiKey: auth.apiKey,
82
129
  systemPrompt: init.systemPrompt,
83
130
  messages,
84
131
  tools: init.tools,
85
- maxOutputTokens: init.maxOutputTokens,
132
+ maxOutputTokens: auth.maxOutputTokens,
133
+ maxTokensField: auth.maxTokensField,
134
+ includeStreamUsage: auth.includeStreamUsage,
86
135
  signal: init.abortController.signal,
87
136
  });
88
137
 
@@ -90,6 +139,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
90
139
  if (init.abortController.signal.aborted) break;
91
140
  switch (evt.type) {
92
141
  case 'text_delta':
142
+ // Round separator rides BEFORE the new round's first token —
143
+ // claude.ts:374-379 ordering — so the streamed bytes stay a true
144
+ // prefix of the final bot:response even when the dashboard commits
145
+ // the buffer at a tool boundary mid-turn (audit D1-5/PI-SES-1).
146
+ if (firstDelta && emitSeparatorFirst) {
147
+ init.onEvent({ type: 'text_delta', delta: '\n\n' });
148
+ }
149
+ firstDelta = false;
93
150
  result.text += evt.delta;
94
151
  init.onEvent({ type: 'text_delta', delta: evt.delta });
95
152
  break;
@@ -110,17 +167,24 @@ export function createPiSession(init: PiSessionInit): PiSession {
110
167
  break;
111
168
  case 'error':
112
169
  result.errored = true;
113
- init.onEvent({ type: 'error', error: evt.error });
170
+ result.errorMsg = evt.error;
171
+ result.errorKind = evt.kind;
172
+ result.retryable = evt.retryable;
114
173
  break;
115
174
  case 'done':
116
- // Loop control is by tool_use presence, not stop reason.
175
+ // Loop control is by tool_use presence, not stop reason — but the
176
+ // usage rides here and feeds the supervisor's session recycling.
177
+ if (evt.usage) lastUsage = evt.usage;
117
178
  break;
118
179
  }
119
180
  }
120
181
  } catch (err: any) {
121
182
  if (!init.abortController.signal.aborted) {
122
183
  result.errored = true;
123
- init.onEvent({ type: 'error', error: err?.message || String(err) });
184
+ result.errorMsg = err?.message || String(err);
185
+ // A throw mid-iteration is a network/stream failure — transient.
186
+ result.errorKind = 'transient';
187
+ result.retryable = true;
124
188
  }
125
189
  }
126
190
  return result;
@@ -141,45 +205,80 @@ export function createPiSession(init: PiSessionInit): PiSession {
141
205
  }
142
206
  }
143
207
 
144
- async function runOneTurn(input: AsyncQueue<PiMessage>, firstUserMsg: PiMessage): Promise<void> {
208
+ async function runOneTurn(userMsg: PiMessage): Promise<void> {
145
209
  if (init.abortController.signal.aborted) return;
146
- // Stack any messages that arrived in the same millisecond into one turn.
147
- messages.push(firstUserMsg);
148
- for (const extra of input.drainPending()) messages.push(extra);
210
+ // ONE message per turn queued messages wait for their own turn so each
211
+ // push gets its own bot:response (routing-FIFO invariant, audit D1-1).
212
+ messages.push(userMsg);
149
213
  init.onEvent({ type: 'turn_started' });
150
214
 
151
215
  let accumulatedText = '';
152
216
  const usedTools = new Set<string>();
153
217
  let turnErrored = false;
154
- let pendingInterleave = false;
218
+ let turnErrorMsg: string | undefined;
219
+ let turnErrorKind: PiErrorKind | undefined;
155
220
 
156
221
  for (let round = 0; round < MAX_TOOL_ROUNDS; round++) {
157
222
  if (init.abortController.signal.aborted) break;
158
- const { text, toolUses, errored } = await runOneRound();
223
+ // The separator condition is decided BEFORE the round so the round can
224
+ // emit it ahead of its first token (claude.ts ordering — see runOneRound).
225
+ const needsSeparator = accumulatedText.length > 0 && !accumulatedText.endsWith('\n');
226
+ let res = await runOneRound(needsSeparator);
227
+
228
+ // Transparent round retry (D6-1): a transient failure that produced
229
+ // NOTHING is safe to re-run — requests are stateless full-history
230
+ // resends. Never retry a round that already streamed text or tool calls.
231
+ for (
232
+ let attempt = 0;
233
+ attempt < MAX_ROUND_RETRIES &&
234
+ res.errored && res.retryable && !res.text && res.toolUses.length === 0 &&
235
+ !init.abortController.signal.aborted;
236
+ attempt++
237
+ ) {
238
+ log.info(`[pi/session] transient round failure — retrying (${attempt + 1}/${MAX_ROUND_RETRIES}): ${res.errorMsg?.slice(0, 160)}`);
239
+ try { await sleep(1000 * 2 ** attempt, init.abortController.signal); } catch { break; }
240
+ res = await runOneRound(needsSeparator);
241
+ }
242
+
243
+ const { text, toolUses, errored } = res;
159
244
 
160
245
  // Append whatever the model produced this round to history so subsequent
161
246
  // rounds (and the next user turn) see it.
162
247
  const assistantContent: PiContentBlock[] = [];
163
248
  if (text) {
164
- accumulatedText += (accumulatedText && !accumulatedText.endsWith('\n') ? '\n\n' : '') + text;
249
+ // Matches the separator runOneRound streamed before this round's
250
+ // first delta — accumulatedText and the token stream stay byte-equal.
251
+ if (needsSeparator) accumulatedText += '\n\n';
252
+ accumulatedText += text;
165
253
  assistantContent.push({ type: 'text', text });
166
254
  }
167
- for (const tu of toolUses) {
168
- assistantContent.push({
169
- type: 'tool_use',
170
- id: tu.id,
171
- name: tu.name,
172
- input: tu.input,
173
- // Forward Gemini's thoughtSignature unchanged so the next turn's
174
- // request echoes it back; without it the API rejects with 400.
175
- thoughtSignature: tu.thoughtSignature,
176
- });
255
+ if (!errored) {
256
+ // On an errored round, keep the text but DROP the round's tool_use
257
+ // blocks: the turn ends before executing them, and a dangling
258
+ // tool_use with no tool_result poisons the history (Anthropic and
259
+ // Gemini reject the next request outright).
260
+ for (const tu of toolUses) {
261
+ assistantContent.push({
262
+ type: 'tool_use',
263
+ id: tu.id,
264
+ name: tu.name,
265
+ input: tu.input,
266
+ // Forward Gemini's thoughtSignature unchanged so the next turn's
267
+ // request echoes it back; without it the API rejects with 400.
268
+ thoughtSignature: tu.thoughtSignature,
269
+ });
270
+ }
177
271
  }
178
272
  if (assistantContent.length > 0) {
179
273
  messages.push({ role: 'assistant', content: assistantContent });
180
274
  }
181
275
 
182
- if (errored) { turnErrored = true; break; }
276
+ if (errored) {
277
+ turnErrored = true;
278
+ turnErrorMsg = res.errorMsg;
279
+ turnErrorKind = res.errorKind;
280
+ break;
281
+ }
183
282
 
184
283
  // Run every tool the model asked for this round, then feed the results
185
284
  // back as a single user message Gemini accepts as a batch.
@@ -188,50 +287,45 @@ export function createPiSession(init: PiSessionInit): PiSession {
188
287
  usedTools.add(tu.name);
189
288
  if (init.abortController.signal.aborted) break;
190
289
  log.info(`[pi/session] tool call ${tu.name}(${JSON.stringify(tu.input).slice(0, 200)})`);
191
- const res = await executeTool(tu);
192
- init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res.isError });
290
+ const res2 = await executeTool(tu);
291
+ init.onEvent({ type: 'tool_result', toolUseId: tu.id, name: tu.name, isError: !!res2.isError });
193
292
  toolResultBlocks.push({
194
293
  type: 'tool_result',
195
294
  toolUseId: tu.id,
196
- content: res.output,
197
- isError: res.isError,
295
+ content: res2.output,
296
+ isError: res2.isError,
198
297
  });
199
298
  }
200
299
  if (toolResultBlocks.length > 0) {
201
300
  messages.push({ role: 'user', content: toolResultBlocks });
202
301
  }
203
302
 
204
- // Fold any user messages that arrived during this round into history so
205
- // the next stream pass sees them. This is what makes the conversation
206
- // feel alive: while the agent is grinding on a long task, a question
207
- // typed mid-stream lands in the very next request as a user-role part,
208
- // and the model can answer it inline before continuing.
209
- const interleaved = input.drainPending();
210
- if (interleaved.length > 0) {
211
- log.info(`[pi/session] interleaved ${interleaved.length} mid-turn user message(s) into history`);
212
- for (const m of interleaved) messages.push(m);
213
- pendingInterleave = true;
214
- } else {
215
- pendingInterleave = false;
216
- }
217
-
218
- // Exit when the model has nothing more to do AND no new user messages
219
- // arrived mid-round. Either condition by itself keeps the loop alive.
220
- if (toolUses.length === 0 && !pendingInterleave) break;
303
+ // No tool calls the model is done with this turn.
304
+ if (toolUses.length === 0) break;
221
305
  }
222
306
 
223
- // Emit text_end only on a clean turn (don't persist a half-baked answer from an errored
224
- // turn). But ALWAYS emit turn_complete on a non-aborted turn including the errored path
225
- // — so the supervisor clears agentQueryActive (set on turn_started). Skipping it on error
226
- // wedged the flag true: backend auto-heal stayed deferred and chat stuck in "typing" until
227
- // the next successful turn. The 'error' event was already emitted by runOneRound, so the
228
- // user still sees the failure. Aborted turns are torn down via bot:conversation-ended.
307
+ // Turn-end emission order (audit D6-2, mirrors claude.ts:394-401):
308
+ // 1. text_end whenever ANY text streamed even on errored turns, so the
309
+ // partial the user watched is committed, persisted, and consumes its
310
+ // routing-FIFO entry (the frontend's bot:error handler would
311
+ // otherwise erase it).
312
+ // 2. error only when the turn produced no text EXCEPT fatal kinds
313
+ // (auth / context-overflow), which must surface regardless so the
314
+ // harness tears the poisoned session down.
315
+ // 3. turn_complete ALWAYS on a non-aborted turn — including errored
316
+ // paths — so the supervisor clears agentQueryActive. Skipping it
317
+ // wedged the flag true historically. Aborted turns are torn down via
318
+ // bot:conversation-ended.
229
319
  if (!init.abortController.signal.aborted) {
230
- if (!turnErrored && accumulatedText) {
320
+ if (accumulatedText) {
231
321
  init.onEvent({ type: 'text_end', text: accumulatedText });
232
322
  }
323
+ const fatal = turnErrorKind === 'auth' || turnErrorKind === 'context-overflow';
324
+ if (turnErrored && (!accumulatedText || fatal)) {
325
+ init.onEvent({ type: 'error', error: turnErrorMsg || 'Provider turn failed', kind: turnErrorKind });
326
+ }
233
327
  const usedFileTools = Array.from(usedTools).some((t) => FILE_TOOL_NAMES.has(t));
234
- init.onEvent({ type: 'turn_complete', usedFileTools });
328
+ init.onEvent({ type: 'turn_complete', usedFileTools, usage: lastUsage, contextWindow: lastContextWindow });
235
329
  }
236
330
  }
237
331
 
@@ -240,7 +334,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
240
334
  for await (const userMsg of input) {
241
335
  if (init.abortController.signal.aborted) break;
242
336
  try {
243
- await runOneTurn(input, userMsg);
337
+ await runOneTurn(userMsg);
244
338
  } catch (err: any) {
245
339
  log.warn(`[pi/session] Turn failed: ${err?.message || err}`);
246
340
  init.onEvent({ type: 'error', error: err?.message || String(err) });
@@ -248,7 +342,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
248
342
  // and chat aren't wedged. Skip when aborting (teardown emits conversation-ended).
249
343
  // usedFileTools=false is the safe default (it only governs whether to auto-restart now).
250
344
  if (!init.abortController.signal.aborted) {
251
- init.onEvent({ type: 'turn_complete', usedFileTools: false });
345
+ init.onEvent({ type: 'turn_complete', usedFileTools: false, usage: lastUsage, contextWindow: lastContextWindow });
252
346
  }
253
347
  }
254
348
  }
@@ -15,7 +15,7 @@
15
15
  * synced from upstream pi via `npm run sync:pi-models`. Sub-providers without
16
16
  * a pi mapping (Ollama, LM Studio, custom) stay `'dynamic'` — free-form ID.
17
17
  */
18
- import { PI_MODELS_CATALOG } from './models-catalog.generated.js';
18
+ import { PI_MODELS_CATALOG, type PiCatalogModel } from './models-catalog.generated.js';
19
19
 
20
20
  export type PiApiFlavor = 'openai-completions' | 'anthropic-messages' | 'google-gemini';
21
21
 
@@ -24,6 +24,16 @@ export interface PiSubProviderModel {
24
24
  label: string;
25
25
  }
26
26
 
27
+ /**
28
+ * Catalog metadata for a saved sub-provider + model pair. Drives the per-model
29
+ * output cap (C-5), the context-window figure the supervisor's recycler needs
30
+ * (D2-1), and — later — the vision gate. Returns undefined for dynamic
31
+ * sub-providers (OpenRouter/Ollama/LM Studio/custom) and unknown model ids.
32
+ */
33
+ export function getCatalogModel(subProviderId: string, modelId: string): PiCatalogModel | undefined {
34
+ return PI_MODELS_CATALOG[subProviderId]?.find((m) => m.id === modelId);
35
+ }
36
+
27
37
  export interface PiSubProvider {
28
38
  id: string;
29
39
  name: string;
@@ -41,6 +51,23 @@ export interface PiSubProvider {
41
51
  models: PiSubProviderModel[] | 'dynamic';
42
52
  /** Default model selection when the user hasn't picked one. */
43
53
  defaultModel?: string;
54
+ /**
55
+ * openai-completions flavor only: which request field carries the output
56
+ * cap. OpenAI's reasoning models (gpt-5.x, o-series — 31 of 37 catalog
57
+ * entries) reject the legacy `max_tokens` with HTTP 400;
58
+ * `max_completion_tokens` is accepted by ALL OpenAI models, so the
59
+ * openai-api entry opts in. Other vendors stay on `max_tokens`, matching
60
+ * their current working behavior.
61
+ */
62
+ maxTokensField?: 'max_tokens' | 'max_completion_tokens';
63
+ /**
64
+ * openai-completions flavor only: set true for vendors whose request schema
65
+ * rejects unknown fields — Mistral 422s ("Extra inputs are not permitted")
66
+ * on `stream_options`, so it must not receive the include_usage opt-in.
67
+ * (Mistral still sends usage in the final streamed chunk by default, so the
68
+ * provider's chunk.usage read keeps working without it.)
69
+ */
70
+ noStreamUsage?: boolean;
44
71
  }
45
72
 
46
73
  function fromCatalog(key: string): PiSubProviderModel[] | 'dynamic' {
@@ -130,6 +157,7 @@ export const PI_SUB_PROVIDERS: PiSubProvider[] = [
130
157
  apiKeyUrl: 'https://console.mistral.ai/api-keys/',
131
158
  models: fromCatalog('mistral'),
132
159
  defaultModel: defaultFor('mistral'),
160
+ noStreamUsage: true,
133
161
  },
134
162
  {
135
163
  id: 'openai-api',
@@ -141,6 +169,7 @@ export const PI_SUB_PROVIDERS: PiSubProvider[] = [
141
169
  apiKeyUrl: 'https://platform.openai.com/api-keys',
142
170
  models: fromCatalog('openai-api'),
143
171
  defaultModel: defaultFor('openai-api'),
172
+ maxTokensField: 'max_completion_tokens',
144
173
  },
145
174
  {
146
175
  id: 'anthropic-api',
@@ -74,6 +74,7 @@ export async function runPiTestCompletion(input: PiTestCompletionInput): Promise
74
74
  modelId,
75
75
  apiKey: input.apiKey?.trim() || '',
76
76
  prompt: input.prompt,
77
+ maxTokensField: provider.maxTokensField,
77
78
  });
78
79
  return { ok: true, text, modelId, subProvider: provider.id };
79
80
  } catch (err: any) {
@@ -92,6 +93,8 @@ interface DispatchArgs {
92
93
  modelId: string;
93
94
  apiKey: string;
94
95
  prompt: string;
96
+ /** openai-completions only — gpt-5.x/o-series reject the legacy max_tokens (C-2). */
97
+ maxTokensField?: 'max_tokens' | 'max_completion_tokens';
95
98
  }
96
99
 
97
100
  async function callByFlavor(flavor: PiApiFlavor, args: DispatchArgs): Promise<string> {
@@ -107,17 +110,20 @@ async function callByFlavor(flavor: PiApiFlavor, args: DispatchArgs): Promise<st
107
110
 
108
111
  /* ── OpenAI / OpenAI-compatible ── */
109
112
 
110
- async function callOpenAICompletions({ baseUrl, modelId, apiKey, prompt }: DispatchArgs): Promise<string> {
113
+ async function callOpenAICompletions({ baseUrl, modelId, apiKey, prompt, maxTokensField }: DispatchArgs): Promise<string> {
111
114
  const headers: Record<string, string> = { 'content-type': 'application/json' };
112
115
  if (apiKey) headers['authorization'] = `Bearer ${apiKey}`;
113
116
 
117
+ // Reasoning models (gpt-5.x/o-series) spend the budget on hidden reasoning
118
+ // first — 256 would come back as an empty message, failing a valid key.
119
+ const capField = maxTokensField ?? 'max_tokens';
114
120
  const res = await timedFetch(`${baseUrl}/chat/completions`, {
115
121
  method: 'POST',
116
122
  headers,
117
123
  body: JSON.stringify({
118
124
  model: modelId,
119
125
  messages: [{ role: 'user', content: prompt }],
120
- max_tokens: 256,
126
+ [capField]: capField === 'max_completion_tokens' ? 2048 : 256,
121
127
  stream: false,
122
128
  }),
123
129
  });
@@ -91,8 +91,8 @@ const PLATFORM_ASSETS = new Set([
91
91
  '/pi-logo.svg',
92
92
  '/codex.svg',
93
93
  '/manifest.json',
94
- '/what-happened.webm',
95
- '/what-happened.mp4',
94
+ '/morphy_sad.webm',
95
+ '/morphy_sad.mov',
96
96
  ]);
97
97
 
98
98
  // Directory-prefix platform assets — anything under these is served from supervisor/public/.
@@ -313,8 +313,8 @@ const RECOVERING_HTML = `<!DOCTYPE html>
313
313
  </style></head>
314
314
  <body><div class="c">
315
315
  <div class="video-wrap"><video autoplay loop muted playsinline>
316
- <source src="/what-happened.webm" type="video/webm">
317
- <source src="/what-happened.mp4" type="video/mp4">
316
+ <source src="/morphy_sad.mov" type='video/mp4; codecs="hvc1"'>
317
+ <source src="/morphy_sad.webm" type="video/webm">
318
318
  </video></div>
319
319
  <h1>Reconnecting…</h1>
320
320
  <p class="lead">Hang tight — your app is coming back online.</p>
@@ -378,8 +378,8 @@ function backendDownPage(logTail: string): string {
378
378
  </style></head>
379
379
  <body><div class="c">
380
380
  <div class="video-wrap"><video autoplay loop muted playsinline>
381
- <source src="/what-happened.webm" type="video/webm">
382
- <source src="/what-happened.mp4" type="video/mp4">
381
+ <source src="/morphy_sad.mov" type='video/mp4; codecs="hvc1"'>
382
+ <source src="/morphy_sad.webm" type="video/webm">
383
383
  </video></div>
384
384
  <h1>Your app's backend is down</h1>
385
385
  <p class="lead">The workspace server crashed and couldn't restart on its own.</p>
@@ -1140,7 +1140,7 @@ export async function startSupervisor() {
1140
1140
  <div class="container">
1141
1141
  ${connected
1142
1142
  ? `<div class="confetti-wrap">${confettiHTML}</div>
1143
- <div class="video-wrap"><video autoplay muted playsinline><source src="/bloby_happy_reappearing.mov" type='video/mp4; codecs="hvc1"'><source src="/bloby_happy_reappearing.webm" type="video/webm"></video></div>
1143
+ <div class="video-wrap"><video autoplay muted playsinline><source src="/morphy_bounce.mov" type='video/mp4; codecs="hvc1"'><source src="/morphy_bounce.webm" type="video/webm"></video></div>
1144
1144
  <div class="text-wrap"><div class="title">Connected!</div><p class="subtitle">WhatsApp is linked. You can close this page.</p>
1145
1145
  <button onclick="relink()" style="margin-top:20px;padding:10px 24px;background:#2a2a2a;border:1px solid rgba(255,255,255,0.15);border-radius:10px;color:#999;font-size:13px;cursor:pointer;font-family:inherit;transition:all .2s" onmouseover="this.style.borderColor='#0069FE';this.style.color='#f5f5f5'" onmouseout="this.style.borderColor='rgba(255,255,255,0.15)';this.style.color='#999'">Relink to a different number</button>
1146
1146
  </div>
@@ -1628,7 +1628,7 @@ ${!connected ? `<script>
1628
1628
  <div class="container" id="root">
1629
1629
  ${alreadyLinked
1630
1630
  ? `<div class="confetti-wrap">${confettiHTML}</div>
1631
- <div class="video-wrap"><video autoplay muted playsinline><source src="/bloby_happy_reappearing.mov" type='video/mp4; codecs="hvc1"'><source src="/bloby_happy_reappearing.webm" type="video/webm"></video></div>
1631
+ <div class="video-wrap"><video autoplay muted playsinline><source src="/morphy_bounce.mov" type='video/mp4; codecs="hvc1"'><source src="/morphy_bounce.webm" type="video/webm"></video></div>
1632
1632
  <div class="text-wrap">
1633
1633
  <div class="title">Connected!</div>
1634
1634
  <p class="success-sub">Alexa is linked. Say <b style="color:#f5f5f5">"Alexa, open Morphy Agent"</b> to start a conversation, or <b style="color:#f5f5f5">"Alexa, tell Morphy Agent &lt;command&gt;"</b> for one-shots.</p>
@@ -1991,7 +1991,7 @@ mint();
1991
1991
  <div class="container" id="root">
1992
1992
  ${alreadyLinked
1993
1993
  ? `<div class="confetti-wrap">${confettiHTML}</div>
1994
- <div class="video-wrap"><video autoplay muted playsinline><source src="/bloby_happy_reappearing.mov" type='video/mp4; codecs="hvc1"'><source src="/bloby_happy_reappearing.webm" type="video/webm"></video></div>
1994
+ <div class="video-wrap"><video autoplay muted playsinline><source src="/morphy_bounce.mov" type='video/mp4; codecs="hvc1"'><source src="/morphy_bounce.webm" type="video/webm"></video></div>
1995
1995
  <div class="text-wrap">
1996
1996
  <div class="title">Connected!</div>
1997
1997
  <p class="success-sub">Telegram is linked${linkedUsername ? ` to <b style="color:#f5f5f5">@${linkedUsername}</b>` : ''}. Open Telegram and message your bot to start chatting.</p>
@@ -2844,7 +2844,8 @@ ${alreadyLinked ? '' : `
2844
2844
  }
2845
2845
 
2846
2846
  // Bloby routes → serve pre-built static files from dist-bloby/
2847
- // Note: must check '/bloby/' (with slash) to avoid matching '/bloby_tilts.webm' etc.
2847
+ // Note: must check '/bloby/' (with slash) so the route only claims the chat UI under
2848
+ // /bloby/, never a root-served asset that merely starts with "bloby".
2848
2849
  if (req.url === '/bloby' || req.url?.startsWith('/bloby/')) {
2849
2850
  // Strip /bloby prefix, then query strings, then resolve file path
2850
2851
  let filePath = req.url!.replace(/^\/bloby\/?/, '').split('?')[0] || 'bloby.html';
Binary file
@@ -170,7 +170,7 @@ export const SHELL_HTML = `<!DOCTYPE html>
170
170
  // preload=auto so the clip is fetched (and SW-cached) while the supervisor is
171
171
  // still up — by the time we show this, the origin is unreachable.
172
172
  '<video autoplay loop muted playsinline preload="auto" style="position:relative;width:100%;height:100%;object-fit:contain;border-radius:50%">' +
173
- '<source src="/what-happened.webm" type="video/webm"><source src="/what-happened.mp4" type="video/mp4">' +
173
+ '<source src="/morphy_sad.mov" type=\'video/mp4; codecs="hvc1"\'><source src="/morphy_sad.webm" type="video/webm">' +
174
174
  '</video>' +
175
175
  '</div>' +
176
176
  '<h1 style="font-size:1.5rem;font-weight:700;margin:0 0 .6rem;background:linear-gradient(135deg,#0166FF,#009AFE,#4AEEFF);-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text">Workspace is restarting&hellip;</h1>' +
@@ -562,7 +562,7 @@
562
562
  '<div style="position:relative;width:160px;height:160px;margin:0 auto 1.2rem">' +
563
563
  '<div style="position:absolute;inset:-18px;background:radial-gradient(circle,rgba(1,102,255,.18) 0%,transparent 60%);filter:blur(18px)"></div>' +
564
564
  '<video autoplay loop muted playsinline style="position:relative;width:100%;height:100%;object-fit:contain;border-radius:50%">' +
565
- '<source src="/what-happened.webm" type="video/webm"><source src="/what-happened.mp4" type="video/mp4">' +
565
+ '<source src="/morphy_sad.mov" type=\'video/mp4; codecs="hvc1"\'><source src="/morphy_sad.webm" type="video/webm">' +
566
566
  '</video>' +
567
567
  '</div>' +
568
568
  '<h1 style="font-size:1.5rem;font-weight:700;margin:0 0 .6rem;background:linear-gradient(135deg,#0166FF,#009AFE,#4AEEFF);-webkit-background-clip:text;-webkit-text-fill-color:transparent;background-clip:text">Workspace error</h1>' +
@@ -9,13 +9,15 @@ function DashboardError() {
9
9
  return (
10
10
  <div style={{ background: '#222122', color: '#fff', display: 'flex', flexDirection: 'column', alignItems: 'center', justifyContent: 'center', height: '100dvh', width: '100vw', position: 'fixed', inset: 0, zIndex: 50, fontFamily: 'system-ui, -apple-system, sans-serif', textAlign: 'center', padding: '24px' }}>
11
11
  <video
12
- src="/bloby_say_hi.webm"
13
12
  autoPlay
14
13
  loop
15
14
  muted
16
15
  playsInline
17
- style={{ height: 120, width: 120, borderRadius: '50%', objectFit: 'cover', marginBottom: 32 }}
18
- />
16
+ style={{ height: 120, objectFit: 'contain', marginBottom: 32 }}
17
+ >
18
+ <source src="/morphy_hi.mov" type='video/mp4; codecs="hvc1"' />
19
+ <source src="/morphy_hi.webm" type="video/webm" />
20
+ </video>
19
21
  <h1 style={{ fontSize: 20, fontWeight: 600, marginBottom: 8 }}>Oopss.. Something wrong is not right</h1>
20
22
  <p style={{ fontSize: 14, color: 'rgba(255,255,255,0.5)', maxWidth: 320, lineHeight: 1.5 }}>
21
23
  If your agent is working, this is normal. If not, go poke them
@@ -1 +0,0 @@
1
- import{i as e}from"./bloby-CXmOcb1r.js";export{e as Mermaid};
Binary file