bloby-bot 0.70.7 → 0.70.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
  2. package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
  3. package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
  4. package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
  5. package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
  6. package/dist-bloby/bloby.html +2 -2
  7. package/dist-bloby/onboard.html +2 -2
  8. package/package.json +1 -1
  9. package/scripts/sync-pi-models.ts +37 -6
  10. package/supervisor/chat/OnboardWizard.tsx +4 -4
  11. package/supervisor/harnesses/pi/async-queue.ts +7 -11
  12. package/supervisor/harnesses/pi/index.ts +232 -65
  13. package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
  14. package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
  15. package/supervisor/harnesses/pi/providers/retry.ts +87 -0
  16. package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
  17. package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
  18. package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
  19. package/supervisor/harnesses/pi/providers/types.ts +26 -1
  20. package/supervisor/harnesses/pi/session.ts +164 -70
  21. package/supervisor/harnesses/pi/sub-providers.ts +30 -1
  22. package/supervisor/harnesses/pi/test-completion.ts +8 -2
  23. package/supervisor/index.ts +11 -10
  24. package/supervisor/public/morphy_sad.mov +0 -0
  25. package/supervisor/public/morphy_sad.webm +0 -0
  26. package/supervisor/shell.ts +1 -1
  27. package/supervisor/workspace-guard.js +1 -1
  28. package/workspace/client/public/morphy_bounce.mov +0 -0
  29. package/workspace/client/public/morphy_bounce.webm +0 -0
  30. package/workspace/client/public/morphy_hi.mov +0 -0
  31. package/workspace/client/public/morphy_hi.webm +0 -0
  32. package/workspace/client/src/App.tsx +5 -3
  33. package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
  34. package/supervisor/public/what-happened.mp4 +0 -0
  35. package/supervisor/public/what-happened.webm +0 -0
@@ -6,9 +6,11 @@
6
6
  * matches the Claude harness so the dispatcher needs no provider-specific
7
7
  * code.
8
8
  *
9
- * Phase 1 scope: live conversation + one-shot text only (no tools). The
9
+ * Live conversations run the full tool loop (session.ts); one-shots are still
10
+ * tool-less (audit Phase C will route them through createPiSession). The
10
11
  * non-blocking feel — user keeps typing while the model is still answering —
11
- * comes from the same `AsyncQueue` pattern Claude uses; see `async-queue.ts`.
12
+ * comes from the same `AsyncQueue` pattern Claude uses (one message per turn);
13
+ * see `async-queue.ts` and PI-PARITY-AUDIT-2026-06-11.md.
12
14
  */
13
15
  import { log } from '../../../shared/logger.js';
14
16
  import { WORKSPACE_DIR } from '../../../shared/paths.js';
@@ -26,8 +28,8 @@ export type { RecentMessage, AgentAttachment };
26
28
 
27
29
  import { buildSkillsIndex } from '../skills.js';
28
30
  import { createAsyncQueue, type AsyncQueue } from './async-queue.js';
29
- import { createPiSession, type PiSessionEvent } from './session.js';
30
- import { getPiSubProvider } from './sub-providers.js';
31
+ import { createPiSession, type PiSessionEvent, type PiSessionAuth } from './session.js';
32
+ import { getPiSubProvider, getCatalogModel } from './sub-providers.js';
31
33
  import { readPiAuth } from './auth-storage.js';
32
34
  import { streamProvider } from './providers/stream.js';
33
35
  import type { PiMessage } from './providers/types.js';
@@ -41,11 +43,50 @@ interface LiveConversation {
41
43
  abortController: AbortController;
42
44
  onMessage: (type: string, data: any) => void;
43
45
  busy: boolean;
46
+ /** Messages pushed but not yet completed (1 turn-complete per message) — mirrors
47
+ * claude.ts pendingCount. idle:true on turn-complete only when this hits 0, so
48
+ * the supervisor's session recycling never fires with a message still queued. */
49
+ pendingCount: number;
50
+ /** 60ms micro-batcher for bot:token — collapses per-delta WS frame floods. */
51
+ batcher: TokenBatcher;
44
52
  loopDone: Promise<void> | null;
45
53
  }
46
54
 
47
55
  const liveConversations = new Map<string, LiveConversation>();
48
56
 
57
+ /**
58
+ * Micro-batch streamed deltas into ~60ms bot:token frames (house standard
59
+ * from the codex parity pass — an order-of-magnitude WS frame reduction with
60
+ * no visible change in streaming feel). Callers MUST flush() before emitting
61
+ * any non-token event so ordering and the streamed-text == bot:response
62
+ * contract are preserved; discard() on teardown drops post-abort stragglers.
63
+ */
64
+ interface TokenBatcher {
65
+ add(delta: string): void;
66
+ flush(): void;
67
+ discard(): void;
68
+ }
69
+
70
+ function createTokenBatcher(emit: (text: string) => void, intervalMs = 60): TokenBatcher {
71
+ let buf = '';
72
+ let timer: NodeJS.Timeout | null = null;
73
+ const flush = () => {
74
+ if (timer) { clearTimeout(timer); timer = null; }
75
+ if (buf) { const out = buf; buf = ''; emit(out); }
76
+ };
77
+ return {
78
+ add(delta: string) {
79
+ buf += delta;
80
+ if (!timer) timer = setTimeout(flush, intervalMs);
81
+ },
82
+ flush,
83
+ discard() {
84
+ if (timer) { clearTimeout(timer); timer = null; }
85
+ buf = '';
86
+ },
87
+ };
88
+ }
89
+
49
90
  export function hasConversation(conversationId: string): boolean {
50
91
  return liveConversations.has(conversationId);
51
92
  }
@@ -100,7 +141,7 @@ You are running in a streaming chat where the user can keep typing while you wor
100
141
 
101
142
  - Before kicking off a multi-step task, say one short line acknowledging it ("On it, looking at the widget now.").
102
143
  - Between tool calls on long tasks, drop a brief progress note ("Found the file, checking the layout next.") so the user knows you're still working.
103
- - If a new user message arrives while you're mid-task, you'll see it as a fresh user-role message in the conversation history. Answer it briefly inline, mention you're still working on the main task, then continue.
144
+ - Messages the user sends while you're working are queued and delivered to you one at a time after the current task finishes each gets its own answer, so never assume you missed one.
104
145
  - Final answers should be concise and concrete.`;
105
146
 
106
147
  async function buildSystemPrompt(
@@ -133,29 +174,56 @@ async function buildSystemPrompt(
133
174
  return systemPrompt;
134
175
  }
135
176
 
136
- /** Resolve sub-provider, base url, api key, model id from saved pi-auth.json. */
137
- function resolveAuth(): {
138
- ok: true;
139
- flavor: ReturnType<typeof getPiSubProvider> extends undefined ? never : NonNullable<ReturnType<typeof getPiSubProvider>>['flavor'];
140
- modelId: string;
141
- baseUrl: string;
142
- apiKey: string;
143
- } | { ok: false; error: string } {
144
- const auth = readPiAuth();
145
- if (!auth) return { ok: false, error: 'Bloby provider is not configured. Run the onboarding wizard.' };
146
- const sub = getPiSubProvider(auth.subProvider);
147
- if (!sub) return { ok: false, error: `Unknown sub-provider in pi-auth.json: ${auth.subProvider}` };
148
- const baseUrl = (auth.baseUrl || sub.baseUrl || '').replace(/\/+$/, '');
177
+ /**
178
+ * Resolve the full provider auth bundle from saved pi-auth.json: sub-provider
179
+ * flavor, base url, api key, model id, plus catalog metadata (per-model output
180
+ * cap, context window) and the sub-provider's max-tokens field quirk.
181
+ *
182
+ * Called at session/one-shot start AND re-called on every live provider round
183
+ * via the session's getAuth thunk — so fixing a revoked key or switching
184
+ * models in the wizard heals a live conversation on its very next round.
185
+ */
186
+ function resolveAuth(): { ok: true; auth: PiSessionAuth } | { ok: false; error: string } {
187
+ const saved = readPiAuth();
188
+ if (!saved) return { ok: false, error: 'Bloby provider is not configured. Run the onboarding wizard.' };
189
+ const sub = getPiSubProvider(saved.subProvider);
190
+ if (!sub) return { ok: false, error: `Unknown sub-provider in pi-auth.json: ${saved.subProvider}` };
191
+ const baseUrl = (saved.baseUrl || sub.baseUrl || '').replace(/\/+$/, '');
149
192
  if (!baseUrl) return { ok: false, error: `No base URL configured for ${sub.id}` };
150
- const modelId = auth.modelId || sub.defaultModel || '';
193
+ const modelId = saved.modelId || sub.defaultModel || '';
151
194
  if (!modelId) return { ok: false, error: `No model selected for ${sub.id}` };
152
- if (sub.needsApiKey && !auth.apiKey) return { ok: false, error: `Missing API key for ${sub.id}` };
195
+ if (sub.needsApiKey && !saved.apiKey) return { ok: false, error: `Missing API key for ${sub.id}` };
196
+ const catalog = getCatalogModel(sub.id, modelId);
197
+
198
+ // Effective window reported to the supervisor's recycler. Two corrections
199
+ // over the raw catalog figure (audit review F1):
200
+ // 1. Anthropic catalog windows can reflect the 1M-context beta; without the
201
+ // beta header (we don't send it) the real window is 200k.
202
+ // 2. Since every request reserves max_tokens of output budget, providers
203
+ // enforce input + max_tokens <= window — the usable INPUT ceiling is
204
+ // window - maxOutputTokens. Reporting the raw window would put the 70%
205
+ // recycle threshold ABOVE that ceiling (e.g. 140k > 200k-64k=136k on
206
+ // claude-haiku-4-5) and the recycler could never preempt the wall.
207
+ let contextWindow = catalog?.contextWindow;
208
+ if (contextWindow && sub.flavor === 'anthropic-messages') {
209
+ contextWindow = Math.min(contextWindow, 200_000);
210
+ }
211
+ if (contextWindow && catalog?.maxOutputTokens) {
212
+ contextWindow = Math.max(0, contextWindow - catalog.maxOutputTokens);
213
+ }
214
+
153
215
  return {
154
216
  ok: true,
155
- flavor: sub.flavor,
156
- modelId,
157
- baseUrl,
158
- apiKey: auth.apiKey || '',
217
+ auth: {
218
+ flavor: sub.flavor,
219
+ modelId,
220
+ baseUrl,
221
+ apiKey: saved.apiKey || '',
222
+ maxOutputTokens: catalog?.maxOutputTokens,
223
+ maxTokensField: sub.maxTokensField,
224
+ includeStreamUsage: sub.noStreamUsage ? false : undefined,
225
+ contextWindow,
226
+ },
159
227
  };
160
228
  }
161
229
 
@@ -208,14 +276,14 @@ export async function startConversation(
208
276
  endConversation(conversationId);
209
277
  }
210
278
 
211
- const auth = resolveAuth();
212
- if (!auth.ok) {
213
- log.warn(`[pi/conversation] Cannot start: ${auth.error}`);
214
- onMessage('bot:error', { conversationId, error: auth.error });
279
+ const resolved = resolveAuth();
280
+ if (!resolved.ok) {
281
+ log.warn(`[pi/conversation] Cannot start: ${resolved.error}`);
282
+ onMessage('bot:error', { conversationId, error: resolved.error });
215
283
  return false;
216
284
  }
217
285
 
218
- log.info(`[pi/conversation] Sub-provider: ${auth.flavor} · model: ${auth.modelId}`);
286
+ log.info(`[pi/conversation] Sub-provider: ${resolved.auth.flavor} · model: ${resolved.auth.modelId}`);
219
287
 
220
288
  const systemPrompt = await buildSystemPrompt(names, recentMessages);
221
289
  log.info(`[pi/conversation] System prompt: ${systemPrompt.length} chars`);
@@ -229,15 +297,24 @@ export async function startConversation(
229
297
  abortController,
230
298
  onMessage,
231
299
  busy: false,
300
+ pendingCount: 0,
301
+ batcher: createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text })),
232
302
  loopDone: null,
233
303
  };
234
304
  liveConversations.set(conversationId, conv);
235
305
 
306
+ // Re-resolve auth on every provider round so a key/model fix in the wizard
307
+ // applies to the next round with full history intact (audit D6-8). Falls
308
+ // back to the last good bundle if pi-auth.json turns unreadable mid-session.
309
+ let currentAuth: PiSessionAuth = resolved.auth;
310
+ const getAuth = (): PiSessionAuth => {
311
+ const fresh = resolveAuth();
312
+ if (fresh.ok) currentAuth = fresh.auth;
313
+ return currentAuth;
314
+ };
315
+
236
316
  const session = createPiSession({
237
- flavor: auth.flavor,
238
- modelId: auth.modelId,
239
- baseUrl: auth.baseUrl,
240
- apiKey: auth.apiKey,
317
+ getAuth,
241
318
  systemPrompt,
242
319
  tools: toolDefsForProvider(),
243
320
  cwd: WORKSPACE_DIR,
@@ -258,6 +335,10 @@ export async function startConversation(
258
335
  }
259
336
  } finally {
260
337
  log.info(`[pi/conversation] Cleaning up conversation ${conversationId}`);
338
+ // Drop any unflushed token stragglers — at teardown the turn is either
339
+ // complete (already flushed before turn_complete) or aborted (tokens
340
+ // from an aborted stream must not surface after the fact).
341
+ conv.batcher.discard();
261
342
  liveConversations.delete(conversationId);
262
343
  onMessage('bot:conversation-ended', { conversationId });
263
344
  }
@@ -268,28 +349,74 @@ export async function startConversation(
268
349
 
269
350
  /** Map session-level events back into bloby's `bot:*` vocabulary. */
270
351
  function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
352
+ if (evt.type === 'text_delta') {
353
+ conv.batcher.add(evt.delta);
354
+ return;
355
+ }
356
+ // Any non-token event flushes the batch first — ordering (tokens before the
357
+ // tool chip / final response) and the streamed-text == bot:response
358
+ // invariant both depend on it.
359
+ conv.batcher.flush();
360
+
271
361
  switch (evt.type) {
272
362
  case 'turn_started':
273
363
  // No bloby event for this — `bot:typing` is already emitted by pushMessage().
274
364
  break;
275
- case 'text_delta':
276
- conv.onMessage('bot:token', { conversationId: conv.id, token: evt.delta });
277
- break;
278
365
  case 'text_end':
279
366
  conv.onMessage('bot:response', { conversationId: conv.id, content: evt.text });
280
367
  break;
281
368
  case 'tool_use':
282
369
  conv.onMessage('bot:tool', { conversationId: conv.id, name: evt.name, input: evt.input });
283
370
  break;
284
- case 'turn_complete':
285
- conv.busy = false;
286
- conv.onMessage('bot:turn-complete', { conversationId: conv.id, usedFileTools: evt.usedFileTools });
287
- log.info(`[pi/conversation] ──── TURN COMPLETE ──── busy=false`);
371
+ case 'tool_result':
372
+ // Not surfaced yet (Phase D: translate to a bot:tool progress pulse).
288
373
  break;
289
- case 'error':
374
+ case 'turn_complete': {
290
375
  conv.busy = false;
291
- conv.onMessage('bot:error', { conversationId: conv.id, error: evt.error });
376
+ // One turn-complete per pushed message (D1-1 restored that invariant);
377
+ // idle gates the supervisor's proactive recycling so it never fires with
378
+ // a message still queued — claude.ts pendingCount semantics exactly.
379
+ conv.pendingCount = Math.max(0, conv.pendingCount - 1);
380
+ const idle = conv.pendingCount === 0;
381
+ // Prompt occupancy of the last provider round — input + cache reads +
382
+ // cache writes, exactly claude.ts's contextTokens math. Output tokens
383
+ // are NOT added (claude doesn't either; the recycler's 70% threshold
384
+ // absorbs the next-turn growth).
385
+ const contextTokens = evt.usage
386
+ ? (evt.usage.inputTokens || 0) + (evt.usage.cacheReadTokens || 0) + (evt.usage.cacheCreationTokens || 0)
387
+ : 0;
388
+ conv.onMessage('bot:turn-complete', {
389
+ conversationId: conv.id,
390
+ usedFileTools: evt.usedFileTools,
391
+ contextTokens,
392
+ contextWindow: evt.contextWindow || 0,
393
+ idle,
394
+ });
395
+ log.info(`[pi/conversation] ──── TURN COMPLETE ──── busy=false ctx=${contextTokens}/${evt.contextWindow || 'n/a'} idle=${idle}`);
396
+ break;
397
+ }
398
+ case 'error': {
399
+ // busy is NOT cleared here (audit D1-9): turn_complete is the single
400
+ // busy=false site and the session guarantees it on every non-aborted
401
+ // turn; an aborted/fatal path is torn down via bot:conversation-ended.
402
+ const fatal = evt.kind === 'auth' || evt.kind === 'context-overflow';
403
+ const remedy = evt.kind === 'context-overflow'
404
+ ? ' Starting a fresh session — send your message again to continue.'
405
+ : evt.kind === 'auth'
406
+ ? ' I\'ll reconnect with the new key as soon as it\'s saved.'
407
+ : '';
408
+ conv.onMessage('bot:error', { conversationId: conv.id, error: `${evt.error}${remedy}` });
409
+ if (fatal) {
410
+ // Unrecoverable for this session (audit D6-4): an over-window history
411
+ // would re-fail on every future turn, and a dead key has no business
412
+ // keeping the loop alive. Tear down — the finally emits
413
+ // bot:conversation-ended (routes + flags clear) and the next user
414
+ // message cold-starts a fresh session with re-injected history.
415
+ log.warn(`[pi/conversation] Fatal provider error (${evt.kind}) — recycling session ${conv.id}`);
416
+ endConversation(conv.id);
417
+ }
292
418
  break;
419
+ }
293
420
  }
294
421
  }
295
422
 
@@ -305,8 +432,9 @@ export function pushMessage(
305
432
  return false;
306
433
  }
307
434
 
308
- log.info(`[pi/conversation] ──── PUSH MESSAGE ──── busy=${conv.busy}`);
435
+ log.info(`[pi/conversation] ──── PUSH MESSAGE ──── busy=${conv.busy} pending=${conv.pendingCount + 1}`);
309
436
  conv.busy = true;
437
+ conv.pendingCount += 1;
310
438
  conv.inputQueue.push(buildUserMessage(content, attachments, savedFiles));
311
439
  conv.onMessage('bot:typing', { conversationId });
312
440
  return true;
@@ -317,6 +445,7 @@ export function endConversation(conversationId: string): void {
317
445
  if (!conv) return;
318
446
 
319
447
  log.info(`[pi/conversation] ──── ENDING CONVERSATION ${conversationId} ────`);
448
+ conv.batcher.discard();
320
449
  conv.inputQueue.end();
321
450
  conv.abortController.abort();
322
451
  liveConversations.delete(conversationId);
@@ -373,21 +502,21 @@ export async function startBlobyAgentQuery(
373
502
  supportPrompt?: string,
374
503
  _maxTurns?: number,
375
504
  ): Promise<void> {
376
- const auth = resolveAuth();
377
- if (!auth.ok) {
378
- onMessage('bot:error', { conversationId, error: auth.error });
505
+ const resolved = resolveAuth();
506
+ if (!resolved.ok) {
507
+ onMessage('bot:error', { conversationId, error: resolved.error });
508
+ // bot:done frees the caller's slot (WhatsApp activeAgents / scheduler) — without it
509
+ // each distinct customer hitting this path pins one of the 5 concurrent slots until
510
+ // supervisor restart (audit D3-2; mirrors claude.ts:620).
511
+ onMessage('bot:done', { conversationId, usedFileTools: false });
379
512
  return;
380
513
  }
514
+ const auth = resolved.auth;
381
515
 
382
- const abortController = new AbortController();
383
- activeQueries.set(conversationId, abortController);
384
- // Hard watchdog a hung provider stream would otherwise pin this query forever (finally never
385
- // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
386
- const watchdog = setTimeout(() => {
387
- log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
388
- abortController.abort();
389
- }, 300_000);
390
-
516
+ // Build the prompt BEFORE registering in activeQueries / arming the watchdog
517
+ // (claude.ts ordering): if anything in here ever rejected after registration,
518
+ // the entry would leak forever anyOneShotActive() stuck true defers every
519
+ // backend restart/self-update, and the caller's slot never frees.
391
520
  let systemPrompt: string;
392
521
  if (supportPrompt) {
393
522
  systemPrompt = supportPrompt;
@@ -398,11 +527,23 @@ export async function startBlobyAgentQuery(
398
527
  const messages: PiMessage[] = recentToPiMessages(recentMessages);
399
528
  messages.push(buildUserMessage(prompt, attachments, savedFiles));
400
529
 
530
+ const abortController = new AbortController();
531
+ activeQueries.set(conversationId, abortController);
532
+ // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
533
+ // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
534
+ const watchdog = setTimeout(() => {
535
+ log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
536
+ abortController.abort();
537
+ }, 300_000);
538
+
401
539
  onMessage('bot:typing', { conversationId });
402
540
 
403
541
  let accumulated = '';
404
542
  const usedTools = new Set<string>();
405
- let errored = false;
543
+ // Errors are stashed, not emitted inline — at the end, partial text wins
544
+ // over the error bubble (audit D3-5/D6-2, claude.ts:730-737 precedence).
545
+ let errorMsg: string | null = null;
546
+ const batcher = createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text }));
406
547
 
407
548
  try {
408
549
  const stream = streamProvider(auth.flavor, {
@@ -411,6 +552,9 @@ export async function startBlobyAgentQuery(
411
552
  apiKey: auth.apiKey,
412
553
  systemPrompt,
413
554
  messages,
555
+ maxOutputTokens: auth.maxOutputTokens,
556
+ maxTokensField: auth.maxTokensField,
557
+ includeStreamUsage: auth.includeStreamUsage,
414
558
  signal: abortController.signal,
415
559
  });
416
560
 
@@ -419,30 +563,46 @@ export async function startBlobyAgentQuery(
419
563
  switch (evt.type) {
420
564
  case 'text_delta':
421
565
  accumulated += evt.delta;
422
- onMessage('bot:token', { conversationId, token: evt.delta });
566
+ batcher.add(evt.delta);
423
567
  break;
424
568
  case 'text_end':
569
+ batcher.flush();
425
570
  accumulated = evt.text;
426
571
  break;
427
572
  case 'tool_use':
573
+ batcher.flush();
428
574
  usedTools.add(evt.name);
429
575
  onMessage('bot:tool', { conversationId, name: evt.name, input: evt.input });
430
576
  break;
431
577
  case 'error':
432
- errored = true;
433
- onMessage('bot:error', { conversationId, error: evt.error });
578
+ batcher.flush();
579
+ errorMsg = evt.error;
434
580
  break;
435
581
  }
436
582
  }
437
- if (accumulated && !errored) {
438
- onMessage('bot:response', { conversationId, content: accumulated });
583
+ // Abort guard (audit D3-8): a watchdog-aborted run must not surface a
584
+ // truncated reply a stopped pulse could otherwise still fire <Message>
585
+ // pushes with half-finished content.
586
+ if (!abortController.signal.aborted) {
587
+ batcher.flush();
588
+ if (accumulated) {
589
+ onMessage('bot:response', { conversationId, content: accumulated });
590
+ } else if (errorMsg) {
591
+ onMessage('bot:error', { conversationId, error: errorMsg });
592
+ }
439
593
  }
440
594
  } catch (err: any) {
441
595
  if (!abortController.signal.aborted) {
442
596
  log.warn(`[pi/bloby-agent] one-shot error: ${err?.message || err}`);
443
- onMessage('bot:error', { conversationId, error: err?.message || String(err) });
597
+ batcher.flush();
598
+ if (accumulated) {
599
+ onMessage('bot:response', { conversationId, content: accumulated });
600
+ } else {
601
+ onMessage('bot:error', { conversationId, error: err?.message || String(err) });
602
+ }
444
603
  }
445
604
  } finally {
605
+ batcher.discard();
446
606
  clearTimeout(watchdog);
447
607
  activeQueries.delete(conversationId);
448
608
  const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
@@ -462,8 +622,9 @@ export function stopBlobyAgentQuery(conversationId: string): void {
462
622
  // ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
463
623
 
464
624
  export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryResult> {
465
- const auth = resolveAuth();
466
- if (!auth.ok) return { ok: false, error: auth.error };
625
+ const resolved = resolveAuth();
626
+ if (!resolved.ok) return { ok: false, error: resolved.error };
627
+ const auth = resolved.auth;
467
628
 
468
629
  const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
469
630
  const abortController = new AbortController();
@@ -487,6 +648,9 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
487
648
  apiKey: auth.apiKey,
488
649
  systemPrompt,
489
650
  messages,
651
+ maxOutputTokens: auth.maxOutputTokens,
652
+ maxTokensField: auth.maxTokensField,
653
+ includeStreamUsage: auth.includeStreamUsage,
490
654
  signal: abortController.signal,
491
655
  });
492
656
 
@@ -517,7 +681,10 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
517
681
  clearTimeout(timeoutHandle);
518
682
  }
519
683
 
520
- if (errored) return { ok: false, error: errorMsg || 'Agent query failed' };
684
+ // Partial-text precedence (claude parity, audit D6-2): if the model streamed
685
+ // anything before failing, return it as a successful (truncated) response —
686
+ // claude's runAgentQuery only reports the error when nothing streamed.
687
+ if (errored && !fullText) return { ok: false, error: errorMsg || 'Agent query failed' };
521
688
 
522
689
  const usedFileTools = ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
523
690
  return { ok: true, response: fullText, toolsUsed: Array.from(usedTools), usedFileTools };