bloby-bot 0.70.8 → 0.70.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
- package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
- package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
- package/dist-bloby/bloby.html +2 -2
- package/dist-bloby/onboard.html +2 -2
- package/package.json +1 -1
- package/scripts/sync-pi-models.ts +37 -6
- package/supervisor/chat/OnboardWizard.tsx +4 -4
- package/supervisor/harnesses/pi/async-queue.ts +7 -11
- package/supervisor/harnesses/pi/index.ts +232 -65
- package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
- package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
- package/supervisor/harnesses/pi/providers/retry.ts +87 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
- package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
- package/supervisor/harnesses/pi/providers/types.ts +26 -1
- package/supervisor/harnesses/pi/session.ts +164 -70
- package/supervisor/harnesses/pi/sub-providers.ts +30 -1
- package/supervisor/harnesses/pi/test-completion.ts +8 -2
- package/supervisor/index.ts +11 -10
- package/supervisor/public/morphy_sad.mov +0 -0
- package/supervisor/public/morphy_sad.webm +0 -0
- package/supervisor/shell.ts +1 -1
- package/supervisor/workspace-guard.js +1 -1
- package/workspace/client/public/morphy_bounce.mov +0 -0
- package/workspace/client/public/morphy_bounce.webm +0 -0
- package/workspace/client/public/morphy_hi.mov +0 -0
- package/workspace/client/public/morphy_hi.webm +0 -0
- package/workspace/client/src/App.tsx +5 -3
- package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
- package/supervisor/public/what-happened.mp4 +0 -0
- package/supervisor/public/what-happened.webm +0 -0
|
@@ -6,9 +6,11 @@
|
|
|
6
6
|
* matches the Claude harness so the dispatcher needs no provider-specific
|
|
7
7
|
* code.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
9
|
+
* Live conversations run the full tool loop (session.ts); one-shots are still
|
|
10
|
+
* tool-less (audit Phase C will route them through createPiSession). The
|
|
10
11
|
* non-blocking feel — user keeps typing while the model is still answering —
|
|
11
|
-
* comes from the same `AsyncQueue` pattern Claude uses
|
|
12
|
+
* comes from the same `AsyncQueue` pattern Claude uses (one message per turn);
|
|
13
|
+
* see `async-queue.ts` and PI-PARITY-AUDIT-2026-06-11.md.
|
|
12
14
|
*/
|
|
13
15
|
import { log } from '../../../shared/logger.js';
|
|
14
16
|
import { WORKSPACE_DIR } from '../../../shared/paths.js';
|
|
@@ -26,8 +28,8 @@ export type { RecentMessage, AgentAttachment };
|
|
|
26
28
|
|
|
27
29
|
import { buildSkillsIndex } from '../skills.js';
|
|
28
30
|
import { createAsyncQueue, type AsyncQueue } from './async-queue.js';
|
|
29
|
-
import { createPiSession, type PiSessionEvent } from './session.js';
|
|
30
|
-
import { getPiSubProvider } from './sub-providers.js';
|
|
31
|
+
import { createPiSession, type PiSessionEvent, type PiSessionAuth } from './session.js';
|
|
32
|
+
import { getPiSubProvider, getCatalogModel } from './sub-providers.js';
|
|
31
33
|
import { readPiAuth } from './auth-storage.js';
|
|
32
34
|
import { streamProvider } from './providers/stream.js';
|
|
33
35
|
import type { PiMessage } from './providers/types.js';
|
|
@@ -41,11 +43,50 @@ interface LiveConversation {
|
|
|
41
43
|
abortController: AbortController;
|
|
42
44
|
onMessage: (type: string, data: any) => void;
|
|
43
45
|
busy: boolean;
|
|
46
|
+
/** Messages pushed but not yet completed (1 turn-complete per message) — mirrors
|
|
47
|
+
* claude.ts pendingCount. idle:true on turn-complete only when this hits 0, so
|
|
48
|
+
* the supervisor's session recycling never fires with a message still queued. */
|
|
49
|
+
pendingCount: number;
|
|
50
|
+
/** 60ms micro-batcher for bot:token — collapses per-delta WS frame floods. */
|
|
51
|
+
batcher: TokenBatcher;
|
|
44
52
|
loopDone: Promise<void> | null;
|
|
45
53
|
}
|
|
46
54
|
|
|
47
55
|
const liveConversations = new Map<string, LiveConversation>();
|
|
48
56
|
|
|
57
|
+
/**
|
|
58
|
+
* Micro-batch streamed deltas into ~60ms bot:token frames (house standard
|
|
59
|
+
* from the codex parity pass — an order-of-magnitude WS frame reduction with
|
|
60
|
+
* no visible change in streaming feel). Callers MUST flush() before emitting
|
|
61
|
+
* any non-token event so ordering and the streamed-text == bot:response
|
|
62
|
+
* contract are preserved; discard() on teardown drops post-abort stragglers.
|
|
63
|
+
*/
|
|
64
|
+
interface TokenBatcher {
|
|
65
|
+
add(delta: string): void;
|
|
66
|
+
flush(): void;
|
|
67
|
+
discard(): void;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function createTokenBatcher(emit: (text: string) => void, intervalMs = 60): TokenBatcher {
|
|
71
|
+
let buf = '';
|
|
72
|
+
let timer: NodeJS.Timeout | null = null;
|
|
73
|
+
const flush = () => {
|
|
74
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
75
|
+
if (buf) { const out = buf; buf = ''; emit(out); }
|
|
76
|
+
};
|
|
77
|
+
return {
|
|
78
|
+
add(delta: string) {
|
|
79
|
+
buf += delta;
|
|
80
|
+
if (!timer) timer = setTimeout(flush, intervalMs);
|
|
81
|
+
},
|
|
82
|
+
flush,
|
|
83
|
+
discard() {
|
|
84
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
85
|
+
buf = '';
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
49
90
|
export function hasConversation(conversationId: string): boolean {
|
|
50
91
|
return liveConversations.has(conversationId);
|
|
51
92
|
}
|
|
@@ -100,7 +141,7 @@ You are running in a streaming chat where the user can keep typing while you wor
|
|
|
100
141
|
|
|
101
142
|
- Before kicking off a multi-step task, say one short line acknowledging it ("On it, looking at the widget now.").
|
|
102
143
|
- Between tool calls on long tasks, drop a brief progress note ("Found the file, checking the layout next.") so the user knows you're still working.
|
|
103
|
-
-
|
|
144
|
+
- Messages the user sends while you're working are queued and delivered to you one at a time after the current task finishes — each gets its own answer, so never assume you missed one.
|
|
104
145
|
- Final answers should be concise and concrete.`;
|
|
105
146
|
|
|
106
147
|
async function buildSystemPrompt(
|
|
@@ -133,29 +174,56 @@ async function buildSystemPrompt(
|
|
|
133
174
|
return systemPrompt;
|
|
134
175
|
}
|
|
135
176
|
|
|
136
|
-
/**
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
const
|
|
147
|
-
if (!
|
|
148
|
-
const
|
|
177
|
+
/**
|
|
178
|
+
* Resolve the full provider auth bundle from saved pi-auth.json: sub-provider
|
|
179
|
+
* flavor, base url, api key, model id, plus catalog metadata (per-model output
|
|
180
|
+
* cap, context window) and the sub-provider's max-tokens field quirk.
|
|
181
|
+
*
|
|
182
|
+
* Called at session/one-shot start AND re-called on every live provider round
|
|
183
|
+
* via the session's getAuth thunk — so fixing a revoked key or switching
|
|
184
|
+
* models in the wizard heals a live conversation on its very next round.
|
|
185
|
+
*/
|
|
186
|
+
function resolveAuth(): { ok: true; auth: PiSessionAuth } | { ok: false; error: string } {
|
|
187
|
+
const saved = readPiAuth();
|
|
188
|
+
if (!saved) return { ok: false, error: 'Bloby provider is not configured. Run the onboarding wizard.' };
|
|
189
|
+
const sub = getPiSubProvider(saved.subProvider);
|
|
190
|
+
if (!sub) return { ok: false, error: `Unknown sub-provider in pi-auth.json: ${saved.subProvider}` };
|
|
191
|
+
const baseUrl = (saved.baseUrl || sub.baseUrl || '').replace(/\/+$/, '');
|
|
149
192
|
if (!baseUrl) return { ok: false, error: `No base URL configured for ${sub.id}` };
|
|
150
|
-
const modelId =
|
|
193
|
+
const modelId = saved.modelId || sub.defaultModel || '';
|
|
151
194
|
if (!modelId) return { ok: false, error: `No model selected for ${sub.id}` };
|
|
152
|
-
if (sub.needsApiKey && !
|
|
195
|
+
if (sub.needsApiKey && !saved.apiKey) return { ok: false, error: `Missing API key for ${sub.id}` };
|
|
196
|
+
const catalog = getCatalogModel(sub.id, modelId);
|
|
197
|
+
|
|
198
|
+
// Effective window reported to the supervisor's recycler. Two corrections
|
|
199
|
+
// over the raw catalog figure (audit review F1):
|
|
200
|
+
// 1. Anthropic catalog windows can reflect the 1M-context beta; without the
|
|
201
|
+
// beta header (we don't send it) the real window is 200k.
|
|
202
|
+
// 2. Since every request reserves max_tokens of output budget, providers
|
|
203
|
+
// enforce input + max_tokens <= window — the usable INPUT ceiling is
|
|
204
|
+
// window - maxOutputTokens. Reporting the raw window would put the 70%
|
|
205
|
+
// recycle threshold ABOVE that ceiling (e.g. 140k > 200k-64k=136k on
|
|
206
|
+
// claude-haiku-4-5) and the recycler could never preempt the wall.
|
|
207
|
+
let contextWindow = catalog?.contextWindow;
|
|
208
|
+
if (contextWindow && sub.flavor === 'anthropic-messages') {
|
|
209
|
+
contextWindow = Math.min(contextWindow, 200_000);
|
|
210
|
+
}
|
|
211
|
+
if (contextWindow && catalog?.maxOutputTokens) {
|
|
212
|
+
contextWindow = Math.max(0, contextWindow - catalog.maxOutputTokens);
|
|
213
|
+
}
|
|
214
|
+
|
|
153
215
|
return {
|
|
154
216
|
ok: true,
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
217
|
+
auth: {
|
|
218
|
+
flavor: sub.flavor,
|
|
219
|
+
modelId,
|
|
220
|
+
baseUrl,
|
|
221
|
+
apiKey: saved.apiKey || '',
|
|
222
|
+
maxOutputTokens: catalog?.maxOutputTokens,
|
|
223
|
+
maxTokensField: sub.maxTokensField,
|
|
224
|
+
includeStreamUsage: sub.noStreamUsage ? false : undefined,
|
|
225
|
+
contextWindow,
|
|
226
|
+
},
|
|
159
227
|
};
|
|
160
228
|
}
|
|
161
229
|
|
|
@@ -208,14 +276,14 @@ export async function startConversation(
|
|
|
208
276
|
endConversation(conversationId);
|
|
209
277
|
}
|
|
210
278
|
|
|
211
|
-
const
|
|
212
|
-
if (!
|
|
213
|
-
log.warn(`[pi/conversation] Cannot start: ${
|
|
214
|
-
onMessage('bot:error', { conversationId, error:
|
|
279
|
+
const resolved = resolveAuth();
|
|
280
|
+
if (!resolved.ok) {
|
|
281
|
+
log.warn(`[pi/conversation] Cannot start: ${resolved.error}`);
|
|
282
|
+
onMessage('bot:error', { conversationId, error: resolved.error });
|
|
215
283
|
return false;
|
|
216
284
|
}
|
|
217
285
|
|
|
218
|
-
log.info(`[pi/conversation] Sub-provider: ${auth.flavor} · model: ${auth.modelId}`);
|
|
286
|
+
log.info(`[pi/conversation] Sub-provider: ${resolved.auth.flavor} · model: ${resolved.auth.modelId}`);
|
|
219
287
|
|
|
220
288
|
const systemPrompt = await buildSystemPrompt(names, recentMessages);
|
|
221
289
|
log.info(`[pi/conversation] System prompt: ${systemPrompt.length} chars`);
|
|
@@ -229,15 +297,24 @@ export async function startConversation(
|
|
|
229
297
|
abortController,
|
|
230
298
|
onMessage,
|
|
231
299
|
busy: false,
|
|
300
|
+
pendingCount: 0,
|
|
301
|
+
batcher: createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text })),
|
|
232
302
|
loopDone: null,
|
|
233
303
|
};
|
|
234
304
|
liveConversations.set(conversationId, conv);
|
|
235
305
|
|
|
306
|
+
// Re-resolve auth on every provider round so a key/model fix in the wizard
|
|
307
|
+
// applies to the next round with full history intact (audit D6-8). Falls
|
|
308
|
+
// back to the last good bundle if pi-auth.json turns unreadable mid-session.
|
|
309
|
+
let currentAuth: PiSessionAuth = resolved.auth;
|
|
310
|
+
const getAuth = (): PiSessionAuth => {
|
|
311
|
+
const fresh = resolveAuth();
|
|
312
|
+
if (fresh.ok) currentAuth = fresh.auth;
|
|
313
|
+
return currentAuth;
|
|
314
|
+
};
|
|
315
|
+
|
|
236
316
|
const session = createPiSession({
|
|
237
|
-
|
|
238
|
-
modelId: auth.modelId,
|
|
239
|
-
baseUrl: auth.baseUrl,
|
|
240
|
-
apiKey: auth.apiKey,
|
|
317
|
+
getAuth,
|
|
241
318
|
systemPrompt,
|
|
242
319
|
tools: toolDefsForProvider(),
|
|
243
320
|
cwd: WORKSPACE_DIR,
|
|
@@ -258,6 +335,10 @@ export async function startConversation(
|
|
|
258
335
|
}
|
|
259
336
|
} finally {
|
|
260
337
|
log.info(`[pi/conversation] Cleaning up conversation ${conversationId}`);
|
|
338
|
+
// Drop any unflushed token stragglers — at teardown the turn is either
|
|
339
|
+
// complete (already flushed before turn_complete) or aborted (tokens
|
|
340
|
+
// from an aborted stream must not surface after the fact).
|
|
341
|
+
conv.batcher.discard();
|
|
261
342
|
liveConversations.delete(conversationId);
|
|
262
343
|
onMessage('bot:conversation-ended', { conversationId });
|
|
263
344
|
}
|
|
@@ -268,28 +349,74 @@ export async function startConversation(
|
|
|
268
349
|
|
|
269
350
|
/** Map session-level events back into bloby's `bot:*` vocabulary. */
|
|
270
351
|
function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
|
|
352
|
+
if (evt.type === 'text_delta') {
|
|
353
|
+
conv.batcher.add(evt.delta);
|
|
354
|
+
return;
|
|
355
|
+
}
|
|
356
|
+
// Any non-token event flushes the batch first — ordering (tokens before the
|
|
357
|
+
// tool chip / final response) and the streamed-text == bot:response
|
|
358
|
+
// invariant both depend on it.
|
|
359
|
+
conv.batcher.flush();
|
|
360
|
+
|
|
271
361
|
switch (evt.type) {
|
|
272
362
|
case 'turn_started':
|
|
273
363
|
// No bloby event for this — `bot:typing` is already emitted by pushMessage().
|
|
274
364
|
break;
|
|
275
|
-
case 'text_delta':
|
|
276
|
-
conv.onMessage('bot:token', { conversationId: conv.id, token: evt.delta });
|
|
277
|
-
break;
|
|
278
365
|
case 'text_end':
|
|
279
366
|
conv.onMessage('bot:response', { conversationId: conv.id, content: evt.text });
|
|
280
367
|
break;
|
|
281
368
|
case 'tool_use':
|
|
282
369
|
conv.onMessage('bot:tool', { conversationId: conv.id, name: evt.name, input: evt.input });
|
|
283
370
|
break;
|
|
284
|
-
case '
|
|
285
|
-
|
|
286
|
-
conv.onMessage('bot:turn-complete', { conversationId: conv.id, usedFileTools: evt.usedFileTools });
|
|
287
|
-
log.info(`[pi/conversation] ──── TURN COMPLETE ──── busy=false`);
|
|
371
|
+
case 'tool_result':
|
|
372
|
+
// Not surfaced yet (Phase D: translate to a bot:tool progress pulse).
|
|
288
373
|
break;
|
|
289
|
-
case '
|
|
374
|
+
case 'turn_complete': {
|
|
290
375
|
conv.busy = false;
|
|
291
|
-
|
|
376
|
+
// One turn-complete per pushed message (D1-1 restored that invariant);
|
|
377
|
+
// idle gates the supervisor's proactive recycling so it never fires with
|
|
378
|
+
// a message still queued — claude.ts pendingCount semantics exactly.
|
|
379
|
+
conv.pendingCount = Math.max(0, conv.pendingCount - 1);
|
|
380
|
+
const idle = conv.pendingCount === 0;
|
|
381
|
+
// Prompt occupancy of the last provider round — input + cache reads +
|
|
382
|
+
// cache writes, exactly claude.ts's contextTokens math. Output tokens
|
|
383
|
+
// are NOT added (claude doesn't either; the recycler's 70% threshold
|
|
384
|
+
// absorbs the next-turn growth).
|
|
385
|
+
const contextTokens = evt.usage
|
|
386
|
+
? (evt.usage.inputTokens || 0) + (evt.usage.cacheReadTokens || 0) + (evt.usage.cacheCreationTokens || 0)
|
|
387
|
+
: 0;
|
|
388
|
+
conv.onMessage('bot:turn-complete', {
|
|
389
|
+
conversationId: conv.id,
|
|
390
|
+
usedFileTools: evt.usedFileTools,
|
|
391
|
+
contextTokens,
|
|
392
|
+
contextWindow: evt.contextWindow || 0,
|
|
393
|
+
idle,
|
|
394
|
+
});
|
|
395
|
+
log.info(`[pi/conversation] ──── TURN COMPLETE ──── busy=false ctx=${contextTokens}/${evt.contextWindow || 'n/a'} idle=${idle}`);
|
|
396
|
+
break;
|
|
397
|
+
}
|
|
398
|
+
case 'error': {
|
|
399
|
+
// busy is NOT cleared here (audit D1-9): turn_complete is the single
|
|
400
|
+
// busy=false site and the session guarantees it on every non-aborted
|
|
401
|
+
// turn; an aborted/fatal path is torn down via bot:conversation-ended.
|
|
402
|
+
const fatal = evt.kind === 'auth' || evt.kind === 'context-overflow';
|
|
403
|
+
const remedy = evt.kind === 'context-overflow'
|
|
404
|
+
? ' Starting a fresh session — send your message again to continue.'
|
|
405
|
+
: evt.kind === 'auth'
|
|
406
|
+
? ' I\'ll reconnect with the new key as soon as it\'s saved.'
|
|
407
|
+
: '';
|
|
408
|
+
conv.onMessage('bot:error', { conversationId: conv.id, error: `${evt.error}${remedy}` });
|
|
409
|
+
if (fatal) {
|
|
410
|
+
// Unrecoverable for this session (audit D6-4): an over-window history
|
|
411
|
+
// would re-fail on every future turn, and a dead key has no business
|
|
412
|
+
// keeping the loop alive. Tear down — the finally emits
|
|
413
|
+
// bot:conversation-ended (routes + flags clear) and the next user
|
|
414
|
+
// message cold-starts a fresh session with re-injected history.
|
|
415
|
+
log.warn(`[pi/conversation] Fatal provider error (${evt.kind}) — recycling session ${conv.id}`);
|
|
416
|
+
endConversation(conv.id);
|
|
417
|
+
}
|
|
292
418
|
break;
|
|
419
|
+
}
|
|
293
420
|
}
|
|
294
421
|
}
|
|
295
422
|
|
|
@@ -305,8 +432,9 @@ export function pushMessage(
|
|
|
305
432
|
return false;
|
|
306
433
|
}
|
|
307
434
|
|
|
308
|
-
log.info(`[pi/conversation] ──── PUSH MESSAGE ──── busy=${conv.busy}`);
|
|
435
|
+
log.info(`[pi/conversation] ──── PUSH MESSAGE ──── busy=${conv.busy} pending=${conv.pendingCount + 1}`);
|
|
309
436
|
conv.busy = true;
|
|
437
|
+
conv.pendingCount += 1;
|
|
310
438
|
conv.inputQueue.push(buildUserMessage(content, attachments, savedFiles));
|
|
311
439
|
conv.onMessage('bot:typing', { conversationId });
|
|
312
440
|
return true;
|
|
@@ -317,6 +445,7 @@ export function endConversation(conversationId: string): void {
|
|
|
317
445
|
if (!conv) return;
|
|
318
446
|
|
|
319
447
|
log.info(`[pi/conversation] ──── ENDING CONVERSATION ${conversationId} ────`);
|
|
448
|
+
conv.batcher.discard();
|
|
320
449
|
conv.inputQueue.end();
|
|
321
450
|
conv.abortController.abort();
|
|
322
451
|
liveConversations.delete(conversationId);
|
|
@@ -373,21 +502,21 @@ export async function startBlobyAgentQuery(
|
|
|
373
502
|
supportPrompt?: string,
|
|
374
503
|
_maxTurns?: number,
|
|
375
504
|
): Promise<void> {
|
|
376
|
-
const
|
|
377
|
-
if (!
|
|
378
|
-
onMessage('bot:error', { conversationId, error:
|
|
505
|
+
const resolved = resolveAuth();
|
|
506
|
+
if (!resolved.ok) {
|
|
507
|
+
onMessage('bot:error', { conversationId, error: resolved.error });
|
|
508
|
+
// bot:done frees the caller's slot (WhatsApp activeAgents / scheduler) — without it
|
|
509
|
+
// each distinct customer hitting this path pins one of the 5 concurrent slots until
|
|
510
|
+
// supervisor restart (audit D3-2; mirrors claude.ts:620).
|
|
511
|
+
onMessage('bot:done', { conversationId, usedFileTools: false });
|
|
379
512
|
return;
|
|
380
513
|
}
|
|
514
|
+
const auth = resolved.auth;
|
|
381
515
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
//
|
|
385
|
-
//
|
|
386
|
-
const watchdog = setTimeout(() => {
|
|
387
|
-
log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
|
|
388
|
-
abortController.abort();
|
|
389
|
-
}, 300_000);
|
|
390
|
-
|
|
516
|
+
// Build the prompt BEFORE registering in activeQueries / arming the watchdog
|
|
517
|
+
// (claude.ts ordering): if anything in here ever rejected after registration,
|
|
518
|
+
// the entry would leak forever — anyOneShotActive() stuck true defers every
|
|
519
|
+
// backend restart/self-update, and the caller's slot never frees.
|
|
391
520
|
let systemPrompt: string;
|
|
392
521
|
if (supportPrompt) {
|
|
393
522
|
systemPrompt = supportPrompt;
|
|
@@ -398,11 +527,23 @@ export async function startBlobyAgentQuery(
|
|
|
398
527
|
const messages: PiMessage[] = recentToPiMessages(recentMessages);
|
|
399
528
|
messages.push(buildUserMessage(prompt, attachments, savedFiles));
|
|
400
529
|
|
|
530
|
+
const abortController = new AbortController();
|
|
531
|
+
activeQueries.set(conversationId, abortController);
|
|
532
|
+
// Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
|
|
533
|
+
// runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
|
|
534
|
+
const watchdog = setTimeout(() => {
|
|
535
|
+
log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
|
|
536
|
+
abortController.abort();
|
|
537
|
+
}, 300_000);
|
|
538
|
+
|
|
401
539
|
onMessage('bot:typing', { conversationId });
|
|
402
540
|
|
|
403
541
|
let accumulated = '';
|
|
404
542
|
const usedTools = new Set<string>();
|
|
405
|
-
|
|
543
|
+
// Errors are stashed, not emitted inline — at the end, partial text wins
|
|
544
|
+
// over the error bubble (audit D3-5/D6-2, claude.ts:730-737 precedence).
|
|
545
|
+
let errorMsg: string | null = null;
|
|
546
|
+
const batcher = createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text }));
|
|
406
547
|
|
|
407
548
|
try {
|
|
408
549
|
const stream = streamProvider(auth.flavor, {
|
|
@@ -411,6 +552,9 @@ export async function startBlobyAgentQuery(
|
|
|
411
552
|
apiKey: auth.apiKey,
|
|
412
553
|
systemPrompt,
|
|
413
554
|
messages,
|
|
555
|
+
maxOutputTokens: auth.maxOutputTokens,
|
|
556
|
+
maxTokensField: auth.maxTokensField,
|
|
557
|
+
includeStreamUsage: auth.includeStreamUsage,
|
|
414
558
|
signal: abortController.signal,
|
|
415
559
|
});
|
|
416
560
|
|
|
@@ -419,30 +563,46 @@ export async function startBlobyAgentQuery(
|
|
|
419
563
|
switch (evt.type) {
|
|
420
564
|
case 'text_delta':
|
|
421
565
|
accumulated += evt.delta;
|
|
422
|
-
|
|
566
|
+
batcher.add(evt.delta);
|
|
423
567
|
break;
|
|
424
568
|
case 'text_end':
|
|
569
|
+
batcher.flush();
|
|
425
570
|
accumulated = evt.text;
|
|
426
571
|
break;
|
|
427
572
|
case 'tool_use':
|
|
573
|
+
batcher.flush();
|
|
428
574
|
usedTools.add(evt.name);
|
|
429
575
|
onMessage('bot:tool', { conversationId, name: evt.name, input: evt.input });
|
|
430
576
|
break;
|
|
431
577
|
case 'error':
|
|
432
|
-
|
|
433
|
-
|
|
578
|
+
batcher.flush();
|
|
579
|
+
errorMsg = evt.error;
|
|
434
580
|
break;
|
|
435
581
|
}
|
|
436
582
|
}
|
|
437
|
-
|
|
438
|
-
|
|
583
|
+
// Abort guard (audit D3-8): a watchdog-aborted run must not surface a
|
|
584
|
+
// truncated reply — a stopped pulse could otherwise still fire <Message>
|
|
585
|
+
// pushes with half-finished content.
|
|
586
|
+
if (!abortController.signal.aborted) {
|
|
587
|
+
batcher.flush();
|
|
588
|
+
if (accumulated) {
|
|
589
|
+
onMessage('bot:response', { conversationId, content: accumulated });
|
|
590
|
+
} else if (errorMsg) {
|
|
591
|
+
onMessage('bot:error', { conversationId, error: errorMsg });
|
|
592
|
+
}
|
|
439
593
|
}
|
|
440
594
|
} catch (err: any) {
|
|
441
595
|
if (!abortController.signal.aborted) {
|
|
442
596
|
log.warn(`[pi/bloby-agent] one-shot error: ${err?.message || err}`);
|
|
443
|
-
|
|
597
|
+
batcher.flush();
|
|
598
|
+
if (accumulated) {
|
|
599
|
+
onMessage('bot:response', { conversationId, content: accumulated });
|
|
600
|
+
} else {
|
|
601
|
+
onMessage('bot:error', { conversationId, error: err?.message || String(err) });
|
|
602
|
+
}
|
|
444
603
|
}
|
|
445
604
|
} finally {
|
|
605
|
+
batcher.discard();
|
|
446
606
|
clearTimeout(watchdog);
|
|
447
607
|
activeQueries.delete(conversationId);
|
|
448
608
|
const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
|
|
@@ -462,8 +622,9 @@ export function stopBlobyAgentQuery(conversationId: string): void {
|
|
|
462
622
|
// ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
|
|
463
623
|
|
|
464
624
|
export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryResult> {
|
|
465
|
-
const
|
|
466
|
-
if (!
|
|
625
|
+
const resolved = resolveAuth();
|
|
626
|
+
if (!resolved.ok) return { ok: false, error: resolved.error };
|
|
627
|
+
const auth = resolved.auth;
|
|
467
628
|
|
|
468
629
|
const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
|
|
469
630
|
const abortController = new AbortController();
|
|
@@ -487,6 +648,9 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
|
|
|
487
648
|
apiKey: auth.apiKey,
|
|
488
649
|
systemPrompt,
|
|
489
650
|
messages,
|
|
651
|
+
maxOutputTokens: auth.maxOutputTokens,
|
|
652
|
+
maxTokensField: auth.maxTokensField,
|
|
653
|
+
includeStreamUsage: auth.includeStreamUsage,
|
|
490
654
|
signal: abortController.signal,
|
|
491
655
|
});
|
|
492
656
|
|
|
@@ -517,7 +681,10 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
|
|
|
517
681
|
clearTimeout(timeoutHandle);
|
|
518
682
|
}
|
|
519
683
|
|
|
520
|
-
|
|
684
|
+
// Partial-text precedence (claude parity, audit D6-2): if the model streamed
|
|
685
|
+
// anything before failing, return it as a successful (truncated) response —
|
|
686
|
+
// claude's runAgentQuery only reports the error when nothing streamed.
|
|
687
|
+
if (errored && !fullText) return { ok: false, error: errorMsg || 'Agent query failed' };
|
|
521
688
|
|
|
522
689
|
const usedFileTools = ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
|
|
523
690
|
return { ok: true, response: fullText, toolsUsed: Array.from(usedTools), usedFileTools };
|