bloby-bot 0.70.8 → 0.70.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist-bloby/assets/{bloby-CXmOcb1r.js → bloby-DSNB0g4w.js} +4 -4
- package/dist-bloby/assets/{globals-DpO5tO92.js → globals-B3cTbITX.js} +1 -1
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-D7cU1Y-Z.js → highlighted-body-OFNGDK62-BLforpkr.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +1 -0
- package/dist-bloby/assets/{onboard-B96ELhXn.js → onboard-Dn2Ws_G2.js} +1 -1
- package/dist-bloby/bloby.html +2 -2
- package/dist-bloby/onboard.html +2 -2
- package/package.json +1 -1
- package/scripts/sync-pi-models.ts +37 -6
- package/supervisor/chat/OnboardWizard.tsx +4 -4
- package/supervisor/harnesses/pi/async-queue.ts +7 -11
- package/supervisor/harnesses/pi/index.ts +475 -73
- package/supervisor/harnesses/pi/models-catalog.generated.ts +840 -210
- package/supervisor/harnesses/pi/providers/humanize-error.ts +125 -0
- package/supervisor/harnesses/pi/providers/retry.ts +87 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +73 -11
- package/supervisor/harnesses/pi/providers/stream-google.ts +15 -5
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +55 -19
- package/supervisor/harnesses/pi/providers/types.ts +26 -1
- package/supervisor/harnesses/pi/session.ts +179 -73
- package/supervisor/harnesses/pi/sub-providers.ts +30 -1
- package/supervisor/harnesses/pi/test-completion.ts +8 -2
- package/supervisor/harnesses/pi/tools/registry.ts +25 -9
- package/supervisor/harnesses/pi/tools/task.ts +108 -0
- package/supervisor/harnesses/pi/tools/types.ts +15 -0
- package/supervisor/index.ts +11 -10
- package/supervisor/public/morphy_sad.mov +0 -0
- package/supervisor/public/morphy_sad.webm +0 -0
- package/supervisor/shell.ts +1 -1
- package/supervisor/workspace-guard.js +1 -1
- package/workspace/client/public/morphy_bounce.mov +0 -0
- package/workspace/client/public/morphy_bounce.webm +0 -0
- package/workspace/client/public/morphy_hi.mov +0 -0
- package/workspace/client/public/morphy_hi.webm +0 -0
- package/workspace/client/src/App.tsx +5 -3
- package/dist-bloby/assets/mermaid-GHXKKRXX-D5YxphBn.js +0 -1
- package/supervisor/public/what-happened.mp4 +0 -0
- package/supervisor/public/what-happened.webm +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider error classification + humanization for the pi harness.
|
|
3
|
+
*
|
|
4
|
+
* Every non-OK HTTP response and network failure from the three stream
|
|
5
|
+
* providers funnels through here so the user sees an actionable message
|
|
6
|
+
* ("update your key in the dashboard") instead of a raw JSON wall, and so the
|
|
7
|
+
* session/harness can react structurally: retry `retryable` rounds, tear the
|
|
8
|
+
* conversation down on `auth` / `context-overflow` (a poisoned history would
|
|
9
|
+
* otherwise re-fail forever).
|
|
10
|
+
*
|
|
11
|
+
* Mirrors the codex harness's codexErrorInfo mapping (house standard M4).
|
|
12
|
+
*/
|
|
13
|
+
import type { PiErrorKind } from './types.js';
|
|
14
|
+
|
|
15
|
+
export interface ClassifiedPiError {
|
|
16
|
+
/** User-facing message — friendly, with a one-line raw detail for debugging. */
|
|
17
|
+
message: string;
|
|
18
|
+
kind: PiErrorKind;
|
|
19
|
+
/** True when re-sending the identical request can plausibly succeed. */
|
|
20
|
+
retryable: boolean;
|
|
21
|
+
status?: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/** Pull the provider's human-readable message out of a JSON error body. */
|
|
25
|
+
function extractDetail(body: string): string {
|
|
26
|
+
const trimmed = (body || '').trim();
|
|
27
|
+
if (!trimmed) return '';
|
|
28
|
+
try {
|
|
29
|
+
const j = JSON.parse(trimmed);
|
|
30
|
+
// Google/OpenAI/Anthropic all nest it under error.message; some
|
|
31
|
+
// OpenAI-compat vendors use a top-level message.
|
|
32
|
+
const msg = j?.error?.message || j?.message || (typeof j?.error === 'string' ? j.error : '');
|
|
33
|
+
if (typeof msg === 'string' && msg.trim()) return msg.trim().slice(0, 300);
|
|
34
|
+
} catch {}
|
|
35
|
+
return trimmed.slice(0, 300);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const CONTEXT_OVERFLOW_RE =
|
|
39
|
+
/context.length|context_length_exceeded|maximum context length|prompt is too long|too many tokens|input token count.*exceed|token count exceeds|exceeds the maximum number of tokens|request exceeds the.*token|exceeds? (the )?context limit|input length and .{0,3}max_tokens/i;
|
|
40
|
+
|
|
41
|
+
const AUTH_RE =
|
|
42
|
+
/api key not valid|invalid api key|invalid x-api-key|incorrect api key|invalid_api_key|authentication[_ ]error|permission_error|invalid bearer token|no auth credentials/i;
|
|
43
|
+
|
|
44
|
+
// Deliberately narrow: only unambiguous out-of-credit markers. Gemini's
|
|
45
|
+
// routine per-minute 429 says "check your plan and billing details" — that is
|
|
46
|
+
// a RATE LIMIT (retryable), not billing; OpenAI's true quota exhaustion is
|
|
47
|
+
// distinguished by the insufficient_quota code (absent from Gemini bodies).
|
|
48
|
+
const BILLING_RE =
|
|
49
|
+
/insufficient_quota|credit balance is too low|payment required|purchase more credits/i;
|
|
50
|
+
|
|
51
|
+
export function classifyPiError(
|
|
52
|
+
providerLabel: string,
|
|
53
|
+
status: number | undefined,
|
|
54
|
+
statusText: string,
|
|
55
|
+
body: string,
|
|
56
|
+
): ClassifiedPiError {
|
|
57
|
+
const detail = extractDetail(body);
|
|
58
|
+
const suffix = detail ? ` (${detail})` : '';
|
|
59
|
+
|
|
60
|
+
// Order matters: overflow and billing hide behind generic 400/429 statuses.
|
|
61
|
+
if ((status === 400 || status === 413) && CONTEXT_OVERFLOW_RE.test(body)) {
|
|
62
|
+
return {
|
|
63
|
+
kind: 'context-overflow',
|
|
64
|
+
retryable: false,
|
|
65
|
+
status,
|
|
66
|
+
message: `The conversation has outgrown ${providerLabel}'s context window.${suffix}`,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
if (BILLING_RE.test(body) || status === 402) {
|
|
70
|
+
return {
|
|
71
|
+
kind: 'billing',
|
|
72
|
+
retryable: false,
|
|
73
|
+
status,
|
|
74
|
+
message: `${providerLabel} reports a quota/billing problem — check your plan or credits on the provider's console.${suffix}`,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
// 401 is always auth; 403 only when the body says so — vendors also use 403
|
|
78
|
+
// for per-message moderation/guardrail blocks (e.g. OpenRouter), which must
|
|
79
|
+
// NOT be classified auth (auth is a fatal kind that recycles the session).
|
|
80
|
+
if (status === 401 || AUTH_RE.test(body)) {
|
|
81
|
+
return {
|
|
82
|
+
kind: 'auth',
|
|
83
|
+
retryable: false,
|
|
84
|
+
status,
|
|
85
|
+
message: `${providerLabel} rejected your API key. Update it from the dashboard (Bloby provider settings).${suffix}`,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
if (status === 429) {
|
|
89
|
+
return {
|
|
90
|
+
kind: 'rate-limit',
|
|
91
|
+
retryable: true,
|
|
92
|
+
status,
|
|
93
|
+
message: `${providerLabel} rate limit reached — give it a moment and try again.${suffix}`,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
if (status === 408 || (status !== undefined && status >= 500)) {
|
|
97
|
+
return {
|
|
98
|
+
kind: 'transient',
|
|
99
|
+
retryable: true,
|
|
100
|
+
status,
|
|
101
|
+
message: `${providerLabel} is having trouble right now (HTTP ${status}) — try again in a moment.${suffix}`,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
kind: 'other',
|
|
106
|
+
retryable: false,
|
|
107
|
+
status,
|
|
108
|
+
message: `${providerLabel} ${status ?? ''} ${statusText || ''}`.trim() + `${detail ? `: ${detail}` : ''}`,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Network-level failures (DNS, refused, reset, undici timeouts) — always transient. */
|
|
113
|
+
export function classifyPiNetworkError(providerLabel: string, err: any): ClassifiedPiError {
|
|
114
|
+
const raw = err?.message || String(err);
|
|
115
|
+
// undici's body/headers timeouts surface as the famously cryptic 'terminated'
|
|
116
|
+
// and 'Headers Timeout Error' — translate them.
|
|
117
|
+
const stalled = /terminated|timeout/i.test(raw);
|
|
118
|
+
return {
|
|
119
|
+
kind: 'transient',
|
|
120
|
+
retryable: true,
|
|
121
|
+
message: stalled
|
|
122
|
+
? `${providerLabel} stream stalled (no data from the provider). Try again in a moment. (${raw})`
|
|
123
|
+
: `Could not reach ${providerLabel}: ${raw}`,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fetchWithRetry — transient-failure absorption for the pi providers.
|
|
3
|
+
*
|
|
4
|
+
* The Claude SDK retries transient provider errors inside its subprocess and
|
|
5
|
+
* codex suppresses willRetry errors; pi's hand-rolled providers previously did
|
|
6
|
+
* exactly one fetch, so a single 429 (routine on Gemini free tier) or a 5xx
|
|
7
|
+
* blip killed an entire multi-minute agentic turn. This wraps the initial
|
|
8
|
+
* request only — once a stream is open, mid-stream failures are handled by the
|
|
9
|
+
* session's round-retry (a full-history resend is stateless, so re-running a
|
|
10
|
+
* round that produced nothing is safe).
|
|
11
|
+
*
|
|
12
|
+
* Policy: up to 3 attempts on network errors and HTTP 408/429/5xx, exponential
|
|
13
|
+
* backoff 1s/2s with jitter, honoring Retry-After when it's short. A long
|
|
14
|
+
* Retry-After (> 15s) means the provider really wants us to back off — return
|
|
15
|
+
* the response and let the classifier surface a friendly rate-limit message.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
const RETRYABLE_STATUS = new Set([408, 429, 500, 502, 503, 504, 529]);
|
|
19
|
+
const MAX_ATTEMPTS = 3;
|
|
20
|
+
const MAX_HONORED_RETRY_AFTER_MS = 15_000;
|
|
21
|
+
|
|
22
|
+
function retryAfterMs(res: Response): number | undefined {
|
|
23
|
+
const h = res.headers.get('retry-after');
|
|
24
|
+
if (!h) return undefined;
|
|
25
|
+
const secs = Number(h);
|
|
26
|
+
if (Number.isFinite(secs)) return Math.max(0, secs * 1000);
|
|
27
|
+
const date = Date.parse(h);
|
|
28
|
+
if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
|
|
29
|
+
return undefined;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function abortError(): Error {
|
|
33
|
+
const err = new Error('This operation was aborted');
|
|
34
|
+
err.name = 'AbortError';
|
|
35
|
+
return err;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Sleep that wakes immediately (and throws AbortError) when the signal fires. */
|
|
39
|
+
export function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
40
|
+
return new Promise((resolve, reject) => {
|
|
41
|
+
if (signal?.aborted) return reject(abortError());
|
|
42
|
+
const timer = setTimeout(() => {
|
|
43
|
+
signal?.removeEventListener('abort', onAbort);
|
|
44
|
+
resolve();
|
|
45
|
+
}, ms);
|
|
46
|
+
const onAbort = () => {
|
|
47
|
+
clearTimeout(timer);
|
|
48
|
+
reject(abortError());
|
|
49
|
+
};
|
|
50
|
+
signal?.addEventListener('abort', onAbort, { once: true });
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function fetchWithRetry(
|
|
55
|
+
url: string,
|
|
56
|
+
init: RequestInit & { signal?: AbortSignal },
|
|
57
|
+
): Promise<Response> {
|
|
58
|
+
let lastErr: any;
|
|
59
|
+
for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
|
|
60
|
+
if (init.signal?.aborted) throw abortError();
|
|
61
|
+
|
|
62
|
+
let res: Response;
|
|
63
|
+
try {
|
|
64
|
+
res = await fetch(url, init);
|
|
65
|
+
} catch (err: any) {
|
|
66
|
+
if (err?.name === 'AbortError') throw err;
|
|
67
|
+
lastErr = err;
|
|
68
|
+
if (attempt === MAX_ATTEMPTS - 1) throw err;
|
|
69
|
+
await sleep(1000 * 2 ** attempt + Math.random() * 250, init.signal);
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (res.ok || !RETRYABLE_STATUS.has(res.status) || attempt === MAX_ATTEMPTS - 1) {
|
|
74
|
+
return res;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const hinted = retryAfterMs(res);
|
|
78
|
+
if (hinted !== undefined && hinted > MAX_HONORED_RETRY_AFTER_MS) {
|
|
79
|
+
return res; // provider asked for a long back-off — surface it instead of stalling the turn
|
|
80
|
+
}
|
|
81
|
+
// Drain/cancel the body so the connection can be reused before retrying.
|
|
82
|
+
try { await res.body?.cancel(); } catch {}
|
|
83
|
+
await sleep(hinted ?? 1000 * 2 ** attempt + Math.random() * 250, init.signal);
|
|
84
|
+
}
|
|
85
|
+
// Unreachable, but keeps TS happy.
|
|
86
|
+
throw lastErr ?? new Error('fetchWithRetry: exhausted attempts');
|
|
87
|
+
}
|
|
@@ -18,7 +18,10 @@ import type {
|
|
|
18
18
|
PiMessage,
|
|
19
19
|
PiContentBlock,
|
|
20
20
|
PiStopReason,
|
|
21
|
+
PiUsage,
|
|
21
22
|
} from './types.js';
|
|
23
|
+
import { fetchWithRetry } from './retry.js';
|
|
24
|
+
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
22
25
|
|
|
23
26
|
/* ── SSE parser (shares the LF/CRLF-tolerant pattern from the other providers) ── */
|
|
24
27
|
|
|
@@ -150,12 +153,30 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
150
153
|
max_tokens: req.maxOutputTokens ?? 8192,
|
|
151
154
|
stream: true,
|
|
152
155
|
};
|
|
153
|
-
|
|
154
|
-
|
|
156
|
+
// Prompt caching (3 of the 4 allowed breakpoints). Without these, every tool
|
|
157
|
+
// round re-prefills the full system prompt + history at full input price —
|
|
158
|
+
// up to 25x per agentic turn. The request prefix is tools → system →
|
|
159
|
+
// messages, so: last tool def caches the tool block, the system block caches
|
|
160
|
+
// tools+system as one prefix, and the last history block caches the
|
|
161
|
+
// conversation so far (Anthropic checks previous breakpoint positions for
|
|
162
|
+
// the longest cached prefix as the marker moves forward each round).
|
|
163
|
+
if (req.systemPrompt?.trim()) {
|
|
164
|
+
body.system = [{ type: 'text', text: req.systemPrompt, cache_control: { type: 'ephemeral' } }];
|
|
165
|
+
}
|
|
166
|
+
if (req.tools && req.tools.length > 0) {
|
|
167
|
+
body.tools = toAnthropicTools(req.tools);
|
|
168
|
+
body.tools[body.tools.length - 1].cache_control = { type: 'ephemeral' };
|
|
169
|
+
}
|
|
170
|
+
if (Array.isArray(body.messages) && body.messages.length > 0) {
|
|
171
|
+
const lastContent = body.messages[body.messages.length - 1].content;
|
|
172
|
+
if (Array.isArray(lastContent) && lastContent.length > 0) {
|
|
173
|
+
lastContent[lastContent.length - 1].cache_control = { type: 'ephemeral' };
|
|
174
|
+
}
|
|
175
|
+
}
|
|
155
176
|
|
|
156
177
|
let res: Response;
|
|
157
178
|
try {
|
|
158
|
-
res = await
|
|
179
|
+
res = await fetchWithRetry(url, {
|
|
159
180
|
method: 'POST',
|
|
160
181
|
headers: {
|
|
161
182
|
'content-type': 'application/json',
|
|
@@ -167,14 +188,20 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
167
188
|
signal: req.signal,
|
|
168
189
|
});
|
|
169
190
|
} catch (err: any) {
|
|
170
|
-
|
|
191
|
+
if (err?.name === 'AbortError') {
|
|
192
|
+
yield { type: 'done', stopReason: 'aborted' };
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
const cls = classifyPiNetworkError('Anthropic', err);
|
|
196
|
+
yield { type: 'error', error: cls.message, kind: cls.kind, retryable: cls.retryable };
|
|
171
197
|
return;
|
|
172
198
|
}
|
|
173
199
|
|
|
174
200
|
if (!res.ok) {
|
|
175
201
|
let detail = '';
|
|
176
202
|
try { detail = await res.text(); } catch {}
|
|
177
|
-
|
|
203
|
+
const cls = classifyPiError('Anthropic', res.status, res.statusText, detail);
|
|
204
|
+
yield { type: 'error', error: cls.message, status: cls.status, kind: cls.kind, retryable: cls.retryable };
|
|
178
205
|
return;
|
|
179
206
|
}
|
|
180
207
|
|
|
@@ -183,7 +210,7 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
183
210
|
const blocks = new Map<number, PartialBlock>();
|
|
184
211
|
let accumulated = '';
|
|
185
212
|
let lastStop: string | undefined;
|
|
186
|
-
let usage:
|
|
213
|
+
let usage: PiUsage | undefined;
|
|
187
214
|
let chunkCount = 0;
|
|
188
215
|
let firstChunkSummary = '';
|
|
189
216
|
|
|
@@ -197,7 +224,17 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
197
224
|
switch (type) {
|
|
198
225
|
case 'message_start': {
|
|
199
226
|
const u = evt?.message?.usage;
|
|
200
|
-
if (u)
|
|
227
|
+
if (u) {
|
|
228
|
+
usage = {
|
|
229
|
+
inputTokens: u.input_tokens,
|
|
230
|
+
outputTokens: u.output_tokens,
|
|
231
|
+
// With prompt caching on, the bulk of the prompt is cache reads —
|
|
232
|
+
// input_tokens alone would massively under-report occupancy and
|
|
233
|
+
// the supervisor's recycler would never fire.
|
|
234
|
+
cacheReadTokens: u.cache_read_input_tokens,
|
|
235
|
+
cacheCreationTokens: u.cache_creation_input_tokens,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
201
238
|
break;
|
|
202
239
|
}
|
|
203
240
|
case 'content_block_start': {
|
|
@@ -241,7 +278,19 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
241
278
|
let input: any = {};
|
|
242
279
|
if (slot.toolArgsBuf) {
|
|
243
280
|
try { input = JSON.parse(slot.toolArgsBuf); }
|
|
244
|
-
catch {
|
|
281
|
+
catch {
|
|
282
|
+
// Truncated tool-call JSON (output cap hit mid-arguments).
|
|
283
|
+
// Executing a fabricated {_raw} input sends the model into an
|
|
284
|
+
// unwinnable retry loop — fail the round loudly instead.
|
|
285
|
+
yield {
|
|
286
|
+
type: 'error',
|
|
287
|
+
error: `The model's ${slot.toolName} call was cut off by the output-token limit (${req.maxOutputTokens ?? 8192} tokens) — the arguments did not fit. Try a smaller change, or raise the model's output budget.`,
|
|
288
|
+
kind: 'other',
|
|
289
|
+
retryable: false,
|
|
290
|
+
};
|
|
291
|
+
yield { type: 'done', stopReason: 'error', usage };
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
245
294
|
}
|
|
246
295
|
yield {
|
|
247
296
|
type: 'tool_use',
|
|
@@ -259,13 +308,25 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
259
308
|
usage = {
|
|
260
309
|
inputTokens: u.input_tokens ?? usage?.inputTokens,
|
|
261
310
|
outputTokens: u.output_tokens ?? usage?.outputTokens,
|
|
311
|
+
cacheReadTokens: u.cache_read_input_tokens ?? usage?.cacheReadTokens,
|
|
312
|
+
cacheCreationTokens: u.cache_creation_input_tokens ?? usage?.cacheCreationTokens,
|
|
262
313
|
};
|
|
263
314
|
}
|
|
264
315
|
break;
|
|
265
316
|
}
|
|
266
317
|
case 'error': {
|
|
267
|
-
|
|
268
|
-
|
|
318
|
+
// In-stream error event (e.g. overloaded_error) — classify so the
|
|
319
|
+
// session can retry transient ones and the user sees friendly text.
|
|
320
|
+
const cls = classifyPiError('Anthropic', undefined, '', JSON.stringify(evt?.error ?? evt ?? {}));
|
|
321
|
+
const isOverloaded = (evt?.error?.type || '') === 'overloaded_error';
|
|
322
|
+
yield {
|
|
323
|
+
type: 'error',
|
|
324
|
+
error: cls.kind === 'other' && !isOverloaded
|
|
325
|
+
? `Anthropic stream error: ${evt?.error?.message || evt?.message || 'Unknown error'}`
|
|
326
|
+
: (isOverloaded ? 'Anthropic is overloaded right now — try again in a moment.' : cls.message),
|
|
327
|
+
kind: isOverloaded ? 'transient' : cls.kind,
|
|
328
|
+
retryable: isOverloaded || cls.retryable,
|
|
329
|
+
};
|
|
269
330
|
yield { type: 'done', stopReason: 'error', usage };
|
|
270
331
|
return;
|
|
271
332
|
}
|
|
@@ -281,7 +342,8 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
281
342
|
yield { type: 'done', stopReason: 'aborted' };
|
|
282
343
|
return;
|
|
283
344
|
}
|
|
284
|
-
|
|
345
|
+
const cls = classifyPiNetworkError('Anthropic', err);
|
|
346
|
+
yield { type: 'error', error: cls.message, kind: cls.kind, retryable: cls.retryable };
|
|
285
347
|
return;
|
|
286
348
|
}
|
|
287
349
|
|
|
@@ -16,7 +16,10 @@ import type {
|
|
|
16
16
|
PiMessage,
|
|
17
17
|
PiContentBlock,
|
|
18
18
|
PiStopReason,
|
|
19
|
+
PiUsage,
|
|
19
20
|
} from './types.js';
|
|
21
|
+
import { fetchWithRetry } from './retry.js';
|
|
22
|
+
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
20
23
|
|
|
21
24
|
/** Walk an SSE byte stream and yield each parsed JSON event. */
|
|
22
25
|
async function* parseSse(res: Response, dbg: { firstBytes: string }): AsyncIterable<any> {
|
|
@@ -209,21 +212,27 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
209
212
|
|
|
210
213
|
let res: Response;
|
|
211
214
|
try {
|
|
212
|
-
res = await
|
|
215
|
+
res = await fetchWithRetry(url, {
|
|
213
216
|
method: 'POST',
|
|
214
217
|
headers: { 'content-type': 'application/json' },
|
|
215
218
|
body: JSON.stringify(body),
|
|
216
219
|
signal: req.signal,
|
|
217
220
|
});
|
|
218
221
|
} catch (err: any) {
|
|
219
|
-
|
|
222
|
+
if (err?.name === 'AbortError') {
|
|
223
|
+
yield { type: 'done', stopReason: 'aborted' };
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
const cls = classifyPiNetworkError('Google Gemini', err);
|
|
227
|
+
yield { type: 'error', error: cls.message, kind: cls.kind, retryable: cls.retryable };
|
|
220
228
|
return;
|
|
221
229
|
}
|
|
222
230
|
|
|
223
231
|
if (!res.ok) {
|
|
224
232
|
let detail = '';
|
|
225
233
|
try { detail = await res.text(); } catch {}
|
|
226
|
-
|
|
234
|
+
const cls = classifyPiError('Google Gemini', res.status, res.statusText, detail);
|
|
235
|
+
yield { type: 'error', error: cls.message, status: cls.status, kind: cls.kind, retryable: cls.retryable };
|
|
227
236
|
return;
|
|
228
237
|
}
|
|
229
238
|
|
|
@@ -231,7 +240,7 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
231
240
|
let toolCallCount = 0;
|
|
232
241
|
let lastFinish: string | undefined;
|
|
233
242
|
let promptBlockReason: string | undefined;
|
|
234
|
-
let usage:
|
|
243
|
+
let usage: PiUsage | undefined;
|
|
235
244
|
// Debug counters — drop once this stabilises.
|
|
236
245
|
let chunkCount = 0;
|
|
237
246
|
let thoughtPartCount = 0;
|
|
@@ -293,7 +302,8 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
293
302
|
yield { type: 'done', stopReason: 'aborted' };
|
|
294
303
|
return;
|
|
295
304
|
}
|
|
296
|
-
|
|
305
|
+
const cls = classifyPiNetworkError('Google Gemini', err);
|
|
306
|
+
yield { type: 'error', error: cls.message, kind: cls.kind, retryable: cls.retryable };
|
|
297
307
|
return;
|
|
298
308
|
}
|
|
299
309
|
|
|
@@ -16,7 +16,10 @@ import type {
|
|
|
16
16
|
PiMessage,
|
|
17
17
|
PiContentBlock,
|
|
18
18
|
PiStopReason,
|
|
19
|
+
PiUsage,
|
|
19
20
|
} from './types.js';
|
|
21
|
+
import { fetchWithRetry } from './retry.js';
|
|
22
|
+
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
20
23
|
|
|
21
24
|
/* ── SSE parser (LF or CRLF tolerant, flushes the trailing event) ── */
|
|
22
25
|
|
|
@@ -187,8 +190,17 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
187
190
|
model: req.modelId,
|
|
188
191
|
messages: openaiMessages,
|
|
189
192
|
stream: true,
|
|
190
|
-
|
|
193
|
+
// gpt-5.x / o-series reject the legacy `max_tokens`; the openai-api
|
|
194
|
+
// sub-provider routes the cap through `max_completion_tokens` instead.
|
|
195
|
+
[req.maxTokensField ?? 'max_tokens']: req.maxOutputTokens ?? 8192,
|
|
191
196
|
};
|
|
197
|
+
// Without this opt-in, OpenAI/OpenRouter streams carry NO usage at all — and
|
|
198
|
+
// usage is what feeds the supervisor's proactive session recycling. Gated
|
|
199
|
+
// per sub-provider: Mistral's strict schema 422s on unknown fields
|
|
200
|
+
// (noStreamUsage in sub-providers.ts); everyone else tolerates or needs it.
|
|
201
|
+
if (req.includeStreamUsage !== false) {
|
|
202
|
+
body.stream_options = { include_usage: true };
|
|
203
|
+
}
|
|
192
204
|
if (req.tools && req.tools.length > 0) {
|
|
193
205
|
body.tools = toOpenAITools(req.tools);
|
|
194
206
|
body.tool_choice = 'auto';
|
|
@@ -201,47 +213,58 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
201
213
|
'accept': 'text/event-stream',
|
|
202
214
|
};
|
|
203
215
|
if (req.apiKey) headers['authorization'] = `Bearer ${req.apiKey}`;
|
|
204
|
-
res = await
|
|
216
|
+
res = await fetchWithRetry(url, {
|
|
205
217
|
method: 'POST',
|
|
206
218
|
headers,
|
|
207
219
|
body: JSON.stringify(body),
|
|
208
220
|
signal: req.signal,
|
|
209
221
|
});
|
|
210
222
|
} catch (err: any) {
|
|
211
|
-
|
|
223
|
+
if (err?.name === 'AbortError') {
|
|
224
|
+
yield { type: 'done', stopReason: 'aborted' };
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
const cls = classifyPiNetworkError('OpenAI-compat', err);
|
|
228
|
+
yield { type: 'error', error: cls.message, kind: cls.kind, retryable: cls.retryable };
|
|
212
229
|
return;
|
|
213
230
|
}
|
|
214
231
|
|
|
215
232
|
if (!res.ok) {
|
|
216
233
|
let detail = '';
|
|
217
234
|
try { detail = await res.text(); } catch {}
|
|
218
|
-
|
|
235
|
+
const cls = classifyPiError('OpenAI-compat', res.status, res.statusText, detail);
|
|
236
|
+
yield { type: 'error', error: cls.message, status: cls.status, kind: cls.kind, retryable: cls.retryable };
|
|
219
237
|
return;
|
|
220
238
|
}
|
|
221
239
|
|
|
222
240
|
let accumulated = '';
|
|
223
241
|
let lastFinish: string | undefined;
|
|
224
|
-
let usage:
|
|
242
|
+
let usage: PiUsage | undefined;
|
|
225
243
|
const toolCallsByIndex = new Map<number, PartialToolCall>();
|
|
226
244
|
let chunkCount = 0;
|
|
227
245
|
let firstChunkSummary = '';
|
|
228
246
|
|
|
247
|
+
// Vendors disagree on where streamed usage lives: spec says a final
|
|
248
|
+
// choice-less chunk's `usage`, Groq defaults to nesting under `x_groq.usage`,
|
|
249
|
+
// Moonshot tucks it onto the choice itself. Read all three.
|
|
250
|
+
const readUsage = (u: any) => {
|
|
251
|
+
if (!u || (u.prompt_tokens === undefined && u.completion_tokens === undefined)) return;
|
|
252
|
+
usage = { inputTokens: u.prompt_tokens, outputTokens: u.completion_tokens };
|
|
253
|
+
};
|
|
254
|
+
|
|
229
255
|
try {
|
|
230
256
|
for await (const chunk of parseSse(res)) {
|
|
231
257
|
chunkCount++;
|
|
232
258
|
if (chunkCount === 1) {
|
|
233
259
|
try { firstChunkSummary = JSON.stringify(chunk).slice(0, 600); } catch {}
|
|
234
260
|
}
|
|
261
|
+
readUsage(chunk?.x_groq?.usage);
|
|
235
262
|
const choice = chunk?.choices?.[0];
|
|
236
263
|
if (!choice) {
|
|
237
|
-
|
|
238
|
-
usage = {
|
|
239
|
-
inputTokens: chunk.usage.prompt_tokens,
|
|
240
|
-
outputTokens: chunk.usage.completion_tokens,
|
|
241
|
-
};
|
|
242
|
-
}
|
|
264
|
+
readUsage(chunk?.usage);
|
|
243
265
|
continue;
|
|
244
266
|
}
|
|
267
|
+
readUsage(choice?.usage);
|
|
245
268
|
const delta = choice.delta || {};
|
|
246
269
|
|
|
247
270
|
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
|
@@ -267,19 +290,15 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
267
290
|
}
|
|
268
291
|
|
|
269
292
|
if (choice.finish_reason) lastFinish = choice.finish_reason;
|
|
270
|
-
|
|
271
|
-
usage = {
|
|
272
|
-
inputTokens: chunk.usage.prompt_tokens,
|
|
273
|
-
outputTokens: chunk.usage.completion_tokens,
|
|
274
|
-
};
|
|
275
|
-
}
|
|
293
|
+
readUsage(chunk?.usage);
|
|
276
294
|
}
|
|
277
295
|
} catch (err: any) {
|
|
278
296
|
if (err?.name === 'AbortError') {
|
|
279
297
|
yield { type: 'done', stopReason: 'aborted' };
|
|
280
298
|
return;
|
|
281
299
|
}
|
|
282
|
-
|
|
300
|
+
const cls = classifyPiNetworkError('OpenAI-compat', err);
|
|
301
|
+
yield { type: 'error', error: cls.message, kind: cls.kind, retryable: cls.retryable };
|
|
283
302
|
return;
|
|
284
303
|
}
|
|
285
304
|
|
|
@@ -300,7 +319,24 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
300
319
|
let input: any = {};
|
|
301
320
|
if (partial.argsBuf) {
|
|
302
321
|
try { input = JSON.parse(partial.argsBuf); }
|
|
303
|
-
catch {
|
|
322
|
+
catch {
|
|
323
|
+
// Truncated/malformed tool-call JSON — almost always the output-token
|
|
324
|
+
// cap cutting the arguments mid-stream. Executing a fabricated {_raw}
|
|
325
|
+
// input produces misleading tool errors the model retries forever;
|
|
326
|
+
// fail the round loudly instead (the session strips dangling tool_use
|
|
327
|
+
// blocks from history on errored rounds).
|
|
328
|
+
const capped = lastFinish === 'length';
|
|
329
|
+
yield {
|
|
330
|
+
type: 'error',
|
|
331
|
+
error: capped
|
|
332
|
+
? `The model's ${partial.name} call was cut off by the output-token limit (${req.maxOutputTokens ?? 8192} tokens) — the arguments did not fit. Try a smaller change, or raise the model's output budget.`
|
|
333
|
+
: `The model emitted a malformed ${partial.name} tool call (arguments were not valid JSON).`,
|
|
334
|
+
kind: 'other',
|
|
335
|
+
retryable: !capped,
|
|
336
|
+
};
|
|
337
|
+
yield { type: 'done', stopReason: 'error', usage };
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
304
340
|
}
|
|
305
341
|
yield {
|
|
306
342
|
type: 'tool_use',
|
|
@@ -44,20 +44,45 @@ export interface PiStreamRequest {
|
|
|
44
44
|
tools?: PiToolDef[];
|
|
45
45
|
/** Hard cap on output tokens for a single turn. */
|
|
46
46
|
maxOutputTokens?: number;
|
|
47
|
+
/**
|
|
48
|
+
* Which request field carries the output cap on the openai-completions
|
|
49
|
+
* flavor. OpenAI's reasoning models (gpt-5.x, o-series) reject the legacy
|
|
50
|
+
* `max_tokens` — the openai-api sub-provider sets `max_completion_tokens`
|
|
51
|
+
* (accepted by ALL OpenAI models); other vendors stay on `max_tokens`.
|
|
52
|
+
*/
|
|
53
|
+
maxTokensField?: 'max_tokens' | 'max_completion_tokens';
|
|
54
|
+
/**
|
|
55
|
+
* openai-completions flavor: set false for strict-schema vendors (Mistral)
|
|
56
|
+
* that 422 on the `stream_options.include_usage` opt-in. Default true.
|
|
57
|
+
*/
|
|
58
|
+
includeStreamUsage?: boolean;
|
|
47
59
|
/** Optional abort signal so the session can interrupt in-flight requests. */
|
|
48
60
|
signal?: AbortSignal;
|
|
49
61
|
}
|
|
50
62
|
|
|
51
63
|
export type PiStopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | 'aborted';
|
|
52
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Coarse error classification so the session/harness can react without
|
|
67
|
+
* string-matching: retry transient rounds, tear down on auth/overflow, and
|
|
68
|
+
* show actionable messages instead of raw provider JSON.
|
|
69
|
+
*/
|
|
70
|
+
export type PiErrorKind = 'auth' | 'context-overflow' | 'rate-limit' | 'billing' | 'transient' | 'other';
|
|
71
|
+
|
|
53
72
|
export type PiStreamEvent =
|
|
54
73
|
| { type: 'text_delta'; delta: string }
|
|
55
74
|
| { type: 'text_end'; text: string }
|
|
56
75
|
| { type: 'tool_use'; id: string; name: string; input: any; thoughtSignature?: string }
|
|
57
76
|
| { type: 'done'; stopReason: PiStopReason; usage?: PiUsage }
|
|
58
|
-
| { type: 'error'; error: string };
|
|
77
|
+
| { type: 'error'; error: string; status?: number; kind?: PiErrorKind; retryable?: boolean };
|
|
59
78
|
|
|
60
79
|
export interface PiUsage {
|
|
80
|
+
/** Non-cached prompt tokens. NOTE: Anthropic's input_tokens EXCLUDES cache
|
|
81
|
+
* reads/writes — prompt occupancy is input + cacheRead + cacheCreation
|
|
82
|
+
* (Gemini's promptTokenCount and OpenAI's prompt_tokens already include
|
|
83
|
+
* cached tokens, so their providers leave the cache fields unset). */
|
|
61
84
|
inputTokens?: number;
|
|
62
85
|
outputTokens?: number;
|
|
86
|
+
cacheReadTokens?: number;
|
|
87
|
+
cacheCreationTokens?: number;
|
|
63
88
|
}
|