bloby-bot 0.70.12 → 0.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +234 -48
- package/dist-bloby/assets/{bloby-DSNB0g4w.js → bloby-es6cZJzs.js} +6 -6
- package/dist-bloby/assets/globals-DBqwNiJV.css +2 -0
- package/dist-bloby/assets/{globals-B3cTbITX.js → globals-DN3F0CQE.js} +1 -1
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-BLforpkr.js → highlighted-body-OFNGDK62-8PiOHw9p.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-BJWX8urU.js +1 -0
- package/dist-bloby/assets/{onboard-Dn2Ws_G2.js → onboard-BKgy17OU.js} +1 -1
- package/dist-bloby/bloby.html +3 -3
- package/dist-bloby/onboard.html +3 -3
- package/package.json +3 -4
- package/scripts/install +156 -41
- package/scripts/install.ps1 +146 -29
- package/scripts/install.sh +156 -41
- package/shared/config.ts +37 -2
- package/shared/relay.ts +3 -1
- package/supervisor/channels/manager.ts +84 -44
- package/supervisor/channels/telegram.ts +57 -16
- package/supervisor/channels/types.ts +4 -1
- package/supervisor/channels/whatsapp.ts +57 -10
- package/supervisor/chat/OnboardWizard.tsx +0 -15
- package/supervisor/chat/src/components/Chat/AudioBubble.tsx +1 -1
- package/supervisor/chat/src/components/Chat/AuthedImage.tsx +16 -3
- package/supervisor/chat/src/components/Chat/BlobyImageCard.tsx +2 -2
- package/supervisor/chat/src/components/Chat/ImageLightbox.tsx +25 -8
- package/supervisor/chat/src/components/Chat/InputBar.tsx +62 -7
- package/supervisor/chat/src/components/Chat/MessageBubble.tsx +37 -18
- package/supervisor/chat/src/components/Chat/MessageList.tsx +3 -3
- package/supervisor/chat/src/hooks/useChat.ts +52 -0
- package/supervisor/chat/src/lib/authedFile.ts +24 -12
- package/supervisor/file-saver.ts +92 -19
- package/supervisor/harnesses/attachment-policy.ts +111 -0
- package/supervisor/harnesses/claude.ts +62 -15
- package/supervisor/harnesses/codex.ts +69 -43
- package/supervisor/harnesses/pi/index.ts +367 -112
- package/supervisor/harnesses/pi/providers/humanize-error.ts +27 -2
- package/supervisor/harnesses/pi/providers/retry.ts +31 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +31 -3
- package/supervisor/harnesses/pi/providers/stream-google.ts +26 -3
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +32 -9
- package/supervisor/harnesses/pi/providers/types.ts +29 -1
- package/supervisor/harnesses/pi/session.ts +143 -3
- package/supervisor/harnesses/pi/test-completion.ts +56 -0
- package/supervisor/harnesses/pi/tools/bash.ts +198 -22
- package/supervisor/harnesses/pi/tools/glob.ts +79 -0
- package/supervisor/harnesses/pi/tools/grep.ts +0 -0
- package/supervisor/harnesses/pi/tools/registry.ts +18 -6
- package/supervisor/harnesses/pi/tools/todo-write.ts +45 -0
- package/supervisor/harnesses/pi/tools/web-fetch.ts +129 -0
- package/supervisor/index.ts +93 -18
- package/supervisor/widget.js +19 -5
- package/worker/db.ts +2 -0
- package/worker/index.ts +18 -1
- package/worker/prompts/bloby-system-prompt-codex.txt +1 -1
- package/worker/prompts/bloby-system-prompt-pi.txt +6 -24
- package/worker/prompts/bloby-system-prompt.txt +1 -1
- package/workspace/client/src/components/Dashboard/DashboardPage.tsx +4 -117
- package/workspace/client/src/components/Dashboard/deleteme_placeholders.tsx +194 -0
- package/workspace/client/src/components/Layout/Sidebar.tsx +52 -30
- package/workspace/client/src/components/deleteme_onboarding/WorkspaceTour.tsx +25 -15
- package/workspace/client/src/components/deleteme_onboarding/tour-theme.css +24 -0
- package/workspace/skills/mac/SKILL.md +13 -4
- package/dist-bloby/assets/globals-DyeW509Y.css +0 -2
- package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +0 -1
- package/supervisor/public/headphones_spritesheet.webp +0 -0
- package/supervisor/public/spritesheet.webp +0 -0
|
@@ -48,6 +48,18 @@ const AUTH_RE =
|
|
|
48
48
|
const BILLING_RE =
|
|
49
49
|
/insufficient_quota|credit balance is too low|payment required|purchase more credits/i;
|
|
50
50
|
|
|
51
|
+
// A text-only model rejecting an attached image. Vendors phrase it many ways:
|
|
52
|
+
// OpenAI "Invalid content type. image_url is only supported by certain models",
|
|
53
|
+
// OpenRouter "No endpoints found that support image input", others mention
|
|
54
|
+
// "image input" / "does not support images" / "unsupported content type".
|
|
55
|
+
// Only EXPLICIT image-naming phrases — the bare tokens "vision"/"multimodal"/
|
|
56
|
+
// "modality" were removed because the provider body routinely echoes the model id
|
|
57
|
+
// (e.g. "gpt-4-vision-preview", "llama-3.2-90b-vision-instruct"), which would
|
|
58
|
+
// mis-classify an unrelated 400 from a vision-capable model and wrongly disable
|
|
59
|
+
// vision for the rest of the session. Paired with a 400/415/422 status below.
|
|
60
|
+
const IMAGE_UNSUPPORTED_RE =
|
|
61
|
+
/image[_ ]?url|image input|images?(?: are| is)? not supported|does not support images?|no endpoints? .*support image|unsupported content type/i;
|
|
62
|
+
|
|
51
63
|
export function classifyPiError(
|
|
52
64
|
providerLabel: string,
|
|
53
65
|
status: number | undefined,
|
|
@@ -85,6 +97,19 @@ export function classifyPiError(
|
|
|
85
97
|
message: `${providerLabel} rejected your API key. Update it from the dashboard (Bloby provider settings).${suffix}`,
|
|
86
98
|
};
|
|
87
99
|
}
|
|
100
|
+
// A text-only model that the catalog couldn't flag up front (dynamic/unknown
|
|
101
|
+
// sub-providers) 400/415/422s on the attached image. The session reacts by
|
|
102
|
+
// disabling vision for the rest of the session and re-running the round with
|
|
103
|
+
// images downgraded — self-healing so a single screenshot can't permanently
|
|
104
|
+
// 400-poison the conversation (it rides every stateless resend otherwise).
|
|
105
|
+
if ((status === 400 || status === 415 || status === 422) && IMAGE_UNSUPPORTED_RE.test(body)) {
|
|
106
|
+
return {
|
|
107
|
+
kind: 'image-unsupported',
|
|
108
|
+
retryable: false,
|
|
109
|
+
status,
|
|
110
|
+
message: `${providerLabel} rejected the attached image — this model appears to be text-only. Retrying without the image; switch to a vision-capable model to send images.${suffix}`,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
88
113
|
if (status === 429) {
|
|
89
114
|
return {
|
|
90
115
|
kind: 'rate-limit',
|
|
@@ -113,8 +138,8 @@ export function classifyPiError(
|
|
|
113
138
|
export function classifyPiNetworkError(providerLabel: string, err: any): ClassifiedPiError {
|
|
114
139
|
const raw = err?.message || String(err);
|
|
115
140
|
// undici's body/headers timeouts surface as the famously cryptic 'terminated'
|
|
116
|
-
// and 'Headers Timeout Error'
|
|
117
|
-
const stalled = /terminated|timeout/i.test(raw);
|
|
141
|
+
// and 'Headers Timeout Error'; our own SSE idle guard says 'stalled'.
|
|
142
|
+
const stalled = /terminated|timeout|stalled/i.test(raw);
|
|
118
143
|
return {
|
|
119
144
|
kind: 'transient',
|
|
120
145
|
retryable: true,
|
|
@@ -51,6 +51,37 @@ export function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
|
51
51
|
});
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
/** Per-chunk SSE idle guard (audit D6-7). Without it, a stalled-but-open
|
|
55
|
+
* stream waits ~300s for Node's undici body timeout and then surfaces a
|
|
56
|
+
* cryptic 'terminated'. 120s is generous: Anthropic pings every ~20s and
|
|
57
|
+
* Gemini/OpenAI chunk every few seconds while healthy. */
|
|
58
|
+
export const SSE_IDLE_TIMEOUT_MS = 120_000;
|
|
59
|
+
|
|
60
|
+
export async function readWithIdleTimeout<T>(
|
|
61
|
+
reader: { read(): Promise<T>; cancel?: (reason?: any) => Promise<void> | void },
|
|
62
|
+
providerLabel: string,
|
|
63
|
+
): Promise<T> {
|
|
64
|
+
let timer: NodeJS.Timeout | undefined;
|
|
65
|
+
const timeoutP = new Promise<never>((_, reject) => {
|
|
66
|
+
timer = setTimeout(
|
|
67
|
+
() => reject(new Error(`${providerLabel} stream stalled — no data received for ${SSE_IDLE_TIMEOUT_MS / 1000}s.`)),
|
|
68
|
+
SSE_IDLE_TIMEOUT_MS,
|
|
69
|
+
);
|
|
70
|
+
});
|
|
71
|
+
const readP = reader.read();
|
|
72
|
+
// Mark the losing read promise handled so a post-timeout rejection (after
|
|
73
|
+
// reader.cancel) never surfaces as an unhandledRejection.
|
|
74
|
+
readP.catch?.(() => {});
|
|
75
|
+
try {
|
|
76
|
+
return await Promise.race([readP, timeoutP]);
|
|
77
|
+
} catch (err) {
|
|
78
|
+
try { void reader.cancel?.(); } catch {}
|
|
79
|
+
throw err;
|
|
80
|
+
} finally {
|
|
81
|
+
clearTimeout(timer!);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
54
85
|
export async function fetchWithRetry(
|
|
55
86
|
url: string,
|
|
56
87
|
init: RequestInit & { signal?: AbortSignal },
|
|
@@ -20,7 +20,7 @@ import type {
|
|
|
20
20
|
PiStopReason,
|
|
21
21
|
PiUsage,
|
|
22
22
|
} from './types.js';
|
|
23
|
-
import { fetchWithRetry } from './retry.js';
|
|
23
|
+
import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
|
|
24
24
|
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
25
25
|
|
|
26
26
|
/* ── SSE parser (shares the LF/CRLF-tolerant pattern from the other providers) ── */
|
|
@@ -32,7 +32,7 @@ async function* parseSse(res: Response): AsyncIterable<any> {
|
|
|
32
32
|
let buffer = '';
|
|
33
33
|
try {
|
|
34
34
|
while (true) {
|
|
35
|
-
const { value, done } = await reader
|
|
35
|
+
const { value, done } = await readWithIdleTimeout(reader, 'Anthropic');
|
|
36
36
|
if (done) break;
|
|
37
37
|
buffer += decoder.decode(value, { stream: true });
|
|
38
38
|
let idx;
|
|
@@ -79,12 +79,24 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
|
|
|
79
79
|
const out: any[] = [];
|
|
80
80
|
for (const b of blocks) {
|
|
81
81
|
if (b.type === 'text') {
|
|
82
|
+
// The Messages API rejects empty/whitespace-only text blocks ("text
|
|
83
|
+
// content blocks must be non-empty") — drop them; an all-empty message
|
|
84
|
+
// is then filtered by the content-length guards in toAnthropicMessages.
|
|
85
|
+
if (!b.text || !b.text.trim()) continue;
|
|
82
86
|
out.push({ type: 'text', text: b.text });
|
|
83
87
|
} else if (b.type === 'image') {
|
|
84
88
|
out.push({
|
|
85
89
|
type: 'image',
|
|
86
90
|
source: { type: 'base64', media_type: b.mediaType, data: b.data },
|
|
87
91
|
});
|
|
92
|
+
} else if (b.type === 'document') {
|
|
93
|
+
// Native PDF document block — the Messages API renders the pages and the
|
|
94
|
+
// model reads them as vision. The base64 document source accepts ONLY
|
|
95
|
+
// application/pdf (buildUserMessage gates it on canNativeDocument).
|
|
96
|
+
out.push({
|
|
97
|
+
type: 'document',
|
|
98
|
+
source: { type: 'base64', media_type: b.mediaType, data: b.data },
|
|
99
|
+
});
|
|
88
100
|
} else if (b.type === 'tool_use') {
|
|
89
101
|
out.push({
|
|
90
102
|
type: 'tool_use',
|
|
@@ -105,13 +117,19 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
|
|
|
105
117
|
}
|
|
106
118
|
|
|
107
119
|
function toAnthropicMessages(pi: PiMessage[]): any[] {
|
|
108
|
-
|
|
120
|
+
const msgs = pi
|
|
109
121
|
.filter((m) => m.content.length > 0)
|
|
110
122
|
.map((m) => ({
|
|
111
123
|
role: m.role === 'assistant' ? 'assistant' : 'user',
|
|
112
124
|
content: toAnthropicContent(m.content),
|
|
113
125
|
}))
|
|
114
126
|
.filter((m) => m.content.length > 0);
|
|
127
|
+
// The Messages API requires the first message to be user-role. Rolling
|
|
128
|
+
// history windows (customer buffers) are trimmed user-first at the source
|
|
129
|
+
// (channels/manager.ts trimCustomerBuffer), but defend here too — a leading
|
|
130
|
+
// assistant message 400s the whole request (audit C-7).
|
|
131
|
+
while (msgs.length > 0 && msgs[0].role !== 'user') msgs.shift();
|
|
132
|
+
return msgs;
|
|
115
133
|
}
|
|
116
134
|
|
|
117
135
|
function toAnthropicTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
|
|
@@ -166,6 +184,9 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
166
184
|
if (req.tools && req.tools.length > 0) {
|
|
167
185
|
body.tools = toAnthropicTools(req.tools);
|
|
168
186
|
body.tools[body.tools.length - 1].cache_control = { type: 'ephemeral' };
|
|
187
|
+
// Round-cap wrap-up: forbid further tool calls; tools stay declared so
|
|
188
|
+
// tool_use/tool_result blocks in history remain valid.
|
|
189
|
+
if (req.toolChoice === 'none') body.tool_choice = { type: 'none' };
|
|
169
190
|
}
|
|
170
191
|
if (Array.isArray(body.messages) && body.messages.length > 0) {
|
|
171
192
|
const lastContent = body.messages[body.messages.length - 1].content;
|
|
@@ -213,6 +234,7 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
213
234
|
let usage: PiUsage | undefined;
|
|
214
235
|
let chunkCount = 0;
|
|
215
236
|
let firstChunkSummary = '';
|
|
237
|
+
let thinkingEmitted = false;
|
|
216
238
|
|
|
217
239
|
try {
|
|
218
240
|
for await (const evt of parseSse(res)) {
|
|
@@ -250,6 +272,12 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
250
272
|
toolArgsBuf: '',
|
|
251
273
|
});
|
|
252
274
|
} else {
|
|
275
|
+
// Extended-thinking blocks (not requested today, future-proofed):
|
|
276
|
+
// one liveness pulse, text never forwarded.
|
|
277
|
+
if (block.type === 'thinking' && !thinkingEmitted) {
|
|
278
|
+
thinkingEmitted = true;
|
|
279
|
+
yield { type: 'thinking' };
|
|
280
|
+
}
|
|
253
281
|
blocks.set(idx, { kind: 'other' });
|
|
254
282
|
}
|
|
255
283
|
break;
|
|
@@ -18,7 +18,7 @@ import type {
|
|
|
18
18
|
PiStopReason,
|
|
19
19
|
PiUsage,
|
|
20
20
|
} from './types.js';
|
|
21
|
-
import { fetchWithRetry } from './retry.js';
|
|
21
|
+
import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
|
|
22
22
|
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
23
23
|
|
|
24
24
|
/** Walk an SSE byte stream and yield each parsed JSON event. */
|
|
@@ -30,7 +30,7 @@ async function* parseSse(res: Response, dbg: { firstBytes: string }): AsyncItera
|
|
|
30
30
|
let totalBytes = 0;
|
|
31
31
|
try {
|
|
32
32
|
while (true) {
|
|
33
|
-
const { value, done } = await reader
|
|
33
|
+
const { value, done } = await readWithIdleTimeout(reader, 'Google Gemini');
|
|
34
34
|
if (done) break;
|
|
35
35
|
if (value) totalBytes += value.byteLength;
|
|
36
36
|
buffer += decoder.decode(value, { stream: true });
|
|
@@ -102,6 +102,11 @@ function toGeminiParts(content: PiContentBlock[]): any[] {
|
|
|
102
102
|
parts.push({ text: b.text });
|
|
103
103
|
} else if (b.type === 'image') {
|
|
104
104
|
parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
|
|
105
|
+
} else if (b.type === 'document') {
|
|
106
|
+
// Gemini ingests application/pdf inline via the same inlineData shape as
|
|
107
|
+
// images (it OCRs/renders the document). buildUserMessage only routes a
|
|
108
|
+
// document block here when the flavor supports it.
|
|
109
|
+
parts.push({ inlineData: { mimeType: b.mediaType, data: b.data } });
|
|
105
110
|
} else if (b.type === 'tool_use') {
|
|
106
111
|
// Assistant turn: the model asked to invoke a tool. Thinking-capable
|
|
107
112
|
// Gemini 3.x rejects (HTTP 400) any echoed functionCall whose
|
|
@@ -203,11 +208,25 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
203
208
|
maxOutputTokens: req.maxOutputTokens ?? 32768,
|
|
204
209
|
},
|
|
205
210
|
};
|
|
211
|
+
// Thinking-capable families (2.5+/3.x): ask for thought summaries so the
|
|
212
|
+
// harness can emit a liveness pulse — without this, Gemini 3 burns its
|
|
213
|
+
// output budget on invisible reasoning and the chat looks hung. Gated by
|
|
214
|
+
// model id; unknown/dynamic ids skip it (older models reject the field).
|
|
215
|
+
// The rolling aliases (gemini-flash-latest / gemini-flash-lite-latest)
|
|
216
|
+
// resolve to 2.5+/3.x thinking models too (review PI-D-2).
|
|
217
|
+
if (/gemini-(2\.5|[3-9]|flash(-lite)?-latest)/i.test(req.modelId)) {
|
|
218
|
+
body.generationConfig.thinkingConfig = { includeThoughts: true };
|
|
219
|
+
}
|
|
206
220
|
if (req.systemPrompt?.trim()) {
|
|
207
221
|
body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
|
|
208
222
|
}
|
|
209
223
|
if (req.tools && req.tools.length > 0) {
|
|
210
224
|
body.tools = toGeminiTools(req.tools);
|
|
225
|
+
// Round-cap wrap-up: forbid further function calls; tools stay declared so
|
|
226
|
+
// functionCall/functionResponse parts in history remain valid.
|
|
227
|
+
if (req.toolChoice === 'none') {
|
|
228
|
+
body.toolConfig = { functionCallingConfig: { mode: 'NONE' } };
|
|
229
|
+
}
|
|
211
230
|
}
|
|
212
231
|
|
|
213
232
|
let res: Response;
|
|
@@ -263,7 +282,11 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
263
282
|
for (const part of parts) {
|
|
264
283
|
// Thinking models emit reasoning parts with `thought: true`. They
|
|
265
284
|
// shouldn't be shown to the user as part of the visible answer.
|
|
266
|
-
if (part?.thought) {
|
|
285
|
+
if (part?.thought) {
|
|
286
|
+
thoughtPartCount++;
|
|
287
|
+
if (thoughtPartCount === 1) yield { type: 'thinking' };
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
267
290
|
if (part?.functionCall && typeof part.functionCall.name === 'string') {
|
|
268
291
|
// Gemini doesn't surface a tool-call id of its own; bake the tool
|
|
269
292
|
// name into the id so the session can echo it back as a
|
|
@@ -18,7 +18,7 @@ import type {
|
|
|
18
18
|
PiStopReason,
|
|
19
19
|
PiUsage,
|
|
20
20
|
} from './types.js';
|
|
21
|
-
import { fetchWithRetry } from './retry.js';
|
|
21
|
+
import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
|
|
22
22
|
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
23
23
|
|
|
24
24
|
/* ── SSE parser (LF or CRLF tolerant, flushes the trailing event) ── */
|
|
@@ -30,7 +30,7 @@ async function* parseSse(res: Response): AsyncIterable<any> {
|
|
|
30
30
|
let buffer = '';
|
|
31
31
|
try {
|
|
32
32
|
while (true) {
|
|
33
|
-
const { value, done } = await reader
|
|
33
|
+
const { value, done } = await readWithIdleTimeout(reader, 'OpenAI-compat');
|
|
34
34
|
if (done) break;
|
|
35
35
|
buffer += decoder.decode(value, { stream: true });
|
|
36
36
|
let idx;
|
|
@@ -119,24 +119,33 @@ function toOpenAIMessages(pi: PiMessage[]): any[] {
|
|
|
119
119
|
out.push(msg);
|
|
120
120
|
continue;
|
|
121
121
|
}
|
|
122
|
-
// role === 'user' with non-tool-result content (text + optional images)
|
|
122
|
+
// role === 'user' with non-tool-result content (text + optional images).
|
|
123
|
+
// Media parts go first; text is appended last (parity with the other
|
|
124
|
+
// providers and pi/index's media-first block ordering).
|
|
123
125
|
const contentBlocks: any[] = [];
|
|
124
126
|
let plainText = '';
|
|
125
|
-
let
|
|
127
|
+
let hasMedia = false;
|
|
126
128
|
for (const b of m.content) {
|
|
127
129
|
if (b.type === 'text') {
|
|
128
130
|
plainText += (plainText ? '\n' : '') + b.text;
|
|
129
131
|
} else if (b.type === 'image') {
|
|
130
|
-
|
|
132
|
+
hasMedia = true;
|
|
131
133
|
contentBlocks.push({
|
|
132
134
|
type: 'image_url',
|
|
133
135
|
image_url: { url: `data:${b.mediaType};base64,${b.data}` },
|
|
134
136
|
});
|
|
137
|
+
} else if (b.type === 'document') {
|
|
138
|
+
// The Chat Completions schema has no document part — degrade to a text
|
|
139
|
+
// note rather than crashing. The file is also on disk (saved-files
|
|
140
|
+
// note), so the agent can open it with its tools. This shouldn't
|
|
141
|
+
// normally happen: buildUserMessage gates documents on canNativeDocument
|
|
142
|
+
// (false for this flavor), so a PDF here rides as the disk pointer.
|
|
143
|
+
plainText += (plainText ? '\n' : '') +
|
|
144
|
+
`[Attached document${b.name ? ` "${b.name}"` : ''} (${b.mediaType}) could not be inlined for this model — it is saved to disk; open it with your file tools.]`;
|
|
135
145
|
}
|
|
136
146
|
}
|
|
137
|
-
if (
|
|
138
|
-
|
|
139
|
-
if (plainText) contentBlocks.unshift({ type: 'text', text: plainText });
|
|
147
|
+
if (hasMedia) {
|
|
148
|
+
if (plainText) contentBlocks.push({ type: 'text', text: plainText });
|
|
140
149
|
out.push({ role: 'user', content: contentBlocks });
|
|
141
150
|
} else {
|
|
142
151
|
out.push({ role: 'user', content: plainText });
|
|
@@ -203,7 +212,10 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
203
212
|
}
|
|
204
213
|
if (req.tools && req.tools.length > 0) {
|
|
205
214
|
body.tools = toOpenAITools(req.tools);
|
|
206
|
-
|
|
215
|
+
// 'none' = the round-cap wrap-up round: the model must summarize, not
|
|
216
|
+
// start more work. Tools stay declared so histories containing tool calls
|
|
217
|
+
// remain valid.
|
|
218
|
+
body.tool_choice = req.toolChoice === 'none' ? 'none' : 'auto';
|
|
207
219
|
}
|
|
208
220
|
|
|
209
221
|
let res: Response;
|
|
@@ -243,6 +255,7 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
243
255
|
const toolCallsByIndex = new Map<number, PartialToolCall>();
|
|
244
256
|
let chunkCount = 0;
|
|
245
257
|
let firstChunkSummary = '';
|
|
258
|
+
let thinkingEmitted = false;
|
|
246
259
|
|
|
247
260
|
// Vendors disagree on where streamed usage lives: spec says a final
|
|
248
261
|
// choice-less chunk's `usage`, Groq defaults to nesting under `x_groq.usage`,
|
|
@@ -267,6 +280,16 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
267
280
|
readUsage(choice?.usage);
|
|
268
281
|
const delta = choice.delta || {};
|
|
269
282
|
|
|
283
|
+
// Reasoning models stream hidden thinking under vendor-specific fields
|
|
284
|
+
// (DeepSeek/OpenRouter: reasoning_content; others: reasoning /
|
|
285
|
+
// reasoning_text — upstream pi's field priority). Emit ONE liveness
|
|
286
|
+
// pulse so the chat doesn't look hung; never forward the text itself.
|
|
287
|
+
const reasoningDelta = delta.reasoning_content ?? delta.reasoning ?? delta.reasoning_text;
|
|
288
|
+
if (!thinkingEmitted && typeof reasoningDelta === 'string' && reasoningDelta.length > 0) {
|
|
289
|
+
thinkingEmitted = true;
|
|
290
|
+
yield { type: 'thinking' };
|
|
291
|
+
}
|
|
292
|
+
|
|
270
293
|
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
|
271
294
|
accumulated += delta.content;
|
|
272
295
|
yield { type: 'text_delta', delta: delta.content };
|
|
@@ -17,6 +17,11 @@ export type PiRole = 'user' | 'assistant' | 'tool';
|
|
|
17
17
|
export type PiContentBlock =
|
|
18
18
|
| { type: 'text'; text: string }
|
|
19
19
|
| { type: 'image'; mediaType: string; data: string } // base64
|
|
20
|
+
// Native document block (PDF). Only the flavors with native document support
|
|
21
|
+
// (anthropic-messages, google-gemini) ever receive one — buildUserMessage
|
|
22
|
+
// gates it on canNativeDocument; openai-completions degrades it to a text
|
|
23
|
+
// note rather than crashing if one ever reaches it.
|
|
24
|
+
| { type: 'document'; mediaType: string; data: string; name?: string } // base64
|
|
20
25
|
// `thoughtSignature` is a Gemini 3.x thinking-model field. Pi-flavored
|
|
21
26
|
// providers that emit reasoning attach it to function-call parts; the API
|
|
22
27
|
// rejects the next turn with HTTP 400 if we don't echo it back verbatim.
|
|
@@ -56,6 +61,13 @@ export interface PiStreamRequest {
|
|
|
56
61
|
* that 422 on the `stream_options.include_usage` opt-in. Default true.
|
|
57
62
|
*/
|
|
58
63
|
includeStreamUsage?: boolean;
|
|
64
|
+
/**
|
|
65
|
+
* 'none' forbids tool calls for this request (mapped per flavor: OpenAI
|
|
66
|
+
* tool_choice:'none', Anthropic {type:'none'}, Gemini functionCallingConfig
|
|
67
|
+
* mode NONE). Used by the session's round-cap wrap-up round, where the model
|
|
68
|
+
* must summarize instead of starting more work.
|
|
69
|
+
*/
|
|
70
|
+
toolChoice?: 'auto' | 'none';
|
|
59
71
|
/** Optional abort signal so the session can interrupt in-flight requests. */
|
|
60
72
|
signal?: AbortSignal;
|
|
61
73
|
}
|
|
@@ -67,11 +79,27 @@ export type PiStopReason = 'end_turn' | 'tool_use' | 'max_tokens' | 'error' | 'a
|
|
|
67
79
|
* string-matching: retry transient rounds, tear down on auth/overflow, and
|
|
68
80
|
* show actionable messages instead of raw provider JSON.
|
|
69
81
|
*/
|
|
70
|
-
export type PiErrorKind =
|
|
82
|
+
export type PiErrorKind =
|
|
83
|
+
| 'auth'
|
|
84
|
+
| 'context-overflow'
|
|
85
|
+
| 'rate-limit'
|
|
86
|
+
| 'billing'
|
|
87
|
+
| 'transient'
|
|
88
|
+
/** The model rejected an image/vision/modality block (a text-only model 400/
|
|
89
|
+
* 415/422s on the attached image). The session reacts by disabling vision
|
|
90
|
+
* for the rest of the session and re-running the round with images
|
|
91
|
+
* downgraded to placeholders — self-healing for dynamic/unknown models whose
|
|
92
|
+
* catalog can't tell us up front whether they see images. */
|
|
93
|
+
| 'image-unsupported'
|
|
94
|
+
| 'other';
|
|
71
95
|
|
|
72
96
|
export type PiStreamEvent =
|
|
73
97
|
| { type: 'text_delta'; delta: string }
|
|
74
98
|
| { type: 'text_end'; text: string }
|
|
99
|
+
/** Emitted when the model starts (visibly) reasoning — a liveness pulse for
|
|
100
|
+
* thinking models so the chat doesn't look hung. Reasoning TEXT is never
|
|
101
|
+
* forwarded (it would corrupt the streamed-text == response contract). */
|
|
102
|
+
| { type: 'thinking' }
|
|
75
103
|
| { type: 'tool_use'; id: string; name: string; input: any; thoughtSignature?: string }
|
|
76
104
|
| { type: 'done'; stopReason: PiStopReason; usage?: PiUsage }
|
|
77
105
|
| { type: 'error'; error: string; status?: number; kind?: PiErrorKind; retryable?: boolean };
|
|
@@ -44,6 +44,8 @@ export type PiSessionEvent =
|
|
|
44
44
|
| { type: 'turn_started' }
|
|
45
45
|
| { type: 'text_delta'; delta: string }
|
|
46
46
|
| { type: 'text_end'; text: string }
|
|
47
|
+
/** Liveness pulse: the model is reasoning (thinking models) — no text attached. */
|
|
48
|
+
| { type: 'thinking' }
|
|
47
49
|
| { type: 'tool_use'; id: string; name: string; input: any }
|
|
48
50
|
| { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
|
|
49
51
|
| {
|
|
@@ -78,6 +80,10 @@ export interface PiSessionAuth {
|
|
|
78
80
|
includeStreamUsage?: boolean;
|
|
79
81
|
/** Model context window from the catalog — reported on turn_complete for the recycler. */
|
|
80
82
|
contextWindow?: number;
|
|
83
|
+
/** False when the catalog says the model is text-only — image blocks are
|
|
84
|
+
* downgraded to placeholders on send so one screenshot can't 400-poison
|
|
85
|
+
* the session (audit C-8). Undefined (dynamic models) ⇒ assume vision. */
|
|
86
|
+
supportsImages?: boolean;
|
|
81
87
|
}
|
|
82
88
|
|
|
83
89
|
export interface PiSessionInit {
|
|
@@ -117,6 +123,51 @@ export interface PiSession {
|
|
|
117
123
|
getMessages(): PiMessage[];
|
|
118
124
|
}
|
|
119
125
|
|
|
126
|
+
/** Transform-on-send for text-only models (audit C-8): image blocks become
|
|
127
|
+
* placeholders in the REQUEST only — the stored history keeps the images, so
|
|
128
|
+
* switching to a vision model later restores them. */
|
|
129
|
+
function downgradeImages(messages: PiMessage[]): PiMessage[] {
|
|
130
|
+
let any = false;
|
|
131
|
+
const out = messages.map((m) => {
|
|
132
|
+
if (!m.content.some((b) => b.type === 'image')) return m;
|
|
133
|
+
any = true;
|
|
134
|
+
return {
|
|
135
|
+
...m,
|
|
136
|
+
content: m.content.map((b): PiContentBlock =>
|
|
137
|
+
b.type === 'image'
|
|
138
|
+
? { type: 'text', text: '[An image was attached here, but the current model cannot view images. Tell the user to switch to a vision-capable model if the image matters.]' }
|
|
139
|
+
: b,
|
|
140
|
+
),
|
|
141
|
+
};
|
|
142
|
+
});
|
|
143
|
+
return any ? out : messages;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Emergency in-turn context relief (audit D2-6): when occupancy crosses the
|
|
147
|
+
* threshold MID-turn (recycling only acts between idle turns), stub out the
|
|
148
|
+
* oldest large tool_result payloads — never user/assistant text, never the
|
|
149
|
+
* protected tail (the current round's results). Cruder than real compaction,
|
|
150
|
+
* but the turn finishes instead of 400ing on the context wall. */
|
|
151
|
+
function trimOldToolResults(messages: PiMessage[], charsToFree: number, protectTail: number): number {
|
|
152
|
+
let freed = 0;
|
|
153
|
+
const limit = Math.max(0, messages.length - protectTail);
|
|
154
|
+
for (let i = 0; i < limit && freed < charsToFree; i++) {
|
|
155
|
+
const m = messages[i];
|
|
156
|
+
if (m.role !== 'user') continue;
|
|
157
|
+
for (const b of m.content) {
|
|
158
|
+
if (b.type === 'tool_result' && typeof b.content === 'string' && b.content.length > 2048) {
|
|
159
|
+
freed += b.content.length;
|
|
160
|
+
b.content = `[tool output trimmed to fit the context window — ~${Math.round(b.content.length / 1024)} KB removed]`;
|
|
161
|
+
if (freed >= charsToFree) break;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return freed;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const ROUND_CAP_NOTICE =
|
|
169
|
+
'[System: the tool budget for this turn is exhausted. Stop working now. In 2-3 sentences, summarize what you completed, what remains, and the exact next step.]';
|
|
170
|
+
|
|
120
171
|
const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit', 'write', 'edit', 'multiEdit', 'notebookEdit']);
|
|
121
172
|
const MAX_TOOL_ROUNDS = 25;
|
|
122
173
|
/** Transparent re-runs of a failed round that produced nothing (audit D6-1). */
|
|
@@ -130,6 +181,14 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
130
181
|
let lastUsage: PiUsage | undefined;
|
|
131
182
|
let lastContextWindow: number | undefined;
|
|
132
183
|
|
|
184
|
+
// Self-healing vision (audit D rank 12): when a model the catalog couldn't
|
|
185
|
+
// classify (dynamic/unknown sub-providers ⇒ supportsImages undefined) rejects
|
|
186
|
+
// an image with an 'image-unsupported' error, latch this for the rest of the
|
|
187
|
+
// session and downgrade images on every subsequent send. The IMAGE stays in
|
|
188
|
+
// history (downgradeImages is transform-on-send only), so switching to a
|
|
189
|
+
// vision-capable model later restores it.
|
|
190
|
+
let visionDisabled = false;
|
|
191
|
+
|
|
133
192
|
/** One stream round — collect the assistant blocks the model emits this pass. */
|
|
134
193
|
interface RoundResult {
|
|
135
194
|
text: string;
|
|
@@ -142,7 +201,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
142
201
|
retryable?: boolean;
|
|
143
202
|
}
|
|
144
203
|
|
|
145
|
-
async function runOneRound(emitSeparatorFirst: boolean): Promise<RoundResult> {
|
|
204
|
+
async function runOneRound(emitSeparatorFirst: boolean, opts?: { wrapUp?: boolean }): Promise<RoundResult> {
|
|
146
205
|
const result: RoundResult = { text: '', toolUses: [], errored: false };
|
|
147
206
|
let firstDelta = true;
|
|
148
207
|
try {
|
|
@@ -153,8 +212,13 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
153
212
|
baseUrl: auth.baseUrl,
|
|
154
213
|
apiKey: auth.apiKey,
|
|
155
214
|
systemPrompt: init.systemPrompt,
|
|
156
|
-
|
|
215
|
+
// Downgrade images when the catalog says text-only (supportsImages
|
|
216
|
+
// false) OR a prior round in THIS session learned it the hard way via
|
|
217
|
+
// an 'image-unsupported' error (visionDisabled). The stored history
|
|
218
|
+
// keeps the image so a later vision-capable model still restores it.
|
|
219
|
+
messages: auth.supportsImages === false || visionDisabled ? downgradeImages(messages) : messages,
|
|
157
220
|
tools: init.tools,
|
|
221
|
+
toolChoice: opts?.wrapUp ? 'none' : undefined,
|
|
158
222
|
maxOutputTokens: auth.maxOutputTokens,
|
|
159
223
|
maxTokensField: auth.maxTokensField,
|
|
160
224
|
includeStreamUsage: auth.includeStreamUsage,
|
|
@@ -182,6 +246,9 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
182
246
|
// at the end of the whole turn so the UI doesn't show half-answers.
|
|
183
247
|
result.text = evt.text;
|
|
184
248
|
break;
|
|
249
|
+
case 'thinking':
|
|
250
|
+
init.onEvent({ type: 'thinking' });
|
|
251
|
+
break;
|
|
185
252
|
case 'tool_use':
|
|
186
253
|
result.toolUses.push({
|
|
187
254
|
id: evt.id,
|
|
@@ -189,7 +256,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
189
256
|
input: evt.input,
|
|
190
257
|
thoughtSignature: evt.thoughtSignature,
|
|
191
258
|
});
|
|
192
|
-
|
|
259
|
+
// Wrap-up rounds forbid tools (toolChoice 'none'); if a vendor
|
|
260
|
+
// ignores that, swallow the phantom call silently — it is never
|
|
261
|
+
// executed or persisted.
|
|
262
|
+
if (!opts?.wrapUp) {
|
|
263
|
+
init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
|
|
264
|
+
}
|
|
193
265
|
break;
|
|
194
266
|
case 'error':
|
|
195
267
|
result.errored = true;
|
|
@@ -270,6 +342,21 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
270
342
|
res = await runOneRound(needsSeparator);
|
|
271
343
|
}
|
|
272
344
|
|
|
345
|
+
// Self-healing vision (audit D rank 12): a model the catalog couldn't
|
|
346
|
+
// classify just 400/415/422'd on an attached image. Latch visionDisabled
|
|
347
|
+
// and re-run the round ONCE — runOneRound now downgrades images on send,
|
|
348
|
+
// so the resend succeeds. Guarded by !visionDisabled so it fires at most
|
|
349
|
+
// once per session; an image rides every stateless resend, so without
|
|
350
|
+
// this the whole conversation would keep re-400ing.
|
|
351
|
+
if (
|
|
352
|
+
res.errored && res.errorKind === 'image-unsupported' && !visionDisabled &&
|
|
353
|
+
!init.abortController.signal.aborted
|
|
354
|
+
) {
|
|
355
|
+
log.info('[pi/session] model rejected image — disabling vision for this session and retrying without it');
|
|
356
|
+
visionDisabled = true;
|
|
357
|
+
res = await runOneRound(needsSeparator);
|
|
358
|
+
}
|
|
359
|
+
|
|
273
360
|
const { text, toolUses, errored } = res;
|
|
274
361
|
|
|
275
362
|
// Append whatever the model produced this round to history so subsequent
|
|
@@ -331,10 +418,63 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
331
418
|
messages.push({ role: 'user', content: toolResultBlocks });
|
|
332
419
|
}
|
|
333
420
|
|
|
421
|
+
// Emergency in-turn context relief (audit D2-6): recycling only acts
|
|
422
|
+
// between idle turns, so a single heavy tool loop could cross the wall
|
|
423
|
+
// mid-turn. Above 85% occupancy, stub the oldest large tool outputs to
|
|
424
|
+
// bring the next request back toward 70%.
|
|
425
|
+
if (lastContextWindow && lastUsage) {
|
|
426
|
+
const occupancy =
|
|
427
|
+
(lastUsage.inputTokens || 0) + (lastUsage.cacheReadTokens || 0) + (lastUsage.cacheCreationTokens || 0);
|
|
428
|
+
if (occupancy > 0.85 * lastContextWindow) {
|
|
429
|
+
const charsToFree = (occupancy - Math.floor(0.7 * lastContextWindow)) * 4; // ~4 chars/token
|
|
430
|
+
const freed = trimOldToolResults(messages, charsToFree, 4);
|
|
431
|
+
if (freed > 0) {
|
|
432
|
+
log.info(`[pi/session] context at ${occupancy}/${lastContextWindow} tok mid-turn — trimmed ~${Math.round(freed / 1024)} KB of old tool output`);
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
334
437
|
// No tool calls ⇒ the model is done with this turn.
|
|
335
438
|
if (toolUses.length === 0) { roundCapHit = false; break; }
|
|
336
439
|
}
|
|
337
440
|
|
|
441
|
+
// Round-cap wrap-up (audit D5-8): the budget ran out with the model still
|
|
442
|
+
// mid-task. Run ONE final no-tools round so the turn ends with an honest
|
|
443
|
+
// status summary instead of silent truncation. roundCapHit stays true on
|
|
444
|
+
// turn_complete — consumers still know the work is incomplete.
|
|
445
|
+
if (roundCapHit && !turnErrored && !init.abortController.signal.aborted) {
|
|
446
|
+
log.info(`[pi/session] tool-round budget (${maxRounds}) exhausted — running a no-tools wrap-up round`);
|
|
447
|
+
messages.push({ role: 'user', content: [{ type: 'text', text: ROUND_CAP_NOTICE }] });
|
|
448
|
+
const needsSeparator = accumulatedText.length > 0 && !accumulatedText.endsWith('\n');
|
|
449
|
+
const res = await runOneRound(needsSeparator, { wrapUp: true });
|
|
450
|
+
if (res.text) {
|
|
451
|
+
if (needsSeparator) accumulatedText += '\n\n';
|
|
452
|
+
accumulatedText += res.text;
|
|
453
|
+
messages.push({ role: 'assistant', content: [{ type: 'text', text: res.text }] });
|
|
454
|
+
} else {
|
|
455
|
+
// The notice was never answered — pop it so the NEXT turn doesn't
|
|
456
|
+
// open under a stale "stop working now" instruction (review PI-D-1).
|
|
457
|
+
const last = messages[messages.length - 1];
|
|
458
|
+
if (last?.role === 'user' && last.content.length === 1 &&
|
|
459
|
+
last.content[0].type === 'text' && last.content[0].text === ROUND_CAP_NOTICE) {
|
|
460
|
+
messages.pop();
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
// Fatal wrap-up failures (dead key / context wall) must still tear the
|
|
464
|
+
// session down, and a cap-hit turn with NO text at all must not end in
|
|
465
|
+
// total silence — claude surfaces error_max_turns and pi's one-shot
|
|
466
|
+
// paths guard this state too (PI-C-2). Set the turn-error fields so the
|
|
467
|
+
// standard emission below handles both (review PI-D-1).
|
|
468
|
+
if (res.errored && (res.errorKind === 'auth' || res.errorKind === 'context-overflow')) {
|
|
469
|
+
turnErrored = true;
|
|
470
|
+
turnErrorMsg = res.errorMsg;
|
|
471
|
+
turnErrorKind = res.errorKind;
|
|
472
|
+
} else if (!accumulatedText) {
|
|
473
|
+
turnErrored = true;
|
|
474
|
+
turnErrorMsg = `I hit my tool budget for this turn (${maxRounds} rounds) before finishing — say "continue" and I'll pick up where I left off.`;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
|
|
338
478
|
// Turn-end emission order (audit D6-2, mirrors claude.ts:394-401):
|
|
339
479
|
// 1. text_end whenever ANY text streamed — even on errored turns, so the
|
|
340
480
|
// partial the user watched is committed, persisted, and consumes its
|